summaryrefslogtreecommitdiffstats
path: root/abs/core/iproute
diff options
context:
space:
mode:
authorJames Meyer <James.meyer@operamail.com>2008-10-02 03:19:12 (GMT)
committerJames Meyer <James.meyer@operamail.com>2008-10-02 03:19:12 (GMT)
commit0e2532d4e8f4eed5e047f1db54d5c03ba849ec0a (patch)
treec0aa2c0b53c317be87eacfcb77b63f53f1f415e7 /abs/core/iproute
downloadlinhes_pkgbuild-0e2532d4e8f4eed5e047f1db54d5c03ba849ec0a.zip
linhes_pkgbuild-0e2532d4e8f4eed5e047f1db54d5c03ba849ec0a.tar.gz
linhes_pkgbuild-0e2532d4e8f4eed5e047f1db54d5c03ba849ec0a.tar.bz2
initial import
Diffstat (limited to 'abs/core/iproute')
-rw-r--r--abs/core/iproute/PKGBUILD26
-rw-r--r--abs/core/iproute/iproute2-2.4.7-now-ss020116.patch9823
2 files changed, 9849 insertions, 0 deletions
diff --git a/abs/core/iproute/PKGBUILD b/abs/core/iproute/PKGBUILD
new file mode 100644
index 0000000..ae0f9f0
--- /dev/null
+++ b/abs/core/iproute/PKGBUILD
@@ -0,0 +1,26 @@
+# $Id: PKGBUILD 3746 2008-06-30 20:09:01Z andyrtr $
+# Maintainer: Judd Vinet <jvinet@zeroflux.org>
+pkgname=iproute
+#_dlver=2.6.24-rc7
+#pkgver=$(echo ${_dlver} | sed 's|-|_|g')
+pkgver=2.6.25
+pkgrel=1
+pkgdesc="IP Routing Utilities"
+arch=(i686 x86_64)
+license=('GPL')
+url="http://www.linux-foundation.org/en/Net:Iproute2"
+depends=('db>=4.7')
+source=(#http://devresources.linux-foundation.org/dev/iproute2/download/iproute2-${_dlver}.tar.bz2
+ http://devresources.linux-foundation.org/dev/iproute2/download/iproute2-${pkgver}.tar.bz2)
+options=('force')
+md5sums=('5737bade2f5e03fad0e2c81da91e551e')
+
+build() {
+ #cd $srcdir/iproute2-${_dlver}
+ cd $srcdir/iproute2-${pkgver}
+ sed -i 's|/usr/local/lib/iptables|/usr/lib/iptables|' include/iptables.h || return 1
+ ./configure
+ make || return 1
+ make DESTDIR=$pkgdir install || return 1
+ chmod 755 $pkgdir/usr/sbin/ifcfg
+}
diff --git a/abs/core/iproute/iproute2-2.4.7-now-ss020116.patch b/abs/core/iproute/iproute2-2.4.7-now-ss020116.patch
new file mode 100644
index 0000000..0e37865
--- /dev/null
+++ b/abs/core/iproute/iproute2-2.4.7-now-ss020116.patch
@@ -0,0 +1,9823 @@
+diff -Naur iproute2-orig/Makefile iproute2/Makefile
+--- iproute2-orig/Makefile 2002-01-15 15:30:32.000000000 -0800
++++ iproute2/Makefile 2004-05-21 00:16:36.000000000 -0700
+@@ -4,8 +4,6 @@
+ CONFDIR=/etc/iproute2
+ DOCDIR=/usr/doc/iproute2
+
+-KERNEL_INCLUDE=/usr/src/linux/include
+-LIBC_INCLUDE=/usr/include
+
+ DEFINES= -DRESOLVE_HOSTNAMES
+
+@@ -23,19 +21,11 @@
+ #options for ipx
+ ADDLIB+=ipx_ntop.o ipx_pton.o
+
+-ifeq ($(LIBC_INCLUDE)/socketbits.h,$(wildcard $(LIBC_INCLUDE)/socketbits.h))
+- ifeq ($(LIBC_INCLUDE)/net/if_packet.h,$(wildcard $(LIBC_INCLUDE)/net/if_packet.h))
+- GLIBCFIX=-I../include-glibc -include ../include-glibc/glibc-bugs.h
+- endif
+-endif
+-ifeq ($(LIBC_INCLUDE)/bits/socket.h,$(wildcard $(LIBC_INCLUDE)/bits/socket.h))
+- GLIBCFIX=-I../include-glibc -I/usr/include/db3 -include ../include-glibc/glibc-bugs.h
+-endif
+
+
+ CC = gcc
+ CCOPTS = -D_GNU_SOURCE -O2 -Wstrict-prototypes -Wall -g
+-CFLAGS = $(CCOPTS) $(GLIBCFIX) -I$(KERNEL_INCLUDE) -I../include $(DEFINES)
++CFLAGS = $(CCOPTS) -I../include $(DEFINES)
+
+ LDLIBS += -L../lib -lnetlink -lutil
+
+@@ -43,19 +33,11 @@
+
+ LIBNETLINK=../lib/libnetlink.a ../lib/libutil.a
+
+-all: check-kernel
++all:
+ @set -e; \
+ for i in $(SUBDIRS); \
+ do $(MAKE) -C $$i; done
+
+-check-kernel:
+-ifeq ($(KERNEL_INCLUDE),)
+- @echo "Please, set correct KERNEL_INCLUDE"; false
+-else
+- @set -e; \
+- if [ ! -r $(KERNEL_INCLUDE)/linux/autoconf.h ]; then \
+- echo "Please, compile the kernel first"; false; fi
+-endif
+
+ install: all
+ install -m 0755 -d $(DESTDIR)$(SBINDIR)
+diff -Naur iproute2-orig/Makefile~ iproute2/Makefile~
+--- iproute2-orig/Makefile~ 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/Makefile~ 2002-01-15 15:30:32.000000000 -0800
+@@ -0,0 +1,77 @@
++# Path to parent kernel include files directory
++DESTDIR=
++SBINDIR=/sbin
++CONFDIR=/etc/iproute2
++DOCDIR=/usr/doc/iproute2
++
++KERNEL_INCLUDE=/usr/src/linux/include
++LIBC_INCLUDE=/usr/include
++
++DEFINES= -DRESOLVE_HOSTNAMES
++
++#options if you have a bind>=4.9.4 libresolv (or, maybe, glibc)
++LDLIBS=-lresolv
++ADDLIB=
++
++#options if you compile with libc5, and without a bind>=4.9.4 libresolv
++#LDLIBS=
++#ADDLIB=inet_ntop.o inet_pton.o
++
++#options for decnet
++ADDLIB+=dnet_ntop.o dnet_pton.o
++
++#options for ipx
++ADDLIB+=ipx_ntop.o ipx_pton.o
++
++ifeq ($(LIBC_INCLUDE)/socketbits.h,$(wildcard $(LIBC_INCLUDE)/socketbits.h))
++ ifeq ($(LIBC_INCLUDE)/net/if_packet.h,$(wildcard $(LIBC_INCLUDE)/net/if_packet.h))
++ GLIBCFIX=-I../include-glibc -include ../include-glibc/glibc-bugs.h
++ endif
++endif
++ifeq ($(LIBC_INCLUDE)/bits/socket.h,$(wildcard $(LIBC_INCLUDE)/bits/socket.h))
++ GLIBCFIX=-I../include-glibc -I/usr/include/db3 -include ../include-glibc/glibc-bugs.h
++endif
++
++
++CC = gcc
++CCOPTS = -D_GNU_SOURCE -O2 -Wstrict-prototypes -Wall -g
++CFLAGS = $(CCOPTS) $(GLIBCFIX) -I$(KERNEL_INCLUDE) -I../include $(DEFINES)
++
++LDLIBS += -L../lib -lnetlink -lutil
++
++SUBDIRS=lib ip tc misc
++
++LIBNETLINK=../lib/libnetlink.a ../lib/libutil.a
++
++all: check-kernel
++ @set -e; \
++ for i in $(SUBDIRS); \
++ do $(MAKE) -C $$i; done
++
++check-kernel:
++ifeq ($(KERNEL_INCLUDE),)
++ @echo "Please, set correct KERNEL_INCLUDE"; false
++else
++ @set -e; \
++ if [ ! -r $(KERNEL_INCLUDE)/linux/autoconf.h ]; then \
++ echo "Please, compile the kernel first"; false; fi
++endif
++
++install: all
++ install -m 0755 -d $(DESTDIR)$(SBINDIR)
++ install -m 0755 -d $(DESTDIR)$(CONFDIR)
++ install -m 0755 -d $(DESTDIR)$(DOCDIR)/examples
++ install -m 0755 -d $(DESTDIR)$(DOCDIR)/examples/diffserv
++ install -m 0644 README.iproute2+tc $(shell find examples -type f -maxdepth 1) $(DESTDIR)$(DOCDIR)/examples
++ install -m 0644 $(shell echo examples/diffserv/*) $(DESTDIR)$(DOCDIR)/examples/diffserv
++ @for i in $(SUBDIRS) doc; do $(MAKE) -C $$i install; done
++ @cd etc/iproute2; for i in *; do \
++ if [ ! -e $(DESTDIR)$(CONFDIR)/$$i ]; then \
++ echo install -m 0644 $$i $(DESTDIR)$(CONFDIR); \
++ install -m 0644 $$i $(DESTDIR)$(CONFDIR); fi; done
++
++clean:
++ for i in $(SUBDIRS) doc; \
++ do $(MAKE) -C $$i clean; done
++
++.EXPORT_ALL_VARIABLES:
+diff -Naur iproute2-orig/debian/README.Debian iproute2/debian/README.Debian
+--- iproute2-orig/debian/README.Debian 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/README.Debian 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,4 @@
++This version of "iproute" includes the HTB Linux queuing discipline
++explained in http://luxik.cdi.cz/~devik/qos/htb/
++
++You need kernel version 2.4.21 or newer in order to use it.
+diff -Naur iproute2-orig/debian/changelog iproute2/debian/changelog
+--- iproute2-orig/debian/changelog 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/changelog 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,207 @@
++iproute (20010824-13) unstable; urgency=low
++
++ * debian/rules: Run dpkg-shlibdeps with all the executables,
++ to fix dependency problem (closes: Bug#224063)
++ * Really removed references to obsolete include files
++ (Bug#223165 was not fixed properly)
++
++ -- Juan Cespedes <cespedes@debian.org> Sun, 25 Jan 2004 23:04:20 +0100
++
++iproute (20010824-12) unstable; urgency=low
++
++ * Updated README.Debian and copyright file
++ * Added two new manpages from http://lartc.org/manpages/:
++ ip(8) and tc-cbq-details(8).
++ * Removed references to obsolete include files which made
++ compilation fail (closes: Bug#223165)
++
++ -- Juan Cespedes <cespedes@debian.org> Sun, 14 Dec 2003 00:40:10 +0100
++
++iproute (20010824-11) unstable; urgency=low
++
++ * Changed priority to "optional"
++ * Fixed "tc -s qdisc" on sparc (patch by "Nicolas S. Dade"
++ <ndade@nsd.dyndns.org>) (closes: Bug#194128)
++
++ -- Juan Cespedes <cespedes@debian.org> Sun, 17 Aug 2003 00:22:47 +0200
++
++iproute (20010824-10) unstable; urgency=low
++
++ * Updated manual pages from http://www.lartc.org/manpages/
++ (closes: Bug#156353, Bug#175313, Bug#176989, Bug#189095)
++ * New Standards-Version
++ * Don't "rm -rf /etc/iproute2" on purge (closes: Bug#202862)
++ * Include "iproute2" in the description (closes: Bug#182999)
++
++ -- Juan Cespedes <cespedes@debian.org> Sat, 16 Aug 2003 18:29:27 +0200
++
++iproute (20010824-9) unstable; urgency=medium
++
++ * Added patch for HTB v3.6 to be able to work with kernel 2.4.20
++ (from http://luxik.cdi.cz/~devik/qos/htb/v3/htb3.6-020525.tgz)
++ (closes: Bug#147550, Bug#167149, Bug#167597, Bug#171277)
++
++ -- Juan Cespedes <cespedes@debian.org> Thu, 05 Dec 2002 13:44:10 +0100
++
++iproute (20010824-8) unstable; urgency=medium
++
++ * Added support for HTB queuing discipline (closes: Bug#133381)
++ NOTE: you need a patched kernel in order to use it
++
++ -- Juan Cespedes <cespedes@debian.org> Tue, 2 Apr 2002 20:29:40 +0200
++
++iproute (20010824-7) unstable; urgency=medium
++
++ * Move `ip' binary to /bin to fix FHS violation (closes: Bug#134812)
++
++ -- Juan Cespedes <cespedes@debian.org> Mon, 4 Mar 2002 00:20:30 +0100
++
++iproute (20010824-6) unstable; urgency=low
++
++ * Added a couple of #ifdef's to be able to compile with older
++ kernel headers (needed for arm) (closes: Bug#131695)
++
++ -- Juan Cespedes <cespedes@debian.org> Sat, 16 Feb 2002 19:27:15 +0100
++
++iproute (20010824-5) unstable; urgency=low
++
++ * Really fix Bug#121589 (dead gateway bug); apparently I
++ forgot to include the patch in 20010824-2
++
++ -- Juan Cespedes <cespedes@debian.org> Tue, 29 Jan 2002 23:22:24 +0100
++
++iproute (20010824-4) unstable; urgency=low
++
++ * Added support for DIFFSERV and ATM in tc
++
++ -- Juan Cespedes <cespedes@debian.org> Sun, 13 Jan 2002 03:01:47 +0100
++
++iproute (20010824-3) unstable; urgency=low
++
++ * Updated tc* man pages (thanks to bert hubert <ahu@ds9a.nl>)
++ * Fixed spurious space in `tc -s qdisc' output (closes: Bug#128501)
++
++ -- Juan Cespedes <cespedes@debian.org> Thu, 10 Jan 2002 22:18:25 +0100
++
++iproute (20010824-2) unstable; urgency=low
++
++ * Fixed the following important and serious bugs:
++ + iproute doesn't compile on Alpha (closes: Bug#118113, Bug#123224)
++ + iproute doesn't compile on MIPS (closes: Bug#118424)
++ + iproute doesn't compile on powerpc (closes: Bug#119601)
++ * Added man pages for tc (closes: Bug#124230), tc-cbq, tc-red, tc-tbf,
++ tc-prio and tc-sfq
++ * Removed references to old programs from iproute(7) (closes: Bug#99536)
++ * Fixed bug which presented first hop as dead in equal cost multipath
++ (closes: Bug#121589)
++ * Do not process .ps with through `psnup' (closes: Bug#119820)
++
++ -- Juan Cespedes <cespedes@debian.org> Tue, 8 Jan 2002 16:07:27 +0100
++
++iproute (20010824-1) unstable; urgency=low
++
++ * New upstream version
++ * Make ingress qdisc work again with tc (closes: Bug#84444)
++ * Make it compile properly with new include files (closes: Bug#113112)
++
++ -- Juan Cespedes <cespedes@debian.org> Sun, 28 Oct 2001 16:38:00 +0100
++
++iproute (20001007-1) unstable; urgency=low
++
++ * New upstream version (closes: Bug#63701)
++ * Remove /etc/iproute2 on purge (closes: Bug#72743)
++ * Fixed Lintian warnings (no-priority-field and no-section-field)
++
++ -- Juan Cespedes <cespedes@debian.org> Sat, 14 Oct 2000 19:27:12 +0200
++
++iproute (991023-2) unstable; urgency=low
++
++ * New Standards-Version (3.1.1) (closes: Bug#47923)
++ * Modified description of package to show which kernel options are
++ necessary to use the package (closes: Bug#47922)
++ * Updated manual page to point at /usr/share/doc/iproute (closes: Bug#47924)
++
++ -- Juan Cespedes <cespedes@debian.org> Sun, 19 Dec 1999 04:00:21 +0100
++
++iproute (991023-1) unstable; urgency=low
++
++ * New upstream version (closes: Bug#48733)
++
++ -- Juan Cespedes <cespedes@debian.org> Tue, 2 Nov 1999 16:29:37 +0100
++
++iproute (990824-1) unstable; urgency=low
++
++ * New maintainer
++ * New upstream version
++ * New Standards-Version: 3.1.0
++ * Minor fix in "ip rule list": mask in "from" address was not shown
++ correctly
++ * Removed obsoleted documentation from "debian/" directory
++
++ -- Juan Cespedes <cespedes@debian.org> Sun, 24 Oct 1999 19:02:56 +0200
++
++iproute (990630-1) unstable; urgency=low
++
++ * New upstream version.
++ * FHS and standards 3.0.1.0.
++
++ -- Roberto Lumbreras <rover@debian.org> Tue, 3 Aug 1999 02:49:28 +0200
++
++iproute (990530-1) unstable; urgency=low
++
++ * New upstream version.
++ * Build with 2.2.10 kernel headers.
++ * Install new scripts ip/routef ip/routel, but not ip/ifcfg ip/rtpr by
++ now, I don't know who/what needs rtpr; ifcfg uses arping, and it isn't
++ available in debian for now.
++
++ -- Roberto Lumbreras <rover@debian.org> Tue, 22 Jun 1999 02:28:53 +0200
++
++iproute (990329-1) unstable; urgency=low
++
++ * New upstream version.
++ * Build with 2.2.5 kernel headers.
++
++ -- Roberto Lumbreras <rover@debian.org> Sun, 4 Apr 1999 18:50:39 +0200
++
++iproute (980630-1) unstable; urgency=low
++
++ * New upstream version.
++ * Build with 2.1.112 kernel headers.
++ * Rewrote the rules file.
++
++ -- Roberto Lumbreras <rover@debian.org> Wed, 29 Jul 1998 23:37:52 +0200
++
++iproute (980119-1) unstable; urgency=low
++
++ * Outdated documentation. Upstream docs are scarce.
++ * Non-Maintainer release
++ * This package has no correct copyright file!
++ * Include all the README.* docs from the upstream site.
++ * Modified to build under glibc
++ * Build with 2.1.85 kernel headers.
++ * produce a correct diff.
++ * Reworked the rules file to utilize debmake fully
++ * Newest upstream release
++ * glibc compilation
++
++ -- Christoph Lameter <christoph@lameter.com> Wed, 4 Feb 1998 13:37:28 -0800
++
++iproute (961225-2) unstable frozen; urgency=low
++
++ * Added a man page for iproute. (Fixes #8080).
++ * Removed out-of-date patches.
++ * Added routing.txt from /usr/src/linux/Documentation/networking/routing.txt
++ * Newer version of debmake.
++
++ -- Tom Lees <tom@lpsg.demon.co.uk> Mon, 17 Apr 1997 17:00:36 +0100
++
++iproute (961225-1) unstable; urgency=low
++
++ * Initial Release.
++
++ -- Tom Lees <tom@lpsg.demon.co.uk> Mon, 30 Dec 1996 11:12:23 +0000
++
++Local variables:
++mode: debian-changelog
++End:
+diff -Naur iproute2-orig/debian/conffiles iproute2/debian/conffiles
+--- iproute2-orig/debian/conffiles 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/conffiles 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,5 @@
++/etc/iproute2/rt_dsfield
++/etc/iproute2/rt_protos
++/etc/iproute2/rt_realms
++/etc/iproute2/rt_scopes
++/etc/iproute2/rt_tables
+diff -Naur iproute2-orig/debian/control iproute2/debian/control
+--- iproute2-orig/debian/control 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/control 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,19 @@
++Source: iproute
++Section: net
++Priority: optional
++Maintainer: Juan Cespedes <cespedes@debian.org>
++Standards-Version: 3.6.0
++Build-Depends: tetex-bin, atm-dev
++
++Package: iproute
++Architecture: any
++Depends: ${shlibs:Depends}
++Description: Professional tools to control the networking in Linux kernels
++ This is `iproute', the professional set of tools to control the
++ networking behavior in kernels 2.2.x and later.
++ .
++ At least, the options CONFIG_NETLINK and CONFIG_RTNETLINK must
++ be compiled in the running kernel
++ .
++ This package is also known as iproute2 upstream and in some
++ documentation.
+diff -Naur iproute2-orig/debian/copyright iproute2/debian/copyright
+--- iproute2-orig/debian/copyright 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/copyright 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,42 @@
++This is the Debian GNU/Linux's prepackaged version of the
++Linux Traffic Control engine and related utils, "iproute"
++
++This package was put together from sources obtained from:
++ ftp://ftp.inr.ac.ru/ip-routing/iproute2-2.4.7-now-ss010824.tar.gz
++
++Changes for Debian:
++ * added Debian GNU/Linux package maintenance system files
++ * Added HTB v3.6 from
++ <http://luxik.cdi.cz/~devik/qos/htb/v3/htb3.6-020525.tgz>
++
++
++Copyrights
++----------
++Copyright (C) 1996-2001 Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
++
++Modifications for Debian:
++ Copyright (C) 1996 Tom Lees <tom@lpsg.demon.co.uk>
++ Copyright (C) 1998 Christoph Lameter <christoph@lameter.com>
++ Copyright (C) 1998-1999 Roberto Lumbreras <rover@debian.org>
++ Copyright (C) 1999-2003 Juan Cespedes <cespedes@debian.org>
++
++
++License
++-------
++
++This program is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 2, or (at your option)
++any later version.
++
++This program is distributed in the hope that it will be useful, but
++WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++General Public License for more details.
++
++A copy of the GNU General Public License is available as
++`/usr/share/common-licenses/GPL' in the Debian GNU/Linux distribution
++or on the World Wide Web at `http://www.gnu.org/copyleft/gpl.html'.
++You can also obtain it by writing to the Free Software Foundation,
++Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
++
+diff -Naur iproute2-orig/debian/manpages/ip.8 iproute2/debian/manpages/ip.8
+--- iproute2-orig/debian/manpages/ip.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/manpages/ip.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,1809 @@
++.TH IP 8 "17 January 2002" "iproute2" "Linux"
++.SH NAME
++ip \- show / manipulate routing, devices, policy routing and tunnels
++.SH SYNOPSIS
++
++.ad l
++.in +8
++.ti -8
++.B ip
++.RI "[ " OPTIONS " ] " OBJECT " { " COMMAND " | "
++.BR help " }"
++.sp
++
++.ti -8
++.IR OBJECT " := { "
++.BR link " | " addr " | " route " | " rule " | " neigh " | " tunnel " | "\
++maddr " | " mroute " | " monitor " }"
++.sp
++
++.ti -8
++.IR OPTIONS " := { "
++\fB\-V\fR[\fIersion\fR] |
++\fB\-s\fR[\fItatistics\fR] |
++\fB\-r\fR[\fIesolve\fR] |
++\fB\-f\fR[\fIamily\fR] {
++.BR inet " | " inet6 " | " ipx " | " dnet " | " link " } | "
++\fB\-o\fR[\fIneline\fR] }
++
++.ti -8
++.BI "ip link set " DEVICE
++.RB "{ " up " | " down " | " arp " { " on " | " off " } |"
++.br
++.BR promisc " { " on " | " off " } |"
++.br
++.BR allmulti " { " on " | " off " } |"
++.br
++.BR dynamic " { " on " | " off " } |"
++.br
++.BR multicast " { " on " | " off " } |"
++.br
++.B txqueuelen
++.IR PACKETS " |"
++.br
++.B name
++.IR NEWNAME " |"
++.br
++.B address
++.IR LLADDR " |"
++.B broadcast
++.IR LLADDR " |"
++.br
++.B mtu
++.IR MTU " }"
++
++.ti -8
++.B ip link show
++.RI "[ " DEVICE " ]"
++
++.ti -8
++.BR "ip addr" " { " add " | " del " } "
++.IB IFADDR " dev " STRING
++
++.ti -8
++.BR "ip addr" " { " show " | " flush " } [ " dev
++.IR STRING " ] [ "
++.B scope
++.IR SCOPE-ID " ] [ "
++.B to
++.IR PREFIX " ] [ " FLAG-LIST " ] [ "
++.B label
++.IR PATTERN " ]"
++
++.ti -8
++.IR IFADDR " := " PREFIX " | " ADDR
++.B peer
++.IR PREFIX " [ "
++.B broadcast
++.IR ADDR " ] [ "
++.B anycast
++.IR ADDR " ] [ "
++.B label
++.IR STRING " ] [ "
++.B scope
++.IR SCOPE-ID " ]"
++
++.ti -8
++.IR SCOPE-ID " := "
++.RB "[ " host " | " link " | " global " | "
++.IR NUMBER " ]"
++
++.ti -8
++.IR FLAG-LIST " := [ " FLAG-LIST " ] " FLAG
++
++.ti -8
++.IR FLAG " := "
++.RB "[ " permanent " | " dynamic " | " secondary " | " primary " | "\
++tentative " | " deprecated " ]"
++
++.ti -8
++.BR "ip route" " { "
++.BR list " | " flush " } "
++.I SELECTOR
++
++.ti -8
++.B ip route get
++.IR ADDRESS " [ "
++.BI from " ADDRESS " iif " STRING"
++.RB " ] [ " oif
++.IR STRING " ] [ "
++.B tos
++.IR TOS " ]"
++
++.ti -8
++.BR "ip route" " { " add " | " del " | " change " | " append " | "\
++replace " | " monitor " } "
++.I ROUTE
++
++.ti -8
++.IR SELECTOR " := "
++.RB "[ " root
++.IR PREFIX " ] [ "
++.B match
++.IR PREFIX " ] [ "
++.B exact
++.IR PREFIX " ] [ "
++.B table
++.IR TABLE_ID " ] [ "
++.B proto
++.IR RTPROTO " ] [ "
++.B type
++.IR TYPE " ] [ "
++.B scope
++.IR SCOPE " ]"
++
++.ti -8
++.IR ROUTE " := " NODE_SPEC " [ " INFO_SPEC " ]"
++
++.ti -8
++.IR NODE_SPEC " := [ " TYPE " ] " PREFIX " ["
++.B tos
++.IR TOS " ] [ "
++.B table
++.IR TABLE_ID " ] [ "
++.B proto
++.IR RTPROTO " ] [ "
++.B scope
++.IR SCOPE " ] [ "
++.B metric
++.IR METRIC " ]"
++
++.ti -8
++.IR INFO_SPEC " := " "NH OPTIONS FLAGS" " ["
++.B nexthop
++.IR NH " ] ..."
++
++.ti -8
++.IR NH " := [ "
++.B via
++.IR ADDRESS " ] [ "
++.B dev
++.IR STRING " ] [ "
++.B weight
++.IR NUMBER " ] " NHFLAGS
++
++.ti -8
++.IR OPTIONS " := " FLAGS " [ "
++.B mtu
++.IR NUMBER " ] [ "
++.B advmss
++.IR NUMBER " ] [ "
++.B rtt
++.IR NUMBER " ] [ "
++.B rttvar
++.IR NUMBER " ] [ "
++.B window
++.IR NUMBER " ] [ "
++.B cwnd
++.IR NUMBER " ] [ "
++.B ssthresh
++.IR REALM " ] [ "
++.B realms
++.IR REALM " ]"
++
++.ti -8
++.IR TYPE " := [ "
++.BR unicast " | " local " | " broadcast " | " multicast " | "\
++throw " | " unreachable " | " prohibit " | " blackhole " | " nat " ]"
++
++.ti -8
++.IR TABLE_ID " := [ "
++.BR local "| " main " | " default " | " all " |"
++.IR NUMBER " ]"
++
++.ti -8
++.IR SCOPE " := [ "
++.BR host " | " link " | " global " |"
++.IR NUMBER " ]"
++
++.ti -8
++.IR FLAGS " := [ "
++.BR equalize " ]"
++
++.ti -8
++.IR NHFLAGS " := [ "
++.BR onlink " | " pervasive " ]"
++
++.ti -8
++.IR RTPROTO " := [ "
++.BR kernel " | " boot " | " static " |"
++.IR NUMBER " ]"
++
++.ti -8
++.B ip rule
++.RB " [ " list " | " add " | " del " ]"
++.I SELECTOR ACTION
++
++.ti -8
++.IR SELECTOR " := [ "
++.B from
++.IR PREFIX " ] [ "
++.B to
++.IR PREFIX " ] [ "
++.B tos
++.IR TOS " ] [ "
++.B fwmark
++.IR FWMARK " ] [ "
++.B dev
++.IR STRING " ] [ "
++.B pref
++.IR NUMBER " ]"
++
++.ti -8
++.IR ACTION " := [ "
++.B table
++.IR TABLE_ID " ] [ "
++.B nat
++.IR ADDRESS " ] [ "
++.BR prohibit " | " reject " | " unreachable " ] [ " realms
++.RI "[" SRCREALM "/]" DSTREALM " ]"
++
++.ti -8
++.IR TABLE_ID " := [ "
++.BR local " | " main " | " default " |"
++.IR NUMBER " ]"
++
++.ti -8
++.BR "ip neigh" " { " add " | " del " | " change " | " replace " } { "
++.IR ADDR " [ "
++.B lladdr
++.IR LLADDR " ] [ "
++.BR nud " { " permanent " | " noarp " | " stale " | " reachable " } ] | " proxy
++.IR ADDR " } [ "
++.B dev
++.IR DEV " ]"
++
++.ti -8
++.BR "ip neigh" " { " show " | " flush " } [ " to
++.IR PREFIX " ] [ "
++.B dev
++.IR DEV " ] [ "
++.B nud
++.IR STATE " ]"
++
++.ti -8
++.BR "ip tunnel" " { " add " | " change " | " del " | " show " }"
++.RI "[ " NAME " ]"
++.br
++.RB "[ " mode " { " ipip " | " gre " | " sit " } ]"
++.br
++.RB "[ " remote
++.IR ADDR " ] [ "
++.B local
++.IR ADDR " ]"
++.br
++.RB "[ [" i "|" o "]" seq " ] [ [" i "|" o "]" key
++.IR KEY " ] [ "
++.RB "[" i "|" o "]" csum " ] ]"
++.br
++.RB "[ " ttl
++.IR TTL " ] [ "
++.B tos
++.IR TOS " ] [ "
++.RB "[" no "]" pmtudisc " ]"
++.br
++.RB "[ " dev
++.IR PHYS_DEV " ]"
++
++.ti -8
++.IR ADDR " := { " IP_ADDRESS " |"
++.BR any " }"
++
++.ti -8
++.IR TOS " := { " NUMBER " |"
++.BR inherit " }"
++
++.ti -8
++.IR TTL " := { " 1 ".." 255 " | "
++.BR inherit " }"
++
++.ti -8
++.IR KEY " := { " DOTTED_QUAD " | " NUMBER " }"
++
++.ti -8
++.BR "ip maddr" " [ " add " | " del " ]"
++.IB MULTIADDR " dev " STRING
++
++.ti -8
++.BR "ip maddr show" " [ " dev
++.IR STRING " ]"
++
++.ti -8
++.BR "ip mroute show" " ["
++.IR PREFIX " ] [ "
++.B from
++.IR PREFIX " ] [ "
++.B iif
++.IR DEVICE " ]"
++
++.ti -8
++.BR "ip monitor" " [ " all " |"
++.IR LISTofOBJECTS " ]"
++.in -8
++.ad b
++
++.SH OPTIONS
++
++.TP
++.BR "\-V" , " -Version"
++print the version of the
++.B ip
++utility and exit.
++
++.TP
++.BR "\-s" , " \-stats", " \-statistics"
++output more information. If the option
++appears twice or more, the amount of information increases.
++As a rule, the information is statistics or some time values.
++
++.TP
++.BR "\-f" , " \-family"
++followed by protocol family identifier:
++.BR "inet" , " inet6"
++or
++.B link
++,enforce the protocol family to use. If the option is not present,
++the protocol family is guessed from other arguments. If the rest
++of the command line does not give enough information to guess the
++family,
++.B ip
++falls back to the default one, usually
++.B inet
++or
++.BR "any" .
++.B link
++is a special family identifier meaning that no networking protocol
++is involved.
++
++.TP
++.B \-4
++shortcut for
++.BR "-family inet" .
++
++.TP
++.B \-6
++shortcut for
++.BR "\-family inet6" .
++
++.TP
++.B \-0
++shortcut for
++.BR "\-family link" .
++
++.TP
++.BR "\-o" , " \-oneline"
++output each record on a single line, replacing line feeds
++with the
++.B '\'
++character. This is convenient when you want to count records
++with
++.BR wc (1)
++ or to
++.BR grep (1)
++the output.
++
++.TP
++.BR "\-r" , " \-resolve"
++use the system's name resolver to print DNS names instead of
++host addresses.
++
++.SH IP - COMMAND SYNTAX
++
++.SS
++.I OBJECT
++
++.TP
++.B link
++- network device.
++
++.TP
++.B address
++- protocol (IP or IPv6) address on a device.
++.TP
++.B neighbour
++- ARP or NDISC cache entry.
++
++.TP
++.B route
++- routing table entry.
++
++.TP
++.B rule
++- rule in routing policy database.
++
++.TP
++.B maddress
++- multicast address.
++
++.TP
++.B mroute
++- multicast routing cache entry.
++
++.TP
++.B tunnel
++- tunnel over IP.
++
++.PP
++The names of all objects may be written in full or
++abbreviated form, f.e.
++.B address
++is abbreviated as
++.B addr
++or just
++.B a.
++
++.SS
++.I COMMAND
++
++Specifies the action to perform on the object.
++The set of possible actions depends on the object type.
++As a rule, it is possible to
++.BR "add" , " delete"
++and
++.B show
++(or
++.B list
++) objects, but some objects do not allow all of these operations
++or have some additional commands. The
++.B help
++command is available for all objects. It prints
++out a list of available commands and argument syntax conventions.
++.sp
++If no command is given, some default command is assumed.
++Usually it is
++.B list
++or, if the objects of this class cannot be listed,
++.BR "help" .
++
++.SH ip link - network device configuration
++
++.B link
++is a network device and the corresponding commands
++display and change the state of devices.
++
++.SS ip link set - change device attributes
++
++.TP
++.BI dev " NAME " (default)
++.I NAME
++specifies network device to operate on.
++
++.TP
++.BR up " and " down
++change the state of the device to
++.B UP
++or
++.BR "DOWN" .
++
++.TP
++.BR "arp on " or " arp off"
++change the
++.B NOARP
++flag on the device.
++
++.TP
++.BR "multicast on " or " multicast off"
++change the
++.B MULTICAST
++flag on the device.
++
++.TP
++.BR "dynamic on " or " dynamic off"
++change the
++.B DYNAMIC
++flag on the device.
++
++.TP
++.BI name " NAME"
++change the name of the device. This operation is not
++recommended if the device is running or has some addresses
++already configured.
++
++.TP
++.BI txqueuelen " NUMBER"
++.TP
++.BI txqlen " NUMBER"
++change the transmit queue length of the device.
++
++.TP
++.BI mtu " NUMBER"
++change the
++.I MTU
++of the device.
++
++.TP
++.BI address " LLADDRESS"
++change the station address of the interface.
++
++.TP
++.BI broadcast " LLADDRESS"
++.TP
++.BI brd " LLADDRESS"
++.TP
++.BI peer " LLADDRESS"
++change the link layer broadcast address or the peer address when
++the interface is
++.IR "POINTOPOINT" .
++
++.PP
++.B Warning:
++If multiple parameter changes are requested,
++.B ip
++aborts immediately after any of the changes have failed.
++This is the only case when
++.B ip
++can move the system to an unpredictable state. The solution
++is to avoid changing several parameters with one
++.B ip link set
++call.
++
++.SS ip link show - display device attributes
++
++.TP
++.BI dev " NAME " (default)
++.I NAME
++specifies the network device to show.
++If this argument is omitted all devices are listed.
++
++.TP
++.B up
++only display running interfaces.
++
++.SH ip address - protocol address management.
++
++The
++.B address
++is a protocol (IP or IPv6) address attached
++to a network device. Each device must have at least one address
++to use the corresponding protocol. It is possible to have several
++different addresses attached to one device. These addresses are not
++discriminated, so that the term
++.B alias
++is not quite appropriate for them and we do not use it in this document.
++.sp
++The
++.B ip addr
++command displays addresses and their properties, adds new addresses
++and deletes old ones.
++
++.SS ip address add - add new protocol address.
++
++.TP
++.BI dev " NAME"
++the name of the device to add the address to.
++
++.TP
++.BI local " ADDRESS " (default)
++the address of the interface. The format of the address depends
++on the protocol. It is a dotted quad for IP and a sequence of
++hexadecimal halfwords separated by colons for IPv6. The
++.I ADDRESS
++may be followed by a slash and a decimal number which encodes
++the network prefix length.
++
++.TP
++.BI peer " ADDRESS"
++the address of the remote endpoint for pointopoint interfaces.
++Again, the
++.I ADDRESS
++may be followed by a slash and a decimal number, encoding the network
++prefix length. If a peer address is specified, the local address
++cannot have a prefix length. The network prefix is associated
++with the peer rather than with the local address.
++
++.TP
++.BI broadcast " ADDRESS"
++the broadcast address on the interface.
++.sp
++It is possible to use the special symbols
++.B '+'
++and
++.B '-'
++instead of the broadcast address. In this case, the broadcast address
++is derived by setting/resetting the host bits of the interface prefix.
++
++.TP
++.BI label " NAME"
++Each address may be tagged with a label string.
++In order to preserve compatibility with Linux-2.0 net aliases,
++this string must coincide with the name of the device or must be prefixed
++with the device name followed by colon.
++
++.TP
++.BI scope " SCOPE_VALUE"
++the scope of the area where this address is valid.
++The available scopes are listed in file
++.BR "/etc/iproute2/rt_scopes" .
++Predefined scope values are:
++
++.in +8
++.B global
++- the address is globally valid.
++.sp
++.B site
++- (IPv6 only) the address is site local, i.e. it is
++valid inside this site.
++.sp
++.B link
++- the address is link local, i.e. it is valid only on this device.
++.sp
++.B host
++- the address is valid only inside this host.
++.in -8
++
++.SS ip address delete - delete protocol address
++.B Arguments:
++coincide with the arguments of
++.B ip addr add.
++The device name is a required argument. The rest are optional.
++If no arguments are given, the first address is deleted.
++
++.SS ip address show - look at protocol addresses
++
++.TP
++.BI dev " NAME " (default)
++name of device.
++
++.TP
++.BI scope " SCOPE_VAL"
++only list addresses with this scope.
++
++.TP
++.BI to " PREFIX"
++only list addresses matching this prefix.
++
++.TP
++.BI label " PATTERN"
++only list addresses with labels matching the
++.IR "PATTERN" .
++.I PATTERN
++is a usual shell style pattern.
++
++.TP
++.BR dynamic " and " permanent
++(IPv6 only) only list addresses installed due to stateless
++address configuration or only list permanent (not dynamic)
++addresses.
++
++.TP
++.B tentative
++(IPv6 only) only list addresses which did not pass duplicate
++address detection.
++
++.TP
++.B deprecated
++(IPv6 only) only list deprecated addresses.
++
++.TP
++.BR primary " and " secondary
++only list primary (or secondary) addresses.
++
++.SS ip address flush - flush protocol addresses
++This command flushes the protocol addresses selected by some criteria.
++
++.PP
++This command has the same arguments as
++.B show.
++The difference is that it does not run when no arguments are given.
++
++.PP
++.B Warning:
++This command (and other
++.B flush
++commands described below) is pretty dangerous. If you make a mistake,
++it will not forgive it, but will cruelly purge all the addresses.
++
++.PP
++With the
++.B -statistics
++option, the command becomes verbose. It prints out the number of deleted
++addresses and the number of rounds made to flush the address list. If
++this option is given twice,
++.B ip addr flush
++also dumps all the deleted addresses in the format described in the
++previous subsection.
++
++.SH ip neighbour - neighbour/arp tables management.
++
++.B neighbour
++objects establish bindings between protocol addresses and
++link layer addresses for hosts sharing the same link.
++Neighbour entries are organized into tables. The IPv4 neighbour table
++is known by another name - the ARP table.
++
++.P
++The corresponding commands display neighbour bindings
++and their properties, add new neighbour entries and delete old ones.
++
++.SS ip neighbour add - add a new neighbour entry
++.SS ip neighbour change - change an existing entry
++.SS ip neighbour replace - add a new entry or change an existing one
++
++These commands create new neighbour records or update existing ones.
++
++.TP
++.BI to " ADDRESS " (default)
++the protocol address of the neighbour. It is either an IPv4 or IPv6 address.
++
++.TP
++.BI dev " NAME"
++the interface to which this neighbour is attached.
++
++.TP
++.BI lladdr " LLADDRESS"
++the link layer address of the neighbour.
++.I LLADDRESS
++can also be
++.BR "null" .
++
++.TP
++.BI nud " NUD_STATE"
++the state of the neighbour entry.
++.B nud
++is an abbreviation for 'Neigh bour Unreachability Detection'.
++The state can take one of the following values:
++
++.in +8
++.B permanent
++- the neighbour entry is valid forever and can be only
++be removed administratively.
++.sp
++
++.B noarp
++- the neighbour entry is valid. No attempts to validate
++this entry will be made but it can be removed when its lifetime expires.
++.sp
++
++.B reachable
++- the neighbour entry is valid until the reachability
++timeout expires.
++.sp
++
++.B stale
++- the neighbour entry is valid but suspicious.
++This option to
++.B ip neigh
++does not change the neighbour state if it was valid and the address
++is not changed by this command.
++.in -8
++
++.SS ip neighbour delete - delete a neighbour entry
++This command invalidates a neighbour entry.
++
++.PP
++The arguments are the same as with
++.BR "ip neigh add" ,
++except that
++.B lladdr
++and
++.B nud
++are ignored.
++
++.PP
++.B Warning:
++Attempts to delete or manually change a
++.B noarp
++entry created by the kernel may result in unpredictable behaviour.
++Particularly, the kernel may try to resolve this address even
++on a
++.B NOARP
++interface or if the address is multicast or broadcast.
++
++.SS ip neighbour show - list neighbour entries
++
++This commands displays neighbour tables.
++
++.TP
++.BI to " ADDRESS " (default)
++the prefix selecting the neighbours to list.
++
++.TP
++.BI dev " NAME"
++only list the neighbours attached to this device.
++
++.TP
++.B unused
++only list neighbours which are not currently in use.
++
++.TP
++.BI nud " NUD_STATE"
++only list neighbour entries in this state.
++.I NUD_STATE
++takes values listed below or the special value
++.B all
++which means all states. This option may occur more than once.
++If this option is absent,
++.B ip
++lists all entries except for
++.B none
++and
++.BR "noarp" .
++
++.SS ip neighbour flush - flush neighbour entries
++This command flushes neighbour tables, selecting
++entries to flush by some criteria.
++
++.PP
++This command has the same arguments as
++.B show.
++The differences are that it does not run when no arguments are given,
++and that the default neighbour states to be flushed do not include
++.B permanent
++and
++.BR "noarp" .
++
++.PP
++With the
++.B -statistics
++option, the command becomes verbose. It prints out the number of
++deleted neighbours and the number of rounds made to flush the
++neighbour table. If the option is given
++twice,
++.B ip neigh flush
++also dumps all the deleted neighbours.
++
++.SH ip route - routing table management
++Manipulate route entries in the kernel routing tables keep
++information about paths to other networked nodes.
++.sp
++.B Route types:
++
++.in +8
++.B unicast
++- the route entry describes real paths to the destinations covered
++by the route prefix.
++
++.sp
++.B unreachable
++- these destinations are unreachable. Packets are discarded and the
++ICMP message
++.I host unreachable
++is generated.
++The local senders get an
++.I EHOSTUNREACH
++error.
++
++.sp
++.B blackhole
++- these destinations are unreachable. Packets are discarded silently.
++The local senders get an
++.I EINVAL
++error.
++
++.sp
++.B prohibit
++- these destinations are unreachable. Packets are discarded and the
++ICMP message
++.I communication administratively prohibited
++is generated. The local senders get an
++.I EACCES
++error.
++
++.sp
++.B local
++- the destinations are assigned to this host. The packets are looped
++back and delivered locally.
++
++.sp
++.B broadcast
++- the destinations are broadcast addresses. The packets are sent as
++link broadcasts.
++
++.sp
++.B throw
++- a special control route used together with policy rules. If such a
++route is selected, lookup in this table is terminated pretending that
++no route was found. Without policy routing it is equivalent to the
++absence of the route in the routing table. The packets are dropped
++and the ICMP message
++.I net unreachable
++is generated. The local senders get an
++.I ENETUNREACH
++error.
++
++.sp
++.B nat
++- a special NAT route. Destinations covered by the prefix
++are considered to be dummy (or external) addresses which require translation
++to real (or internal) ones before forwarding. The addresses to translate to
++are selected with the attribute
++.BR "via" .
++
++.sp
++.B anycast
++.RI "- " "not implemented"
++the destinations are
++.I anycast
++addresses assigned to this host. They are mainly equivalent
++to
++.B local
++with one difference: such addresses are invalid when used
++as the source address of any packet.
++
++.sp
++.B multicast
++- a special type used for multicast routing. It is not present in
++normal routing tables.
++.in -8
++
++.P
++.B Route tables:
++Linux-2.x can pack routes into several routing
++tables identified by a number in the range from 1 to 255 or by
++name from the file
++.B /etc/iproute2/rt_tables
++. By default all normal routes are inserted into the
++.B main
++table (ID 254) and the kernel only uses this table when calculating routes.
++
++.sp
++Actually, one other table always exists, which is invisible but
++even more important. It is the
++.B local
++table (ID 255). This table
++consists of routes for local and broadcast addresses. The kernel maintains
++this table automatically and the administrator usually need not modify it
++or even look at it.
++
++The multiple routing tables enter the game when
++.I policy routing
++is used.
++
++.SS ip route add - add new route
++.SS ip route change - change route
++.SS ip route replace - change or add new one
++
++.TP
++.BI to " TYPE PREFIX " (default)
++the destination prefix of the route. If
++.I TYPE
++is omitted,
++.B ip
++assumes type
++.BR "unicast" .
++Other values of
++.I TYPE
++are listed above.
++.I PREFIX
++is an IP or IPv6 address optionally followed by a slash and the
++prefix length. If the length of the prefix is missing,
++.B ip
++assumes a full-length host route. There is also a special
++.I PREFIX
++.B default
++- which is equivalent to IP
++.B 0/0
++or to IPv6
++.BR "::/0" .
++
++.TP
++.BI tos " TOS"
++.TP
++.BI dsfield " TOS"
++the Type Of Service (TOS) key. This key has no associated mask and
++the longest match is understood as: First, compare the TOS
++of the route and of the packet. If they are not equal, then the packet
++may still match a route with a zero TOS.
++.I TOS
++is either an 8 bit hexadecimal number or an identifier
++from
++.BR "/etc/iproute2/rt_dsfield" .
++
++.TP
++.BI metric " NUMBER"
++.TP
++.BI preference " NUMBER"
++the preference value of the route.
++.I NUMBER
++is an arbitrary 32bit number.
++
++.TP
++.BI table " TABLEID"
++the table to add this route to.
++.I TABLEID
++may be a number or a string from the file
++.BR "/etc/iproute2/rt_tables" .
++If this parameter is omitted,
++.B ip
++assumes the
++.B main
++table, with the exception of
++.BR local " , " broadcast " and " nat
++routes, which are put into the
++.B local
++table by default.
++
++.TP
++.BI dev " NAME"
++the output device name.
++
++.TP
++.BI via " ADDRESS"
++the address of the nexthop router. Actually, the sense of this field
++depends on the route type. For normal
++.B unicast
++routes it is either the true next hop router or, if it is a direct
++route installed in BSD compatibility mode, it can be a local address
++of the interface. For NAT routes it is the first address of the block
++of translated IP destinations.
++
++.TP
++.BI src " ADDRESS"
++the source address to prefer when sending to the destinations
++covered by the route prefix.
++
++.TP
++.BI realm " REALMID"
++the realm to which this route is assigned.
++.I REALMID
++may be a number or a string from the file
++.BR "/etc/iproute2/rt_realms" .
++
++.TP
++.BI mtu " MTU"
++.TP
++.BI "mtu lock" " MTU"
++the MTU along the path to the destination. If the modifier
++.B lock
++is not used, the MTU may be updated by the kernel due to
++Path MTU Discovery. If the modifier
++.B lock
++is used, no path MTU discovery will be tried, all packets
++will be sent without the DF bit in IPv4 case or fragmented
++to MTU for IPv6.
++
++.TP
++.BI window " NUMBER"
++the maximal window for TCP to advertise to these destinations,
++measured in bytes. It limits maximal data bursts that our TCP
++peers are allowed to send to us.
++
++.TP
++.BI rtt " NUMBER"
++the initial RTT ('Round Trip Time') estimate.
++
++.TP
++.BI rttvar " NUMBER " "(2.3.15+ only)"
++the initial RTT variance estimate.
++
++.TP
++.BI ssthresh " NUMBER " "(2.3.15+ only)"
++an estimate for the initial slow start threshold.
++
++.TP
++.BI cwnd " NUMBER " "(2.3.15+ only)"
++the clamp for congestion window. It is ignored if the
++.B lock
++flag is not used.
++
++.TP
++.BI advmss " NUMBER " "(2.3.15+ only)"
++the MSS ('Maximal Segment Size') to advertise to these
++destinations when establishing TCP connections. If it is not given,
++Linux uses a default value calculated from the first hop device MTU.
++(If the path to these destination is asymmetric, this guess may be wrong.)
++
++.TP
++.BI reordering " NUMBER " "(2.3.15+ only)"
++Maximal reordering on the path to this destination.
++If it is not given, Linux uses the value selected with
++.B sysctl
++variable
++.BR "net/ipv4/tcp_reordering" .
++
++.TP
++.BI nexthop " NEXTHOP"
++the nexthop of a multipath route.
++.I NEXTHOP
++is a complex value with its own syntax similar to the top level
++argument lists:
++
++.in +8
++.BI via " ADDRESS"
++- is the nexthop router.
++.sp
++
++.BI dev " NAME"
++- is the output device.
++.sp
++
++.BI weight " NUMBER"
++- is a weight for this element of a multipath
++route reflecting its relative bandwidth or quality.
++.in -8
++
++.TP
++.BI scope " SCOPE_VAL"
++the scope of the destinations covered by the route prefix.
++.I SCOPE_VAL
++may be a number or a string from the file
++.BR "/etc/iproute2/rt_scopes" .
++If this parameter is omitted,
++.B ip
++assumes scope
++.B global
++for all gatewayed
++.B unicast
++routes, scope
++.B link
++for direct
++.BR unicast " and " broadcast
++routes and scope
++.BR host " for " local
++routes.
++
++.TP
++.BI protocol " RTPROTO"
++the routing protocol identifier of this route.
++.I RTPROTO
++may be a number or a string from the file
++.BR "/etc/iproute2/rt_protos" .
++If the routing protocol ID is not given,
++.B ip assumes protocol
++.B boot
++(i.e. it assumes the route was added by someone who doesn't
++understand what they are doing). Several protocol values have
++a fixed interpretation.
++Namely:
++
++.in +8
++.B redirect
++- the route was installed due to an ICMP redirect.
++.sp
++
++.B kernel
++- the route was installed by the kernel during autoconfiguration.
++.sp
++
++.B boot
++- the route was installed during the bootup sequence.
++If a routing daemon starts, it will purge all of them.
++.sp
++
++.B static
++- the route was installed by the administrator
++to override dynamic routing. Routing daemon will respect them
++and, probably, even advertise them to its peers.
++.sp
++
++.B ra
++- the route was installed by Router Discovery protocol.
++.in -8
++
++.sp
++The rest of the values are not reserved and the administrator is free
++to assign (or not to assign) protocol tags.
++
++.TP
++.B onlink
++pretend that the nexthop is directly attached to this link,
++even if it does not match any interface prefix.
++
++.TP
++.B equalize
++allow packet by packet randomization on multipath routes.
++Without this modifier, the route will be frozen to one selected
++nexthop, so that load splitting will only occur on per-flow base.
++.B equalize
++only works if the kernel is patched.
++
++.SS ip route delete - delete route
++
++.B ip route del
++has the same arguments as
++.BR "ip route add" ,
++but their semantics are a bit different.
++
++Key values
++.RB "(" to ", " tos ", " preference " and " table ")"
++select the route to delete. If optional attributes are present,
++.B ip
++verifies that they coincide with the attributes of the route to delete.
++If no route with the given key and attributes was found,
++.B ip route del
++fails.
++
++.SS ip route show - list routes
++the command displays the contents of the routing tables or the route(s)
++selected by some criteria.
++
++.TP
++.BI to " SELECTOR " (default)
++only select routes from the given range of destinations.
++.I SELECTOR
++consists of an optional modifier
++.RB "(" root ", " match " or " exact ")"
++and a prefix.
++.BI root " PREFIX"
++selects routes with prefixes not shorter than
++.IR PREFIX "."
++F.e.
++.BI root " 0/0"
++selects the entire routing table.
++.BI match " PREFIX"
++selects routes with prefixes not longer than
++.IR PREFIX "."
++F.e.
++.BI match " 10.0/16"
++selects
++.IR 10.0/16 ","
++.IR 10/8 " and " 0/0 ,
++but it does not select
++.IR 10.1/16 " and " 10.0.0/24 .
++And
++.BI exact " PREFIX"
++(or just
++.IR PREFIX ")"
++selects routes with this exact prefix. If neither of these options
++are present,
++.B ip
++assumes
++.BI root " 0/0"
++i.e. it lists the entire table.
++
++.TP
++.BI tos " TOS"
++.BI dsfield " TOS"
++only select routes with the given TOS.
++
++.TP
++.BI table " TABLEID"
++show the routes from this table(s). The default setting is to show
++.BR table main "."
++.I TABLEID
++may either be the ID of a real table or one of the special values:
++.sp
++.in +8
++.B all
++- list all of the tables.
++.sp
++.B cache
++- dump the routing cache.
++.in -8
++
++.TP
++.B cloned
++.TP
++.B cached
++list cloned routes i.e. routes which were dynamically forked from
++other routes because some route attribute (f.e. MTU) was updated.
++Actually, it is equivalent to
++.BR "table cache" "."
++
++.TP
++.BI from " SELECTOR"
++the same syntax as for
++.BR to ","
++but it binds the source address range rather than destinations.
++Note that the
++.B from
++option only works with cloned routes.
++
++.TP
++.BI protocol " RTPROTO"
++only list routes of this protocol.
++
++.TP
++.BI scope " SCOPE_VAL"
++only list routes with this scope.
++
++.TP
++.BI type " TYPE"
++only list routes of this type.
++
++.TP
++.BI dev " NAME"
++only list routes going via this device.
++
++.TP
++.BI via " PREFIX"
++only list routes going via the nexthop routers selected by
++.IR PREFIX "."
++
++.TP
++.BI src " PREFIX"
++only list routes with preferred source addresses selected
++by
++.IR PREFIX "."
++
++.TP
++.BI realm " REALMID"
++.TP
++.BI realms " FROMREALM/TOREALM"
++only list routes with these realms.
++
++.SS ip route flush - flush routing tables
++this command flushes routes selected by some criteria.
++
++.sp
++The arguments have the same syntax and semantics as the arguments of
++.BR "ip route show" ,
++but routing tables are not listed but purged. The only difference is
++the default action:
++.B show
++dumps all the IP main routing table but
++.B flush
++prints the helper page.
++
++.sp
++With the
++.B -statistics
++option, the command becomes verbose. It prints out the number of
++deleted routes and the number of rounds made to flush the routing
++table. If the option is given
++twice,
++.B ip route flush
++also dumps all the deleted routes in the format described in the
++previous subsection.
++
++.SS ip route get - get a single route
++this command gets a single route to a destination and prints its
++contents exactly as the kernel sees it.
++
++.TP
++.BI to " ADDRESS " (default)
++the destination address.
++
++.TP
++.BI from " ADDRESS"
++the source address.
++
++.TP
++.BI tos " TOS"
++.TP
++.BI dsfield " TOS"
++the Type Of Service.
++
++.TP
++.BI iif " NAME"
++the device from which this packet is expected to arrive.
++
++.TP
++.BI oif " NAME"
++force the output device on which this packet will be routed.
++
++.TP
++.B connected
++if no source address
++.RB "(option " from ")"
++was given, relookup the route with the source set to the preferred
++address received from the first lookup.
++If policy routing is used, it may be a different route.
++
++.P
++Note that this operation is not equivalent to
++.BR "ip route show" .
++.B show
++shows existing routes.
++.B get
++resolves them and creates new clones if necessary. Essentially,
++.B get
++is equivalent to sending a packet along this path.
++If the
++.B iif
++argument is not given, the kernel creates a route
++to output packets towards the requested destination.
++This is equivalent to pinging the destination
++with a subsequent
++.BR "ip route ls cache" ,
++however, no packets are actually sent. With the
++.B iif
++argument, the kernel pretends that a packet arrived from this interface
++and searches for a path to forward the packet.
++
++.SH ip rule - routing policy database management
++
++.BR "Rule" s
++in the routing policy database control the route selection algorithm.
++
++.P
++Classic routing algorithms used in the Internet make routing decisions
++based only on the destination address of packets (and in theory,
++but not in practice, on the TOS field).
++
++.P
++In some circumstances we want to route packets differently depending not only
++on destination addresses, but also on other packet fields: source address,
++IP protocol, transport protocol ports or even packet payload.
++This task is called 'policy routing'.
++
++.P
++To solve this task, the conventional destination based routing table, ordered
++according to the longest match rule, is replaced with a 'routing policy
++database' (or RPDB), which selects routes by executing some set of rules.
++
++.P
++Each policy routing rule consists of a
++.B selector
++and an
++.B action predicate.
++The RPDB is scanned in the order of increasing priority. The selector
++of each rule is applied to {source address, destination address, incoming
++interface, tos, fwmark} and, if the selector matches the packet,
++the action is performed. The action predicate may return with success.
++In this case, it will either give a route or failure indication
++and the RPDB lookup is terminated. Otherwise, the RPDB program
++continues on the next rule.
++
++.P
++Semantically, natural action is to select the nexthop and the output device.
++
++.P
++At startup time the kernel configures the default RPDB consisting of three
++rules:
++
++.TP
++1.
++Priority: 0, Selector: match anything, Action: lookup routing
++table
++.B local
++(ID 255).
++The
++.B local
++table is a special routing table containing
++high priority control routes for local and broadcast addresses.
++.sp
++Rule 0 is special. It cannot be deleted or overridden.
++
++.TP
++2.
++Priority: 32766, Selector: match anything, Action: lookup routing
++table
++.B main
++(ID 254).
++The
++.B main
++table is the normal routing table containing all non-policy
++routes. This rule may be deleted and/or overridden with other
++ones by the administrator.
++
++.TP
++3.
++Priority: 32767, Selector: match anything, Action: lookup routing
++table
++.B default
++(ID 253).
++The
++.B default
++table is empty. It is reserved for some post-processing if no previous
++default rules selected the packet.
++This rule may also be deleted.
++
++.P
++Each RPDB entry has additional
++attributes. F.e. each rule has a pointer to some routing
++table. NAT and masquerading rules have an attribute to select new IP
++address to translate/masquerade. Besides that, rules have some
++optional attributes, which routes have, namely
++.BR "realms" .
++These values do not override those contained in the routing tables. They
++are only used if the route did not select any attributes.
++
++.sp
++The RPDB may contain rules of the following types:
++
++.in +8
++.B unicast
++- the rule prescribes to return the route found
++in the routing table referenced by the rule.
++
++.B blackhole
++- the rule prescribes to silently drop the packet.
++
++.B unreachable
++- the rule prescribes to generate a 'Network is unreachable' error.
++
++.B prohibit
++- the rule prescribes to generate 'Communication is administratively
++prohibited' error.
++
++.B nat
++- the rule prescribes to translate the source address
++of the IP packet into some other value.
++.in -8
++
++.SS ip rule add - insert a new rule
++.SS ip rule delete - delete a rule
++
++.TP
++.BI type " TYPE " (default)
++the type of this rule. The list of valid types was given in the previous
++subsection.
++
++.TP
++.BI from " PREFIX"
++select the source prefix to match.
++
++.TP
++.BI to " PREFIX"
++select the destination prefix to match.
++
++.TP
++.BI iif " NAME"
++select the incoming device to match. If the interface is loopback,
++the rule only matches packets originating from this host. This means
++that you may create separate routing tables for forwarded and local
++packets and, hence, completely segregate them.
++
++.TP
++.BI tos " TOS"
++.TP
++.BI dsfield " TOS"
++select the TOS value to match.
++
++.TP
++.BI fwmark " MARK"
++select the
++.B fwmark
++value to match.
++
++.TP
++.BI priority " PREFERENCE"
++the priority of this rule. Each rule should have an explicitly
++set
++.I unique
++priority value.
++
++.TP
++.BI table " TABLEID"
++the routing table identifier to lookup if the rule selector matches.
++
++.TP
++.BI realms " FROM/TO"
++Realms to select if the rule matched and the routing table lookup
++succeeded. Realm
++.I TO
++is only used if the route did not select any realm.
++
++.TP
++.BI nat " ADDRESS"
++The base of the IP address block to translate (for source addresses).
++The
++.I ADDRESS
++may be either the start of the block of NAT addresses (selected by NAT
++routes) or a local host address (or even zero).
++In the last case the router does not translate the packets, but
++masquerades them to this address.
++
++.B Warning:
++Changes to the RPDB made with these commands do not become active
++immediately. It is assumed that after a script finishes a batch of
++updates, it flushes the routing cache with
++.BR "ip route flush cache" .
++
++.SS ip rule show - list rules
++This command has no arguments.
++
++.SH ip maddress - multicast addresses management
++
++.B maddress
++objects are multicast addresses.
++
++.SS ip maddress show - list multicast addresses
++
++.TP
++.BI dev " NAME " (default)
++the device name.
++
++.SS ip maddress add - add a multicast address
++.SS ip maddress delete - delete a multicast address
++these commands attach/detach a static link layer multicast address
++to listen on the interface.
++Note that it is impossible to join protocol multicast groups
++statically. This command only manages link layer addresses.
++
++.TP
++.BI address " LLADDRESS " (default)
++the link layer multicast address.
++
++.TP
++.BI dev " NAME"
++the device to join/leave this multicast address.
++
++.SH ip mroute - multicast routing cache management
++.B mroute
++objects are multicast routing cache entries created by a user level
++mrouting daemon (f.e.
++.B pimd
++or
++.B mrouted
++).
++
++Due to the limitations of the current interface to the multicast routing
++engine, it is impossible to change
++.B mroute
++objects administratively, so we may only display them. This limitation
++will be removed in the future.
++
++.SS ip mroute show - list mroute cache entries
++
++.TP
++.BI to " PREFIX " (default)
++the prefix selecting the destination multicast addresses to list.
++
++.TP
++.BI iif " NAME"
++the interface on which multicast packets are received.
++
++.TP
++.BI from " PREFIX"
++the prefix selecting the IP source addresses of the multicast route.
++
++.SH ip tunnel - tunnel configuration
++.B tunnel
++objects are tunnels, encapsulating packets in IPv4 packets and then
++sending them over the IP infrastructure.
++
++.SS ip tunnel add - add a new tunnel
++.SS ip tunnel change - change an existing tunnel
++.SS ip tunnel delete - destroy a tunnel
++
++.TP
++.BI name " NAME " (default)
++select the tunnel device name.
++
++.TP
++.BI mode " MODE"
++set the tunnel mode. Three modes are currently available:
++.BR ipip ", " sit " and " gre "."
++
++.TP
++.BI remote " ADDRESS"
++set the remote endpoint of the tunnel.
++
++.TP
++.BI local " ADDRESS"
++set the fixed local address for tunneled packets.
++It must be an address on another interface of this host.
++
++.TP
++.BI ttl " N"
++set a fixed TTL
++.I N
++on tunneled packets.
++.I N
++is a number in the range 1--255. 0 is a special value
++meaning that packets inherit the TTL value.
++The default value is:
++.BR "inherit" .
++
++.TP
++.BI tos " T"
++.TP
++.BI dsfield " T"
++set a fixed TOS
++.I T
++on tunneled packets.
++The default value is:
++.BR "inherit" .
++
++.TP
++.BI dev " NAME"
++bind the tunnel to the device
++.I NAME
++so that tunneled packets will only be routed via this device and will
++not be able to escape to another device when the route to endpoint
++changes.
++
++.TP
++.B nopmtudisc
++disable Path MTU Discovery on this tunnel.
++It is enabled by default. Note that a fixed ttl is incompatible
++with this option: tunnelling with a fixed ttl always makes pmtu
++discovery.
++
++.TP
++.BI key " K"
++.TP
++.BI ikey " K"
++.TP
++.BI okey " K"
++.RB ( " only GRE tunnels " )
++use keyed GRE with key
++.IR K ". " K
++is either a number or an IP address-like dotted quad.
++The
++.B key
++parameter sets the key to use in both directions.
++The
++.BR ikey " and " okey
++parameters set different keys for input and output.
++
++.TP
++.BR csum ", " icsum ", " ocsum
++.RB ( " only GRE tunnels " )
++generate/require checksums for tunneled packets.
++The
++.B ocsum
++flag calculates checksums for outgoing packets.
++The
++.B icsum
++flag requires that all input packets have the correct
++checksum. The
++.B csum
++flag is equivalent to the combination
++.BR "icsum ocsum" .
++
++.TP
++.BR seq ", " iseq ", " oseq
++.RB ( " only GRE tunnels " )
++serialize packets.
++The
++.B oseq
++flag enables sequencing of outgoing packets.
++The
++.B iseq
++flag requires that all input packets are serialized.
++The
++.B seq
++flag is equivalent to the combination
++.BR "iseq oseq" .
++.B It isn't work. Don't use it.
++
++.SS ip tunnel show - list tunnels
++This command has no arguments.
++
++.SH ip monitor and rtmon - state monitoring
++
++The
++.B ip
++utility can monitor the state of devices, addresses
++and routes continuously. This option has a slightly different format.
++Namely, the
++.B monitor
++command is the first in the command line and then the object list follows:
++
++.BR "ip monitor" " [ " all " |"
++.IR LISTofOBJECTS " ]"
++
++.I OBJECT-LIST
++is the list of object types that we want to monitor.
++It may contain
++.BR link ", " address " and " route "."
++If no
++.B file
++argument is given,
++.B ip
++opens RTNETLINK, listens on it and dumps state changes in the format
++described in previous sections.
++
++.P
++If a file name is given, it does not listen on RTNETLINK,
++but opens the file containing RTNETLINK messages saved in binary format
++and dumps them. Such a history file can be generated with the
++.B rtmon
++utility. This utility has a command line syntax similar to
++.BR "ip monitor" .
++Ideally,
++.B rtmon
++should be started before the first network configuration command
++is issued. F.e. if you insert:
++.sp
++.in +8
++rtmon file /var/log/rtmon.log
++.in -8
++.sp
++in a startup script, you will be able to view the full history
++later.
++
++.P
++Certainly, it is possible to start
++.B rtmon
++at any time.
++It prepends the history with the state snapshot dumped at the moment
++of starting.
++
++.SH HISTORY
++
++.B ip
++was written by Alexey N. Kuznetsov and added in Linux 2.2.
++.SH SEE ALSO
++.BR tc (8)
++.br
++.RB "IP Command reference " ip-cref.ps
++.br
++.RB "IP tunnels " ip-cref.ps
++
++.SH AUTHOR
++
++Manpage maintained by Michail Litvak <mci@owl.openwall.com>
+diff -Naur iproute2-orig/debian/manpages/old/ip.8 iproute2/debian/manpages/old/ip.8
+--- iproute2-orig/debian/manpages/old/ip.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/manpages/old/ip.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,1809 @@
++.TH IP 8 "17 January 2002" "iproute2" "Linux"
++.SH NAME
++ip \- show / manipulate routing, devices, policy routing and tunnels
++.SH SYNOPSIS
++
++.ad l
++.in +8
++.ti -8
++.B ip
++.RI "[ " OPTIONS " ] " OBJECT " { " COMMAND " | "
++.BR help " }"
++.sp
++
++.ti -8
++.IR OBJECT " := { "
++.BR link " | " addr " | " route " | " rule " | " neigh " | " tunnel " | "\
++maddr " | " mroute " | " monitor " }"
++.sp
++
++.ti -8
++.IR OPTIONS " := { "
++\fB\-V\fR[\fIersion\fR] |
++\fB\-s\fR[\fItatistics\fR] |
++\fB\-r\fR[\fIesolve\fR] |
++\fB\-f\fR[\fIamily\fR] {
++.BR inet " | " inet6 " | " ipx " | " dnet " | " link " } | "
++\fB\-o\fR[\fIneline\fR] }
++
++.ti -8
++.BI "ip link set " DEVICE
++.RB "{ " up " | " down " | " arp " { " on " | " off " } |"
++.br
++.BR promisc " { " on " | " off " } |"
++.br
++.BR allmulti " { " on " | " off " } |"
++.br
++.BR dynamic " { " on " | " off " } |"
++.br
++.BR multicast " { " on " | " off " } |"
++.br
++.B txqueuelen
++.IR PACKETS " |"
++.br
++.B name
++.IR NEWNAME " |"
++.br
++.B address
++.IR LLADDR " |"
++.B broadcast
++.IR LLADDR " |"
++.br
++.B mtu
++.IR MTU " }"
++
++.ti -8
++.B ip link show
++.RI "[ " DEVICE " ]"
++
++.ti -8
++.BR "ip addr" " { " add " | " del " } "
++.IB IFADDR " dev " STRING
++
++.ti -8
++.BR "ip addr" " { " show " | " flush " } [ " dev
++.IR STRING " ] [ "
++.B scope
++.IR SCOPE-ID " ] [ "
++.B to
++.IR PREFIX " ] [ " FLAG-LIST " ] [ "
++.B label
++.IR PATTERN " ]"
++
++.ti -8
++.IR IFADDR " := " PREFIX " | " ADDR
++.B peer
++.IR PREFIX " [ "
++.B broadcast
++.IR ADDR " ] [ "
++.B anycast
++.IR ADDR " ] [ "
++.B label
++.IR STRING " ] [ "
++.B scope
++.IR SCOPE-ID " ]"
++
++.ti -8
++.IR SCOPE-ID " := "
++.RB "[ " host " | " link " | " global " | "
++.IR NUMBER " ]"
++
++.ti -8
++.IR FLAG-LIST " := [ " FLAG-LIST " ] " FLAG
++
++.ti -8
++.IR FLAG " := "
++.RB "[ " permanent " | " dynamic " | " secondary " | " primary " | "\
++tentative " | " deprecated " ]"
++
++.ti -8
++.BR "ip route" " { "
++.BR list " | " flush " } "
++.I SELECTOR
++
++.ti -8
++.B ip route get
++.IR ADDRESS " [ "
++.BI from " ADDRESS " iif " STRING"
++.RB " ] [ " oif
++.IR STRING " ] [ "
++.B tos
++.IR TOS " ]"
++
++.ti -8
++.BR "ip route" " { " add " | " del " | " change " | " append " | "\
++replace " | " monitor " } "
++.I ROUTE
++
++.ti -8
++.IR SELECTOR " := "
++.RB "[ " root
++.IR PREFIX " ] [ "
++.B match
++.IR PREFIX " ] [ "
++.B exact
++.IR PREFIX " ] [ "
++.B table
++.IR TABLE_ID " ] [ "
++.B proto
++.IR RTPROTO " ] [ "
++.B type
++.IR TYPE " ] [ "
++.B scope
++.IR SCOPE " ]"
++
++.ti -8
++.IR ROUTE " := " NODE_SPEC " [ " INFO_SPEC " ]"
++
++.ti -8
++.IR NODE_SPEC " := [ " TYPE " ] " PREFIX " ["
++.B tos
++.IR TOS " ] [ "
++.B table
++.IR TABLE_ID " ] [ "
++.B proto
++.IR RTPROTO " ] [ "
++.B scope
++.IR SCOPE " ] [ "
++.B metric
++.IR METRIC " ]"
++
++.ti -8
++.IR INFO_SPEC " := " "NH OPTIONS FLAGS" " ["
++.B nexthop
++.IR NH " ] ..."
++
++.ti -8
++.IR NH " := [ "
++.B via
++.IR ADDRESS " ] [ "
++.B dev
++.IR STRING " ] [ "
++.B weight
++.IR NUMBER " ] " NHFLAGS
++
++.ti -8
++.IR OPTIONS " := " FLAGS " [ "
++.B mtu
++.IR NUMBER " ] [ "
++.B advmss
++.IR NUMBER " ] [ "
++.B rtt
++.IR NUMBER " ] [ "
++.B rttvar
++.IR NUMBER " ] [ "
++.B window
++.IR NUMBER " ] [ "
++.B cwnd
++.IR NUMBER " ] [ "
++.B ssthresh
++.IR REALM " ] [ "
++.B realms
++.IR REALM " ]"
++
++.ti -8
++.IR TYPE " := [ "
++.BR unicast " | " local " | " broadcast " | " multicast " | "\
++throw " | " unreachable " | " prohibit " | " blackhole " | " nat " ]"
++
++.ti -8
++.IR TABLE_ID " := [ "
++.BR local "| " main " | " default " | " all " |"
++.IR NUMBER " ]"
++
++.ti -8
++.IR SCOPE " := [ "
++.BR host " | " link " | " global " |"
++.IR NUMBER " ]"
++
++.ti -8
++.IR FLAGS " := [ "
++.BR equalize " ]"
++
++.ti -8
++.IR NHFLAGS " := [ "
++.BR onlink " | " pervasive " ]"
++
++.ti -8
++.IR RTPROTO " := [ "
++.BR kernel " | " boot " | " static " |"
++.IR NUMBER " ]"
++
++.ti -8
++.B ip rule
++.RB " [ " list " | " add " | " del " ]"
++.I SELECTOR ACTION
++
++.ti -8
++.IR SELECTOR " := [ "
++.B from
++.IR PREFIX " ] [ "
++.B to
++.IR PREFIX " ] [ "
++.B tos
++.IR TOS " ] [ "
++.B fwmark
++.IR FWMARK " ] [ "
++.B dev
++.IR STRING " ] [ "
++.B pref
++.IR NUMBER " ]"
++
++.ti -8
++.IR ACTION " := [ "
++.B table
++.IR TABLE_ID " ] [ "
++.B nat
++.IR ADDRESS " ] [ "
++.BR prohibit " | " reject " | " unreachable " ] [ " realms
++.RI "[" SRCREALM "/]" DSTREALM " ]"
++
++.ti -8
++.IR TABLE_ID " := [ "
++.BR local " | " main " | " default " |"
++.IR NUMBER " ]"
++
++.ti -8
++.BR "ip neigh" " { " add " | " del " | " change " | " replace " } { "
++.IR ADDR " [ "
++.B lladdr
++.IR LLADDR " ] [ "
++.BR nud " { " permanent " | " noarp " | " stale " | " reachable " } ] | " proxy
++.IR ADDR " } [ "
++.B dev
++.IR DEV " ]"
++
++.ti -8
++.BR "ip neigh" " { " show " | " flush " } [ " to
++.IR PREFIX " ] [ "
++.B dev
++.IR DEV " ] [ "
++.B nud
++.IR STATE " ]"
++
++.ti -8
++.BR "ip tunnel" " { " add " | " change " | " del " | " show " }"
++.RI "[ " NAME " ]"
++.br
++.RB "[ " mode " { " ipip " | " gre " | " sit " } ]"
++.br
++.RB "[ " remote
++.IR ADDR " ] [ "
++.B local
++.IR ADDR " ]"
++.br
++.RB "[ [" i "|" o "]" seq " ] [ [" i "|" o "]" key
++.IR KEY " ] [ "
++.RB "[" i "|" o "]" csum " ] ]"
++.br
++.RB "[ " ttl
++.IR TTL " ] [ "
++.B tos
++.IR TOS " ] [ "
++.RB "[" no "]" pmtudisc " ]"
++.br
++.RB "[ " dev
++.IR PHYS_DEV " ]"
++
++.ti -8
++.IR ADDR " := { " IP_ADDRESS " |"
++.BR any " }"
++
++.ti -8
++.IR TOS " := { " NUMBER " |"
++.BR inherit " }"
++
++.ti -8
++.IR TTL " := { " 1 ".." 255 " | "
++.BR inherit " }"
++
++.ti -8
++.IR KEY " := { " DOTTED_QUAD " | " NUMBER " }"
++
++.ti -8
++.BR "ip maddr" " [ " add " | " del " ]"
++.IB MULTIADDR " dev " STRING
++
++.ti -8
++.BR "ip maddr show" " [ " dev
++.IR STRING " ]"
++
++.ti -8
++.BR "ip mroute show" " ["
++.IR PREFIX " ] [ "
++.B from
++.IR PREFIX " ] [ "
++.B iif
++.IR DEVICE " ]"
++
++.ti -8
++.BR "ip monitor" " [ " all " |"
++.IR LISTofOBJECTS " ]"
++.in -8
++.ad b
++
++.SH OPTIONS
++
++.TP
++.BR "\-V" , " -Version"
++print the version of the
++.B ip
++utility and exit.
++
++.TP
++.BR "\-s" , " \-stats", " \-statistics"
++output more information. If the option
++appears twice or more, the amount of information increases.
++As a rule, the information is statistics or some time values.
++
++.TP
++.BR "\-f" , " \-family"
++followed by protocol family identifier:
++.BR "inet" , " inet6"
++or
++.B link
++,enforce the protocol family to use. If the option is not present,
++the protocol family is guessed from other arguments. If the rest
++of the command line does not give enough information to guess the
++family,
++.B ip
++falls back to the default one, usually
++.B inet
++or
++.BR "any" .
++.B link
++is a special family identifier meaning that no networking protocol
++is involved.
++
++.TP
++.B \-4
++shortcut for
++.BR "-family inet" .
++
++.TP
++.B \-6
++shortcut for
++.BR "\-family inet6" .
++
++.TP
++.B \-0
++shortcut for
++.BR "\-family link" .
++
++.TP
++.BR "\-o" , " \-oneline"
++output each record on a single line, replacing line feeds
++with the
++.B '\'
++character. This is convenient when you want to count records
++with
++.BR wc (1)
++ or to
++.BR grep (1)
++the output.
++
++.TP
++.BR "\-r" , " \-resolve"
++use the system's name resolver to print DNS names instead of
++host addresses.
++
++.SH IP - COMMAND SYNTAX
++
++.SS
++.I OBJECT
++
++.TP
++.B link
++- network device.
++
++.TP
++.B address
++- protocol (IP or IPv6) address on a device.
++.TP
++.B neighbour
++- ARP or NDISC cache entry.
++
++.TP
++.B route
++- routing table entry.
++
++.TP
++.B rule
++- rule in routing policy database.
++
++.TP
++.B maddress
++- multicast address.
++
++.TP
++.B mroute
++- multicast routing cache entry.
++
++.TP
++.B tunnel
++- tunnel over IP.
++
++.PP
++The names of all objects may be written in full or
++abbreviated form, f.e.
++.B address
++is abbreviated as
++.B addr
++or just
++.B a.
++
++.SS
++.I COMMAND
++
++Specifies the action to perform on the object.
++The set of possible actions depends on the object type.
++As a rule, it is possible to
++.BR "add" , " delete"
++and
++.B show
++(or
++.B list
++) objects, but some objects do not allow all of these operations
++or have some additional commands. The
++.B help
++command is available for all objects. It prints
++out a list of available commands and argument syntax conventions.
++.sp
++If no command is given, some default command is assumed.
++Usually it is
++.B list
++or, if the objects of this class cannot be listed,
++.BR "help" .
++
++.SH ip link - network device configuration
++
++.B link
++is a network device and the corresponding commands
++display and change the state of devices.
++
++.SS ip link set - change device attributes
++
++.TP
++.BI dev " NAME " (default)
++.I NAME
++specifies network device to operate on.
++
++.TP
++.BR up " and " down
++change the state of the device to
++.B UP
++or
++.BR "DOWN" .
++
++.TP
++.BR "arp on " or " arp off"
++change the
++.B NOARP
++flag on the device.
++
++.TP
++.BR "multicast on " or " multicast off"
++change the
++.B MULTICAST
++flag on the device.
++
++.TP
++.BR "dynamic on " or " dynamic off"
++change the
++.B DYNAMIC
++flag on the device.
++
++.TP
++.BI name " NAME"
++change the name of the device. This operation is not
++recommended if the device is running or has some addresses
++already configured.
++
++.TP
++.BI txqueuelen " NUMBER"
++.TP
++.BI txqlen " NUMBER"
++change the transmit queue length of the device.
++
++.TP
++.BI mtu " NUMBER"
++change the
++.I MTU
++of the device.
++
++.TP
++.BI address " LLADDRESS"
++change the station address of the interface.
++
++.TP
++.BI broadcast " LLADDRESS"
++.TP
++.BI brd " LLADDRESS"
++.TP
++.BI peer " LLADDRESS"
++change the link layer broadcast address or the peer address when
++the interface is
++.IR "POINTOPOINT" .
++
++.PP
++.B Warning:
++If multiple parameter changes are requested,
++.B ip
++aborts immediately after any of the changes have failed.
++This is the only case when
++.B ip
++can move the system to an unpredictable state. The solution
++is to avoid changing several parameters with one
++.B ip link set
++call.
++
++.SS ip link show - display device attributes
++
++.TP
++.BI dev " NAME " (default)
++.I NAME
++specifies the network device to show.
++If this argument is omitted all devices are listed.
++
++.TP
++.B up
++only display running interfaces.
++
++.SH ip address - protocol address management.
++
++The
++.B address
++is a protocol (IP or IPv6) address attached
++to a network device. Each device must have at least one address
++to use the corresponding protocol. It is possible to have several
++different addresses attached to one device. These addresses are not
++discriminated, so that the term
++.B alias
++is not quite appropriate for them and we do not use it in this document.
++.sp
++The
++.B ip addr
++command displays addresses and their properties, adds new addresses
++and deletes old ones.
++
++.SS ip address add - add new protocol address.
++
++.TP
++.BI dev " NAME"
++the name of the device to add the address to.
++
++.TP
++.BI local " ADDRESS " (default)
++the address of the interface. The format of the address depends
++on the protocol. It is a dotted quad for IP and a sequence of
++hexadecimal halfwords separated by colons for IPv6. The
++.I ADDRESS
++may be followed by a slash and a decimal number which encodes
++the network prefix length.
++
++.TP
++.BI peer " ADDRESS"
++the address of the remote endpoint for pointopoint interfaces.
++Again, the
++.I ADDRESS
++may be followed by a slash and a decimal number, encoding the network
++prefix length. If a peer address is specified, the local address
++cannot have a prefix length. The network prefix is associated
++with the peer rather than with the local address.
++
++.TP
++.BI broadcast " ADDRESS"
++the broadcast address on the interface.
++.sp
++It is possible to use the special symbols
++.B '+'
++and
++.B '-'
++instead of the broadcast address. In this case, the broadcast address
++is derived by setting/resetting the host bits of the interface prefix.
++
++.TP
++.BI label " NAME"
++Each address may be tagged with a label string.
++In order to preserve compatibility with Linux-2.0 net aliases,
++this string must coincide with the name of the device or must be prefixed
++with the device name followed by colon.
++
++.TP
++.BI scope " SCOPE_VALUE"
++the scope of the area where this address is valid.
++The available scopes are listed in file
++.BR "/etc/iproute2/rt_scopes" .
++Predefined scope values are:
++
++.in +8
++.B global
++- the address is globally valid.
++.sp
++.B site
++- (IPv6 only) the address is site local, i.e. it is
++valid inside this site.
++.sp
++.B link
++- the address is link local, i.e. it is valid only on this device.
++.sp
++.B host
++- the address is valid only inside this host.
++.in -8
++
++.SS ip address delete - delete protocol address
++.B Arguments:
++coincide with the arguments of
++.B ip addr add.
++The device name is a required argument. The rest are optional.
++If no arguments are given, the first address is deleted.
++
++.SS ip address show - look at protocol addresses
++
++.TP
++.BI dev " NAME " (default)
++name of device.
++
++.TP
++.BI scope " SCOPE_VAL"
++only list addresses with this scope.
++
++.TP
++.BI to " PREFIX"
++only list addresses matching this prefix.
++
++.TP
++.BI label " PATTERN"
++only list addresses with labels matching the
++.IR "PATTERN" .
++.I PATTERN
++is a usual shell style pattern.
++
++.TP
++.BR dynamic " and " permanent
++(IPv6 only) only list addresses installed due to stateless
++address configuration or only list permanent (not dynamic)
++addresses.
++
++.TP
++.B tentative
++(IPv6 only) only list addresses which did not pass duplicate
++address detection.
++
++.TP
++.B deprecated
++(IPv6 only) only list deprecated addresses.
++
++.TP
++.BR primary " and " secondary
++only list primary (or secondary) addresses.
++
++.SS ip address flush - flush protocol addresses
++This command flushes the protocol addresses selected by some criteria.
++
++.PP
++This command has the same arguments as
++.B show.
++The difference is that it does not run when no arguments are given.
++
++.PP
++.B Warning:
++This command (and other
++.B flush
++commands described below) is pretty dangerous. If you make a mistake,
++it will not forgive it, but will cruelly purge all the addresses.
++
++.PP
++With the
++.B -statistics
++option, the command becomes verbose. It prints out the number of deleted
++addresses and the number of rounds made to flush the address list. If
++this option is given twice,
++.B ip addr flush
++also dumps all the deleted addresses in the format described in the
++previous subsection.
++
++.SH ip neighbour - neighbour/arp tables management.
++
++.B neighbour
++objects establish bindings between protocol addresses and
++link layer addresses for hosts sharing the same link.
++Neighbour entries are organized into tables. The IPv4 neighbour table
++is known by another name - the ARP table.
++
++.P
++The corresponding commands display neighbour bindings
++and their properties, add new neighbour entries and delete old ones.
++
++.SS ip neighbour add - add a new neighbour entry
++.SS ip neighbour change - change an existing entry
++.SS ip neighbour replace - add a new entry or change an existing one
++
++These commands create new neighbour records or update existing ones.
++
++.TP
++.BI to " ADDRESS " (default)
++the protocol address of the neighbour. It is either an IPv4 or IPv6 address.
++
++.TP
++.BI dev " NAME"
++the interface to which this neighbour is attached.
++
++.TP
++.BI lladdr " LLADDRESS"
++the link layer address of the neighbour.
++.I LLADDRESS
++can also be
++.BR "null" .
++
++.TP
++.BI nud " NUD_STATE"
++the state of the neighbour entry.
++.B nud
++is an abbreviation for 'Neigh bour Unreachability Detection'.
++The state can take one of the following values:
++
++.in +8
++.B permanent
++- the neighbour entry is valid forever and can be only
++be removed administratively.
++.sp
++
++.B noarp
++- the neighbour entry is valid. No attempts to validate
++this entry will be made but it can be removed when its lifetime expires.
++.sp
++
++.B reachable
++- the neighbour entry is valid until the reachability
++timeout expires.
++.sp
++
++.B stale
++- the neighbour entry is valid but suspicious.
++This option to
++.B ip neigh
++does not change the neighbour state if it was valid and the address
++is not changed by this command.
++.in -8
++
++.SS ip neighbour delete - delete a neighbour entry
++This command invalidates a neighbour entry.
++
++.PP
++The arguments are the same as with
++.BR "ip neigh add" ,
++except that
++.B lladdr
++and
++.B nud
++are ignored.
++
++.PP
++.B Warning:
++Attempts to delete or manually change a
++.B noarp
++entry created by the kernel may result in unpredictable behaviour.
++Particularly, the kernel may try to resolve this address even
++on a
++.B NOARP
++interface or if the address is multicast or broadcast.
++
++.SS ip neighbour show - list neighbour entries
++
++This commands displays neighbour tables.
++
++.TP
++.BI to " ADDRESS " (default)
++the prefix selecting the neighbours to list.
++
++.TP
++.BI dev " NAME"
++only list the neighbours attached to this device.
++
++.TP
++.B unused
++only list neighbours which are not currently in use.
++
++.TP
++.BI nud " NUD_STATE"
++only list neighbour entries in this state.
++.I NUD_STATE
++takes values listed below or the special value
++.B all
++which means all states. This option may occur more than once.
++If this option is absent,
++.B ip
++lists all entries except for
++.B none
++and
++.BR "noarp" .
++
++.SS ip neighbour flush - flush neighbour entries
++This command flushes neighbour tables, selecting
++entries to flush by some criteria.
++
++.PP
++This command has the same arguments as
++.B show.
++The differences are that it does not run when no arguments are given,
++and that the default neighbour states to be flushed do not include
++.B permanent
++and
++.BR "noarp" .
++
++.PP
++With the
++.B -statistics
++option, the command becomes verbose. It prints out the number of
++deleted neighbours and the number of rounds made to flush the
++neighbour table. If the option is given
++twice,
++.B ip neigh flush
++also dumps all the deleted neighbours.
++
++.SH ip route - routing table management
++Manipulate route entries in the kernel routing tables keep
++information about paths to other networked nodes.
++.sp
++.B Route types:
++
++.in +8
++.B unicast
++- the route entry describes real paths to the destinations covered
++by the route prefix.
++
++.sp
++.B unreachable
++- these destinations are unreachable. Packets are discarded and the
++ICMP message
++.I host unreachable
++is generated.
++The local senders get an
++.I EHOSTUNREACH
++error.
++
++.sp
++.B blackhole
++- these destinations are unreachable. Packets are discarded silently.
++The local senders get an
++.I EINVAL
++error.
++
++.sp
++.B prohibit
++- these destinations are unreachable. Packets are discarded and the
++ICMP message
++.I communication administratively prohibited
++is generated. The local senders get an
++.I EACCES
++error.
++
++.sp
++.B local
++- the destinations are assigned to this host. The packets are looped
++back and delivered locally.
++
++.sp
++.B broadcast
++- the destinations are broadcast addresses. The packets are sent as
++link broadcasts.
++
++.sp
++.B throw
++- a special control route used together with policy rules. If such a
++route is selected, lookup in this table is terminated pretending that
++no route was found. Without policy routing it is equivalent to the
++absence of the route in the routing table. The packets are dropped
++and the ICMP message
++.I net unreachable
++is generated. The local senders get an
++.I ENETUNREACH
++error.
++
++.sp
++.B nat
++- a special NAT route. Destinations covered by the prefix
++are considered to be dummy (or external) addresses which require translation
++to real (or internal) ones before forwarding. The addresses to translate to
++are selected with the attribute
++.BR "via" .
++
++.sp
++.B anycast
++.RI "- " "not implemented"
++the destinations are
++.I anycast
++addresses assigned to this host. They are mainly equivalent
++to
++.B local
++with one difference: such addresses are invalid when used
++as the source address of any packet.
++
++.sp
++.B multicast
++- a special type used for multicast routing. It is not present in
++normal routing tables.
++.in -8
++
++.P
++.B Route tables:
++Linux-2.x can pack routes into several routing
++tables identified by a number in the range from 1 to 255 or by
++name from the file
++.B /etc/iproute2/rt_tables
++. By default all normal routes are inserted into the
++.B main
++table (ID 254) and the kernel only uses this table when calculating routes.
++
++.sp
++Actually, one other table always exists, which is invisible but
++even more important. It is the
++.B local
++table (ID 255). This table
++consists of routes for local and broadcast addresses. The kernel maintains
++this table automatically and the administrator usually need not modify it
++or even look at it.
++
++The multiple routing tables enter the game when
++.I policy routing
++is used.
++
++.SS ip route add - add new route
++.SS ip route change - change route
++.SS ip route replace - change or add new one
++
++.TP
++.BI to " TYPE PREFIX " (default)
++the destination prefix of the route. If
++.I TYPE
++is omitted,
++.B ip
++assumes type
++.BR "unicast" .
++Other values of
++.I TYPE
++are listed above.
++.I PREFIX
++is an IP or IPv6 address optionally followed by a slash and the
++prefix length. If the length of the prefix is missing,
++.B ip
++assumes a full-length host route. There is also a special
++.I PREFIX
++.B default
++- which is equivalent to IP
++.B 0/0
++or to IPv6
++.BR "::/0" .
++
++.TP
++.BI tos " TOS"
++.TP
++.BI dsfield " TOS"
++the Type Of Service (TOS) key. This key has no associated mask and
++the longest match is understood as: First, compare the TOS
++of the route and of the packet. If they are not equal, then the packet
++may still match a route with a zero TOS.
++.I TOS
++is either an 8 bit hexadecimal number or an identifier
++from
++.BR "/etc/iproute2/rt_dsfield" .
++
++.TP
++.BI metric " NUMBER"
++.TP
++.BI preference " NUMBER"
++the preference value of the route.
++.I NUMBER
++is an arbitrary 32bit number.
++
++.TP
++.BI table " TABLEID"
++the table to add this route to.
++.I TABLEID
++may be a number or a string from the file
++.BR "/etc/iproute2/rt_tables" .
++If this parameter is omitted,
++.B ip
++assumes the
++.B main
++table, with the exception of
++.BR local " , " broadcast " and " nat
++routes, which are put into the
++.B local
++table by default.
++
++.TP
++.BI dev " NAME"
++the output device name.
++
++.TP
++.BI via " ADDRESS"
++the address of the nexthop router. Actually, the sense of this field
++depends on the route type. For normal
++.B unicast
++routes it is either the true next hop router or, if it is a direct
++route installed in BSD compatibility mode, it can be a local address
++of the interface. For NAT routes it is the first address of the block
++of translated IP destinations.
++
++.TP
++.BI src " ADDRESS"
++the source address to prefer when sending to the destinations
++covered by the route prefix.
++
++.TP
++.BI realm " REALMID"
++the realm to which this route is assigned.
++.I REALMID
++may be a number or a string from the file
++.BR "/etc/iproute2/rt_realms" .
++
++.TP
++.BI mtu " MTU"
++.TP
++.BI "mtu lock" " MTU"
++the MTU along the path to the destination. If the modifier
++.B lock
++is not used, the MTU may be updated by the kernel due to
++Path MTU Discovery. If the modifier
++.B lock
++is used, no path MTU discovery will be tried, all packets
++will be sent without the DF bit in IPv4 case or fragmented
++to MTU for IPv6.
++
++.TP
++.BI window " NUMBER"
++the maximal window for TCP to advertise to these destinations,
++measured in bytes. It limits maximal data bursts that our TCP
++peers are allowed to send to us.
++
++.TP
++.BI rtt " NUMBER"
++the initial RTT ('Round Trip Time') estimate.
++
++.TP
++.BI rttvar " NUMBER " "(2.3.15+ only)"
++the initial RTT variance estimate.
++
++.TP
++.BI ssthresh " NUMBER " "(2.3.15+ only)"
++an estimate for the initial slow start threshold.
++
++.TP
++.BI cwnd " NUMBER " "(2.3.15+ only)"
++the clamp for congestion window. It is ignored if the
++.B lock
++flag is not used.
++
++.TP
++.BI advmss " NUMBER " "(2.3.15+ only)"
++the MSS ('Maximal Segment Size') to advertise to these
++destinations when establishing TCP connections. If it is not given,
++Linux uses a default value calculated from the first hop device MTU.
++(If the path to these destination is asymmetric, this guess may be wrong.)
++
++.TP
++.BI reordering " NUMBER " "(2.3.15+ only)"
++Maximal reordering on the path to this destination.
++If it is not given, Linux uses the value selected with
++.B sysctl
++variable
++.BR "net/ipv4/tcp_reordering" .
++
++.TP
++.BI nexthop " NEXTHOP"
++the nexthop of a multipath route.
++.I NEXTHOP
++is a complex value with its own syntax similar to the top level
++argument lists:
++
++.in +8
++.BI via " ADDRESS"
++- is the nexthop router.
++.sp
++
++.BI dev " NAME"
++- is the output device.
++.sp
++
++.BI weight " NUMBER"
++- is a weight for this element of a multipath
++route reflecting its relative bandwidth or quality.
++.in -8
++
++.TP
++.BI scope " SCOPE_VAL"
++the scope of the destinations covered by the route prefix.
++.I SCOPE_VAL
++may be a number or a string from the file
++.BR "/etc/iproute2/rt_scopes" .
++If this parameter is omitted,
++.B ip
++assumes scope
++.B global
++for all gatewayed
++.B unicast
++routes, scope
++.B link
++for direct
++.BR unicast " and " broadcast
++routes and scope
++.BR host " for " local
++routes.
++
++.TP
++.BI protocol " RTPROTO"
++the routing protocol identifier of this route.
++.I RTPROTO
++may be a number or a string from the file
++.BR "/etc/iproute2/rt_protos" .
++If the routing protocol ID is not given,
++.B ip assumes protocol
++.B boot
++(i.e. it assumes the route was added by someone who doesn't
++understand what they are doing). Several protocol values have
++a fixed interpretation.
++Namely:
++
++.in +8
++.B redirect
++- the route was installed due to an ICMP redirect.
++.sp
++
++.B kernel
++- the route was installed by the kernel during autoconfiguration.
++.sp
++
++.B boot
++- the route was installed during the bootup sequence.
++If a routing daemon starts, it will purge all of them.
++.sp
++
++.B static
++- the route was installed by the administrator
++to override dynamic routing. Routing daemon will respect them
++and, probably, even advertise them to its peers.
++.sp
++
++.B ra
++- the route was installed by Router Discovery protocol.
++.in -8
++
++.sp
++The rest of the values are not reserved and the administrator is free
++to assign (or not to assign) protocol tags.
++
++.TP
++.B onlink
++pretend that the nexthop is directly attached to this link,
++even if it does not match any interface prefix.
++
++.TP
++.B equalize
++allow packet by packet randomization on multipath routes.
++Without this modifier, the route will be frozen to one selected
++nexthop, so that load splitting will only occur on per-flow base.
++.B equalize
++only works if the kernel is patched.
++
++.SS ip route delete - delete route
++
++.B ip route del
++has the same arguments as
++.BR "ip route add" ,
++but their semantics are a bit different.
++
++Key values
++.RB "(" to ", " tos ", " preference " and " table ")"
++select the route to delete. If optional attributes are present,
++.B ip
++verifies that they coincide with the attributes of the route to delete.
++If no route with the given key and attributes was found,
++.B ip route del
++fails.
++
++.SS ip route show - list routes
++the command displays the contents of the routing tables or the route(s)
++selected by some criteria.
++
++.TP
++.BI to " SELECTOR " (default)
++only select routes from the given range of destinations.
++.I SELECTOR
++consists of an optional modifier
++.RB "(" root ", " match " or " exact ")"
++and a prefix.
++.BI root " PREFIX"
++selects routes with prefixes not shorter than
++.IR PREFIX "."
++F.e.
++.BI root " 0/0"
++selects the entire routing table.
++.BI match " PREFIX"
++selects routes with prefixes not longer than
++.IR PREFIX "."
++F.e.
++.BI match " 10.0/16"
++selects
++.IR 10.0/16 ","
++.IR 10/8 " and " 0/0 ,
++but it does not select
++.IR 10.1/16 " and " 10.0.0/24 .
++And
++.BI exact " PREFIX"
++(or just
++.IR PREFIX ")"
++selects routes with this exact prefix. If neither of these options
++are present,
++.B ip
++assumes
++.BI root " 0/0"
++i.e. it lists the entire table.
++
++.TP
++.BI tos " TOS"
++.BI dsfield " TOS"
++only select routes with the given TOS.
++
++.TP
++.BI table " TABLEID"
++show the routes from this table(s). The default setting is to show
++.BR table main "."
++.I TABLEID
++may either be the ID of a real table or one of the special values:
++.sp
++.in +8
++.B all
++- list all of the tables.
++.sp
++.B cache
++- dump the routing cache.
++.in -8
++
++.TP
++.B cloned
++.TP
++.B cached
++list cloned routes i.e. routes which were dynamically forked from
++other routes because some route attribute (f.e. MTU) was updated.
++Actually, it is equivalent to
++.BR "table cache" "."
++
++.TP
++.BI from " SELECTOR"
++the same syntax as for
++.BR to ","
++but it binds the source address range rather than destinations.
++Note that the
++.B from
++option only works with cloned routes.
++
++.TP
++.BI protocol " RTPROTO"
++only list routes of this protocol.
++
++.TP
++.BI scope " SCOPE_VAL"
++only list routes with this scope.
++
++.TP
++.BI type " TYPE"
++only list routes of this type.
++
++.TP
++.BI dev " NAME"
++only list routes going via this device.
++
++.TP
++.BI via " PREFIX"
++only list routes going via the nexthop routers selected by
++.IR PREFIX "."
++
++.TP
++.BI src " PREFIX"
++only list routes with preferred source addresses selected
++by
++.IR PREFIX "."
++
++.TP
++.BI realm " REALMID"
++.TP
++.BI realms " FROMREALM/TOREALM"
++only list routes with these realms.
++
++.SS ip route flush - flush routing tables
++this command flushes routes selected by some criteria.
++
++.sp
++The arguments have the same syntax and semantics as the arguments of
++.BR "ip route show" ,
++but routing tables are not listed but purged. The only difference is
++the default action:
++.B show
++dumps all the IP main routing table but
++.B flush
++prints the helper page.
++
++.sp
++With the
++.B -statistics
++option, the command becomes verbose. It prints out the number of
++deleted routes and the number of rounds made to flush the routing
++table. If the option is given
++twice,
++.B ip route flush
++also dumps all the deleted routes in the format described in the
++previous subsection.
++
++.SS ip route get - get a single route
++this command gets a single route to a destination and prints its
++contents exactly as the kernel sees it.
++
++.TP
++.BI to " ADDRESS " (default)
++the destination address.
++
++.TP
++.BI from " ADDRESS"
++the source address.
++
++.TP
++.BI tos " TOS"
++.TP
++.BI dsfield " TOS"
++the Type Of Service.
++
++.TP
++.BI iif " NAME"
++the device from which this packet is expected to arrive.
++
++.TP
++.BI oif " NAME"
++force the output device on which this packet will be routed.
++
++.TP
++.B connected
++if no source address
++.RB "(option " from ")"
++was given, relookup the route with the source set to the preferred
++address received from the first lookup.
++If policy routing is used, it may be a different route.
++
++.P
++Note that this operation is not equivalent to
++.BR "ip route show" .
++.B show
++shows existing routes.
++.B get
++resolves them and creates new clones if necessary. Essentially,
++.B get
++is equivalent to sending a packet along this path.
++If the
++.B iif
++argument is not given, the kernel creates a route
++to output packets towards the requested destination.
++This is equivalent to pinging the destination
++with a subsequent
++.BR "ip route ls cache" ,
++however, no packets are actually sent. With the
++.B iif
++argument, the kernel pretends that a packet arrived from this interface
++and searches for a path to forward the packet.
++
++.SH ip rule - routing policy database management
++
++.BR "Rule" s
++in the routing policy database control the route selection algorithm.
++
++.P
++Classic routing algorithms used in the Internet make routing decisions
++based only on the destination address of packets (and in theory,
++but not in practice, on the TOS field).
++
++.P
++In some circumstances we want to route packets differently depending not only
++on destination addresses, but also on other packet fields: source address,
++IP protocol, transport protocol ports or even packet payload.
++This task is called 'policy routing'.
++
++.P
++To solve this task, the conventional destination based routing table, ordered
++according to the longest match rule, is replaced with a 'routing policy
++database' (or RPDB), which selects routes by executing some set of rules.
++
++.P
++Each policy routing rule consists of a
++.B selector
++and an
++.B action predicate.
++The RPDB is scanned in the order of increasing priority. The selector
++of each rule is applied to {source address, destination address, incoming
++interface, tos, fwmark} and, if the selector matches the packet,
++the action is performed. The action predicate may return with success.
++In this case, it will either give a route or failure indication
++and the RPDB lookup is terminated. Otherwise, the RPDB program
++continues on the next rule.
++
++.P
++Semantically, natural action is to select the nexthop and the output device.
++
++.P
++At startup time the kernel configures the default RPDB consisting of three
++rules:
++
++.TP
++1.
++Priority: 0, Selector: match anything, Action: lookup routing
++table
++.B local
++(ID 255).
++The
++.B local
++table is a special routing table containing
++high priority control routes for local and broadcast addresses.
++.sp
++Rule 0 is special. It cannot be deleted or overridden.
++
++.TP
++2.
++Priority: 32766, Selector: match anything, Action: lookup routing
++table
++.B main
++(ID 254).
++The
++.B main
++table is the normal routing table containing all non-policy
++routes. This rule may be deleted and/or overridden with other
++ones by the administrator.
++
++.TP
++3.
++Priority: 32767, Selector: match anything, Action: lookup routing
++table
++.B default
++(ID 253).
++The
++.B default
++table is empty. It is reserved for some post-processing if no previous
++default rules selected the packet.
++This rule may also be deleted.
++
++.P
++Each RPDB entry has additional
++attributes. F.e. each rule has a pointer to some routing
++table. NAT and masquerading rules have an attribute to select new IP
++address to translate/masquerade. Besides that, rules have some
++optional attributes, which routes have, namely
++.BR "realms" .
++These values do not override those contained in the routing tables. They
++are only used if the route did not select any attributes.
++
++.sp
++The RPDB may contain rules of the following types:
++
++.in +8
++.B unicast
++- the rule prescribes to return the route found
++in the routing table referenced by the rule.
++
++.B blackhole
++- the rule prescribes to silently drop the packet.
++
++.B unreachable
++- the rule prescribes to generate a 'Network is unreachable' error.
++
++.B prohibit
++- the rule prescribes to generate 'Communication is administratively
++prohibited' error.
++
++.B nat
++- the rule prescribes to translate the source address
++of the IP packet into some other value.
++.in -8
++
++.SS ip rule add - insert a new rule
++.SS ip rule delete - delete a rule
++
++.TP
++.BI type " TYPE " (default)
++the type of this rule. The list of valid types was given in the previous
++subsection.
++
++.TP
++.BI from " PREFIX"
++select the source prefix to match.
++
++.TP
++.BI to " PREFIX"
++select the destination prefix to match.
++
++.TP
++.BI iif " NAME"
++select the incoming device to match. If the interface is loopback,
++the rule only matches packets originating from this host. This means
++that you may create separate routing tables for forwarded and local
++packets and, hence, completely segregate them.
++
++.TP
++.BI tos " TOS"
++.TP
++.BI dsfield " TOS"
++select the TOS value to match.
++
++.TP
++.BI fwmark " MARK"
++select the
++.B fwmark
++value to match.
++
++.TP
++.BI priority " PREFERENCE"
++the priority of this rule. Each rule should have an explicitly
++set
++.I unique
++priority value.
++
++.TP
++.BI table " TABLEID"
++the routing table identifier to lookup if the rule selector matches.
++
++.TP
++.BI realms " FROM/TO"
++Realms to select if the rule matched and the routing table lookup
++succeeded. Realm
++.I TO
++is only used if the route did not select any realm.
++
++.TP
++.BI nat " ADDRESS"
++The base of the IP address block to translate (for source addresses).
++The
++.I ADDRESS
++may be either the start of the block of NAT addresses (selected by NAT
++routes) or a local host address (or even zero).
++In the last case the router does not translate the packets, but
++masquerades them to this address.
++
++.B Warning:
++Changes to the RPDB made with these commands do not become active
++immediately. It is assumed that after a script finishes a batch of
++updates, it flushes the routing cache with
++.BR "ip route flush cache" .
++
++.SS ip rule show - list rules
++This command has no arguments.
++
++.SH ip maddress - multicast addresses management
++
++.B maddress
++objects are multicast addresses.
++
++.SS ip maddress show - list multicast addresses
++
++.TP
++.BI dev " NAME " (default)
++the device name.
++
++.SS ip maddress add - add a multicast address
++.SS ip maddress delete - delete a multicast address
++these commands attach/detach a static link layer multicast address
++to listen on the interface.
++Note that it is impossible to join protocol multicast groups
++statically. This command only manages link layer addresses.
++
++.TP
++.BI address " LLADDRESS " (default)
++the link layer multicast address.
++
++.TP
++.BI dev " NAME"
++the device to join/leave this multicast address.
++
++.SH ip mroute - multicast routing cache management
++.B mroute
++objects are multicast routing cache entries created by a user level
++mrouting daemon (f.e.
++.B pimd
++or
++.B mrouted
++).
++
++Due to the limitations of the current interface to the multicast routing
++engine, it is impossible to change
++.B mroute
++objects administratively, so we may only display them. This limitation
++will be removed in the future.
++
++.SS ip mroute show - list mroute cache entries
++
++.TP
++.BI to " PREFIX " (default)
++the prefix selecting the destination multicast addresses to list.
++
++.TP
++.BI iif " NAME"
++the interface on which multicast packets are received.
++
++.TP
++.BI from " PREFIX"
++the prefix selecting the IP source addresses of the multicast route.
++
++.SH ip tunnel - tunnel configuration
++.B tunnel
++objects are tunnels, encapsulating packets in IPv4 packets and then
++sending them over the IP infrastructure.
++
++.SS ip tunnel add - add a new tunnel
++.SS ip tunnel change - change an existing tunnel
++.SS ip tunnel delete - destroy a tunnel
++
++.TP
++.BI name " NAME " (default)
++select the tunnel device name.
++
++.TP
++.BI mode " MODE"
++set the tunnel mode. Three modes are currently available:
++.BR ipip ", " sit " and " gre "."
++
++.TP
++.BI remote " ADDRESS"
++set the remote endpoint of the tunnel.
++
++.TP
++.BI local " ADDRESS"
++set the fixed local address for tunneled packets.
++It must be an address on another interface of this host.
++
++.TP
++.BI ttl " N"
++set a fixed TTL
++.I N
++on tunneled packets.
++.I N
++is a number in the range 1--255. 0 is a special value
++meaning that packets inherit the TTL value.
++The default value is:
++.BR "inherit" .
++
++.TP
++.BI tos " T"
++.TP
++.BI dsfield " T"
++set a fixed TOS
++.I T
++on tunneled packets.
++The default value is:
++.BR "inherit" .
++
++.TP
++.BI dev " NAME"
++bind the tunnel to the device
++.I NAME
++so that tunneled packets will only be routed via this device and will
++not be able to escape to another device when the route to endpoint
++changes.
++
++.TP
++.B nopmtudisc
++disable Path MTU Discovery on this tunnel.
++It is enabled by default. Note that a fixed ttl is incompatible
++with this option: tunnelling with a fixed ttl always makes pmtu
++discovery.
++
++.TP
++.BI key " K"
++.TP
++.BI ikey " K"
++.TP
++.BI okey " K"
++.RB ( " only GRE tunnels " )
++use keyed GRE with key
++.IR K ". " K
++is either a number or an IP address-like dotted quad.
++The
++.B key
++parameter sets the key to use in both directions.
++The
++.BR ikey " and " okey
++parameters set different keys for input and output.
++
++.TP
++.BR csum ", " icsum ", " ocsum
++.RB ( " only GRE tunnels " )
++generate/require checksums for tunneled packets.
++The
++.B ocsum
++flag calculates checksums for outgoing packets.
++The
++.B icsum
++flag requires that all input packets have the correct
++checksum. The
++.B csum
++flag is equivalent to the combination
++.BR "icsum ocsum" .
++
++.TP
++.BR seq ", " iseq ", " oseq
++.RB ( " only GRE tunnels " )
++serialize packets.
++The
++.B oseq
++flag enables sequencing of outgoing packets.
++The
++.B iseq
++flag requires that all input packets are serialized.
++The
++.B seq
++flag is equivalent to the combination
++.BR "iseq oseq" .
++.B It isn't work. Don't use it.
++
++.SS ip tunnel show - list tunnels
++This command has no arguments.
++
++.SH ip monitor and rtmon - state monitoring
++
++The
++.B ip
++utility can monitor the state of devices, addresses
++and routes continuously. This option has a slightly different format.
++Namely, the
++.B monitor
++command is the first in the command line and then the object list follows:
++
++.BR "ip monitor" " [ " all " |"
++.IR LISTofOBJECTS " ]"
++
++.I OBJECT-LIST
++is the list of object types that we want to monitor.
++It may contain
++.BR link ", " address " and " route "."
++If no
++.B file
++argument is given,
++.B ip
++opens RTNETLINK, listens on it and dumps state changes in the format
++described in previous sections.
++
++.P
++If a file name is given, it does not listen on RTNETLINK,
++but opens the file containing RTNETLINK messages saved in binary format
++and dumps them. Such a history file can be generated with the
++.B rtmon
++utility. This utility has a command line syntax similar to
++.BR "ip monitor" .
++Ideally,
++.B rtmon
++should be started before the first network configuration command
++is issued. F.e. if you insert:
++.sp
++.in +8
++rtmon file /var/log/rtmon.log
++.in -8
++.sp
++in a startup script, you will be able to view the full history
++later.
++
++.P
++Certainly, it is possible to start
++.B rtmon
++at any time.
++It prepends the history with the state snapshot dumped at the moment
++of starting.
++
++.SH HISTORY
++
++.B ip
++was written by Alexey N. Kuznetsov and added in Linux 2.2.
++.SH SEE ALSO
++.BR tc (8)
++.br
++.RB "IP Command reference " ip-cref.ps
++.br
++.RB "IP tunnels " ip-cref.ps
++
++.SH AUTHOR
++
++Manpage maintained by Michail Litvak <mci@owl.openwall.com>
+diff -Naur iproute2-orig/debian/manpages/old/tc-cbq-details.8 iproute2/debian/manpages/old/tc-cbq-details.8
+--- iproute2-orig/debian/manpages/old/tc-cbq-details.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/manpages/old/tc-cbq-details.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,425 @@
++.TH CBQ 8 "8 December 2001" "iproute2" "Linux"
++.SH NAME
++CBQ \- Class Based Queueing
++.SH SYNOPSIS
++.B tc qdisc ... dev
++dev
++.B ( parent
++classid
++.B | root) [ handle
++major:
++.B ] cbq avpkt
++bytes
++.B bandwidth
++rate
++.B [ cell
++bytes
++.B ] [ ewma
++log
++.B ] [ mpu
++bytes
++.B ]
++
++.B tc class ... dev
++dev
++.B parent
++major:[minor]
++.B [ classid
++major:minor
++.B ] cbq allot
++bytes
++.B [ bandwidth
++rate
++.B ] [ rate
++rate
++.B ] prio
++priority
++.B [ weight
++weight
++.B ] [ minburst
++packets
++.B ] [ maxburst
++packets
++.B ] [ ewma
++log
++.B ] [ cell
++bytes
++.B ] avpkt
++bytes
++.B [ mpu
++bytes
++.B ] [ bounded isolated ] [ split
++handle
++.B & defmap
++defmap
++.B ] [ estimator
++interval timeconstant
++.B ]
++
++.SH DESCRIPTION
++Class Based Queueing is a classful qdisc that implements a rich
++linksharing hierarchy of classes. It contains shaping elements as
++well as prioritizing capabilities. Shaping is performed using link
++idle time calculations based on the timing of dequeue events and
++underlying link bandwidth.
++
++.SH SHAPING ALGORITHM
++Shaping is done using link idle time calculations, and actions taken if
++these calculations deviate from set limits.
++
++When shaping a 10mbit/s connection to 1mbit/s, the link will
++be idle 90% of the time. If it isn't, it needs to be throttled so that it
++IS idle 90% of the time.
++
++From the kernel's perspective, this is hard to measure, so CBQ instead
++derives the idle time from the number of microseconds (in fact, jiffies)
++that elapse between requests from the device driver for more data. Combined
++with the knowledge of packet sizes, this is used to approximate how full or
++empty the link is.
++
++This is rather circumspect and doesn't always arrive at proper
++results. For example, what is the actual link speed of an interface
++that is not really able to transmit the full 100mbit/s of data,
++perhaps because of a badly implemented driver? A PCMCIA network card
++will also never achieve 100mbit/s because of the way the bus is
++designed - again, how do we calculate the idle time?
++
++The physical link bandwidth may be ill defined in case of not-quite-real
++network devices like PPP over Ethernet or PPTP over TCP/IP. The effective
++bandwidth in that case is probably determined by the efficiency of pipes
++to userspace - which not defined.
++
++During operations, the effective idletime is measured using an
++exponential weighted moving average (EWMA), which considers recent
++packets to be exponentially more important than past ones. The Unix
++loadaverage is calculated in the same way.
++
++The calculated idle time is subtracted from the EWMA measured one,
++the resulting number is called 'avgidle'. A perfectly loaded link has
++an avgidle of zero: packets arrive exactly at the calculated
++interval.
++
++An overloaded link has a negative avgidle and if it gets too negative,
++CBQ throttles and is then 'overlimit'.
++
++Conversely, an idle link might amass a huge avgidle, which would then
++allow infinite bandwidths after a few hours of silence. To prevent
++this, avgidle is capped at
++.B maxidle.
++
++If overlimit, in theory, the CBQ could throttle itself for exactly the
++amount of time that was calculated to pass between packets, and then
++pass one packet, and throttle again. Due to timer resolution constraints,
++this may not be feasible, see the
++.B minburst
++parameter below.
++
++.SH CLASSIFICATION
++Within the one CBQ instance many classes may exist. Each of these classes
++contains another qdisc, by default
++.BR tc-pfifo (8).
++
++When enqueueing a packet, CBQ starts at the root and uses various methods to
++determine which class should receive the data. If a verdict is reached, this
++process is repeated for the recipient class which might have further
++means of classifying traffic to its children, if any.
++
++CBQ has the following methods available to classify a packet to any child
++classes.
++.TP
++(i)
++.B skb->priority class encoding.
++Can be set from userspace by an application with the
++.B SO_PRIORITY
++setsockopt.
++The
++.B skb->priority class encoding
++only applies if the skb->priority holds a major:minor handle of an existing
++class within this qdisc.
++.TP
++(ii)
++tc filters attached to the class.
++.TP
++(iii)
++The defmap of a class, as set with the
++.B split & defmap
++parameters. The defmap may contain instructions for each possible Linux packet
++priority.
++
++.P
++Each class also has a
++.B level.
++Leaf nodes, attached to the bottom of the class hierarchy, have a level of 0.
++.SH CLASSIFICATION ALGORITHM
++
++Classification is a loop, which terminates when a leaf class is found. At any
++point the loop may jump to the fallback algorithm.
++
++The loop consists of the following steps:
++.TP
++(i)
++If the packet is generated locally and has a valid classid encoded within its
++.B skb->priority,
++choose it and terminate.
++
++.TP
++(ii)
++Consult the tc filters, if any, attached to this child. If these return
++a class which is not a leaf class, restart loop from the class returned.
++If it is a leaf, choose it and terminate.
++.TP
++(iii)
++If the tc filters did not return a class, but did return a classid,
++try to find a class with that id within this qdisc.
++Check if the found class is of a lower
++.B level
++than the current class. If so, and the returned class is not a leaf node,
++restart the loop at the found class. If it is a leaf node, terminate.
++If we found an upward reference to a higher level, enter the fallback
++algorithm.
++.TP
++(iv)
++If the tc filters did not return a class, nor a valid reference to one,
++consider the minor number of the reference to be the priority. Retrieve
++a class from the defmap of this class for the priority. If this did not
++contain a class, consult the defmap of this class for the
++.B BEST_EFFORT
++class. If this is an upward reference, or no
++.B BEST_EFFORT
++class was defined,
++enter the fallback algorithm. If a valid class was found, and it is not a
++leaf node, restart the loop at this class. If it is a leaf, choose it and
++terminate. If
++neither the priority distilled from the classid, nor the
++.B BEST_EFFORT
++priority yielded a class, enter the fallback algorithm.
++.P
++The fallback algorithm resides outside of the loop and is as follows.
++.TP
++(i)
++Consult the defmap of the class at which the jump to fallback occured. If
++the defmap contains a class for the
++.B
++priority
++of the class (which is related to the TOS field), choose this class and
++terminate.
++.TP
++(ii)
++Consult the map for a class for the
++.B BEST_EFFORT
++priority. If found, choose it, and terminate.
++.TP
++(iii)
++Choose the class at which break out to the fallback algorithm occured. Terminate.
++.P
++The packet is enqueued to the class which was chosen when either algorithm
++terminated. It is therefore possible for a packet to be enqueued *not* at a
++leaf node, but in the middle of the hierarchy.
++
++.SH LINK SHARING ALGORITHM
++When dequeuing for sending to the network device, CBQ decides which of its
++classes will be allowed to send. It does so with a Weighted Round Robin process
++in which each class with packets gets a chance to send in turn. The WRR process
++starts by asking the highest priority classes (lowest numerically -
++highest semantically) for packets, and will continue to do so until they
++have no more data to offer, in which case the process repeats for lower
++priorities.
++
++.B CERTAINTY ENDS HERE, ANK PLEASE HELP
++
++Each class is not allowed to send at length though - they can only dequeue a
++configurable amount of data during each round.
++
++If a class is about to go overlimit, and it is not
++.B bounded
++it will try to borrow avgidle from siblings that are not
++.B isolated.
++This process is repeated from the bottom upwards. If a class is unable
++to borrow enough avgidle to send a packet, it is throttled and not asked
++for a packet for enough time for the avgidle to increase above zero.
++
++.B I REALLY NEED HELP FIGURING THIS OUT. REST OF DOCUMENT IS PRETTY CERTAIN
++.B AGAIN.
++
++.SH QDISC
++The root qdisc of a CBQ class tree has the following parameters:
++
++.TP
++parent major:minor | root
++This mandatory parameter determines the place of the CBQ instance, either at the
++.B root
++of an interface or within an existing class.
++.TP
++handle major:
++Like all other qdiscs, the CBQ can be assigned a handle. Should consist only
++of a major number, followed by a colon. Optional.
++.TP
++avpkt bytes
++For calculations, the average packet size must be known. It is silently capped
++at a minimum of 2/3 of the interface MTU. Mandatory.
++.TP
++bandwidth rate
++To determine the idle time, CBQ must know the bandwidth of your underlying
++physical interface, or parent qdisc. This is a vital parameter, more about it
++later. Mandatory.
++.TP
++cell
++The cell size determines he granularity of packet transmission time calculations. Has a sensible default.
++.TP
++mpu
++A zero sized packet may still take time to transmit. This value is the lower
++cap for packet transmission time calculations - packets smaller than this value
++are still deemed to have this size. Defaults to zero.
++.TP
++ewma log
++When CBQ needs to measure the average idle time, it does so using an
++Exponentially Weighted Moving Average which smoothes out measurements into
++a moving average. The EWMA LOG determines how much smoothing occurs. Defaults
++to 5. Lower values imply greater sensitivity. Must be between 0 and 31.
++.P
++A CBQ qdisc does not shape out of its own accord. It only needs to know certain
++parameters about the underlying link. Actual shaping is done in classes.
++
++.SH CLASSES
++Classes have a host of parameters to configure their operation.
++
++.TP
++parent major:minor
++Place of this class within the hierarchy. If attached directly to a qdisc
++and not to another class, minor can be omitted. Mandatory.
++.TP
++classid major:minor
++Like qdiscs, classes can be named. The major number must be equal to the
++major number of the qdisc to which it belongs. Optional, but needed if this
++class is going to have children.
++.TP
++weight weight
++When dequeuing to the interface, classes are tried for traffic in a
++round-robin fashion. Classes with a higher configured qdisc will generally
++have more traffic to offer during each round, so it makes sense to allow
++it to dequeue more traffic. All weights under a class are normalized, so
++only the ratios matter. Defaults to the configured rate, unless the priority
++of this class is maximal, in which case it is set to 1.
++.TP
++allot bytes
++Allot specifies how many bytes a qdisc can dequeue
++during each round of the process. This parameter is weighted using the
++renormalized class weight described above.
++
++.TP
++priority priority
++In the round-robin process, classes with the lowest priority field are tried
++for packets first. Mandatory.
++
++.TP
++rate rate
++Maximum rate this class and all its children combined can send at. Mandatory.
++
++.TP
++bandwidth rate
++This is different from the bandwidth specified when creating a CBQ disc. Only
++used to determine maxidle and offtime, which are only calculated when
++specifying maxburst or minburst. Mandatory if specifying maxburst or minburst.
++
++.TP
++maxburst
++This number of packets is used to calculate maxidle so that when
++avgidle is at maxidle, this number of average packets can be burst
++before avgidle drops to 0. Set it higher to be more tolerant of
++bursts. You can't set maxidle directly, only via this parameter.
++
++.TP
++minburst
++As mentioned before, CBQ needs to throttle in case of
++overlimit. The ideal solution is to do so for exactly the calculated
++idle time, and pass 1 packet. However, Unix kernels generally have a
++hard time scheduling events shorter than 10ms, so it is better to
++throttle for a longer period, and then pass minburst packets in one
++go, and then sleep minburst times longer.
++
++The time to wait is called the offtime. Higher values of minburst lead
++to more accurate shaping in the long term, but to bigger bursts at
++millisecond timescales.
++
++.TP
++minidle
++If avgidle is below 0, we are overlimits and need to wait until
++avgidle will be big enough to send one packet. To prevent a sudden
++burst from shutting down the link for a prolonged period of time,
++avgidle is reset to minidle if it gets too low.
++
++Minidle is specified in negative microseconds, so 10 means that
++avgidle is capped at -10us.
++
++.TP
++bounded
++Signifies that this class will not borrow bandwidth from its siblings.
++.TP
++isolated
++Means that this class will not borrow bandwidth to its siblings
++
++.TP
++split major:minor & defmap bitmap[/bitmap]
++If consulting filters attached to a class did not give a verdict,
++CBQ can also classify based on the packet's priority. There are 16
++priorities available, numbered from 0 to 15.
++
++The defmap specifies which priorities this class wants to receive,
++specified as a bitmap. The Least Significant Bit corresponds to priority
++zero. The
++.B split
++parameter tells CBQ at which class the decision must be made, which should
++be a (grand)parent of the class you are adding.
++
++As an example, 'tc class add ... classid 10:1 cbq .. split 10:0 defmap c0'
++configures class 10:0 to send packets with priorities 6 and 7 to 10:1.
++
++The complimentary configuration would then
++be: 'tc class add ... classid 10:2 cbq ... split 10:0 defmap 3f'
++Which would send all packets 0, 1, 2, 3, 4 and 5 to 10:1.
++.TP
++estimator interval timeconstant
++CBQ can measure how much bandwidth each class is using, which tc filters
++can use to classify packets with. In order to determine the bandwidth
++it uses a very simple estimator that measures once every
++.B interval
++microseconds how much traffic has passed. This again is a EWMA, for which
++the time constant can be specified, also in microseconds. The
++.B time constant
++corresponds to the sluggishness of the measurement or, conversely, to the
++sensitivity of the average to short bursts. Higher values mean less
++sensitivity.
++
++
++
++.SH SOURCES
++.TP
++o
++Sally Floyd and Van Jacobson, "Link-sharing and Resource
++Management Models for Packet Networks",
++IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
++
++.TP
++o
++Sally Floyd, "Notes on CBQ and Guarantee Service", 1995
++
++.TP
++o
++Sally Floyd, "Notes on Class-Based Queueing: Setting
++Parameters", 1996
++
++.TP
++o
++Sally Floyd and Michael Speer, "Experimental Results
++for Class-Based Queueing", 1998, not published.
++
++
++
++.SH SEE ALSO
++.BR tc (8)
++
++.SH AUTHOR
++Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>. This manpage maintained by
++bert hubert <ahu@ds9a.nl>
++
++
+diff -Naur iproute2-orig/debian/manpages/old/tc-cbq.8 iproute2/debian/manpages/old/tc-cbq.8
+--- iproute2-orig/debian/manpages/old/tc-cbq.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/manpages/old/tc-cbq.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,353 @@
++.TH CBQ 8 "16 December 2001" "iproute2" "Linux"
++.SH NAME
++CBQ \- Class Based Queueing
++.SH SYNOPSIS
++.B tc qdisc ... dev
++dev
++.B ( parent
++classid
++.B | root) [ handle
++major:
++.B ] cbq [ allot
++bytes
++.B ] avpkt
++bytes
++.B bandwidth
++rate
++.B [ cell
++bytes
++.B ] [ ewma
++log
++.B ] [ mpu
++bytes
++.B ]
++
++.B tc class ... dev
++dev
++.B parent
++major:[minor]
++.B [ classid
++major:minor
++.B ] cbq allot
++bytes
++.B [ bandwidth
++rate
++.B ] [ rate
++rate
++.B ] prio
++priority
++.B [ weight
++weight
++.B ] [ minburst
++packets
++.B ] [ maxburst
++packets
++.B ] [ ewma
++log
++.B ] [ cell
++bytes
++.B ] avpkt
++bytes
++.B [ mpu
++bytes
++.B ] [ bounded isolated ] [ split
++handle
++.B & defmap
++defmap
++.B ] [ estimator
++interval timeconstant
++.B ]
++
++.SH DESCRIPTION
++Class Based Queueing is a classful qdisc that implements a rich
++linksharing hierarchy of classes. It contains shaping elements as
++well as prioritizing capabilities. Shaping is performed using link
++idle time calculations based on the timing of dequeue events and
++underlying link bandwidth.
++
++.SH SHAPING ALGORITHM
++When shaping a 10mbit/s connection to 1mbit/s, the link will
++be idle 90% of the time. If it isn't, it needs to be throttled so that it
++IS idle 90% of the time.
++
++During operations, the effective idletime is measured using an
++exponential weighted moving average (EWMA), which considers recent
++packets to be exponentially more important than past ones. The Unix
++loadaverage is calculated in the same way.
++
++The calculated idle time is subtracted from the EWMA measured one,
++the resulting number is called 'avgidle'. A perfectly loaded link has
++an avgidle of zero: packets arrive exactly at the calculated
++interval.
++
++An overloaded link has a negative avgidle and if it gets too negative,
++CBQ throttles and is then 'overlimit'.
++
++Conversely, an idle link might amass a huge avgidle, which would then
++allow infinite bandwidths after a few hours of silence. To prevent
++this, avgidle is capped at
++.B maxidle.
++
++If overlimit, in theory, the CBQ could throttle itself for exactly the
++amount of time that was calculated to pass between packets, and then
++pass one packet, and throttle again. Due to timer resolution constraints,
++this may not be feasible, see the
++.B minburst
++parameter below.
++
++.SH CLASSIFICATION
++Within the one CBQ instance many classes may exist. Each of these classes
++contains another qdisc, by default
++.BR tc-pfifo (8).
++
++When enqueueing a packet, CBQ starts at the root and uses various methods to
++determine which class should receive the data.
++
++In the absence of uncommon configuration options, the process is rather easy.
++At each node we look for an instruction, and then go to the class the
++instruction refers us to. If the class found is a barren leaf-node (without
++children), we enqueue the packet there. If it is not yet a leaf node, we do
++the whole thing over again starting from that node.
++
++The following actions are performed, in order at each node we visit, until one
++sends us to another node, or terminates the process.
++.TP
++(i)
++Consult filters attached to the class. If sent to a leafnode, we are done.
++Otherwise, restart.
++.TP
++(ii)
++Consult the defmap for the priority assigned to this packet, which depends
++on the TOS bits. Check if the referral is leafless, otherwise restart.
++.TP
++(iii)
++Ask the defmap for instructions for the 'best effort' priority. Check the
++answer for leafness, otherwise restart.
++.TP
++(iv)
++If none of the above returned with an instruction, enqueue at this node.
++.P
++This algorithm makes sure that a packet always ends up somewhere, even while
++you are busy building your configuration.
++
++For more details, see
++.BR tc-cbq-details(8).
++
++.SH LINK SHARING ALGORITHM
++When dequeuing for sending to the network device, CBQ decides which of its
++classes will be allowed to send. It does so with a Weighted Round Robin process
++in which each class with packets gets a chance to send in turn. The WRR process
++starts by asking the highest priority classes (lowest numerically -
++highest semantically) for packets, and will continue to do so until they
++have no more data to offer, in which case the process repeats for lower
++priorities.
++
++Classes by default borrow bandwidth from their siblings. A class can be
++prevented from doing so by declaring it 'bounded'. A class can also indicate
++its unwillingness to lend out bandwidth by being 'isolated'.
++
++.SH QDISC
++The root of a CBQ qdisc class tree has the following parameters:
++
++.TP
++parent major:minor | root
++This mandatory parameter determines the place of the CBQ instance, either at the
++.B root
++of an interface or within an existing class.
++.TP
++handle major:
++Like all other qdiscs, the CBQ can be assigned a handle. Should consist only
++of a major number, followed by a colon. Optional, but very useful if classes
++will be generated within this qdisc.
++.TP
++allot bytes
++This allotment is the 'chunkiness' of link sharing and is used for determining packet
++transmission time tables. The qdisc allot differs slightly from the class allot discussed
++below. Optional. Defaults to a reasonable value, related to avpkt.
++.TP
++avpkt bytes
++The average size of a packet is needed for calculating maxidle, and is also used
++for making sure 'allot' has a safe value. Mandatory.
++.TP
++bandwidth rate
++To determine the idle time, CBQ must know the bandwidth of your underlying
++physical interface, or parent qdisc. This is a vital parameter, more about it
++later. Mandatory.
++.TP
++cell
++The cell size determines he granularity of packet transmission time calculations. Has a sensible default.
++.TP
++mpu
++A zero sized packet may still take time to transmit. This value is the lower
++cap for packet transmission time calculations - packets smaller than this value
++are still deemed to have this size. Defaults to zero.
++.TP
++ewma log
++When CBQ needs to measure the average idle time, it does so using an
++Exponentially Weighted Moving Average which smoothes out measurements into
++a moving average. The EWMA LOG determines how much smoothing occurs. Lower
++values imply greater sensitivity. Must be between 0 and 31. Defaults
++to 5.
++.P
++A CBQ qdisc does not shape out of its own accord. It only needs to know certain
++parameters about the underlying link. Actual shaping is done in classes.
++
++.SH CLASSES
++Classes have a host of parameters to configure their operation.
++
++.TP
++parent major:minor
++Place of this class within the hierarchy. If attached directly to a qdisc
++and not to another class, minor can be omitted. Mandatory.
++.TP
++classid major:minor
++Like qdiscs, classes can be named. The major number must be equal to the
++major number of the qdisc to which it belongs. Optional, but needed if this
++class is going to have children.
++.TP
++weight weight
++When dequeuing to the interface, classes are tried for traffic in a
++round-robin fashion. Classes with a higher configured qdisc will generally
++have more traffic to offer during each round, so it makes sense to allow
++it to dequeue more traffic. All weights under a class are normalized, so
++only the ratios matter. Defaults to the configured rate, unless the priority
++of this class is maximal, in which case it is set to 1.
++.TP
++allot bytes
++Allot specifies how many bytes a qdisc can dequeue
++during each round of the process. This parameter is weighted using the
++renormalized class weight described above. Silently capped at a minimum of
++3/2 avpkt. Mandatory.
++
++.TP
++prio priority
++In the round-robin process, classes with the lowest priority field are tried
++for packets first. Mandatory.
++
++.TP
++avpkt
++See the QDISC section.
++
++.TP
++rate rate
++Maximum rate this class and all its children combined can send at. Mandatory.
++
++.TP
++bandwidth rate
++This is different from the bandwidth specified when creating a CBQ disc! Only
++used to determine maxidle and offtime, which are only calculated when
++specifying maxburst or minburst. Mandatory if specifying maxburst or minburst.
++
++.TP
++maxburst
++This number of packets is used to calculate maxidle so that when
++avgidle is at maxidle, this number of average packets can be burst
++before avgidle drops to 0. Set it higher to be more tolerant of
++bursts. You can't set maxidle directly, only via this parameter.
++
++.TP
++minburst
++As mentioned before, CBQ needs to throttle in case of
++overlimit. The ideal solution is to do so for exactly the calculated
++idle time, and pass 1 packet. However, Unix kernels generally have a
++hard time scheduling events shorter than 10ms, so it is better to
++throttle for a longer period, and then pass minburst packets in one
++go, and then sleep minburst times longer.
++
++The time to wait is called the offtime. Higher values of minburst lead
++to more accurate shaping in the long term, but to bigger bursts at
++millisecond timescales. Optional.
++
++.TP
++minidle
++If avgidle is below 0, we are overlimits and need to wait until
++avgidle will be big enough to send one packet. To prevent a sudden
++burst from shutting down the link for a prolonged period of time,
++avgidle is reset to minidle if it gets too low.
++
++Minidle is specified in negative microseconds, so 10 means that
++avgidle is capped at -10us. Optional.
++
++.TP
++bounded
++Signifies that this class will not borrow bandwidth from its siblings.
++.TP
++isolated
++Means that this class will not borrow bandwidth to its siblings
++
++.TP
++split major:minor & defmap bitmap[/bitmap]
++If consulting filters attached to a class did not give a verdict,
++CBQ can also classify based on the packet's priority. There are 16
++priorities available, numbered from 0 to 15.
++
++The defmap specifies which priorities this class wants to receive,
++specified as a bitmap. The Least Significant Bit corresponds to priority
++zero. The
++.B split
++parameter tells CBQ at which class the decision must be made, which should
++be a (grand)parent of the class you are adding.
++
++As an example, 'tc class add ... classid 10:1 cbq .. split 10:0 defmap c0'
++configures class 10:0 to send packets with priorities 6 and 7 to 10:1.
++
++The complimentary configuration would then
++be: 'tc class add ... classid 10:2 cbq ... split 10:0 defmap 3f'
++Which would send all packets 0, 1, 2, 3, 4 and 5 to 10:1.
++.TP
++estimator interval timeconstant
++CBQ can measure how much bandwidth each class is using, which tc filters
++can use to classify packets with. In order to determine the bandwidth
++it uses a very simple estimator that measures once every
++.B interval
++microseconds how much traffic has passed. This again is a EWMA, for which
++the time constant can be specified, also in microseconds. The
++.B time constant
++corresponds to the sluggishness of the measurement or, conversely, to the
++sensitivity of the average to short bursts. Higher values mean less
++sensitivity.
++
++.SH BUGS
++The actual bandwidth of the underlying link may not be known, for example
++in the case of PPoE or PPTP connections which in fact may send over a
++pipe, instead of over a physical device. CBQ is quite resilient to major
++errors in the configured bandwidth, probably a the cost of coarser shaping.
++
++Default kernels rely on coarse timing information for making decisions. These
++may make shaping precise in the long term, but inaccurate on second long scales.
++
++See
++.BR tc-cbq-details(8)
++for hints on how to improve this.
++
++.SH SOURCES
++.TP
++o
++Sally Floyd and Van Jacobson, "Link-sharing and Resource
++Management Models for Packet Networks",
++IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
++
++.TP
++o
++Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
++
++.TP
++o
++Sally Floyd, "Notes on Class-Based Queueing: Setting
++Parameters", 1996
++
++.TP
++o
++Sally Floyd and Michael Speer, "Experimental Results
++for Class-Based Queueing", 1998, not published.
++
++
++
++.SH SEE ALSO
++.BR tc (8)
++
++.SH AUTHOR
++Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>. This manpage maintained by
++bert hubert <ahu@ds9a.nl>
++
++
+diff -Naur iproute2-orig/debian/manpages/old/tc-htb.8 iproute2/debian/manpages/old/tc-htb.8
+--- iproute2-orig/debian/manpages/old/tc-htb.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/manpages/old/tc-htb.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,150 @@
++.TH HTB 8 "10 January 2002" "iproute2" "Linux"
++.SH NAME
++HTB \- Hierarchy Token Bucket
++.SH SYNOPSIS
++.B tc qdisc ... dev
++dev
++.B ( parent
++classid
++.B | root) [ handle
++major:
++.B ] htb [ default
++minor-id
++.B ]
++
++.B tc class ... dev
++dev
++.B parent
++major:[minor]
++.B [ classid
++major:minor
++.B ] htb rate
++rate
++.B [ ceil
++rate
++.B ] burst
++bytes
++.B [ cburst
++bytes
++.B ] [ prio
++priority
++.B ]
++
++.SH DESCRIPTION
++HTB is meant as a more understandable and intuitive replacement for
++the CBQ qdisc in Linux. Both CBQ and HTB help you to control the use
++of the outbound bandwidth on a given link. Both allow you to use one
++physical link to simulate several slower links and to send different
++kinds of traffic on different simulated links. In both cases, you have
++to specify how to divide the physical link into simulated links and
++how to decide which simulated link to use for a given packet to be sent.
++
++Unlike CBQ, HTB shapes traffic based on the Token Bucket Filter algorithm
++which does not depend on interface characteristics and so does not need to
++know the underlying bandwidth of the outgoing interface.
++
++.SH SHAPING ALGORITHM
++Shaping works as documented in
++.B tc-tbf (8).
++
++.SH CLASSIFICATION
++Within the one HRB instance many classes may exist. Each of these classes
++contains another qdisc, by default
++.BR tc-pfifo (8).
++
++When enqueueing a packet, HTB starts at the root and uses various methods to
++determine which class should receive the data.
++
++In the absence of uncommon configuration options, the process is rather easy.
++At each node we look for an instruction, and then go to the class the
++instruction refers us to. If the class found is a barren leaf-node (without
++children), we enqueue the packet there. If it is not yet a leaf node, we do
++the whole thing over again starting from that node.
++
++The following actions are performed, in order at each node we visit, until one
++sends us to another node, or terminates the process.
++.TP
++(i)
++Consult filters attached to the class. If sent to a leafnode, we are done.
++Otherwise, restart.
++.TP
++(ii)
++If none of the above returned with an instruction, enqueue at this node.
++.P
++This algorithm makes sure that a packet always ends up somewhere, even while
++you are busy building your configuration.
++
++.SH LINK SHARING ALGORITHM
++FIXME
++
++.SH QDISC
++The root of a HTB qdisc class tree has the following parameters:
++
++.TP
++parent major:minor | root
++This mandatory parameter determines the place of the HTB instance, either at the
++.B root
++of an interface or within an existing class.
++.TP
++handle major:
++Like all other qdiscs, the HTB can be assigned a handle. Should consist only
++of a major number, followed by a colon. Optional, but very useful if classes
++will be generated within this qdisc.
++.TP
++default minor-id
++Unclassified traffic gets sent to the class with this minor-id.
++
++.SH CLASSES
++Classes have a host of parameters to configure their operation.
++
++.TP
++parent major:minor
++Place of this class within the hierarchy. If attached directly to a qdisc
++and not to another class, minor can be omitted. Mandatory.
++.TP
++classid major:minor
++Like qdiscs, classes can be named. The major number must be equal to the
++major number of the qdisc to which it belongs. Optional, but needed if this
++class is going to have children.
++.TP
++prio priority
++In the round-robin process, classes with the lowest priority field are tried
++for packets first. Mandatory.
++
++.TP
++rate rate
++Maximum rate this class and all its children are guaranteed. Mandatory.
++
++.TP
++ceil rate
++Maximum rate at which a class can send, if its parent has bandwidth to spare.
++Defaults to the configured rate, which implies no borrowing
++
++.TP
++burst bytes
++Amount of bytes that can be burst at
++.B ceil
++speed, in excess of the configured
++.B rate.
++Should be at least as high as the highest burst of all children.
++
++.TP
++cburst bytes
++Amount of bytes that can be burst at 'infinite' speed, in other words, as fast
++as the interface can transmit them. For perfect evening out, should be equal to at most one average
++packet. Should be at least as high as the highest cburst of all children.
++
++.SH NOTES
++Due to Unix timing constraints, the maximum ceil rate is not infinite and may in fact be quite low. On Intel,
++there are 100 timer events per second, the maximum rate is that rate at which 'burst' bytes are sent each timer tick.
++From this, the mininum burst size for a specified rate can be calculated. For i386, a 10mbit rate requires a 12 kilobyte
++burst as 100*12kb*8 equals 10mbit.
++
++.SH SEE ALSO
++.BR tc (8)
++.P
++HTB website: http://luxik.cdi.cz/~devik/qos/htb/
++.SH AUTHOR
++Martin Devera <devik@cdi.cz>. This manpage maintained by bert hubert <ahu@ds9a.nl>
++
++
+diff -Naur iproute2-orig/debian/manpages/old/tc-pbfifo.8 iproute2/debian/manpages/old/tc-pbfifo.8
+--- iproute2-orig/debian/manpages/old/tc-pbfifo.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/manpages/old/tc-pbfifo.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,72 @@
++.TH PBFIFO 8 "10 January 2002" "iproute2" "Linux"
++.SH NAME
++pfifo \- Packet limited First In, First Out queue
++.P
++bfifo \- Byte limited First In, First Out queue
++
++.SH SYNOPSIS
++.B tc qdisc ... add pfifo
++.B [ limit
++packets
++.B ]
++.P
++.B tc qdisc ... add bfifo
++.B [ limit
++bytes
++.B ]
++
++.SH DESCRIPTION
++The pfifo and bfifo qdiscs are unadorned First In, First Out queues. They are the
++simplest queues possible and therefore have no overhead.
++.B pfifo
++constrains the queue size as measured in packets.
++.B bfifo
++does so as measured in bytes.
++
++Like all non-default qdiscs, they maintain statistics. This might be a reason to prefer
++pfifo or bfifo over the default.
++
++.SH ALGORITHM
++A list of packets is maintained, when a packet is enqueued it gets inserted at the tail of
++a list. When a packet needs to be sent out to the network, it is taken from the head of the list.
++
++If the list is too long, no further packets are allowed on. This is called 'tail drop'.
++
++.SH PARAMETERS
++.TP
++limit
++Maximum queue size. Specified in bytes for bfifo, in packets for pfifo. For pfifo, defaults
++to the interface txqueuelen, as specified with
++.BR ifconfig (8)
++or
++.BR ip (8).
++
++For bfifo, it defaults to the txqueuelen multiplied by the interface MTU.
++
++.SH OUTPUT
++The output of
++.B tc -s qdisc ls
++contains the limit, either in packets or in bytes, and the number of bytes
++and packets actually sent. An unsent and dropped packet only appears between braces
++and is not counted as 'Sent'.
++
++In this example, the queue length is 100 packets, 45894 bytes were sent over 681 packets.
++No packets were dropped, and as the pfifo queue does not slow down packets, there were also no
++overlimits:
++.P
++.nf
++# tc -s qdisc ls dev eth0
++qdisc pfifo 8001: dev eth0 limit 100p
++ Sent 45894 bytes 681 pkts (dropped 0, overlimits 0)
++.fi
++
++If a backlog occurs, this is displayed as well.
++.SH SEE ALSO
++.BR tc (8)
++
++.SH AUTHORS
++Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>
++
++This manpage maintained by bert hubert <ahu@ds9a.nl>
++
++
+diff -Naur iproute2-orig/debian/manpages/old/tc-pfifo_fast.8 iproute2/debian/manpages/old/tc-pfifo_fast.8
+--- iproute2-orig/debian/manpages/old/tc-pfifo_fast.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/manpages/old/tc-pfifo_fast.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,59 @@
++.TH PFIFO_FAST 8 "10 January 2002" "iproute2" "Linux"
++.SH NAME
++pfifo_fast \- three-band first in, first out queue
++
++.SH DESCRIPTION
++pfifo_fast is the default qdisc of each interface.
++
++Whenever an interface is created, the pfifo_fast qdisc is automatically used
++as a queue. If another qdisc is attached, it preempts the default
++pfifo_fast, which automatically returns to function when an existing qdisc
++is detached.
++
++In this sense this qdisc is magic, and unlike other qdiscs.
++
++.SH ALGORITHM
++The algorithm is very similar to that of the classful
++.BR tc-prio (8)
++qdisc.
++.B pfifo_fast
++is like three
++.BR tc-pfifo (8)
++queues side by side, where packets can be enqueued in any of the three bands
++based on their Type of Service bits or assigned priority.
++
++Not all three bands are dequeued simultaneously - as long as lower bands
++have traffic, higher bands are never dequeued. This can be used to
++prioritize interactive traffic or penalize 'lowest cost' traffic.
++
++Each band can be txqueuelen packets long, as configured with
++.BR ifconfig (8)
++or
++.BR ip (8).
++Additional packets coming in are not enqueued but are instead dropped.
++
++See
++.BR tc-prio (8)
++for complete details on how TOS bits are translated into bands.
++.SH PARAMETERS
++.TP
++txqueuelen
++The length of the three bands depends on the interface txqueuelen, as
++specified with
++.BR ifconfig (8)
++or
++.BR ip (8).
++
++.SH BUGS
++Does not maintain statistics and does not show up in tc qdisc ls. This is because
++it is the automatic default in the absence of a configured qdisc.
++
++.SH SEE ALSO
++.BR tc (8)
++
++.SH AUTHORS
++Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>
++
++This manpage maintained by bert hubert <ahu@ds9a.nl>
++
++
+diff -Naur iproute2-orig/debian/manpages/old/tc-prio.8 iproute2/debian/manpages/old/tc-prio.8
+--- iproute2-orig/debian/manpages/old/tc-prio.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/manpages/old/tc-prio.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,187 @@
++.TH PRIO 8 "16 December 2001" "iproute2" "Linux"
++.SH NAME
++PRIO \- Priority qdisc
++.SH SYNOPSIS
++.B tc qdisc ... dev
++dev
++.B ( parent
++classid
++.B | root) [ handle
++major:
++.B ] prio [ bands
++bands
++.B ] [ priomap
++band,band,band...
++.B ] [ estimator
++interval timeconstant
++.B ]
++
++.SH DESCRIPTION
++The PRIO qdisc is a simple classful queueing discipline that contains
++an arbitrary number of classes of differing priority. The classes are
++dequeued in numerical descending order of priority. PRIO is a scheduler
++and never delays packets - it is a work-conserving qdisc, though the qdiscs
++contained in the classes may not be.
++
++Very useful for lowering latency when there is no need for slowing down
++traffic.
++
++.SH ALGORITHM
++On creation with 'tc qdisc add', a fixed number of bands is created. Each
++band is a class, although is not possible to add classes with 'tc qdisc
++add', the number of bands to be created must instead be specified on the
++commandline attaching PRIO to its root.
++
++When dequeueing, band 0 is tried first and only if it did not deliver a
++packet does PRIO try band 1, and so onwards. Maximum reliability packets
++should therefore go to band 0, minimum delay to band 1 and the rest to band
++2.
++
++As the PRIO qdisc itself will have minor number 0, band 0 is actually
++major:1, band 1 is major:2, etc. For major, substitute the major number
++assigned to the qdisc on 'tc qdisc add' with the
++.B handle
++parameter.
++
++.SH CLASSIFICATION
++Three methods are available to PRIO to determine in which band a packet will
++be enqueued.
++.TP
++From userspace
++A process with sufficient privileges can encode the destination class
++directly with SO_PRIORITY, see
++.BR tc(7).
++.TP
++with a tc filter
++A tc filter attached to the root qdisc can point traffic directly to a class
++.TP
++with the priomap
++Based on the packet priority, which in turn is derived from the Type of
++Service assigned to the packet.
++.P
++Only the priomap is specific to this qdisc.
++.SH QDISC PARAMETERS
++.TP
++bands
++Number of bands. If changed from the default of 3,
++.B priomap
++must be updated as well.
++.TP
++priomap
++The priomap maps the priority of
++a packet to a class. The priority can either be set directly from userspace,
++or be derived from the Type of Service of the packet.
++
++Determines how packet priorities, as assigned by the kernel, map to
++bands. Mapping occurs based on the TOS octet of the packet, which looks like
++this:
++
++.nf
++0 1 2 3 4 5 6 7
+++---+---+---+---+---+---+---+---+
++| | | |
++|PRECEDENCE | TOS |MBZ|
++| | | |
+++---+---+---+---+---+---+---+---+
++.fi
++
++The four TOS bits (the 'TOS field') are defined as:
++
++.nf
++Binary Decimcal Meaning
++-----------------------------------------
++1000 8 Minimize delay (md)
++0100 4 Maximize throughput (mt)
++0010 2 Maximize reliability (mr)
++0001 1 Minimize monetary cost (mmc)
++0000 0 Normal Service
++.fi
++
++As there is 1 bit to the right of these four bits, the actual value of the
++TOS field is double the value of the TOS bits. Tcpdump -v -v shows you the
++value of the entire TOS field, not just the four bits. It is the value you
++see in the first column of this table:
++
++.nf
++TOS Bits Means Linux Priority Band
++------------------------------------------------------------
++0x0 0 Normal Service 0 Best Effort 1
++0x2 1 Minimize Monetary Cost 1 Filler 2
++0x4 2 Maximize Reliability 0 Best Effort 1
++0x6 3 mmc+mr 0 Best Effort 1
++0x8 4 Maximize Throughput 2 Bulk 2
++0xa 5 mmc+mt 2 Bulk 2
++0xc 6 mr+mt 2 Bulk 2
++0xe 7 mmc+mr+mt 2 Bulk 2
++0x10 8 Minimize Delay 6 Interactive 0
++0x12 9 mmc+md 6 Interactive 0
++0x14 10 mr+md 6 Interactive 0
++0x16 11 mmc+mr+md 6 Interactive 0
++0x18 12 mt+md 4 Int. Bulk 1
++0x1a 13 mmc+mt+md 4 Int. Bulk 1
++0x1c 14 mr+mt+md 4 Int. Bulk 1
++0x1e 15 mmc+mr+mt+md 4 Int. Bulk 1
++.fi
++
++The second column contains the value of the relevant
++four TOS bits, followed by their translated meaning. For example, 15 stands
++for a packet wanting Minimal Montetary Cost, Maximum Reliability, Maximum
++Throughput AND Minimum Delay.
++
++The fourth column lists the way the Linux kernel interprets the TOS bits, by
++showing to which Priority they are mapped.
++
++The last column shows the result of the default priomap. On the commandline,
++the default priomap looks like this:
++
++ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
++
++This means that priority 4, for example, gets mapped to band number 1.
++The priomap also allows you to list higher priorities (> 7) which do not
++correspond to TOS mappings, but which are set by other means.
++
++This table from RFC 1349 (read it for more details) explains how
++applications might very well set their TOS bits:
++
++.nf
++TELNET 1000 (minimize delay)
++FTP
++ Control 1000 (minimize delay)
++ Data 0100 (maximize throughput)
++
++TFTP 1000 (minimize delay)
++
++SMTP
++ Command phase 1000 (minimize delay)
++ DATA phase 0100 (maximize throughput)
++
++Domain Name Service
++ UDP Query 1000 (minimize delay)
++ TCP Query 0000
++ Zone Transfer 0100 (maximize throughput)
++
++NNTP 0001 (minimize monetary cost)
++
++ICMP
++ Errors 0000
++ Requests 0000 (mostly)
++ Responses <same as request> (mostly)
++.fi
++
++
++.SH CLASSES
++PRIO classes cannot be configured further - they are automatically created
++when the PRIO qdisc is attached. Each class however can contain yet a
++further qdisc.
++
++.SH BUGS
++Large amounts of traffic in the lower bands can cause starvation of higher
++bands. Can be prevented by attaching a shaper (for example,
++.BR tc-tbf(8)
++to these bands to make sure they cannot dominate the link.
++
++.SH AUTHORS
++Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>, J Hadi Salim
++<hadi@cyberus.ca>. This manpage maintained by bert hubert <ahu@ds9a.nl>
++
++
+diff -Naur iproute2-orig/debian/manpages/old/tc-red.8 iproute2/debian/manpages/old/tc-red.8
+--- iproute2-orig/debian/manpages/old/tc-red.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/manpages/old/tc-red.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,131 @@
++.TH RED 8 "13 December 2001" "iproute2" "Linux"
++.SH NAME
++red \- Random Early Detection
++.SH SYNOPSIS
++.B tc qdisc ... red
++.B limit
++bytes
++.B min
++bytes
++.B max
++bytes
++.B avpkt
++bytes
++.B burst
++packets
++.B [ ecn ] [ bandwidth
++rate
++.B ] probability
++chance
++
++.SH DESCRIPTION
++Random Early Detection is a classless qdisc which manages its queue size
++smartly. Regular queues simply drop packets from the tail when they are
++full, which may not be the optimal behaviour. RED also performs tail drop,
++but does so in a more gradual way.
++
++Once the queue hits a certain average length, packets enqueued have a
++configurable chance of being marked (which may mean dropped). This chance
++increases linearly up to a point called the
++.B max
++average queue length, although the queue might get bigger.
++
++This has a host of benefits over simple taildrop, while not being processor
++intensive. It prevents synchronous retransmits after a burst in traffic,
++which cause further retransmits, etc.
++
++The goal is the have a small queue size, which is good for interactivity
++while not disturbing TCP/IP traffic with too many sudden drops after a burst
++of traffic.
++
++Depending on if ECN is configured, marking either means dropping or
++purely marking a packet as overlimit.
++.SH ALGORITHM
++The average queue size is used for determining the marking
++probability. This is calculated using an Exponential Weighted Moving
++Average, which can be more or less sensitive to bursts.
++
++When the average queue size is below
++.B min
++bytes, no packet will ever be marked. When it exceeds
++.B min,
++the probability of doing so climbs linearly up
++to
++.B probability,
++until the average queue size hits
++.B max
++bytes. Because
++.B probability
++is normally not set to 100%, the queue size might
++conceivably rise above
++.B max
++bytes, so the
++.B limit
++parameter is provided to set a hard maximum for the size of the queue.
++
++.SH PARAMETERS
++.TP
++min
++Average queue size at which marking becomes a possibility.
++.TP
++max
++At this average queue size, the marking probability is maximal. Should be at
++least twice
++.B min
++to prevent synchronous retransmits, higher for low
++.B min.
++.TP
++probability
++Maximum probability for marking, specified as a floating point
++number from 0.0 to 1.0. Suggested values are 0.01 or 0.02 (1 or 2%,
++respectively).
++.TP
++limit
++Hard limit on the real (not average) queue size in bytes. Further packets
++are dropped. Should be set higher than max+burst. It is advised to set this
++a few times higher than
++.B max.
++.TP
++burst
++Used for determining how fast the average queue size is influenced by the
++real queue size. Larger values make the calculation more sluggish, allowing
++longer bursts of traffic before marking starts. Real life experiments
++support the following guideline: (min+min+max)/(3*avpkt).
++.TP
++avpkt
++Specified in bytes. Used with burst to determine the time constant for
++average queue size calculations. 1000 is a good value.
++.TP
++bandwidth
++This rate is used for calculating the average queue size after some
++idle time. Should be set to the bandwidth of your interface. Does not mean
++that RED will shape for you! Optional.
++.TP
++ecn
++As mentioned before, RED can either 'mark' or 'drop'. Explicit Congestion
++Notification allows RED to notify remote hosts that their rate exceeds the
++amount of bandwidth available. Non-ECN capable hosts can only be notified by
++dropping a packet. If this parameter is specified, packets which indicate
++that their hosts honor ECN will only be marked and not dropped, unless the
++queue size hits
++.B limit
++bytes. Needs a tc binary with RED support compiled in. Recommended.
++
++.SH SEE ALSO
++.BR tc (8)
++
++.SH SOURCES
++.TP
++o
++Floyd, S., and Jacobson, V., Random Early Detection gateways for
++Congestion Avoidance. http://www.aciri.org/floyd/papers/red/red.html
++.TP
++o
++Some changes to the algorithm by Alexey N. Kuznetsov.
++
++.SH AUTHORS
++Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>, Alexey Makarenko
++<makar@phoenix.kharkov.ua>, J Hadi Salim <hadi@nortelnetworks.com>.
++This manpage maintained by bert hubert <ahu@ds9a.nl>
++
++
+diff -Naur iproute2-orig/debian/manpages/old/tc-sfq.8 iproute2/debian/manpages/old/tc-sfq.8
+--- iproute2-orig/debian/manpages/old/tc-sfq.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/manpages/old/tc-sfq.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,107 @@
++.TH TC 8 "8 December 2001" "iproute2" "Linux"
++.SH NAME
++sfq \- Stochastic Fairness Queueing
++.SH SYNOPSIS
++.B tc qdisc ... perturb
++seconds
++.B quantum
++bytes
++
++.SH DESCRIPTION
++
++Stochastic Fairness Queueing is a classless queueing discipline available for
++traffic control with the
++.BR tc (8)
++command.
++
++SFQ does not shape traffic but only schedules the transmission of packets, based on 'flows'.
++The goal is to ensure fairness so that each flow is able to send data in turn, thus preventing
++any single flow from drowning out the rest.
++
++This may in fact have some effect in mitigating a Denial of Service attempt.
++
++SFQ is work-conserving and therefore always delivers a packet if it has one available.
++.SH ALGORITHM
++On enqueueing, each packet is assigned to a hash bucket, based on
++.TP
++(i)
++Source address
++.TP
++(ii)
++Destination address
++.TP
++(iii)
++Source port
++.P
++If these are available. SFQ knows about ipv4 and ipv6 and also UDP, TCP and ESP.
++Packets with other protocols are hashed based on the 32bits representation of their
++destination and the socket they belong to. A flow corresponds mostly to a TCP/IP
++connection.
++
++Each of these buckets should represent a unique flow. Because multiple flows may
++get hashed to the same bucket, the hashing algorithm is perturbed at configurable
++intervals so that the unfairness lasts only for a short while. Perturbation may
++however cause some inadvertent packet reordering to occur.
++
++When dequeuing, each hashbucket with data is queried in a round robin fashion.
++
++The compile time maximum length of the SFQ is 128 packets, which can be spread over
++at most 128 buckets of 1024 available. In case of overflow, tail-drop is performed
++on the fullest bucket, thus maintaining fairness.
++
++.SH PARAMETERS
++.TP
++perturb
++Interval in seconds for queue algorithm perturbation. Defaults to 0, which means that
++no perturbation occurs. Do not set too low for each perturbation may cause some packet
++reordering. Advised value: 10
++.TP
++quantum
++Amount of bytes a flow is allowed to dequeue during a round of the round robin process.
++Defaults to the MTU of the interface which is also the advised value and the minimum value.
++
++.SH EXAMPLE & USAGE
++
++To attach to device ppp0:
++.P
++# tc qdisc add dev ppp0 root sfq perturb 10
++.P
++Please note that SFQ, like all non-shaping (work-conserving) qdiscs, is only useful
++if it owns the queue.
++This is the case when the link speed equals the actually available bandwidth. This holds
++for regular phone modems, ISDN connections and direct non-switched ethernet links.
++.P
++Most often, cable modems and DSL devices do not fall into this category. The same holds
++for when connected to a switch and trying to send data to a congested segment also
++connected to the switch.
++.P
++In this case, the effective queue does not reside within Linux and is therefore not
++available for scheduling.
++.P
++Embed SFQ in a classful qdisc to make sure it owns the queue.
++
++.SH SOURCE
++.TP
++o
++Paul E. McKenney "Stochastic Fairness Queuing",
++IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
++
++.TP
++o
++Paul E. McKenney "Stochastic Fairness Queuing",
++"Interworking: Research and Experience", v.2, 1991, p.113-131.
++
++.TP
++o
++See also:
++M. Shreedhar and George Varghese "Efficient Fair
++Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
++
++.SH SEE ALSO
++.BR tc (8)
++
++.SH AUTHOR
++Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>. This manpage maintained by
++bert hubert <ahu@ds9a.nl>
++
++
+diff -Naur iproute2-orig/debian/manpages/old/tc-tbf.8 iproute2/debian/manpages/old/tc-tbf.8
+--- iproute2-orig/debian/manpages/old/tc-tbf.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/manpages/old/tc-tbf.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,138 @@
++.TH TC 8 "13 December 2001" "iproute2" "Linux"
++.SH NAME
++tbf \- Token Bucket Filter
++.SH SYNOPSIS
++.B tc qdisc ... tbf rate
++rate
++.B burst
++bytes/cell
++.B ( latency
++ms
++.B | limit
++bytes
++.B ) [ mpu
++bytes
++.B [ peakrate
++rate
++.B mtu
++bytes/cell
++.B ] ]
++.P
++burst is also known as buffer and maxburst. mtu is also known as minburst.
++.SH DESCRIPTION
++
++The Token Bucket Filter is a classless queueing discipline available for
++traffic control with the
++.BR tc (8)
++command.
++
++TBF is a pure shaper and never schedules traffic. It is non-work-conserving and may throttle
++itself, although packets are available, to ensure that the configured rate is not exceeded.
++On all platforms except for Alpha,
++it is able to shape up to 1mbit/s of normal traffic with ideal minimal burstiness,
++sending out data exactly at the configured rates.
++
++Much higher rates are possible but at the cost of losing the minimal burstiness. In that
++case, data is on average dequeued at the configured rate but may be sent much faster at millisecond
++timescales. Because of further queues living in network adaptors, this is often not a problem.
++
++Kernels with a higher 'HZ' can achieve higher rates with perfect burstiness. On Alpha, HZ is ten
++times higher, leading to a 10mbit/s limit to perfection. These calculations hold for packets of on
++average 1000 bytes.
++
++.SH ALGORITHM
++As the name implies, traffic is filtered based on the expenditure of
++.B tokens.
++Tokens roughly correspond to bytes, with the additional constraint that each packet consumes
++some tokens, no matter how small it is. This reflects the fact that even a zero-sized packet occupies
++the link for some time.
++
++On creation, the TBF is stocked with tokens which correspond to the amount of traffic that can be burst
++in one go. Tokens arrive at a steady rate, until the bucket is full.
++
++If no tokens are available, packets are queued, up to a configured limit. The TBF now
++calculates the token deficit, and throttles until the first packet in the queue can be sent.
++
++If it is not acceptable to burst out packets at maximum speed, a peakrate can be configured
++to limit the speed at which the bucket empties. This peakrate is implemented as a second TBF
++with a very small bucket, so that it doesn't burst.
++
++To achieve perfection, the second bucket may contain only a single packet, which leads to
++the earlier mentioned 1mbit/s limit.
++
++This limit is caused by the fact that the kernel can only throttle for at minimum 1 'jiffy', which depends
++on HZ as 1/HZ. For perfect shaping, only a single packet can get sent per jiffy - for HZ=100, this means 100
++packets of on average 1000 bytes each, which roughly corresponds to 1mbit/s.
++
++.SH PARAMETERS
++See
++.BR tc (8)
++for how to specify the units of these values.
++.TP
++limit or latency
++Limit is the number of bytes that can be queued waiting for tokens to become
++available. You can also specify this the other way around by setting the
++latency parameter, which specifies the maximum amount of time a packet can
++sit in the TBF. The latter calculation takes into account the size of the
++bucket, the rate and possibly the peakrate (if set). These two parameters
++are mutually exclusive.
++.TP
++burst
++Also known as buffer or maxburst.
++Size of the bucket, in bytes. This is the maximum amount of bytes that tokens can be available for instantaneously.
++In general, larger shaping rates require a larger buffer. For 10mbit/s on Intel, you need at least 10kbyte buffer
++if you want to reach your configured rate!
++
++If your buffer is too small, packets may be dropped because more tokens arrive per timer tick than fit in your bucket.
++The minimum buffer size can be calculated by dividing the rate by HZ.
++
++Token usage calculations are performed using a table which by default has a resolution of 8 packets.
++This resolution can be changed by specifying the
++.B cell
++size with the burst. For example, to specify a 6000 byte buffer with a 16
++byte cell size, set a burst of 6000/16. You will probably never have to set
++this. Must be an integral power of 2.
++.TP
++mpu
++A zero-sized packet does not use zero bandwidth. For ethernet, no packet uses less than 64 bytes. The Minimum Packet Unit
++determines the minimal token usage (specified in bytes) for a packet. Defaults to zero.
++.TP
++rate
++The speed knob. See remarks above about limits! See
++.BR tc (8)
++for units.
++.PP
++Furthermore, if a peakrate is desired, the following parameters are available:
++
++.TP
++peakrate
++Maximum depletion rate of the bucket. Limited to 1mbit/s on Intel, 10mbit/s on Alpha. The peakrate does
++not need to be set, it is only necessary if perfect millisecond timescale shaping is required.
++
++.TP
++mtu/minburst
++Specifies the size of the peakrate bucket. For perfect accuracy, should be set to the MTU of the interface.
++If a peakrate is needed, but some burstiness is acceptable, this size can be raised. A 3000 byte minburst
++allows around 3mbit/s of peakrate, given 1000 byte packets.
++
++Like the regular burstsize you can also specify a
++.B cell
++size.
++.SH EXAMPLE & USAGE
++
++To attach a TBF with a sustained maximum rate of 0.5mbit/s, a peakrate of 1.0mbit/s,
++a 5kilobyte buffer, with a pre-bucket queue size limit calculated so the TBF causes
++at most 70ms of latency, with perfect peakrate behaviour, issue:
++.P
++# tc qdisc add dev eth0 root tbf rate 0.5mbit \\
++ burst 5kb latency 70ms peakrate 1mbit \\
++ minburst 1540
++
++.SH SEE ALSO
++.BR tc (8)
++
++.SH AUTHOR
++Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>. This manpage maintained by
++bert hubert <ahu@ds9a.nl>
++
++
+diff -Naur iproute2-orig/debian/manpages/old/tc.8 iproute2/debian/manpages/old/tc.8
+--- iproute2-orig/debian/manpages/old/tc.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/manpages/old/tc.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,348 @@
++.TH TC 8 "16 December 2001" "iproute2" "Linux"
++.SH NAME
++tc \- show / manipulate traffic control settings
++.SH SYNOPSIS
++.B tc qdisc [ add | change | replace | link ] dev
++DEV
++.B
++[ parent
++qdisc-id
++.B | root ]
++.B [ handle
++qdisc-id ] qdisc
++[ qdisc specific parameters ]
++.P
++
++.B tc class [ add | change | replace ] dev
++DEV
++.B parent
++qdisc-id
++.B [ classid
++class-id ] qdisc
++[ qdisc specific parameters ]
++.P
++
++.B tc filter [ add | change | replace ] dev
++DEV
++.B [ parent
++qdisc-id
++.B | root ] protocol
++protocol
++.B prio
++priority filtertype
++[ filtertype specific parameters ]
++.B flowid
++flow-id
++
++.B tc [-s | -d ] qdisc show [ dev
++DEV
++.B ]
++.P
++.B tc [-s | -d ] class show dev
++DEV
++.P
++.B tc filter show dev
++DEV
++
++.SH DESCRIPTION
++.B Tc
++is used to configure Traffic Control in the Linux kernel. Traffic Control consists
++of the following:
++
++.TP
++SHAPING
++When traffic is shaped, its rate of transmission is under control. Shaping may
++be more than lowering the available bandwidth - it is also used to smooth out
++bursts in traffic for better network behaviour. Shaping occurs on egress.
++
++.TP
++SCHEDULING
++By scheduling the transmission of packets it is possible to improve interactivity
++for traffic that needs it while still guaranteeing bandwidth to bulk transfers. Reordering
++is also called prioritizing, and happens only on egress.
++
++.TP
++POLICING
++Where shaping deals with transmission of traffic, policing pertains to traffic
++arriving. Policing thus occurs on ingress.
++
++.TP
++DROPPING
++Traffic exceeding a set bandwidth may also be dropped forthwith, both on
++ingress and on egress.
++
++.P
++Processing of traffic is controlled by three kinds of objects: qdiscs,
++classes and filters.
++
++.SH QDISCS
++.B qdisc
++is short for 'queueing discipline' and it is elementary to
++understanding traffic control. Whenever the kernel needs to send a
++packet to an interface, it is
++.B enqueued
++to the qdisc configured for that interface. Immediately afterwards, the kernel
++tries to get as many packets as possible from the qdisc, for giving them
++to the network adaptor driver.
++
++A simple QDISC is the 'pfifo' one, which does no processing at all and is a pure
++First In, First Out queue. It does however store traffic when the network interface
++can't handle it momentarily.
++
++.SH CLASSES
++Some qdiscs can contain classes, which contain further qdiscs - traffic may
++then be enqueued in any of the inner qdiscs, which are within the
++.B classes.
++When the kernel tries to dequeue a packet from such a
++.B classful qdisc
++it can come from any of the classes. A qdisc may for example prioritize
++certain kinds of traffic by trying to dequeue from certain classes
++before others.
++
++.SH FILTERS
++A
++.B filter
++is used by a classful qdisc to determine in which class a packet will
++be enqueued. Whenever traffic arrives at a class with subclasses, it needs
++to be classified. Various methods may be employed to do so, one of these
++are the filters. All filters attached to the class are called, until one of
++them returns with a verdict. If no verdict was made, other criteria may be
++available. This differs per qdisc.
++
++It is important to notice that filters reside
++.B within
++qdiscs - they are not masters of what happens.
++
++.SH CLASSLESS QDISCS
++The classless qdiscs are:
++.TP
++[p|b]fifo
++Simplest usable qdisc, pure First In, First Out behaviour. Limited in
++packets or in bytes.
++.TP
++pfifo_fast
++Standard qdisc for 'Advanced Router' enabled kernels. Consists of a three-band
++queue which honors Type of Service flags, as well as the priority that may be
++assigned to a packet.
++.TP
++red
++Random Early Detection simulates physical congestion by randomly dropping
++packets when nearing configured bandwidth allocation. Well suited to very
++large bandwidth applications.
++.TP
++sfq
++Stochastic Fairness Queueing reorders queued traffic so each 'session'
++gets to send a packet in turn.
++.TP
++tbf
++The Token Bucket Filter is suited for slowing traffic down to a precisely
++configured rate. Scales well to large bandwidths.
++.SH CONFIGURING CLASSLESS QDISCS
++In the absence of classful qdiscs, classless qdiscs can only be attached at
++the root of a device. Full syntax:
++.P
++.B tc qdisc add dev
++DEV
++.B root
++QDISC QDISC-PARAMETERS
++
++To remove, issue
++.P
++.B tc qdisc del dev
++DEV
++.B root
++
++The
++.B pfifo_fast
++qdisc is the automatic default in the absence of a configured qdisc.
++
++.SH CLASSFUL QDISCS
++The classful qdiscs are:
++.TP
++CBQ
++Class Based Queueing implements a rich linksharing hierarchy of classes.
++It contains shaping elements as well as prioritizing capabilities. Shaping is
++performed using link idle time calculations based on average packet size and
++underlying link bandwidth. The latter may be ill-defined for some interfaces.
++.TP
++HTB
++The Hierarchy Token Bucket implements a rich linksharing hierarchy of
++classes with an emphasis on conforming to existing practices. HTB facilitates
++guaranteeing bandwidth to classes, while also allowing specification of upper
++limits to inter-class sharing. It contains shaping elements, based on TBF and
++can prioritize classes.
++.TP
++PRIO
++The PRIO qdisc is a non-shaping container for a configurable number of
++classes which are dequeued in order. This allows for easy prioritization
++of traffic, where lower classes are only able to send if higher ones have
++no packets available. To facilitate configuration, Type Of Service bits are
++honored by default.
++.SH THEORY OF OPERATION
++Classes form a tree, where each class has a single parent.
++A class may have multiple children. Some qdiscs allow for runtime addition
++of classes (CBQ, HTB) while others (PRIO) are created with a static number of
++children.
++
++Qdiscs which allow dynamic addition of classes can have zero or more
++subclasses to which traffic may be enqueued.
++
++Furthermore, each class contains a
++.B leaf qdisc
++which by default has
++.B pfifo
++behaviour though another qdisc can be attached in place. This qdisc may again
++contain classes, but each class can have only one leaf qdisc.
++
++When a packet enters a classful qdisc it can be
++.B classified
++to one of the classes within. Three criteria are available, although not all
++qdiscs will use all three:
++.TP
++tc filters
++If tc filters are attached to a class, they are consulted first
++for relevant instructions. Filters can match on all fields of a packet header,
++as well as on the firewall mark applied by ipchains or iptables. See
++.BR tc-filters (8).
++.TP
++Type of Service
++Some qdiscs have built in rules for classifying packets based on the TOS field.
++.TP
++skb->priority
++Userspace programs can encode a class-id in the 'skb->priority' field using
++the SO_PRIORITY option.
++.P
++Each node within the tree can have its own filters but higher level filters
++may also point directly to lower classes.
++
++If classification did not succeed, packets are enqueued to the leaf qdisc
++attached to that class. Check qdisc specific manpages for details, however.
++
++.SH NAMING
++All qdiscs, classes and filters have IDs, which can either be specified
++or be automatically assigned.
++
++IDs consist of a major number and a minor number, separated by a colon.
++
++.TP
++QDISCS
++A qdisc, which potentially can have children,
++gets assigned a major number, called a 'handle', leaving the minor
++number namespace available for classes. The handle is expressed as '10:'.
++It is customary to explicitly assign a handle to qdiscs expected to have
++children.
++
++.TP
++CLASSES
++Classes residing under a qdisc share their qdisc major number, but each have
++a separate minor number called a 'classid' that has no relation to their
++parent classes, only to their parent qdisc. The same naming custom as for
++qdiscs applies.
++
++.TP
++FILTERS
++Filters have a three part ID, which is only needed when using a hashed
++filter hierarchy, for which see
++.BR tc-filters (8).
++.SH UNITS
++All parameters accept a floating point number, possibly followed by a unit.
++.P
++Bandwidths or rates can be specified in:
++.TP
++kbps
++Kilobytes per second
++.TP
++mbps
++Megabytes per second
++.TP
++kbit
++Kilobits per second
++.TP
++mbit
++Megabits per second
++.TP
++bps or a bare number
++Bytes per second
++.P
++Amounts of data can be specified in:
++.TP
++kb or k
++Kilobytes
++.TP
++mb or m
++Megabytes
++.TP
++mbit
++Megabits
++.TP
++kbit
++Kilobits
++.TP
++b or a bare number
++Bytes.
++.P
++Lengths of time can be specified in:
++.TP
++s, sec or secs
++Whole seconds
++.TP
++ms, msec or msecs
++Milliseconds
++.TP
++us, usec, usecs or a bare number
++Microseconds.
++
++.SH TC COMMANDS
++The following commands are available for qdiscs, classes and filter:
++.TP
++add
++Add a qdisc, class or filter to a node. For all entities, a
++.B parent
++must be passed, either by passing its ID or by attaching directly to the root of a device.
++When creating a qdisc or a filter, it can be named with the
++.B handle
++parameter. A class is named with the
++.B classid
++parameter.
++
++.TP
++remove
++A qdisc can be removed by specifying its handle, which may also be 'root'. All subclasses and their leaf qdiscs
++are automatically deleted, as well as any filters attached to them.
++
++.TP
++change
++Some entities can be modified 'in place'. Shares the syntax of 'add', with the exception
++that the handle cannot be changed and neither can the parent. In other words,
++.B
++change
++cannot move a node.
++
++.TP
++replace
++Performs a nearly atomic remove/add on an existing node id. If the node does not exist yet
++it is created.
++
++.TP
++link
++Only available for qdiscs and performs a replace where the node
++must exist already.
++
++
++.SH HISTORY
++.B tc
++was written by Alexey N. Kuznetsov and added in Linux 2.2.
++.SH SEE ALSO
++.BR tc-cbq (8),
++.BR tc-htb (8),
++.BR tc-sfq (8),
++.BR tc-red (8),
++.BR tc-tbf (8),
++.BR tc-pfifo (8),
++.BR tc-bfifo (8),
++.BR tc-pfifo_fast (8),
++.BR tc-filters (8)
++
++.SH AUTHOR
++Manpage maintained by bert hubert (ahu@ds9a.nl)
++
+diff -Naur iproute2-orig/debian/manpages/tc-cbq-details.8 iproute2/debian/manpages/tc-cbq-details.8
+--- iproute2-orig/debian/manpages/tc-cbq-details.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/manpages/tc-cbq-details.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,425 @@
++.TH CBQ 8 "8 December 2001" "iproute2" "Linux"
++.SH NAME
++CBQ \- Class Based Queueing
++.SH SYNOPSIS
++.B tc qdisc ... dev
++dev
++.B ( parent
++classid
++.B | root) [ handle
++major:
++.B ] cbq avpkt
++bytes
++.B bandwidth
++rate
++.B [ cell
++bytes
++.B ] [ ewma
++log
++.B ] [ mpu
++bytes
++.B ]
++
++.B tc class ... dev
++dev
++.B parent
++major:[minor]
++.B [ classid
++major:minor
++.B ] cbq allot
++bytes
++.B [ bandwidth
++rate
++.B ] [ rate
++rate
++.B ] prio
++priority
++.B [ weight
++weight
++.B ] [ minburst
++packets
++.B ] [ maxburst
++packets
++.B ] [ ewma
++log
++.B ] [ cell
++bytes
++.B ] avpkt
++bytes
++.B [ mpu
++bytes
++.B ] [ bounded isolated ] [ split
++handle
++.B & defmap
++defmap
++.B ] [ estimator
++interval timeconstant
++.B ]
++
++.SH DESCRIPTION
++Class Based Queueing is a classful qdisc that implements a rich
++linksharing hierarchy of classes. It contains shaping elements as
++well as prioritizing capabilities. Shaping is performed using link
++idle time calculations based on the timing of dequeue events and
++underlying link bandwidth.
++
++.SH SHAPING ALGORITHM
++Shaping is done using link idle time calculations, and actions taken if
++these calculations deviate from set limits.
++
++When shaping a 10mbit/s connection to 1mbit/s, the link will
++be idle 90% of the time. If it isn't, it needs to be throttled so that it
++IS idle 90% of the time.
++
++From the kernel's perspective, this is hard to measure, so CBQ instead
++derives the idle time from the number of microseconds (in fact, jiffies)
++that elapse between requests from the device driver for more data. Combined
++with the knowledge of packet sizes, this is used to approximate how full or
++empty the link is.
++
++This is rather circumspect and doesn't always arrive at proper
++results. For example, what is the actual link speed of an interface
++that is not really able to transmit the full 100mbit/s of data,
++perhaps because of a badly implemented driver? A PCMCIA network card
++will also never achieve 100mbit/s because of the way the bus is
++designed - again, how do we calculate the idle time?
++
++The physical link bandwidth may be ill defined in case of not-quite-real
++network devices like PPP over Ethernet or PPTP over TCP/IP. The effective
++bandwidth in that case is probably determined by the efficiency of pipes
++to userspace - which not defined.
++
++During operations, the effective idletime is measured using an
++exponential weighted moving average (EWMA), which considers recent
++packets to be exponentially more important than past ones. The Unix
++loadaverage is calculated in the same way.
++
++The calculated idle time is subtracted from the EWMA measured one,
++the resulting number is called 'avgidle'. A perfectly loaded link has
++an avgidle of zero: packets arrive exactly at the calculated
++interval.
++
++An overloaded link has a negative avgidle and if it gets too negative,
++CBQ throttles and is then 'overlimit'.
++
++Conversely, an idle link might amass a huge avgidle, which would then
++allow infinite bandwidths after a few hours of silence. To prevent
++this, avgidle is capped at
++.B maxidle.
++
++If overlimit, in theory, the CBQ could throttle itself for exactly the
++amount of time that was calculated to pass between packets, and then
++pass one packet, and throttle again. Due to timer resolution constraints,
++this may not be feasible, see the
++.B minburst
++parameter below.
++
++.SH CLASSIFICATION
++Within the one CBQ instance many classes may exist. Each of these classes
++contains another qdisc, by default
++.BR tc-pfifo (8).
++
++When enqueueing a packet, CBQ starts at the root and uses various methods to
++determine which class should receive the data. If a verdict is reached, this
++process is repeated for the recipient class which might have further
++means of classifying traffic to its children, if any.
++
++CBQ has the following methods available to classify a packet to any child
++classes.
++.TP
++(i)
++.B skb->priority class encoding.
++Can be set from userspace by an application with the
++.B SO_PRIORITY
++setsockopt.
++The
++.B skb->priority class encoding
++only applies if the skb->priority holds a major:minor handle of an existing
++class within this qdisc.
++.TP
++(ii)
++tc filters attached to the class.
++.TP
++(iii)
++The defmap of a class, as set with the
++.B split & defmap
++parameters. The defmap may contain instructions for each possible Linux packet
++priority.
++
++.P
++Each class also has a
++.B level.
++Leaf nodes, attached to the bottom of the class hierarchy, have a level of 0.
++.SH CLASSIFICATION ALGORITHM
++
++Classification is a loop, which terminates when a leaf class is found. At any
++point the loop may jump to the fallback algorithm.
++
++The loop consists of the following steps:
++.TP
++(i)
++If the packet is generated locally and has a valid classid encoded within its
++.B skb->priority,
++choose it and terminate.
++
++.TP
++(ii)
++Consult the tc filters, if any, attached to this child. If these return
++a class which is not a leaf class, restart loop from the class returned.
++If it is a leaf, choose it and terminate.
++.TP
++(iii)
++If the tc filters did not return a class, but did return a classid,
++try to find a class with that id within this qdisc.
++Check if the found class is of a lower
++.B level
++than the current class. If so, and the returned class is not a leaf node,
++restart the loop at the found class. If it is a leaf node, terminate.
++If we found an upward reference to a higher level, enter the fallback
++algorithm.
++.TP
++(iv)
++If the tc filters did not return a class, nor a valid reference to one,
++consider the minor number of the reference to be the priority. Retrieve
++a class from the defmap of this class for the priority. If this did not
++contain a class, consult the defmap of this class for the
++.B BEST_EFFORT
++class. If this is an upward reference, or no
++.B BEST_EFFORT
++class was defined,
++enter the fallback algorithm. If a valid class was found, and it is not a
++leaf node, restart the loop at this class. If it is a leaf, choose it and
++terminate. If
++neither the priority distilled from the classid, nor the
++.B BEST_EFFORT
++priority yielded a class, enter the fallback algorithm.
++.P
++The fallback algorithm resides outside of the loop and is as follows.
++.TP
++(i)
++Consult the defmap of the class at which the jump to fallback occured. If
++the defmap contains a class for the
++.B
++priority
++of the class (which is related to the TOS field), choose this class and
++terminate.
++.TP
++(ii)
++Consult the map for a class for the
++.B BEST_EFFORT
++priority. If found, choose it, and terminate.
++.TP
++(iii)
++Choose the class at which break out to the fallback algorithm occured. Terminate.
++.P
++The packet is enqueued to the class which was chosen when either algorithm
++terminated. It is therefore possible for a packet to be enqueued *not* at a
++leaf node, but in the middle of the hierarchy.
++
++.SH LINK SHARING ALGORITHM
++When dequeuing for sending to the network device, CBQ decides which of its
++classes will be allowed to send. It does so with a Weighted Round Robin process
++in which each class with packets gets a chance to send in turn. The WRR process
++starts by asking the highest priority classes (lowest numerically -
++highest semantically) for packets, and will continue to do so until they
++have no more data to offer, in which case the process repeats for lower
++priorities.
++
++.B CERTAINTY ENDS HERE, ANK PLEASE HELP
++
++Each class is not allowed to send at length though - they can only dequeue a
++configurable amount of data during each round.
++
++If a class is about to go overlimit, and it is not
++.B bounded
++it will try to borrow avgidle from siblings that are not
++.B isolated.
++This process is repeated from the bottom upwards. If a class is unable
++to borrow enough avgidle to send a packet, it is throttled and not asked
++for a packet for enough time for the avgidle to increase above zero.
++
++.B I REALLY NEED HELP FIGURING THIS OUT. REST OF DOCUMENT IS PRETTY CERTAIN
++.B AGAIN.
++
++.SH QDISC
++The root qdisc of a CBQ class tree has the following parameters:
++
++.TP
++parent major:minor | root
++This mandatory parameter determines the place of the CBQ instance, either at the
++.B root
++of an interface or within an existing class.
++.TP
++handle major:
++Like all other qdiscs, the CBQ can be assigned a handle. Should consist only
++of a major number, followed by a colon. Optional.
++.TP
++avpkt bytes
++For calculations, the average packet size must be known. It is silently capped
++at a minimum of 2/3 of the interface MTU. Mandatory.
++.TP
++bandwidth rate
++To determine the idle time, CBQ must know the bandwidth of your underlying
++physical interface, or parent qdisc. This is a vital parameter, more about it
++later. Mandatory.
++.TP
++cell
++The cell size determines he granularity of packet transmission time calculations. Has a sensible default.
++.TP
++mpu
++A zero sized packet may still take time to transmit. This value is the lower
++cap for packet transmission time calculations - packets smaller than this value
++are still deemed to have this size. Defaults to zero.
++.TP
++ewma log
++When CBQ needs to measure the average idle time, it does so using an
++Exponentially Weighted Moving Average which smoothes out measurements into
++a moving average. The EWMA LOG determines how much smoothing occurs. Defaults
++to 5. Lower values imply greater sensitivity. Must be between 0 and 31.
++.P
++A CBQ qdisc does not shape out of its own accord. It only needs to know certain
++parameters about the underlying link. Actual shaping is done in classes.
++
++.SH CLASSES
++Classes have a host of parameters to configure their operation.
++
++.TP
++parent major:minor
++Place of this class within the hierarchy. If attached directly to a qdisc
++and not to another class, minor can be omitted. Mandatory.
++.TP
++classid major:minor
++Like qdiscs, classes can be named. The major number must be equal to the
++major number of the qdisc to which it belongs. Optional, but needed if this
++class is going to have children.
++.TP
++weight weight
++When dequeuing to the interface, classes are tried for traffic in a
++round-robin fashion. Classes with a higher configured qdisc will generally
++have more traffic to offer during each round, so it makes sense to allow
++it to dequeue more traffic. All weights under a class are normalized, so
++only the ratios matter. Defaults to the configured rate, unless the priority
++of this class is maximal, in which case it is set to 1.
++.TP
++allot bytes
++Allot specifies how many bytes a qdisc can dequeue
++during each round of the process. This parameter is weighted using the
++renormalized class weight described above.
++
++.TP
++priority priority
++In the round-robin process, classes with the lowest priority field are tried
++for packets first. Mandatory.
++
++.TP
++rate rate
++Maximum rate this class and all its children combined can send at. Mandatory.
++
++.TP
++bandwidth rate
++This is different from the bandwidth specified when creating a CBQ disc. Only
++used to determine maxidle and offtime, which are only calculated when
++specifying maxburst or minburst. Mandatory if specifying maxburst or minburst.
++
++.TP
++maxburst
++This number of packets is used to calculate maxidle so that when
++avgidle is at maxidle, this number of average packets can be burst
++before avgidle drops to 0. Set it higher to be more tolerant of
++bursts. You can't set maxidle directly, only via this parameter.
++
++.TP
++minburst
++As mentioned before, CBQ needs to throttle in case of
++overlimit. The ideal solution is to do so for exactly the calculated
++idle time, and pass 1 packet. However, Unix kernels generally have a
++hard time scheduling events shorter than 10ms, so it is better to
++throttle for a longer period, and then pass minburst packets in one
++go, and then sleep minburst times longer.
++
++The time to wait is called the offtime. Higher values of minburst lead
++to more accurate shaping in the long term, but to bigger bursts at
++millisecond timescales.
++
++.TP
++minidle
++If avgidle is below 0, we are overlimits and need to wait until
++avgidle will be big enough to send one packet. To prevent a sudden
++burst from shutting down the link for a prolonged period of time,
++avgidle is reset to minidle if it gets too low.
++
++Minidle is specified in negative microseconds, so 10 means that
++avgidle is capped at -10us.
++
++.TP
++bounded
++Signifies that this class will not borrow bandwidth from its siblings.
++.TP
++isolated
++Means that this class will not borrow bandwidth to its siblings
++
++.TP
++split major:minor & defmap bitmap[/bitmap]
++If consulting filters attached to a class did not give a verdict,
++CBQ can also classify based on the packet's priority. There are 16
++priorities available, numbered from 0 to 15.
++
++The defmap specifies which priorities this class wants to receive,
++specified as a bitmap. The Least Significant Bit corresponds to priority
++zero. The
++.B split
++parameter tells CBQ at which class the decision must be made, which should
++be a (grand)parent of the class you are adding.
++
++As an example, 'tc class add ... classid 10:1 cbq .. split 10:0 defmap c0'
++configures class 10:0 to send packets with priorities 6 and 7 to 10:1.
++
++The complimentary configuration would then
++be: 'tc class add ... classid 10:2 cbq ... split 10:0 defmap 3f'
++Which would send all packets 0, 1, 2, 3, 4 and 5 to 10:1.
++.TP
++estimator interval timeconstant
++CBQ can measure how much bandwidth each class is using, which tc filters
++can use to classify packets with. In order to determine the bandwidth
++it uses a very simple estimator that measures once every
++.B interval
++microseconds how much traffic has passed. This again is a EWMA, for which
++the time constant can be specified, also in microseconds. The
++.B time constant
++corresponds to the sluggishness of the measurement or, conversely, to the
++sensitivity of the average to short bursts. Higher values mean less
++sensitivity.
++
++
++
++.SH SOURCES
++.TP
++o
++Sally Floyd and Van Jacobson, "Link-sharing and Resource
++Management Models for Packet Networks",
++IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
++
++.TP
++o
++Sally Floyd, "Notes on CBQ and Guarantee Service", 1995
++
++.TP
++o
++Sally Floyd, "Notes on Class-Based Queueing: Setting
++Parameters", 1996
++
++.TP
++o
++Sally Floyd and Michael Speer, "Experimental Results
++for Class-Based Queueing", 1998, not published.
++
++
++
++.SH SEE ALSO
++.BR tc (8)
++
++.SH AUTHOR
++Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>. This manpage maintained by
++bert hubert <ahu@ds9a.nl>
++
++
+diff -Naur iproute2-orig/debian/manpages/tc-cbq.8 iproute2/debian/manpages/tc-cbq.8
+--- iproute2-orig/debian/manpages/tc-cbq.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/manpages/tc-cbq.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,353 @@
++.TH CBQ 8 "16 December 2001" "iproute2" "Linux"
++.SH NAME
++CBQ \- Class Based Queueing
++.SH SYNOPSIS
++.B tc qdisc ... dev
++dev
++.B ( parent
++classid
++.B | root) [ handle
++major:
++.B ] cbq [ allot
++bytes
++.B ] avpkt
++bytes
++.B bandwidth
++rate
++.B [ cell
++bytes
++.B ] [ ewma
++log
++.B ] [ mpu
++bytes
++.B ]
++
++.B tc class ... dev
++dev
++.B parent
++major:[minor]
++.B [ classid
++major:minor
++.B ] cbq allot
++bytes
++.B [ bandwidth
++rate
++.B ] [ rate
++rate
++.B ] prio
++priority
++.B [ weight
++weight
++.B ] [ minburst
++packets
++.B ] [ maxburst
++packets
++.B ] [ ewma
++log
++.B ] [ cell
++bytes
++.B ] avpkt
++bytes
++.B [ mpu
++bytes
++.B ] [ bounded isolated ] [ split
++handle
++.B & defmap
++defmap
++.B ] [ estimator
++interval timeconstant
++.B ]
++
++.SH DESCRIPTION
++Class Based Queueing is a classful qdisc that implements a rich
++linksharing hierarchy of classes. It contains shaping elements as
++well as prioritizing capabilities. Shaping is performed using link
++idle time calculations based on the timing of dequeue events and
++underlying link bandwidth.
++
++.SH SHAPING ALGORITHM
++When shaping a 10mbit/s connection to 1mbit/s, the link will
++be idle 90% of the time. If it isn't, it needs to be throttled so that it
++IS idle 90% of the time.
++
++During operations, the effective idletime is measured using an
++exponential weighted moving average (EWMA), which considers recent
++packets to be exponentially more important than past ones. The Unix
++loadaverage is calculated in the same way.
++
++The calculated idle time is subtracted from the EWMA measured one,
++the resulting number is called 'avgidle'. A perfectly loaded link has
++an avgidle of zero: packets arrive exactly at the calculated
++interval.
++
++An overloaded link has a negative avgidle and if it gets too negative,
++CBQ throttles and is then 'overlimit'.
++
++Conversely, an idle link might amass a huge avgidle, which would then
++allow infinite bandwidths after a few hours of silence. To prevent
++this, avgidle is capped at
++.B maxidle.
++
++If overlimit, in theory, the CBQ could throttle itself for exactly the
++amount of time that was calculated to pass between packets, and then
++pass one packet, and throttle again. Due to timer resolution constraints,
++this may not be feasible, see the
++.B minburst
++parameter below.
++
++.SH CLASSIFICATION
++Within the one CBQ instance many classes may exist. Each of these classes
++contains another qdisc, by default
++.BR tc-pfifo (8).
++
++When enqueueing a packet, CBQ starts at the root and uses various methods to
++determine which class should receive the data.
++
++In the absence of uncommon configuration options, the process is rather easy.
++At each node we look for an instruction, and then go to the class the
++instruction refers us to. If the class found is a barren leaf-node (without
++children), we enqueue the packet there. If it is not yet a leaf node, we do
++the whole thing over again starting from that node.
++
++The following actions are performed, in order at each node we visit, until one
++sends us to another node, or terminates the process.
++.TP
++(i)
++Consult filters attached to the class. If sent to a leafnode, we are done.
++Otherwise, restart.
++.TP
++(ii)
++Consult the defmap for the priority assigned to this packet, which depends
++on the TOS bits. Check if the referral is leafless, otherwise restart.
++.TP
++(iii)
++Ask the defmap for instructions for the 'best effort' priority. Check the
++answer for leafness, otherwise restart.
++.TP
++(iv)
++If none of the above returned with an instruction, enqueue at this node.
++.P
++This algorithm makes sure that a packet always ends up somewhere, even while
++you are busy building your configuration.
++
++For more details, see
++.BR tc-cbq-details(8).
++
++.SH LINK SHARING ALGORITHM
++When dequeuing for sending to the network device, CBQ decides which of its
++classes will be allowed to send. It does so with a Weighted Round Robin process
++in which each class with packets gets a chance to send in turn. The WRR process
++starts by asking the highest priority classes (lowest numerically -
++highest semantically) for packets, and will continue to do so until they
++have no more data to offer, in which case the process repeats for lower
++priorities.
++
++Classes by default borrow bandwidth from their siblings. A class can be
++prevented from doing so by declaring it 'bounded'. A class can also indicate
++its unwillingness to lend out bandwidth by being 'isolated'.
++
++.SH QDISC
++The root of a CBQ qdisc class tree has the following parameters:
++
++.TP
++parent major:minor | root
++This mandatory parameter determines the place of the CBQ instance, either at the
++.B root
++of an interface or within an existing class.
++.TP
++handle major:
++Like all other qdiscs, the CBQ can be assigned a handle. Should consist only
++of a major number, followed by a colon. Optional, but very useful if classes
++will be generated within this qdisc.
++.TP
++allot bytes
++This allotment is the 'chunkiness' of link sharing and is used for determining packet
++transmission time tables. The qdisc allot differs slightly from the class allot discussed
++below. Optional. Defaults to a reasonable value, related to avpkt.
++.TP
++avpkt bytes
++The average size of a packet is needed for calculating maxidle, and is also used
++for making sure 'allot' has a safe value. Mandatory.
++.TP
++bandwidth rate
++To determine the idle time, CBQ must know the bandwidth of your underlying
++physical interface, or parent qdisc. This is a vital parameter, more about it
++later. Mandatory.
++.TP
++cell
++The cell size determines he granularity of packet transmission time calculations. Has a sensible default.
++.TP
++mpu
++A zero sized packet may still take time to transmit. This value is the lower
++cap for packet transmission time calculations - packets smaller than this value
++are still deemed to have this size. Defaults to zero.
++.TP
++ewma log
++When CBQ needs to measure the average idle time, it does so using an
++Exponentially Weighted Moving Average which smoothes out measurements into
++a moving average. The EWMA LOG determines how much smoothing occurs. Lower
++values imply greater sensitivity. Must be between 0 and 31. Defaults
++to 5.
++.P
++A CBQ qdisc does not shape out of its own accord. It only needs to know certain
++parameters about the underlying link. Actual shaping is done in classes.
++
++.SH CLASSES
++Classes have a host of parameters to configure their operation.
++
++.TP
++parent major:minor
++Place of this class within the hierarchy. If attached directly to a qdisc
++and not to another class, minor can be omitted. Mandatory.
++.TP
++classid major:minor
++Like qdiscs, classes can be named. The major number must be equal to the
++major number of the qdisc to which it belongs. Optional, but needed if this
++class is going to have children.
++.TP
++weight weight
++When dequeuing to the interface, classes are tried for traffic in a
++round-robin fashion. Classes with a higher configured qdisc will generally
++have more traffic to offer during each round, so it makes sense to allow
++it to dequeue more traffic. All weights under a class are normalized, so
++only the ratios matter. Defaults to the configured rate, unless the priority
++of this class is maximal, in which case it is set to 1.
++.TP
++allot bytes
++Allot specifies how many bytes a qdisc can dequeue
++during each round of the process. This parameter is weighted using the
++renormalized class weight described above. Silently capped at a minimum of
++3/2 avpkt. Mandatory.
++
++.TP
++prio priority
++In the round-robin process, classes with the lowest priority field are tried
++for packets first. Mandatory.
++
++.TP
++avpkt
++See the QDISC section.
++
++.TP
++rate rate
++Maximum rate this class and all its children combined can send at. Mandatory.
++
++.TP
++bandwidth rate
++This is different from the bandwidth specified when creating a CBQ disc! Only
++used to determine maxidle and offtime, which are only calculated when
++specifying maxburst or minburst. Mandatory if specifying maxburst or minburst.
++
++.TP
++maxburst
++This number of packets is used to calculate maxidle so that when
++avgidle is at maxidle, this number of average packets can be burst
++before avgidle drops to 0. Set it higher to be more tolerant of
++bursts. You can't set maxidle directly, only via this parameter.
++
++.TP
++minburst
++As mentioned before, CBQ needs to throttle in case of
++overlimit. The ideal solution is to do so for exactly the calculated
++idle time, and pass 1 packet. However, Unix kernels generally have a
++hard time scheduling events shorter than 10ms, so it is better to
++throttle for a longer period, and then pass minburst packets in one
++go, and then sleep minburst times longer.
++
++The time to wait is called the offtime. Higher values of minburst lead
++to more accurate shaping in the long term, but to bigger bursts at
++millisecond timescales. Optional.
++
++.TP
++minidle
++If avgidle is below 0, we are overlimits and need to wait until
++avgidle will be big enough to send one packet. To prevent a sudden
++burst from shutting down the link for a prolonged period of time,
++avgidle is reset to minidle if it gets too low.
++
++Minidle is specified in negative microseconds, so 10 means that
++avgidle is capped at -10us. Optional.
++
++.TP
++bounded
++Signifies that this class will not borrow bandwidth from its siblings.
++.TP
++isolated
++Means that this class will not borrow bandwidth to its siblings
++
++.TP
++split major:minor & defmap bitmap[/bitmap]
++If consulting filters attached to a class did not give a verdict,
++CBQ can also classify based on the packet's priority. There are 16
++priorities available, numbered from 0 to 15.
++
++The defmap specifies which priorities this class wants to receive,
++specified as a bitmap. The Least Significant Bit corresponds to priority
++zero. The
++.B split
++parameter tells CBQ at which class the decision must be made, which should
++be a (grand)parent of the class you are adding.
++
++As an example, 'tc class add ... classid 10:1 cbq .. split 10:0 defmap c0'
++configures class 10:0 to send packets with priorities 6 and 7 to 10:1.
++
++The complimentary configuration would then
++be: 'tc class add ... classid 10:2 cbq ... split 10:0 defmap 3f'
++Which would send all packets 0, 1, 2, 3, 4 and 5 to 10:1.
++.TP
++estimator interval timeconstant
++CBQ can measure how much bandwidth each class is using, which tc filters
++can use to classify packets with. In order to determine the bandwidth
++it uses a very simple estimator that measures once every
++.B interval
++microseconds how much traffic has passed. This again is a EWMA, for which
++the time constant can be specified, also in microseconds. The
++.B time constant
++corresponds to the sluggishness of the measurement or, conversely, to the
++sensitivity of the average to short bursts. Higher values mean less
++sensitivity.
++
++.SH BUGS
++The actual bandwidth of the underlying link may not be known, for example
++in the case of PPoE or PPTP connections which in fact may send over a
++pipe, instead of over a physical device. CBQ is quite resilient to major
++errors in the configured bandwidth, probably a the cost of coarser shaping.
++
++Default kernels rely on coarse timing information for making decisions. These
++may make shaping precise in the long term, but inaccurate on second long scales.
++
++See
++.BR tc-cbq-details(8)
++for hints on how to improve this.
++
++.SH SOURCES
++.TP
++o
++Sally Floyd and Van Jacobson, "Link-sharing and Resource
++Management Models for Packet Networks",
++IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
++
++.TP
++o
++Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
++
++.TP
++o
++Sally Floyd, "Notes on Class-Based Queueing: Setting
++Parameters", 1996
++
++.TP
++o
++Sally Floyd and Michael Speer, "Experimental Results
++for Class-Based Queueing", 1998, not published.
++
++
++
++.SH SEE ALSO
++.BR tc (8)
++
++.SH AUTHOR
++Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>. This manpage maintained by
++bert hubert <ahu@ds9a.nl>
++
++
+diff -Naur iproute2-orig/debian/manpages/tc-htb.8 iproute2/debian/manpages/tc-htb.8
+--- iproute2-orig/debian/manpages/tc-htb.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/manpages/tc-htb.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,150 @@
++.TH HTB 8 "10 January 2002" "iproute2" "Linux"
++.SH NAME
++HTB \- Hierarchy Token Bucket
++.SH SYNOPSIS
++.B tc qdisc ... dev
++dev
++.B ( parent
++classid
++.B | root) [ handle
++major:
++.B ] htb [ default
++minor-id
++.B ]
++
++.B tc class ... dev
++dev
++.B parent
++major:[minor]
++.B [ classid
++major:minor
++.B ] htb rate
++rate
++.B [ ceil
++rate
++.B ] burst
++bytes
++.B [ cburst
++bytes
++.B ] [ prio
++priority
++.B ]
++
++.SH DESCRIPTION
++HTB is meant as a more understandable and intuitive replacement for
++the CBQ qdisc in Linux. Both CBQ and HTB help you to control the use
++of the outbound bandwidth on a given link. Both allow you to use one
++physical link to simulate several slower links and to send different
++kinds of traffic on different simulated links. In both cases, you have
++to specify how to divide the physical link into simulated links and
++how to decide which simulated link to use for a given packet to be sent.
++
++Unlike CBQ, HTB shapes traffic based on the Token Bucket Filter algorithm
++which does not depend on interface characteristics and so does not need to
++know the underlying bandwidth of the outgoing interface.
++
++.SH SHAPING ALGORITHM
++Shaping works as documented in
++.B tc-tbf (8).
++
++.SH CLASSIFICATION
++Within the one HRB instance many classes may exist. Each of these classes
++contains another qdisc, by default
++.BR tc-pfifo (8).
++
++When enqueueing a packet, HTB starts at the root and uses various methods to
++determine which class should receive the data.
++
++In the absence of uncommon configuration options, the process is rather easy.
++At each node we look for an instruction, and then go to the class the
++instruction refers us to. If the class found is a barren leaf-node (without
++children), we enqueue the packet there. If it is not yet a leaf node, we do
++the whole thing over again starting from that node.
++
++The following actions are performed, in order at each node we visit, until one
++sends us to another node, or terminates the process.
++.TP
++(i)
++Consult filters attached to the class. If sent to a leafnode, we are done.
++Otherwise, restart.
++.TP
++(ii)
++If none of the above returned with an instruction, enqueue at this node.
++.P
++This algorithm makes sure that a packet always ends up somewhere, even while
++you are busy building your configuration.
++
++.SH LINK SHARING ALGORITHM
++FIXME
++
++.SH QDISC
++The root of a HTB qdisc class tree has the following parameters:
++
++.TP
++parent major:minor | root
++This mandatory parameter determines the place of the HTB instance, either at the
++.B root
++of an interface or within an existing class.
++.TP
++handle major:
++Like all other qdiscs, the HTB can be assigned a handle. Should consist only
++of a major number, followed by a colon. Optional, but very useful if classes
++will be generated within this qdisc.
++.TP
++default minor-id
++Unclassified traffic gets sent to the class with this minor-id.
++
++.SH CLASSES
++Classes have a host of parameters to configure their operation.
++
++.TP
++parent major:minor
++Place of this class within the hierarchy. If attached directly to a qdisc
++and not to another class, minor can be omitted. Mandatory.
++.TP
++classid major:minor
++Like qdiscs, classes can be named. The major number must be equal to the
++major number of the qdisc to which it belongs. Optional, but needed if this
++class is going to have children.
++.TP
++prio priority
++In the round-robin process, classes with the lowest priority field are tried
++for packets first. Mandatory.
++
++.TP
++rate rate
++Maximum rate this class and all its children are guaranteed. Mandatory.
++
++.TP
++ceil rate
++Maximum rate at which a class can send, if its parent has bandwidth to spare.
++Defaults to the configured rate, which implies no borrowing
++
++.TP
++burst bytes
++Amount of bytes that can be burst at
++.B ceil
++speed, in excess of the configured
++.B rate.
++Should be at least as high as the highest burst of all children.
++
++.TP
++cburst bytes
++Amount of bytes that can be burst at 'infinite' speed, in other words, as fast
++as the interface can transmit them. For perfect evening out, should be equal to at most one average
++packet. Should be at least as high as the highest cburst of all children.
++
++.SH NOTES
++Due to Unix timing constraints, the maximum ceil rate is not infinite and may in fact be quite low. On Intel,
++there are 100 timer events per second, the maximum rate is that rate at which 'burst' bytes are sent each timer tick.
++From this, the mininum burst size for a specified rate can be calculated. For i386, a 10mbit rate requires a 12 kilobyte
++burst as 100*12kb*8 equals 10mbit.
++
++.SH SEE ALSO
++.BR tc (8)
++.P
++HTB website: http://luxik.cdi.cz/~devik/qos/htb/
++.SH AUTHOR
++Martin Devera <devik@cdi.cz>. This manpage maintained by bert hubert <ahu@ds9a.nl>
++
++
+diff -Naur iproute2-orig/debian/manpages/tc-pbfifo.8 iproute2/debian/manpages/tc-pbfifo.8
+--- iproute2-orig/debian/manpages/tc-pbfifo.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/manpages/tc-pbfifo.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,72 @@
++.TH PBFIFO 8 "10 January 2002" "iproute2" "Linux"
++.SH NAME
++pfifo \- Packet limited First In, First Out queue
++.P
++bfifo \- Byte limited First In, First Out queue
++
++.SH SYNOPSIS
++.B tc qdisc ... add pfifo
++.B [ limit
++packets
++.B ]
++.P
++.B tc qdisc ... add bfifo
++.B [ limit
++bytes
++.B ]
++
++.SH DESCRIPTION
++The pfifo and bfifo qdiscs are unadorned First In, First Out queues. They are the
++simplest queues possible and therefore have no overhead.
++.B pfifo
++constrains the queue size as measured in packets.
++.B bfifo
++does so as measured in bytes.
++
++Like all non-default qdiscs, they maintain statistics. This might be a reason to prefer
++pfifo or bfifo over the default.
++
++.SH ALGORITHM
++A list of packets is maintained, when a packet is enqueued it gets inserted at the tail of
++a list. When a packet needs to be sent out to the network, it is taken from the head of the list.
++
++If the list is too long, no further packets are allowed on. This is called 'tail drop'.
++
++.SH PARAMETERS
++.TP
++limit
++Maximum queue size. Specified in bytes for bfifo, in packets for pfifo. For pfifo, defaults
++to the interface txqueuelen, as specified with
++.BR ifconfig (8)
++or
++.BR ip (8).
++
++For bfifo, it defaults to the txqueuelen multiplied by the interface MTU.
++
++.SH OUTPUT
++The output of
++.B tc -s qdisc ls
++contains the limit, either in packets or in bytes, and the number of bytes
++and packets actually sent. An unsent and dropped packet only appears between braces
++and is not counted as 'Sent'.
++
++In this example, the queue length is 100 packets, 45894 bytes were sent over 681 packets.
++No packets were dropped, and as the pfifo queue does not slow down packets, there were also no
++overlimits:
++.P
++.nf
++# tc -s qdisc ls dev eth0
++qdisc pfifo 8001: dev eth0 limit 100p
++ Sent 45894 bytes 681 pkts (dropped 0, overlimits 0)
++.fi
++
++If a backlog occurs, this is displayed as well.
++.SH SEE ALSO
++.BR tc (8)
++
++.SH AUTHORS
++Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>
++
++This manpage maintained by bert hubert <ahu@ds9a.nl>
++
++
+diff -Naur iproute2-orig/debian/manpages/tc-pfifo_fast.8 iproute2/debian/manpages/tc-pfifo_fast.8
+--- iproute2-orig/debian/manpages/tc-pfifo_fast.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/manpages/tc-pfifo_fast.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,59 @@
++.TH PFIFO_FAST 8 "10 January 2002" "iproute2" "Linux"
++.SH NAME
++pfifo_fast \- three-band first in, first out queue
++
++.SH DESCRIPTION
++pfifo_fast is the default qdisc of each interface.
++
++Whenever an interface is created, the pfifo_fast qdisc is automatically used
++as a queue. If another qdisc is attached, it preempts the default
++pfifo_fast, which automatically returns to function when an existing qdisc
++is detached.
++
++In this sense this qdisc is magic, and unlike other qdiscs.
++
++.SH ALGORITHM
++The algorithm is very similar to that of the classful
++.BR tc-prio (8)
++qdisc.
++.B pfifo_fast
++is like three
++.BR tc-pfifo (8)
++queues side by side, where packets can be enqueued in any of the three bands
++based on their Type of Service bits or assigned priority.
++
++Not all three bands are dequeued simultaneously - as long as lower bands
++have traffic, higher bands are never dequeued. This can be used to
++prioritize interactive traffic or penalize 'lowest cost' traffic.
++
++Each band can be txqueuelen packets long, as configured with
++.BR ifconfig (8)
++or
++.BR ip (8).
++Additional packets coming in are not enqueued but are instead dropped.
++
++See
++.BR tc-prio (8)
++for complete details on how TOS bits are translated into bands.
++.SH PARAMETERS
++.TP
++txqueuelen
++The length of the three bands depends on the interface txqueuelen, as
++specified with
++.BR ifconfig (8)
++or
++.BR ip (8).
++
++.SH BUGS
++Does not maintain statistics and does not show up in tc qdisc ls. This is because
++it is the automatic default in the absence of a configured qdisc.
++
++.SH SEE ALSO
++.BR tc (8)
++
++.SH AUTHORS
++Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>
++
++This manpage maintained by bert hubert <ahu@ds9a.nl>
++
++
+diff -Naur iproute2-orig/debian/manpages/tc-prio.8 iproute2/debian/manpages/tc-prio.8
+--- iproute2-orig/debian/manpages/tc-prio.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/manpages/tc-prio.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,187 @@
++.TH PRIO 8 "16 December 2001" "iproute2" "Linux"
++.SH NAME
++PRIO \- Priority qdisc
++.SH SYNOPSIS
++.B tc qdisc ... dev
++dev
++.B ( parent
++classid
++.B | root) [ handle
++major:
++.B ] prio [ bands
++bands
++.B ] [ priomap
++band,band,band...
++.B ] [ estimator
++interval timeconstant
++.B ]
++
++.SH DESCRIPTION
++The PRIO qdisc is a simple classful queueing discipline that contains
++an arbitrary number of classes of differing priority. The classes are
++dequeued in numerical descending order of priority. PRIO is a scheduler
++and never delays packets - it is a work-conserving qdisc, though the qdiscs
++contained in the classes may not be.
++
++Very useful for lowering latency when there is no need for slowing down
++traffic.
++
++.SH ALGORITHM
++On creation with 'tc qdisc add', a fixed number of bands is created. Each
++band is a class, although is not possible to add classes with 'tc qdisc
++add', the number of bands to be created must instead be specified on the
++commandline attaching PRIO to its root.
++
++When dequeueing, band 0 is tried first and only if it did not deliver a
++packet does PRIO try band 1, and so onwards. Maximum reliability packets
++should therefore go to band 0, minimum delay to band 1 and the rest to band
++2.
++
++As the PRIO qdisc itself will have minor number 0, band 0 is actually
++major:1, band 1 is major:2, etc. For major, substitute the major number
++assigned to the qdisc on 'tc qdisc add' with the
++.B handle
++parameter.
++
++.SH CLASSIFICATION
++Three methods are available to PRIO to determine in which band a packet will
++be enqueued.
++.TP
++From userspace
++A process with sufficient privileges can encode the destination class
++directly with SO_PRIORITY, see
++.BR tc(7).
++.TP
++with a tc filter
++A tc filter attached to the root qdisc can point traffic directly to a class
++.TP
++with the priomap
++Based on the packet priority, which in turn is derived from the Type of
++Service assigned to the packet.
++.P
++Only the priomap is specific to this qdisc.
++.SH QDISC PARAMETERS
++.TP
++bands
++Number of bands. If changed from the default of 3,
++.B priomap
++must be updated as well.
++.TP
++priomap
++The priomap maps the priority of
++a packet to a class. The priority can either be set directly from userspace,
++or be derived from the Type of Service of the packet.
++
++Determines how packet priorities, as assigned by the kernel, map to
++bands. Mapping occurs based on the TOS octet of the packet, which looks like
++this:
++
++.nf
++0 1 2 3 4 5 6 7
+++---+---+---+---+---+---+---+---+
++| | | |
++|PRECEDENCE | TOS |MBZ|
++| | | |
+++---+---+---+---+---+---+---+---+
++.fi
++
++The four TOS bits (the 'TOS field') are defined as:
++
++.nf
++Binary Decimcal Meaning
++-----------------------------------------
++1000 8 Minimize delay (md)
++0100 4 Maximize throughput (mt)
++0010 2 Maximize reliability (mr)
++0001 1 Minimize monetary cost (mmc)
++0000 0 Normal Service
++.fi
++
++As there is 1 bit to the right of these four bits, the actual value of the
++TOS field is double the value of the TOS bits. Tcpdump -v -v shows you the
++value of the entire TOS field, not just the four bits. It is the value you
++see in the first column of this table:
++
++.nf
++TOS Bits Means Linux Priority Band
++------------------------------------------------------------
++0x0 0 Normal Service 0 Best Effort 1
++0x2 1 Minimize Monetary Cost 1 Filler 2
++0x4 2 Maximize Reliability 0 Best Effort 1
++0x6 3 mmc+mr 0 Best Effort 1
++0x8 4 Maximize Throughput 2 Bulk 2
++0xa 5 mmc+mt 2 Bulk 2
++0xc 6 mr+mt 2 Bulk 2
++0xe 7 mmc+mr+mt 2 Bulk 2
++0x10 8 Minimize Delay 6 Interactive 0
++0x12 9 mmc+md 6 Interactive 0
++0x14 10 mr+md 6 Interactive 0
++0x16 11 mmc+mr+md 6 Interactive 0
++0x18 12 mt+md 4 Int. Bulk 1
++0x1a 13 mmc+mt+md 4 Int. Bulk 1
++0x1c 14 mr+mt+md 4 Int. Bulk 1
++0x1e 15 mmc+mr+mt+md 4 Int. Bulk 1
++.fi
++
++The second column contains the value of the relevant
++four TOS bits, followed by their translated meaning. For example, 15 stands
++for a packet wanting Minimal Montetary Cost, Maximum Reliability, Maximum
++Throughput AND Minimum Delay.
++
++The fourth column lists the way the Linux kernel interprets the TOS bits, by
++showing to which Priority they are mapped.
++
++The last column shows the result of the default priomap. On the commandline,
++the default priomap looks like this:
++
++ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
++
++This means that priority 4, for example, gets mapped to band number 1.
++The priomap also allows you to list higher priorities (> 7) which do not
++correspond to TOS mappings, but which are set by other means.
++
++This table from RFC 1349 (read it for more details) explains how
++applications might very well set their TOS bits:
++
++.nf
++TELNET 1000 (minimize delay)
++FTP
++ Control 1000 (minimize delay)
++ Data 0100 (maximize throughput)
++
++TFTP 1000 (minimize delay)
++
++SMTP
++ Command phase 1000 (minimize delay)
++ DATA phase 0100 (maximize throughput)
++
++Domain Name Service
++ UDP Query 1000 (minimize delay)
++ TCP Query 0000
++ Zone Transfer 0100 (maximize throughput)
++
++NNTP 0001 (minimize monetary cost)
++
++ICMP
++ Errors 0000
++ Requests 0000 (mostly)
++ Responses <same as request> (mostly)
++.fi
++
++
++.SH CLASSES
++PRIO classes cannot be configured further - they are automatically created
++when the PRIO qdisc is attached. Each class however can contain yet a
++further qdisc.
++
++.SH BUGS
++Large amounts of traffic in the lower bands can cause starvation of higher
++bands. Can be prevented by attaching a shaper (for example,
++.BR tc-tbf(8)
++to these bands to make sure they cannot dominate the link.
++
++.SH AUTHORS
++Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>, J Hadi Salim
++<hadi@cyberus.ca>. This manpage maintained by bert hubert <ahu@ds9a.nl>
++
++
+diff -Naur iproute2-orig/debian/manpages/tc-red.8 iproute2/debian/manpages/tc-red.8
+--- iproute2-orig/debian/manpages/tc-red.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/manpages/tc-red.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,131 @@
++.TH RED 8 "13 December 2001" "iproute2" "Linux"
++.SH NAME
++red \- Random Early Detection
++.SH SYNOPSIS
++.B tc qdisc ... red
++.B limit
++bytes
++.B min
++bytes
++.B max
++bytes
++.B avpkt
++bytes
++.B burst
++packets
++.B [ ecn ] [ bandwidth
++rate
++.B ] probability
++chance
++
++.SH DESCRIPTION
++Random Early Detection is a classless qdisc which manages its queue size
++smartly. Regular queues simply drop packets from the tail when they are
++full, which may not be the optimal behaviour. RED also performs tail drop,
++but does so in a more gradual way.
++
++Once the queue hits a certain average length, packets enqueued have a
++configurable chance of being marked (which may mean dropped). This chance
++increases linearly up to a point called the
++.B max
++average queue length, although the queue might get bigger.
++
++This has a host of benefits over simple taildrop, while not being processor
++intensive. It prevents synchronous retransmits after a burst in traffic,
++which cause further retransmits, etc.
++
++The goal is the have a small queue size, which is good for interactivity
++while not disturbing TCP/IP traffic with too many sudden drops after a burst
++of traffic.
++
++Depending on if ECN is configured, marking either means dropping or
++purely marking a packet as overlimit.
++.SH ALGORITHM
++The average queue size is used for determining the marking
++probability. This is calculated using an Exponential Weighted Moving
++Average, which can be more or less sensitive to bursts.
++
++When the average queue size is below
++.B min
++bytes, no packet will ever be marked. When it exceeds
++.B min,
++the probability of doing so climbs linearly up
++to
++.B probability,
++until the average queue size hits
++.B max
++bytes. Because
++.B probability
++is normally not set to 100%, the queue size might
++conceivably rise above
++.B max
++bytes, so the
++.B limit
++parameter is provided to set a hard maximum for the size of the queue.
++
++.SH PARAMETERS
++.TP
++min
++Average queue size at which marking becomes a possibility.
++.TP
++max
++At this average queue size, the marking probability is maximal. Should be at
++least twice
++.B min
++to prevent synchronous retransmits, higher for low
++.B min.
++.TP
++probability
++Maximum probability for marking, specified as a floating point
++number from 0.0 to 1.0. Suggested values are 0.01 or 0.02 (1 or 2%,
++respectively).
++.TP
++limit
++Hard limit on the real (not average) queue size in bytes. Further packets
++are dropped. Should be set higher than max+burst. It is advised to set this
++a few times higher than
++.B max.
++.TP
++burst
++Used for determining how fast the average queue size is influenced by the
++real queue size. Larger values make the calculation more sluggish, allowing
++longer bursts of traffic before marking starts. Real life experiments
++support the following guideline: (min+min+max)/(3*avpkt).
++.TP
++avpkt
++Specified in bytes. Used with burst to determine the time constant for
++average queue size calculations. 1000 is a good value.
++.TP
++bandwidth
++This rate is used for calculating the average queue size after some
++idle time. Should be set to the bandwidth of your interface. Does not mean
++that RED will shape for you! Optional.
++.TP
++ecn
++As mentioned before, RED can either 'mark' or 'drop'. Explicit Congestion
++Notification allows RED to notify remote hosts that their rate exceeds the
++amount of bandwidth available. Non-ECN capable hosts can only be notified by
++dropping a packet. If this parameter is specified, packets which indicate
++that their hosts honor ECN will only be marked and not dropped, unless the
++queue size hits
++.B limit
++bytes. Needs a tc binary with RED support compiled in. Recommended.
++
++.SH SEE ALSO
++.BR tc (8)
++
++.SH SOURCES
++.TP
++o
++Floyd, S., and Jacobson, V., Random Early Detection gateways for
++Congestion Avoidance. http://www.aciri.org/floyd/papers/red/red.html
++.TP
++o
++Some changes to the algorithm by Alexey N. Kuznetsov.
++
++.SH AUTHORS
++Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>, Alexey Makarenko
++<makar@phoenix.kharkov.ua>, J Hadi Salim <hadi@nortelnetworks.com>.
++This manpage maintained by bert hubert <ahu@ds9a.nl>
++
++
+diff -Naur iproute2-orig/debian/manpages/tc-sfq.8 iproute2/debian/manpages/tc-sfq.8
+--- iproute2-orig/debian/manpages/tc-sfq.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/manpages/tc-sfq.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,107 @@
++.TH TC 8 "8 December 2001" "iproute2" "Linux"
++.SH NAME
++sfq \- Stochastic Fairness Queueing
++.SH SYNOPSIS
++.B tc qdisc ... perturb
++seconds
++.B quantum
++bytes
++
++.SH DESCRIPTION
++
++Stochastic Fairness Queueing is a classless queueing discipline available for
++traffic control with the
++.BR tc (8)
++command.
++
++SFQ does not shape traffic but only schedules the transmission of packets, based on 'flows'.
++The goal is to ensure fairness so that each flow is able to send data in turn, thus preventing
++any single flow from drowning out the rest.
++
++This may in fact have some effect in mitigating a Denial of Service attempt.
++
++SFQ is work-conserving and therefore always delivers a packet if it has one available.
++.SH ALGORITHM
++On enqueueing, each packet is assigned to a hash bucket, based on
++.TP
++(i)
++Source address
++.TP
++(ii)
++Destination address
++.TP
++(iii)
++Source port
++.P
++If these are available. SFQ knows about ipv4 and ipv6 and also UDP, TCP and ESP.
++Packets with other protocols are hashed based on the 32bits representation of their
++destination and the socket they belong to. A flow corresponds mostly to a TCP/IP
++connection.
++
++Each of these buckets should represent a unique flow. Because multiple flows may
++get hashed to the same bucket, the hashing algorithm is perturbed at configurable
++intervals so that the unfairness lasts only for a short while. Perturbation may
++however cause some inadvertent packet reordering to occur.
++
++When dequeuing, each hashbucket with data is queried in a round robin fashion.
++
++The compile time maximum length of the SFQ is 128 packets, which can be spread over
++at most 128 buckets of 1024 available. In case of overflow, tail-drop is performed
++on the fullest bucket, thus maintaining fairness.
++
++.SH PARAMETERS
++.TP
++perturb
++Interval in seconds for queue algorithm perturbation. Defaults to 0, which means that
++no perturbation occurs. Do not set too low for each perturbation may cause some packet
++reordering. Advised value: 10
++.TP
++quantum
++Amount of bytes a flow is allowed to dequeue during a round of the round robin process.
++Defaults to the MTU of the interface which is also the advised value and the minimum value.
++
++.SH EXAMPLE & USAGE
++
++To attach to device ppp0:
++.P
++# tc qdisc add dev ppp0 root sfq perturb 10
++.P
++Please note that SFQ, like all non-shaping (work-conserving) qdiscs, is only useful
++if it owns the queue.
++This is the case when the link speed equals the actually available bandwidth. This holds
++for regular phone modems, ISDN connections and direct non-switched ethernet links.
++.P
++Most often, cable modems and DSL devices do not fall into this category. The same holds
++for when connected to a switch and trying to send data to a congested segment also
++connected to the switch.
++.P
++In this case, the effective queue does not reside within Linux and is therefore not
++available for scheduling.
++.P
++Embed SFQ in a classful qdisc to make sure it owns the queue.
++
++.SH SOURCE
++.TP
++o
++Paul E. McKenney "Stochastic Fairness Queuing",
++IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
++
++.TP
++o
++Paul E. McKenney "Stochastic Fairness Queuing",
++"Interworking: Research and Experience", v.2, 1991, p.113-131.
++
++.TP
++o
++See also:
++M. Shreedhar and George Varghese "Efficient Fair
++Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
++
++.SH SEE ALSO
++.BR tc (8)
++
++.SH AUTHOR
++Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>. This manpage maintained by
++bert hubert <ahu@ds9a.nl>
++
++
+diff -Naur iproute2-orig/debian/manpages/tc-tbf.8 iproute2/debian/manpages/tc-tbf.8
+--- iproute2-orig/debian/manpages/tc-tbf.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/manpages/tc-tbf.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,138 @@
++.TH TC 8 "13 December 2001" "iproute2" "Linux"
++.SH NAME
++tbf \- Token Bucket Filter
++.SH SYNOPSIS
++.B tc qdisc ... tbf rate
++rate
++.B burst
++bytes/cell
++.B ( latency
++ms
++.B | limit
++bytes
++.B ) [ mpu
++bytes
++.B [ peakrate
++rate
++.B mtu
++bytes/cell
++.B ] ]
++.P
++burst is also known as buffer and maxburst. mtu is also known as minburst.
++.SH DESCRIPTION
++
++The Token Bucket Filter is a classless queueing discipline available for
++traffic control with the
++.BR tc (8)
++command.
++
++TBF is a pure shaper and never schedules traffic. It is non-work-conserving and may throttle
++itself, although packets are available, to ensure that the configured rate is not exceeded.
++On all platforms except for Alpha,
++it is able to shape up to 1mbit/s of normal traffic with ideal minimal burstiness,
++sending out data exactly at the configured rates.
++
++Much higher rates are possible but at the cost of losing the minimal burstiness. In that
++case, data is on average dequeued at the configured rate but may be sent much faster at millisecond
++timescales. Because of further queues living in network adaptors, this is often not a problem.
++
++Kernels with a higher 'HZ' can achieve higher rates with perfect burstiness. On Alpha, HZ is ten
++times higher, leading to a 10mbit/s limit to perfection. These calculations hold for packets of on
++average 1000 bytes.
++
++.SH ALGORITHM
++As the name implies, traffic is filtered based on the expenditure of
++.B tokens.
++Tokens roughly correspond to bytes, with the additional constraint that each packet consumes
++some tokens, no matter how small it is. This reflects the fact that even a zero-sized packet occupies
++the link for some time.
++
++On creation, the TBF is stocked with tokens which correspond to the amount of traffic that can be burst
++in one go. Tokens arrive at a steady rate, until the bucket is full.
++
++If no tokens are available, packets are queued, up to a configured limit. The TBF now
++calculates the token deficit, and throttles until the first packet in the queue can be sent.
++
++If it is not acceptable to burst out packets at maximum speed, a peakrate can be configured
++to limit the speed at which the bucket empties. This peakrate is implemented as a second TBF
++with a very small bucket, so that it doesn't burst.
++
++To achieve perfection, the second bucket may contain only a single packet, which leads to
++the earlier mentioned 1mbit/s limit.
++
++This limit is caused by the fact that the kernel can only throttle for at minimum 1 'jiffy', which depends
++on HZ as 1/HZ. For perfect shaping, only a single packet can get sent per jiffy - for HZ=100, this means 100
++packets of on average 1000 bytes each, which roughly corresponds to 1mbit/s.
++
++.SH PARAMETERS
++See
++.BR tc (8)
++for how to specify the units of these values.
++.TP
++limit or latency
++Limit is the number of bytes that can be queued waiting for tokens to become
++available. You can also specify this the other way around by setting the
++latency parameter, which specifies the maximum amount of time a packet can
++sit in the TBF. The latter calculation takes into account the size of the
++bucket, the rate and possibly the peakrate (if set). These two parameters
++are mutually exclusive.
++.TP
++burst
++Also known as buffer or maxburst.
++Size of the bucket, in bytes. This is the maximum amount of bytes that tokens can be available for instantaneously.
++In general, larger shaping rates require a larger buffer. For 10mbit/s on Intel, you need at least 10kbyte buffer
++if you want to reach your configured rate!
++
++If your buffer is too small, packets may be dropped because more tokens arrive per timer tick than fit in your bucket.
++The minimum buffer size can be calculated by dividing the rate by HZ.
++
++Token usage calculations are performed using a table which by default has a resolution of 8 packets.
++This resolution can be changed by specifying the
++.B cell
++size with the burst. For example, to specify a 6000 byte buffer with a 16
++byte cell size, set a burst of 6000/16. You will probably never have to set
++this. Must be an integral power of 2.
++.TP
++mpu
++A zero-sized packet does not use zero bandwidth. For ethernet, no packet uses less than 64 bytes. The Minimum Packet Unit
++determines the minimal token usage (specified in bytes) for a packet. Defaults to zero.
++.TP
++rate
++The speed knob. See remarks above about limits! See
++.BR tc (8)
++for units.
++.PP
++Furthermore, if a peakrate is desired, the following parameters are available:
++
++.TP
++peakrate
++Maximum depletion rate of the bucket. Limited to 1mbit/s on Intel, 10mbit/s on Alpha. The peakrate does
++not need to be set, it is only necessary if perfect millisecond timescale shaping is required.
++
++.TP
++mtu/minburst
++Specifies the size of the peakrate bucket. For perfect accuracy, should be set to the MTU of the interface.
++If a peakrate is needed, but some burstiness is acceptable, this size can be raised. A 3000 byte minburst
++allows around 3mbit/s of peakrate, given 1000 byte packets.
++
++Like the regular burstsize you can also specify a
++.B cell
++size.
++.SH EXAMPLE & USAGE
++
++To attach a TBF with a sustained maximum rate of 0.5mbit/s, a peakrate of 1.0mbit/s,
++a 5kilobyte buffer, with a pre-bucket queue size limit calculated so the TBF causes
++at most 70ms of latency, with perfect peakrate behaviour, issue:
++.P
++# tc qdisc add dev eth0 root tbf rate 0.5mbit \\
++ burst 5kb latency 70ms peakrate 1mbit \\
++ minburst 1540
++
++.SH SEE ALSO
++.BR tc (8)
++
++.SH AUTHOR
++Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>. This manpage maintained by
++bert hubert <ahu@ds9a.nl>
++
++
+diff -Naur iproute2-orig/debian/manpages/tc.8 iproute2/debian/manpages/tc.8
+--- iproute2-orig/debian/manpages/tc.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/manpages/tc.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,348 @@
++.TH TC 8 "16 December 2001" "iproute2" "Linux"
++.SH NAME
++tc \- show / manipulate traffic control settings
++.SH SYNOPSIS
++.B tc qdisc [ add | change | replace | link ] dev
++DEV
++.B
++[ parent
++qdisc-id
++.B | root ]
++.B [ handle
++qdisc-id ] qdisc
++[ qdisc specific parameters ]
++.P
++
++.B tc class [ add | change | replace ] dev
++DEV
++.B parent
++qdisc-id
++.B [ classid
++class-id ] qdisc
++[ qdisc specific parameters ]
++.P
++
++.B tc filter [ add | change | replace ] dev
++DEV
++.B [ parent
++qdisc-id
++.B | root ] protocol
++protocol
++.B prio
++priority filtertype
++[ filtertype specific parameters ]
++.B flowid
++flow-id
++
++.B tc [-s | -d ] qdisc show [ dev
++DEV
++.B ]
++.P
++.B tc [-s | -d ] class show dev
++DEV
++.P
++.B tc filter show dev
++DEV
++
++.SH DESCRIPTION
++.B Tc
++is used to configure Traffic Control in the Linux kernel. Traffic Control consists
++of the following:
++
++.TP
++SHAPING
++When traffic is shaped, its rate of transmission is under control. Shaping may
++be more than lowering the available bandwidth - it is also used to smooth out
++bursts in traffic for better network behaviour. Shaping occurs on egress.
++
++.TP
++SCHEDULING
++By scheduling the transmission of packets it is possible to improve interactivity
++for traffic that needs it while still guaranteeing bandwidth to bulk transfers. Reordering
++is also called prioritizing, and happens only on egress.
++
++.TP
++POLICING
++Where shaping deals with transmission of traffic, policing pertains to traffic
++arriving. Policing thus occurs on ingress.
++
++.TP
++DROPPING
++Traffic exceeding a set bandwidth may also be dropped forthwith, both on
++ingress and on egress.
++
++.P
++Processing of traffic is controlled by three kinds of objects: qdiscs,
++classes and filters.
++
++.SH QDISCS
++.B qdisc
++is short for 'queueing discipline' and it is elementary to
++understanding traffic control. Whenever the kernel needs to send a
++packet to an interface, it is
++.B enqueued
++to the qdisc configured for that interface. Immediately afterwards, the kernel
++tries to get as many packets as possible from the qdisc, for giving them
++to the network adaptor driver.
++
++A simple QDISC is the 'pfifo' one, which does no processing at all and is a pure
++First In, First Out queue. It does however store traffic when the network interface
++can't handle it momentarily.
++
++.SH CLASSES
++Some qdiscs can contain classes, which contain further qdiscs - traffic may
++then be enqueued in any of the inner qdiscs, which are within the
++.B classes.
++When the kernel tries to dequeue a packet from such a
++.B classful qdisc
++it can come from any of the classes. A qdisc may for example prioritize
++certain kinds of traffic by trying to dequeue from certain classes
++before others.
++
++.SH FILTERS
++A
++.B filter
++is used by a classful qdisc to determine in which class a packet will
++be enqueued. Whenever traffic arrives at a class with subclasses, it needs
++to be classified. Various methods may be employed to do so, one of these
++are the filters. All filters attached to the class are called, until one of
++them returns with a verdict. If no verdict was made, other criteria may be
++available. This differs per qdisc.
++
++It is important to notice that filters reside
++.B within
++qdiscs - they are not masters of what happens.
++
++.SH CLASSLESS QDISCS
++The classless qdiscs are:
++.TP
++[p|b]fifo
++Simplest usable qdisc, pure First In, First Out behaviour. Limited in
++packets or in bytes.
++.TP
++pfifo_fast
++Standard qdisc for 'Advanced Router' enabled kernels. Consists of a three-band
++queue which honors Type of Service flags, as well as the priority that may be
++assigned to a packet.
++.TP
++red
++Random Early Detection simulates physical congestion by randomly dropping
++packets when nearing configured bandwidth allocation. Well suited to very
++large bandwidth applications.
++.TP
++sfq
++Stochastic Fairness Queueing reorders queued traffic so each 'session'
++gets to send a packet in turn.
++.TP
++tbf
++The Token Bucket Filter is suited for slowing traffic down to a precisely
++configured rate. Scales well to large bandwidths.
++.SH CONFIGURING CLASSLESS QDISCS
++In the absence of classful qdiscs, classless qdiscs can only be attached at
++the root of a device. Full syntax:
++.P
++.B tc qdisc add dev
++DEV
++.B root
++QDISC QDISC-PARAMETERS
++
++To remove, issue
++.P
++.B tc qdisc del dev
++DEV
++.B root
++
++The
++.B pfifo_fast
++qdisc is the automatic default in the absence of a configured qdisc.
++
++.SH CLASSFUL QDISCS
++The classful qdiscs are:
++.TP
++CBQ
++Class Based Queueing implements a rich linksharing hierarchy of classes.
++It contains shaping elements as well as prioritizing capabilities. Shaping is
++performed using link idle time calculations based on average packet size and
++underlying link bandwidth. The latter may be ill-defined for some interfaces.
++.TP
++HTB
++The Hierarchy Token Bucket implements a rich linksharing hierarchy of
++classes with an emphasis on conforming to existing practices. HTB facilitates
++guaranteeing bandwidth to classes, while also allowing specification of upper
++limits to inter-class sharing. It contains shaping elements, based on TBF and
++can prioritize classes.
++.TP
++PRIO
++The PRIO qdisc is a non-shaping container for a configurable number of
++classes which are dequeued in order. This allows for easy prioritization
++of traffic, where lower classes are only able to send if higher ones have
++no packets available. To facilitate configuration, Type Of Service bits are
++honored by default.
++.SH THEORY OF OPERATION
++Classes form a tree, where each class has a single parent.
++A class may have multiple children. Some qdiscs allow for runtime addition
++of classes (CBQ, HTB) while others (PRIO) are created with a static number of
++children.
++
++Qdiscs which allow dynamic addition of classes can have zero or more
++subclasses to which traffic may be enqueued.
++
++Furthermore, each class contains a
++.B leaf qdisc
++which by default has
++.B pfifo
++behaviour though another qdisc can be attached in place. This qdisc may again
++contain classes, but each class can have only one leaf qdisc.
++
++When a packet enters a classful qdisc it can be
++.B classified
++to one of the classes within. Three criteria are available, although not all
++qdiscs will use all three:
++.TP
++tc filters
++If tc filters are attached to a class, they are consulted first
++for relevant instructions. Filters can match on all fields of a packet header,
++as well as on the firewall mark applied by ipchains or iptables. See
++.BR tc-filters (8).
++.TP
++Type of Service
++Some qdiscs have built in rules for classifying packets based on the TOS field.
++.TP
++skb->priority
++Userspace programs can encode a class-id in the 'skb->priority' field using
++the SO_PRIORITY option.
++.P
++Each node within the tree can have its own filters but higher level filters
++may also point directly to lower classes.
++
++If classification did not succeed, packets are enqueued to the leaf qdisc
++attached to that class. Check qdisc specific manpages for details, however.
++
++.SH NAMING
++All qdiscs, classes and filters have IDs, which can either be specified
++or be automatically assigned.
++
++IDs consist of a major number and a minor number, separated by a colon.
++
++.TP
++QDISCS
++A qdisc, which potentially can have children,
++gets assigned a major number, called a 'handle', leaving the minor
++number namespace available for classes. The handle is expressed as '10:'.
++It is customary to explicitly assign a handle to qdiscs expected to have
++children.
++
++.TP
++CLASSES
++Classes residing under a qdisc share their qdisc major number, but each have
++a separate minor number called a 'classid' that has no relation to their
++parent classes, only to their parent qdisc. The same naming custom as for
++qdiscs applies.
++
++.TP
++FILTERS
++Filters have a three part ID, which is only needed when using a hashed
++filter hierarchy, for which see
++.BR tc-filters (8).
++.SH UNITS
++All parameters accept a floating point number, possibly followed by a unit.
++.P
++Bandwidths or rates can be specified in:
++.TP
++kbps
++Kilobytes per second
++.TP
++mbps
++Megabytes per second
++.TP
++kbit
++Kilobits per second
++.TP
++mbit
++Megabits per second
++.TP
++bps or a bare number
++Bytes per second
++.P
++Amounts of data can be specified in:
++.TP
++kb or k
++Kilobytes
++.TP
++mb or m
++Megabytes
++.TP
++mbit
++Megabits
++.TP
++kbit
++Kilobits
++.TP
++b or a bare number
++Bytes.
++.P
++Lengths of time can be specified in:
++.TP
++s, sec or secs
++Whole seconds
++.TP
++ms, msec or msecs
++Milliseconds
++.TP
++us, usec, usecs or a bare number
++Microseconds.
++
++.SH TC COMMANDS
++The following commands are available for qdiscs, classes and filter:
++.TP
++add
++Add a qdisc, class or filter to a node. For all entities, a
++.B parent
++must be passed, either by passing its ID or by attaching directly to the root of a device.
++When creating a qdisc or a filter, it can be named with the
++.B handle
++parameter. A class is named with the
++.B classid
++parameter.
++
++.TP
++remove
++A qdisc can be removed by specifying its handle, which may also be 'root'. All subclasses and their leaf qdiscs
++are automatically deleted, as well as any filters attached to them.
++
++.TP
++change
++Some entities can be modified 'in place'. Shares the syntax of 'add', with the exception
++that the handle cannot be changed and neither can the parent. In other words,
++.B
++change
++cannot move a node.
++
++.TP
++replace
++Performs a nearly atomic remove/add on an existing node id. If the node does not exist yet
++it is created.
++
++.TP
++link
++Only available for qdiscs and performs a replace where the node
++must exist already.
++
++
++.SH HISTORY
++.B tc
++was written by Alexey N. Kuznetsov and added in Linux 2.2.
++.SH SEE ALSO
++.BR tc-cbq (8),
++.BR tc-htb (8),
++.BR tc-sfq (8),
++.BR tc-red (8),
++.BR tc-tbf (8),
++.BR tc-pfifo (8),
++.BR tc-bfifo (8),
++.BR tc-pfifo_fast (8),
++.BR tc-filters (8)
++
++.SH AUTHOR
++Manpage maintained by bert hubert (ahu@ds9a.nl)
++
+diff -Naur iproute2-orig/debian/postinst iproute2/debian/postinst
+--- iproute2-orig/debian/postinst 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/postinst 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,6 @@
++#!/bin/sh -e
++
++# FHS:
++if [ "$1" = "configure" -a -d /usr/doc -a ! -e /usr/doc/iproute ]; then
++ ln -sf ../share/doc/iproute /usr/doc/iproute
++fi
+diff -Naur iproute2-orig/debian/postrm iproute2/debian/postrm
+--- iproute2-orig/debian/postrm 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/postrm 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,6 @@
++#!/bin/sh
++
++if [ "$1" = "purge" ]
++then
++ rm -rf /etc/iproute2
++fi
+diff -Naur iproute2-orig/debian/prerm iproute2/debian/prerm
+--- iproute2-orig/debian/prerm 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/prerm 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,5 @@
++#!/bin/sh -e
++
++if [ \( "$1" = "upgrade" -o "$1" = "remove" \) -a -L /usr/doc/iproute ]; then
++ rm -f /usr/doc/iproute
++fi
+diff -Naur iproute2-orig/debian/rules iproute2/debian/rules
+--- iproute2-orig/debian/rules 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/rules 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,85 @@
++#!/usr/bin/make -f
++#
++# Copyright (C) 1999 Roberto Lumbreras <rover@debian.org>
++# Copyright (C) 1999-2002 Juan Cespedes <cespedes@debian.org>
++# Copying: GPL
++
++SHELL = bash
++
++PACKAGE = $(shell perl -e 'print <> =~ /^(.*) \(.*\)/' debian/changelog)
++PKG_VER = $(shell perl -e 'print <> =~ /\((.*)\)/' debian/changelog)
++PKG_UPVER= $(shell perl -e 'print <> =~ /\((.*)-[^-]*\)/' debian/changelog)
++
++BINS = ip/ip
++SBINS = ip/rtmon ip/rtacct tc/tc
++SHBINS = ip/routef ip/routel # ip/ifcfg ip/rtpr
++DOCS = README* doc/Plan debian/README.Debian
++MAN8 = debian/manpages/*.8
++MANLINKS= rtmon rtacct routef routel
++TEXDOCS = ip-cref ip-tunnels api-ip6-flowlabels
++
++build: stamp-build
++
++stamp-build:
++ test -f include-glibc/netinet/in.h.orig || \
++ mv include-glibc/netinet/in.h \
++ include-glibc/netinet/in.h.orig
++ $(MAKE) KERNEL_INCLUDE=/usr/include
++ $(MAKE) -C doc
++ touch stamp-build
++
++binary: binary-indep binary-arch
++
++binary-indep:
++
++binary-arch: checkroot stamp-build
++ $(RM) -r debian/tmp
++ install -d -m0755 debian/tmp/{DEBIAN,bin,sbin,usr/{bin,share/doc/$(PACKAGE),share/man/man{7,8}}}
++ install -s -m0755 $(BINS) debian/tmp/bin/
++ install -s -m0755 $(SBINS) debian/tmp/sbin/
++ ln -s /bin/ip debian/tmp/sbin/ip
++ install -m0755 $(SHBINS) debian/tmp/usr/bin/
++ cp -p $(DOCS) debian/tmp/usr/share/doc/$(PACKAGE)/
++ cp -rp examples debian/tmp/usr/share/doc/$(PACKAGE)/
++ find debian/tmp/usr/share/doc/$(PACKAGE)/examples -type f -exec chmod -x {} \;
++ install -m0644 debian/changelog debian/tmp/usr/share/doc/$(PACKAGE)/changelog.Debian
++ cp -p RELNOTES debian/tmp/usr/share/doc/$(PACKAGE)/changelog
++ for i in $(TEXDOCS); do \
++ install -m0644 doc/$$i.tex debian/tmp/usr/share/doc/$(PACKAGE)/; \
++ install -m0644 doc/$$i.dvi debian/tmp/usr/share/doc/$(PACKAGE)/; \
++ install -m0644 doc/$$i.ps debian/tmp/usr/share/doc/$(PACKAGE)/; \
++ done
++ install -m0644 $(MAN8) debian/tmp/usr/share/man/man8/
++ gzip -9fr debian/tmp/usr/share || true
++ ln -s tc-pbfifo.8.gz debian/tmp/usr/share/man/man8/tc-pfifo.8.gz
++ ln -s tc-pbfifo.8.gz debian/tmp/usr/share/man/man8/tc-bfifo.8.gz
++ for i in $(MANLINKS); do \
++ ln -s ../man7/undocumented.7.gz debian/tmp/usr/share/man/man8/$$i.8.gz; \
++ done
++ cp -p debian/copyright debian/tmp/usr/share/doc/$(PACKAGE)/
++ cp -rp etc debian/tmp/
++ install -m0644 debian/conffiles debian/tmp/DEBIAN/
++
++ dpkg-shlibdeps $(BINS) $(SBINS)
++ dpkg-gencontrol -isp
++ chown -R root.root debian/tmp
++ chmod -R u=rwX,go=rX debian/tmp
++ dpkg --build debian/tmp ..
++
++checkdir:
++ @test -f debian/rules
++
++checkroot: checkdir
++ @test 0 = `id -u` || { echo "Error: not super-user"; exit 1; }
++
++clean: checkdir debian/control
++ $(RM) stamp-build debian/files debian/substvars
++ $(MAKE) clean
++ $(MAKE) -C doc clean
++ $(RM) `find . -name "*~" -o -name core`
++ $(RM) -r debian/tmp
++ test -f include-glibc/netinet/in.h.orig && \
++ mv include-glibc/netinet/in.h.orig \
++ include-glibc/netinet/in.h || true
++
++.PHONY: build binary binary-arch binary-indep checkdir checkroot clean
+diff -Naur iproute2-orig/debian/tc-cbq.8 iproute2/debian/tc-cbq.8
+--- iproute2-orig/debian/tc-cbq.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/tc-cbq.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,353 @@
++.TH CBQ 8 "16 December 2001" "iproute2" "Linux"
++.SH NAME
++CBQ \- Class Based Queueing
++.SH SYNOPSIS
++.B tc qdisc ... dev
++dev
++.B ( parent
++classid
++.B | root) [ handle
++major:
++.B ] cbq [ allot
++bytes
++.B ] avpkt
++bytes
++.B bandwidth
++rate
++.B [ cell
++bytes
++.B ] [ ewma
++log
++.B ] [ mpu
++bytes
++.B ]
++
++.B tc class ... dev
++dev
++.B parent
++major:[minor]
++.B [ classid
++major:minor
++.B ] cbq allot
++bytes
++.B [ bandwidth
++rate
++.B ] [ rate
++rate
++.B ] prio
++priority
++.B [ weight
++weight
++.B ] [ minburst
++packets
++.B ] [ maxburst
++packets
++.B ] [ ewma
++log
++.B ] [ cell
++bytes
++.B ] avpkt
++bytes
++.B [ mpu
++bytes
++.B ] [ bounded isolated ] [ split
++handle
++.B & defmap
++defmap
++.B ] [ estimator
++interval timeconstant
++.B ]
++
++.SH DESCRIPTION
++Class Based Queueing is a classful qdisc that implements a rich
++linksharing hierarchy of classes. It contains shaping elements as
++well as prioritizing capabilities. Shaping is performed using link
++idle time calculations based on the timing of dequeue events and
++underlying link bandwidth.
++
++.SH SHAPING ALGORITHM
++When shaping a 10mbit/s connection to 1mbit/s, the link will
++be idle 90% of the time. If it isn't, it needs to be throttled so that it
++IS idle 90% of the time.
++
++During operations, the effective idletime is measured using an
++exponential weighted moving average (EWMA), which considers recent
++packets to be exponentially more important than past ones. The Unix
++loadaverage is calculated in the same way.
++
++The calculated idle time is subtracted from the EWMA measured one,
++the resulting number is called 'avgidle'. A perfectly loaded link has
++an avgidle of zero: packets arrive exactly at the calculated
++interval.
++
++An overloaded link has a negative avgidle and if it gets too negative,
++CBQ throttles and is then 'overlimit'.
++
++Conversely, an idle link might amass a huge avgidle, which would then
++allow infinite bandwidths after a few hours of silence. To prevent
++this, avgidle is capped at
++.B maxidle.
++
++If overlimit, in theory, the CBQ could throttle itself for exactly the
++amount of time that was calculated to pass between packets, and then
++pass one packet, and throttle again. Due to timer resolution constraints,
++this may not be feasible, see the
++.B minburst
++parameter below.
++
++.SH CLASSIFICATION
++Within the one CBQ instance many classes may exist. Each of these classes
++contains another qdisc, by default
++.BR tc-pfifo (8).
++
++When enqueueing a packet, CBQ starts at the root and uses various methods to
++determine which class should receive the data.
++
++In the absence of uncommon configuration options, the process is rather easy.
++At each node we look for an instruction, and then go to the class the
++instruction refers us to. If the class found is a barren leaf-node (without
++children), we enqueue the packet there. If it is not yet a leaf node, we do
++the whole thing over again starting from that node.
++
++The following actions are performed, in order at each node we visit, until one
++sends us to another node, or terminates the process.
++.TP
++(i)
++Consult filters attached to the class. If sent to a leafnode, we are done.
++Otherwise, restart.
++.TP
++(ii)
++Consult the defmap for the priority assigned to this packet, which depends
++on the TOS bits. Check if the referral is leafless, otherwise restart.
++.TP
++(iii)
++Ask the defmap for instructions for the 'best effort' priority. Check the
++answer for leafness, otherwise restart.
++.TP
++(iv)
++If none of the above returned with an instruction, enqueue at this node.
++.P
++This algorithm makes sure that a packet always ends up somewhere, even while
++you are busy building your configuration.
++
++For more details, see
++.BR tc-cbq-details(8).
++
++.SH LINK SHARING ALGORITHM
++When dequeuing for sending to the network device, CBQ decides which of its
++classes will be allowed to send. It does so with a Weighted Round Robin process
++in which each class with packets gets a chance to send in turn. The WRR process
++starts by asking the highest priority classes (lowest numerically -
++highest semantically) for packets, and will continue to do so until they
++have no more data to offer, in which case the process repeats for lower
++priorities.
++
++Classes by default borrow bandwidth from their siblings. A class can be
++prevented from doing so by declaring it 'bounded'. A class can also indicate
++its unwillingness to lend out bandwidth by being 'isolated'.
++
++.SH QDISC
++The root of a CBQ qdisc class tree has the following parameters:
++
++.TP
++parent major:minor | root
++This mandatory parameter determines the place of the CBQ instance, either at the
++.B root
++of an interface or within an existing class.
++.TP
++handle major:
++Like all other qdiscs, the CBQ can be assigned a handle. Should consist only
++of a major number, followed by a colon. Optional, but very useful if classes
++will be generated within this qdisc.
++.TP
++allot bytes
++This allotment is the 'chunkiness' of link sharing and is used for determining packet
++transmission time tables. The qdisc allot differs slightly from the class allot discussed
++below. Optional. Defaults to a reasonable value, related to avpkt.
++.TP
++avpkt bytes
++The average size of a packet is needed for calculating maxidle, and is also used
++for making sure 'allot' has a safe value. Mandatory.
++.TP
++bandwidth rate
++To determine the idle time, CBQ must know the bandwidth of your underlying
++physical interface, or parent qdisc. This is a vital parameter, more about it
++later. Mandatory.
++.TP
++cell
++The cell size determines he granularity of packet transmission time calculations. Has a sensible default.
++.TP
++mpu
++A zero sized packet may still take time to transmit. This value is the lower
++cap for packet transmission time calculations - packets smaller than this value
++are still deemed to have this size. Defaults to zero.
++.TP
++ewma log
++When CBQ needs to measure the average idle time, it does so using an
++Exponentially Weighted Moving Average which smoothes out measurements into
++a moving average. The EWMA LOG determines how much smoothing occurs. Lower
++values imply greater sensitivity. Must be between 0 and 31. Defaults
++to 5.
++.P
++A CBQ qdisc does not shape out of its own accord. It only needs to know certain
++parameters about the underlying link. Actual shaping is done in classes.
++
++.SH CLASSES
++Classes have a host of parameters to configure their operation.
++
++.TP
++parent major:minor
++Place of this class within the hierarchy. If attached directly to a qdisc
++and not to another class, minor can be omitted. Mandatory.
++.TP
++classid major:minor
++Like qdiscs, classes can be named. The major number must be equal to the
++major number of the qdisc to which it belongs. Optional, but needed if this
++class is going to have children.
++.TP
++weight weight
++When dequeuing to the interface, classes are tried for traffic in a
++round-robin fashion. Classes with a higher configured qdisc will generally
++have more traffic to offer during each round, so it makes sense to allow
++it to dequeue more traffic. All weights under a class are normalized, so
++only the ratios matter. Defaults to the configured rate, unless the priority
++of this class is maximal, in which case it is set to 1.
++.TP
++allot bytes
++Allot specifies how many bytes a qdisc can dequeue
++during each round of the process. This parameter is weighted using the
++renormalized class weight described above. Silently capped at a minimum of
++3/2 avpkt. Mandatory.
++
++.TP
++prio priority
++In the round-robin process, classes with the lowest priority field are tried
++for packets first. Mandatory.
++
++.TP
++avpkt
++See the QDISC section.
++
++.TP
++rate rate
++Maximum rate this class and all its children combined can send at. Mandatory.
++
++.TP
++bandwidth rate
++This is different from the bandwidth specified when creating a CBQ disc! Only
++used to determine maxidle and offtime, which are only calculated when
++specifying maxburst or minburst. Mandatory if specifying maxburst or minburst.
++
++.TP
++maxburst
++This number of packets is used to calculate maxidle so that when
++avgidle is at maxidle, this number of average packets can be burst
++before avgidle drops to 0. Set it higher to be more tolerant of
++bursts. You can't set maxidle directly, only via this parameter.
++
++.TP
++minburst
++As mentioned before, CBQ needs to throttle in case of
++overlimit. The ideal solution is to do so for exactly the calculated
++idle time, and pass 1 packet. However, Unix kernels generally have a
++hard time scheduling events shorter than 10ms, so it is better to
++throttle for a longer period, and then pass minburst packets in one
++go, and then sleep minburst times longer.
++
++The time to wait is called the offtime. Higher values of minburst lead
++to more accurate shaping in the long term, but to bigger bursts at
++millisecond timescales. Optional.
++
++.TP
++minidle
++If avgidle is below 0, we are overlimits and need to wait until
++avgidle will be big enough to send one packet. To prevent a sudden
++burst from shutting down the link for a prolonged period of time,
++avgidle is reset to minidle if it gets too low.
++
++Minidle is specified in negative microseconds, so 10 means that
++avgidle is capped at -10us. Optional.
++
++.TP
++bounded
++Signifies that this class will not borrow bandwidth from its siblings.
++.TP
++isolated
++Means that this class will not borrow bandwidth to its siblings
++
++.TP
++split major:minor & defmap bitmap[/bitmap]
++If consulting filters attached to a class did not give a verdict,
++CBQ can also classify based on the packet's priority. There are 16
++priorities available, numbered from 0 to 15.
++
++The defmap specifies which priorities this class wants to receive,
++specified as a bitmap. The Least Significant Bit corresponds to priority
++zero. The
++.B split
++parameter tells CBQ at which class the decision must be made, which should
++be a (grand)parent of the class you are adding.
++
++As an example, 'tc class add ... classid 10:1 cbq .. split 10:0 defmap c0'
++configures class 10:0 to send packets with priorities 6 and 7 to 10:1.
++
++The complimentary configuration would then
++be: 'tc class add ... classid 10:2 cbq ... split 10:0 defmap 3f'
++Which would send all packets 0, 1, 2, 3, 4 and 5 to 10:1.
++.TP
++estimator interval timeconstant
++CBQ can measure how much bandwidth each class is using, which tc filters
++can use to classify packets with. In order to determine the bandwidth
++it uses a very simple estimator that measures once every
++.B interval
++microseconds how much traffic has passed. This again is a EWMA, for which
++the time constant can be specified, also in microseconds. The
++.B time constant
++corresponds to the sluggishness of the measurement or, conversely, to the
++sensitivity of the average to short bursts. Higher values mean less
++sensitivity.
++
++.SH BUGS
++The actual bandwidth of the underlying link may not be known, for example
++in the case of PPoE or PPTP connections which in fact may send over a
++pipe, instead of over a physical device. CBQ is quite resilient to major
++errors in the configured bandwidth, probably a the cost of coarser shaping.
++
++Default kernels rely on coarse timing information for making decisions. These
++may make shaping precise in the long term, but inaccurate on second long scales.
++
++See
++.BR tc-cbq-details(8)
++for hints on how to improve this.
++
++.SH SOURCES
++.TP
++o
++Sally Floyd and Van Jacobson, "Link-sharing and Resource
++Management Models for Packet Networks",
++IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
++
++.TP
++o
++Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
++
++.TP
++o
++Sally Floyd, "Notes on Class-Based Queueing: Setting
++Parameters", 1996
++
++.TP
++o
++Sally Floyd and Michael Speer, "Experimental Results
++for Class-Based Queueing", 1998, not published.
++
++
++
++.SH SEE ALSO
++.BR tc (8)
++
++.SH AUTHOR
++Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>. This manpage maintained by
++bert hubert <ahu@ds9a.nl>
++
++
+diff -Naur iproute2-orig/debian/tc-htb.8 iproute2/debian/tc-htb.8
+--- iproute2-orig/debian/tc-htb.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/tc-htb.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,153 @@
++.TH HTB 8 "10 January 2002" "iproute2" "Linux"
++.SH NAME
++HTB \- Hierarchy Token Bucket
++.SH SYNOPSIS
++.B tc qdisc ... dev
++dev
++.B ( parent
++classid
++.B | root) [ handle
++major:
++.B ] htb [ default
++minor-id
++.B ]
++
++.B tc class ... dev
++dev
++.B parent
++major:[minor]
++.B [ classid
++major:minor
++.B ] htb rate
++rate
++.B [ ceil
++rate
++.B ] burst
++bytes
++.B [ cburst
++bytes
++.B ] [ prio
++priority
++.B ]
++
++.SH DESCRIPTION
++HTB is meant as a more understandable and intuitive replacement for
++the CBQ qdisc in Linux. Both CBQ and HTB help you to control the use
++of the outbound bandwidth on a given link. Both allow you to use one
++physical link to simulate several slower links and to send different
++kinds of traffic on different simulated links. In both cases, you have
++to specify how to divide the physical link into simulated links and
++how to decide which simulated link to use for a given packet to be sent.
++
++Unlike CBQ, HTB shapes traffic based on the Token Bucket Filter algorithm
++which does not depend on interface characteristics and so does not need to
++know the underlying bandwidth of the outgoing interface.
++
++.SH SHAPING ALGORITHM
++Shaping works as documented in
++.B tc-tbf (8).
++
++.SH CLASSIFICATION
++Within the one HRB instance many classes may exist. Each of these classes
++contains another qdisc, by default
++.BR tc-pfifo (8).
++
++When enqueueing a packet, HTB starts at the root and uses various methods to
++determine which class should receive the data.
++
++In the absence of uncommon configuration options, the process is rather easy.
++At each node we look for an instruction, and then go to the class the
++instruction refers us to. If the class found is a barren leaf-node (without
++children), we enqueue the packet there. If it is not yet a leaf node, we do
++the whole thing over again starting from that node.
++
++The following actions are performed, in order at each node we visit, until one
++sends us to another node, or terminates the process.
++.TP
++(i)
++Consult filters attached to the class. If sent to a leafnode, we are done.
++Otherwise, restart.
++.TP
++(ii)
++If none of the above returned with an instruction, enqueue at this node.
++.P
++This algorithm makes sure that a packet always ends up somewhere, even while
++you are busy building your configuration.
++
++.SH LINK SHARING ALGORITHM
++FIXME
++
++.SH QDISC
++The root of a CBQ qdisc class tree has the following parameters:
++
++.TP
++parent major:minor | root
++This mandatory parameter determines the place of the CBQ instance, either at the
++.B root
++of an interface or within an existing class.
++.TP
++handle major:
++Like all other qdiscs, the CBQ can be assigned a handle. Should consist only
++of a major number, followed by a colon. Optional, but very useful if classes
++will be generated within this qdisc.
++.TP
++default minor-id
++Unclassified traffic gets sent to the class with this minor-id.
++
++.SH CLASSES
++Classes have a host of parameters to configure their operation.
++
++.TP
++parent major:minor
++Place of this class within the hierarchy. If attached directly to a qdisc
++and not to another class, minor can be omitted. Mandatory.
++.TP
++classid major:minor
++Like qdiscs, classes can be named. The major number must be equal to the
++major number of the qdisc to which it belongs. Optional, but needed if this
++class is going to have children.
++.TP
++prio priority
++In the round-robin process, classes with the lowest priority field are tried
++for packets first. Mandatory.
++
++.TP
++rate rate
++Maximum rate this class and all its children are guaranteed. Mandatory.
++
++.TP
++ceil rate
++Maximum rate at which a class can send, if its parent has bandwidth to spare.
++Defaults to the configured rate, which implies no borrowing
++
++.TP
++burst bytes
++Amount of bytes that can be burst at
++.B ceil
++speed, in excess of the configured
++.B rate.
++Should be at least as high as the highest burst of all children.
++
++.TP
++cburst bytes
++Amount of bytes that can be burst at 'infinite' speed, in other words, as fast
++as the interface can transmit them. For perfect evening out, should be equal to at most one average
++packet. Should be at least as high as the highest cburst of all children.
++
++.SH NOTES
++Due to Unix timing constraints, the maximum ceil rate is not infinite and may in fact be quite low. On Intel,
++there are 100 timer events per second, the maximum rate is that rate at which 'burst' bytes are sent each timer tick.
++From this, the mininum burst size for a specified rate can be calculated. For i386, a 10mbit rate requires a 12 kilobyte
++burst as 100*12kb*8 equals 10mbit.
++
++.SH BUGS
++Not in the stock kernel yet.
++
++.SH SEE ALSO
++.BR tc (8)
++.P
++HTB website: http://luxik.cdi.cz/~devik/qos/htb/
++.SH AUTHOR
++Martin Devera <devik@cdi.cz>. This manpage maintained by bert hubert <ahu@ds9a.nl>
++
++
+diff -Naur iproute2-orig/debian/tc-pbfifo.8 iproute2/debian/tc-pbfifo.8
+--- iproute2-orig/debian/tc-pbfifo.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/tc-pbfifo.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,72 @@
++.TH PBFIFO 8 "10 January 2002" "iproute2" "Linux"
++.SH NAME
++pfifo \- Packet limited First In, First Out queue
++.P
++bfifo \- Byte limited First In, First Out queue
++
++.SH SYNOPSIS
++.B tc qdisc ... add pfifo
++.B [ limit
++packets
++.B ]
++.P
++.B tc qdisc ... add bfifo
++.B [ limit
++bytes
++.B ]
++
++.SH DESCRIPTION
++The pfifo and bfifo qdiscs are unadorned First In, First Out queues. They are the
++simplest queues possible and therefore have no overhead.
++.B pfifo
++constrains the queue size as measured in packets.
++.B bfifo
++does so as measured in bytes.
++
++Like all non-default qdiscs, they maintain statistics. This might be a reason to prefer
++pfifo or bfifo over the default.
++
++.SH ALGORITHM
++A list of packets is maintained, when a packet is enqueued it gets inserted at the tail of
++a list. When a packet needs to be sent out to the network, it is taken from the head of the list.
++
++If the list is too long, no further packets are allowed on. This is called 'tail drop'.
++
++.SH PARAMETERS
++.TP
++limit
++Maximum queue size. Specified in bytes for bfifo, in packets for pfifo. For pfifo, defaults
++to the interface txqueuelen, as specified with
++.BR ifconfig (8)
++or
++.BR ip (8).
++
++For bfifo, it defaults to the txqueuelen multiplied by the interface MTU.
++
++.SH OUTPUT
++The output of
++.B tc -s qdisc ls
++contains the limit, either in packets or in bytes, and the number of bytes
++and packets actually sent. An unsent and dropped packet only appears between braces
++and is not counted as 'Sent'.
++
++In this example, the queue length is 100 packets, 45894 bytes were sent over 681 packets.
++No packets were dropped, and as the pfifo queue does not slow down packets, there were also no
++overlimits:
++.P
++.nf
++# tc -s qdisc ls dev eth0
++qdisc pfifo 8001: dev eth0 limit 100p
++ Sent 45894 bytes 681 pkts (dropped 0, overlimits 0)
++.fi
++
++If a backlog occurs, this is displayed as well.
++.SH SEE ALSO
++.BR tc (8)
++
++.SH AUTHORS
++Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>
++
++This manpage maintained by bert hubert <ahu@ds9a.nl>
++
++
+diff -Naur iproute2-orig/debian/tc-pfifo_fast.8 iproute2/debian/tc-pfifo_fast.8
+--- iproute2-orig/debian/tc-pfifo_fast.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/tc-pfifo_fast.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,59 @@
++.TH PFIFO_FAST 8 "10 January 2002" "iproute2" "Linux"
++.SH NAME
++pfifo_fast \- three-band first in, first out queue
++
++.SH DESCRIPTION
++pfifo_fast is the default qdisc of each interface.
++
++Whenever an interface is created, the pfifo_fast qdisc is automatically used
++as a queue. If another qdisc is attached, it preempts the default
++pfifo_fast, which automatically returns to function when an existing qdisc
++is detached.
++
++In this sense this qdisc is magic, and unlike other qdiscs.
++
++.SH ALGORITHM
++The algorithm is very similar to that of the classful
++.BR tc-prio (8)
++qdisc.
++.B pfifo_fast
++is like three
++.BR tc-pfifo (8)
++queues side by side, where packets can be enqueued in any of the three bands
++based on their Type of Service bits or assigned priority.
++
++Not all three bands are dequeued simultaneously - as long as lower bands
++have traffic, higher bands are never dequeued. This can be used to
++prioritize interactive traffic or penalize 'lowest cost' traffic.
++
++Each band can be txqueuelen packets long, as configured with
++.BR ifconfig (8)
++or
++.BR ip (8).
++Additional packets coming in are not enqueued but are instead dropped.
++
++See
++.BR tc-prio (8)
++for complete details on how TOS bits are translated into bands.
++.SH PARAMETERS
++.TP
++txqueuelen
++The length of the three bands depends on the interface txqueuelen, as
++specified with
++.BR ifconfig (8)
++or
++.BR ip (8).
++
++.SH BUGS
++Does not maintain statistics and does not show up in tc qdisc ls. This is because
++it is the automatic default in the absence of a configured qdisc.
++
++.SH SEE ALSO
++.BR tc (8)
++
++.SH AUTHORS
++Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>
++
++This manpage maintained by bert hubert <ahu@ds9a.nl>
++
++
+diff -Naur iproute2-orig/debian/tc-prio.8 iproute2/debian/tc-prio.8
+--- iproute2-orig/debian/tc-prio.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/tc-prio.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,187 @@
++.TH PRIO 8 "16 December 2001" "iproute2" "Linux"
++.SH NAME
++PRIO \- Priority qdisc
++.SH SYNOPSIS
++.B tc qdisc ... dev
++dev
++.B ( parent
++classid
++.B | root) [ handle
++major:
++.B ] prio [ bands
++bands
++.B ] [ priomap
++band,band,band...
++.B ] [ estimator
++interval timeconstant
++.B ]
++
++.SH DESCRIPTION
++The PRIO qdisc is a simple classful queueing discipline that contains
++an arbitrary number of classes of differing priority. The classes are
++dequeued in numerical descending order of priority. PRIO is a scheduler
++and never delays packets - it is a work-conserving qdisc, though the qdiscs
++contained in the classes may not be.
++
++Very useful for lowering latency when there is no need for slowing down
++traffic.
++
++.SH ALGORITHM
++On creation with 'tc qdisc add', a fixed number of bands is created. Each
++band is a class, although is not possible to add classes with 'tc qdisc
++add', the number of bands to be created must instead be specified on the
++commandline attaching PRIO to its root.
++
++When dequeueing, band 0 is tried first and only if it did not deliver a
++packet does PRIO try band 1, and so onwards. Maximum reliability packets
++should therefore go to band 0, minimum delay to band 1 and the rest to band
++2.
++
++As the PRIO qdisc itself will have minor number 0, band 0 is actually
++major:1, band 1 is major:2, etc. For major, substitute the major number
++assigned to the qdisc on 'tc qdisc add' with the
++.B handle
++parameter.
++
++.SH CLASSIFICATION
++Three methods are available to PRIO to determine in which band a packet will
++be enqueued.
++.TP
++From userspace
++A process with sufficient privileges can encode the destination class
++directly with SO_PRIORITY, see
++.BR tc(7).
++.TP
++with a tc filter
++A tc filter attached to the root qdisc can point traffic directly to a class
++.TP
++with the priomap
++Based on the packet priority, which in turn is derived from the Type of
++Service assigned to the packet.
++.P
++Only the priomap is specific to this qdisc.
++.SH QDISC PARAMETERS
++.TP
++bands
++Number of bands. If changed from the default of 3,
++.B priomap
++must be updated as well.
++.TP
++priomap
++The priomap maps the priority of
++a packet to a class. The priority can either be set directly from userspace,
++or be derived from the Type of Service of the packet.
++
++Determines how packet priorities, as assigned by the kernel, map to
++bands. Mapping occurs based on the TOS octet of the packet, which looks like
++this:
++
++.nf
++0 1 2 3 4 5 6 7
+++---+---+---+---+---+---+---+---+
++| | | |
++|PRECEDENCE | TOS |MBZ|
++| | | |
+++---+---+---+---+---+---+---+---+
++.fi
++
++The four TOS bits (the 'TOS field') are defined as:
++
++.nf
++Binary Decimcal Meaning
++-----------------------------------------
++1000 8 Minimize delay (md)
++0100 4 Maximize throughput (mt)
++0010 2 Maximize reliability (mr)
++0001 1 Minimize monetary cost (mmc)
++0000 0 Normal Service
++.fi
++
++As there is 1 bit to the right of these four bits, the actual value of the
++TOS field is double the value of the TOS bits. Tcpdump -v -v shows you the
++value of the entire TOS field, not just the four bits. It is the value you
++see in the first column of this table:
++
++.nf
++TOS Bits Means Linux Priority Band
++------------------------------------------------------------
++0x0 0 Normal Service 0 Best Effort 1
++0x2 1 Minimize Monetary Cost 1 Filler 2
++0x4 2 Maximize Reliability 0 Best Effort 1
++0x6 3 mmc+mr 0 Best Effort 1
++0x8 4 Maximize Throughput 2 Bulk 2
++0xa 5 mmc+mt 2 Bulk 2
++0xc 6 mr+mt 2 Bulk 2
++0xe 7 mmc+mr+mt 2 Bulk 2
++0x10 8 Minimize Delay 6 Interactive 0
++0x12 9 mmc+md 6 Interactive 0
++0x14 10 mr+md 6 Interactive 0
++0x16 11 mmc+mr+md 6 Interactive 0
++0x18 12 mt+md 4 Int. Bulk 1
++0x1a 13 mmc+mt+md 4 Int. Bulk 1
++0x1c 14 mr+mt+md 4 Int. Bulk 1
++0x1e 15 mmc+mr+mt+md 4 Int. Bulk 1
++.fi
++
++The second column contains the value of the relevant
++four TOS bits, followed by their translated meaning. For example, 15 stands
++for a packet wanting Minimal Montetary Cost, Maximum Reliability, Maximum
++Throughput AND Minimum Delay.
++
++The fourth column lists the way the Linux kernel interprets the TOS bits, by
++showing to which Priority they are mapped.
++
++The last column shows the result of the default priomap. On the commandline,
++the default priomap looks like this:
++
++ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
++
++This means that priority 4, for example, gets mapped to band number 1.
++The priomap also allows you to list higher priorities (> 7) which do not
++correspond to TOS mappings, but which are set by other means.
++
++This table from RFC 1349 (read it for more details) explains how
++applications might very well set their TOS bits:
++
++.nf
++TELNET 1000 (minimize delay)
++FTP
++ Control 1000 (minimize delay)
++ Data 0100 (maximize throughput)
++
++TFTP 1000 (minimize delay)
++
++SMTP
++ Command phase 1000 (minimize delay)
++ DATA phase 0100 (maximize throughput)
++
++Domain Name Service
++ UDP Query 1000 (minimize delay)
++ TCP Query 0000
++ Zone Transfer 0100 (maximize throughput)
++
++NNTP 0001 (minimize monetary cost)
++
++ICMP
++ Errors 0000
++ Requests 0000 (mostly)
++ Responses <same as request> (mostly)
++.fi
++
++
++.SH CLASSES
++PRIO classes cannot be configured further - they are automatically created
++when the PRIO qdisc is attached. Each class however can contain yet a
++further qdisc.
++
++.SH BUGS
++Large amounts of traffic in the lower bands can cause starvation of higher
++bands. Can be prevented by attaching a shaper (for example,
++.BR tc-tbf(8)
++to these bands to make sure they cannot dominate the link.
++
++.SH AUTHORS
++Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>, J Hadi Salim
++<hadi@cyberus.ca>. This manpage maintained by bert hubert <ahu@ds9a.nl>
++
++
+diff -Naur iproute2-orig/debian/tc-red.8 iproute2/debian/tc-red.8
+--- iproute2-orig/debian/tc-red.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/tc-red.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,131 @@
++.TH RED 8 "13 December 2001" "iproute2" "Linux"
++.SH NAME
++red \- Random Early Detection
++.SH SYNOPSIS
++.B tc qdisc ... red
++.B limit
++bytes
++.B min
++bytes
++.B max
++bytes
++.B avpkt
++bytes
++.B burst
++packets
++.B [ ecn ] [ bandwidth
++rate
++.B ] probability
++chance
++
++.SH DESCRIPTION
++Random Early Detection is a classless qdisc which manages its queue size
++smartly. Regular queues simply drop packets from the tail when they are
++full, which may not be the optimal behaviour. RED also performs tail drop,
++but does so in a more gradual way.
++
++Once the queue hits a certain average length, packets enqueued have a
++configurable chance of being marked (which may mean dropped). This chance
++increases linearly up to a point called the
++.B max
++average queue length, although the queue might get bigger.
++
++This has a host of benefits over simple taildrop, while not being processor
++intensive. It prevents synchronous retransmits after a burst in traffic,
++which cause further retransmits, etc.
++
++The goal is the have a small queue size, which is good for interactivity
++while not disturbing TCP/IP traffic with too many sudden drops after a burst
++of traffic.
++
++Depending on 08 ECN is configured, marking either means dropping or
++purely marking a packet as overlimit.
++.SH ALGORITHM
++The average queue size is used for determining the marking
++probability. This is calculated using an Exponential Weighted Moving
++Average, which can be more or less sensitive to bursts.
++
++When the average queue size is below
++.B min
++bytes, no packet will ever be marked. When it exceeds
++.B min,
++the probability of doing so climbs linearly up
++to
++.B probability,
++until the average queue size hits
++.B max
++bytes. Because
++.B probability
++is normally not set to 100%, the queue size might
++conceivably rise above
++.B max
++bytes, so the
++.B limit
++parameter is provided to set a hard maximum for the size of the queue.
++
++.SH PARAMETERS
++.TP
++min
++Average queue size at which marking becomes a possibility.
++.TP
++max
++At this average queue size, the marking probability is maximal. Should be at
++least twice
++.B min
++to prevent synchronous retransmits, higher for low
++.B min.
++.TP
++probability
++Maximum probability for marking, specified as a floating point
++number from 0.0 to 1.0. Suggested values are 0.01 or 0.02 (1 or 2%,
++respectively).
++.TP
++limit
++Hard limit on the real (not average) queue size in bytes. Further packets
++are dropped. Should be set higher than max+burst. It is advised to set this
++a few times higher than
++.B max.
++.TP
++burst
++Used for determining how fast the average queue size is influenced by the
++real queue size. Larger values make the calculation more sluggish, allowing
++longer bursts of traffic before marking starts. Real life experiments
++support the following guideline: (min+min+max)/(3*avpkt).
++.TP
++avpkt
++Specified in bytes. Used with burst to determine the time constant for
++average queue size calculations. 1000 is a good value.
++.TP
++bandwidth
++This rate is used for calculating the average queue size after some
++idle time. Should be set to the bandwidth of your interface. Does not mean
++that RED will shape for you! Optional.
++.TP
++ecn
++As mentioned before, RED can either 'mark' or 'drop'. Explicit Congestion
++Notification allows RED to notify remote hosts that their rate exceeds the
++amount of bandwidth available. Non-ECN capable hosts can only be notified by
++dropping a packet. If this parameter is specified, packets which indicate
++that their hosts honor ECN will only be marked and not dropped, unless the
++queue size hits
++.B limit
++bytes. Needs a tc binary with RED support compiled in. Recommended.
++
++.SH SEE ALSO
++.BR tc (8)
++
++.SH SOURCES
++.TP
++o
++Floyd, S., and Jacobson, V., Random Early Detection gateways for
++Congestion Avoidance. http://www.aciri.org/floyd/papers/red/red.html
++.TP
++o
++Some changes to the algorithm by Alexey N. Kuznetsov.
++
++.SH AUTHORS
++Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>, Alexey Makarenko
++<makar@phoenix.kharkov.ua>, J Hadi Salim <hadi@nortelnetworks.com>.
++This manpage maintained by bert hubert <ahu@ds9a.nl>
++
++
+diff -Naur iproute2-orig/debian/tc-sfq.8 iproute2/debian/tc-sfq.8
+--- iproute2-orig/debian/tc-sfq.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/tc-sfq.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,107 @@
++.TH TC 8 "8 December 2001" "iproute2" "Linux"
++.SH NAME
++sfq \- Stochastic Fairness Queueing
++.SH SYNOPSIS
++.B tc qdisc ... perturb
++seconds
++.B quantum
++bytes
++
++.SH DESCRIPTION
++
++Stochastic Fairness Queueing is a classless queueing discipline available for
++traffic control with the
++.BR tc (8)
++command.
++
++SFQ does not shape traffic but only schedules the transmission of packets, based on 'flows'.
++The goal is to ensure fairness so that each flow is able to send data in turn, thus preventing
++any single flow from drowning out the rest.
++
++This may in fact have some effect in mitigating a Denial of Service attempt.
++
++SFQ is work-conserving and therefore always delivers a packet if it has one available.
++.SH ALGORITHM
++On enqueueing, each packet is assigned to a hash bucket, based on
++.TP
++(i)
++Source address
++.TP
++(ii)
++Destination address
++.TP
++(iii)
++Source port
++.P
++If these are available. SFQ knows about ipv4 and ipv6 and also UDP, TCP and ESP.
++Packets with other protocols are hashed based on the 32bits representation of their
++destination and the socket they belong to. A flow corresponds mostly to a TCP/IP
++connection.
++
++Each of these buckets should represent a unique flow. Because multiple flows may
++get hashed to the same bucket, the hashing algorithm is perturbed at configurable
++intervals so that the unfairness lasts only for a short while. Perturbation may
++however cause some inadvertent packet reordering to occur.
++
++When dequeuing, each hashbucket with data is queried in a round robin fashion.
++
++The compile time maximum length of the SFQ is 128 packets, which can be spread over
++at most 128 buckets of 1024 available. In case of overflow, tail-drop is performed
++on the fullest bucket, thus maintaining fairness.
++
++.SH PARAMETERS
++.TP
++perturb
++Interval in seconds for queue algorithm perturbation. Defaults to 0, which means that
++no perturbation occurs. Do not set too low for each perturbation may cause some packet
++reordering. Advised value: 10
++.TP
++quantum
++Amount of bytes a flow is allowed to dequeue during a round of the round robin process.
++Defaults to the MTU of the interface which is also the advised value and the minimum value.
++
++.SH EXAMPLE & USAGE
++
++To attach to device ppp0:
++.P
++# tc qdisc add dev ppp0 root sfq perturb 10
++.P
++Please note that SFQ, like all non-shaping (work-conserving) qdiscs, is only useful
++if it owns the queue.
++This is the case when the link speed equals the actually available bandwidth. This holds
++for regular phone modems, ISDN connections and direct non-switched ethernet links.
++.P
++Most often, cable modems and DSL devices do not fall into this category. The same holds
++for when connected to a switch and trying to send data to a congested segment also
++connected to the switch.
++.P
++In this case, the effective queue does not reside within Linux and is therefore not
++available for scheduling.
++.P
++Embed SFQ in a classful qdisc to make sure it owns the queue.
++
++.SH SOURCE
++.TP
++o
++Paul E. McKenney "Stochastic Fairness Queuing",
++IEEE INFOCOMM'90 Proceedings, San Francisco, 1990.
++
++.TP
++o
++Paul E. McKenney "Stochastic Fairness Queuing",
++"Interworking: Research and Experience", v.2, 1991, p.113-131.
++
++.TP
++o
++See also:
++M. Shreedhar and George Varghese "Efficient Fair
++Queuing using Deficit Round Robin", Proc. SIGCOMM 95.
++
++.SH SEE ALSO
++.BR tc (8)
++
++.SH AUTHOR
++Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>. This manpage maintained by
++bert hubert <ahu@ds9a.nl>
++
++
+diff -Naur iproute2-orig/debian/tc-tbf.8 iproute2/debian/tc-tbf.8
+--- iproute2-orig/debian/tc-tbf.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/tc-tbf.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,138 @@
++.TH TC 8 "13 December 2001" "iproute2" "Linux"
++.SH NAME
++tbf \- Token Bucket Filter
++.SH SYNOPSIS
++.B tc qdisc ... tbf rate
++rate
++.B burst
++bytes/cell
++.B ( latency
++ms
++.B | limit
++bytes
++.B ) [ mpu
++bytes
++.B [ peakrate
++rate
++.B mtu
++bytes/cell
++.B ] ]
++.P
++burst is also known as buffer and maxburst. mtu is also known as minburst.
++.SH DESCRIPTION
++
++The Token Bucket Filter is a classless queueing discipline available for
++traffic control with the
++.BR tc (8)
++command.
++
++TBF is a pure shaper and never schedules traffic. It is non-work-conserving and may throttle
++itself, although packets are available, to ensure that the configured rate is not exceeded.
++On all platforms except for Alpha,
++it is able to shape up to 1mbit/s of normal traffic with ideal minimal burstiness,
++sending out data exactly at the configured rates.
++
++Much higher rates are possible but at the cost of losing the minimal burstiness. In that
++case, data is on average dequeued at the configured rate but may be sent much faster at millisecond
++timescales. Because of further queues living in network adaptors, this is often not a problem.
++
++Kernels with a higher 'HZ' can achieve higher rates with perfect burstiness. On Alpha, HZ is ten
++times higher, leading to a 10mbit/s limit to perfection. These calculations hold for packets of on
++average 1000 bytes.
++
++.SH ALGORITHM
++As the name implies, traffic is filtered based on the expenditure of
++.B tokens.
++Tokens roughly correspond to bytes, with the additional constraint that each packet consumes
++some tokens, no matter how small it is. This reflects the fact that even a zero-sized packet occupies
++the link for some time.
++
++On creation, the TBF is stocked with tokens which correspond to the amount of traffic that can be burst
++in one go. Tokens arrive at a steady rate, until the bucket is full.
++
++If no tokens are available, packets are queued, up to a configured limit. The TBF now
++calculates the token deficit, and throttles until the first packet in the queue can be sent.
++
++If it is not acceptable to burst out packets at maximum speed, a peakrate can be configured
++to limit the speed at which the bucket empties. This peakrate is implemented as a second TBF
++with a very small bucket, so that it doesn't burst.
++
++To achieve perfection, the second bucket may contain only a single packet, which leads to
++the earlier mentioned 1mbit/s limit.
++
++This limit is caused by the fact that the kernel can only throttle for at minimum 1 'jiffy', which depends
++on HZ as 1/HZ. For perfect shaping, only a single packet can get sent per jiffy - for HZ=100, this means 100
++packets of on average 1000 bytes each, which roughly corresponds to 1mbit/s.
++
++.SH PARAMETERS
++See
++.BR tc (8)
++for how to specify the units of these values.
++.TP
++limit or latency
++Limit is the number of bytes that can be queued waiting for tokens to become
++available. You can also specify this the other way around by setting the
++latency parameter, which specifies the maximum amount of time a packet can
++sit in the TBF. The latter calculation takes into account the size of the
++bucket, the rate and possibly the peakrate (if set). These two parameters
++are mutually exclusive.
++.TP
++burst
++Also known as buffer or maxburst.
++Size of the bucket, in bytes. This is the maximum amount of bytes that tokens can be available for instantaneously.
++In general, larger shaping rates require a larger buffer. For 10mbit/s on Intel, you need at least 10kbyte buffer
++if you want to reach your configured rate!
++
++If your buffer is too small, packets may be dropped because more tokens arrive per timer tick than fit in your bucket.
++The minimum buffer size can be calculated by dividing the rate by HZ.
++
++Token usage calculations are performed using a table which by default has a resolution of 8 packets.
++This resolution can be changed by specifying the
++.B cell
++size with the burst. For example, to specify a 6000 byte buffer with a 16
++byte cell size, set a burst of 6000/16. You will probably never have to set
++this. Must be an integral power of 2.
++.TP
++mpu
++A zero-sized packet does not use zero bandwidth. For ethernet, no packet uses less than 64 bytes. The Minimum Packet Unit
++determines the minimal token usage (specified in bytes) for a packet. Defaults to zero.
++.TP
++rate
++The speed knob. See remarks above about limits! See
++.BR tc (8)
++for units.
++.PP
++Furthermore, if a peakrate is desired, the following parameters are available:
++
++.TP
++peakrate
++Maximum depletion rate of the bucket. Limited to 1mbit/s on Intel, 10mbit/s on Alpha. The peakrate does
++not need to be set, it is only necessary if perfect millisecond timescale shaping is required.
++
++.TP
++mtu/minburst
++Specifies the size of the peakrate bucket. For perfect accuracy, should be set to the MTU of the interface.
++If a peakrate is needed, but some burstiness is acceptable, this size can be raised. A 3000 byte minburst
++allows around 3mbit/s of peakrate, given 1000 byte packets.
++
++Like the regular burstsize you can also specify a
++.B cell
++size.
++.SH EXAMPLE & USAGE
++
++To attach a TBF with a sustained maximum rate of 0.5mbit/s, a peakrate of 1.0mbit/s,
++a 5kilobyte buffer, with a pre-bucket queue size limit calculated so the TBF causes
++at most 70ms of latency, with perfect peakrate behaviour, issue:
++.P
++# tc qdisc add dev eth0 root tbf rate 0.5mbit \\
++ burst 5kb latency 70ms peakrate 1mbit \\
++ minburst 1540
++
++.SH SEE ALSO
++.BR tc (8)
++
++.SH AUTHOR
++Alexey N. Kuznetsov, <kuznet@ms2.inr.ac.ru>. This manpage maintained by
++bert hubert <ahu@ds9a.nl>
++
++
+diff -Naur iproute2-orig/debian/tc.8 iproute2/debian/tc.8
+--- iproute2-orig/debian/tc.8 1969-12-31 16:00:00.000000000 -0800
++++ iproute2/debian/tc.8 2004-05-21 00:09:38.000000000 -0700
+@@ -0,0 +1,348 @@
++.TH TC 8 "16 December 2001" "iproute2" "Linux"
++.SH NAME
++tc \- show / manipulate traffic control settings
++.SH SYNOPSIS
++.B tc qdisc [ add | change | replace | link ] dev
++DEV
++.B
++[ parent
++qdisc-id
++.B | root ]
++.B [ handle
++qdisc-id ] qdisc
++[ qdisc specific parameters ]
++.P
++
++.B tc class [ add | change | replace ] dev
++DEV
++.B parent
++qdisc-id
++.B [ classid
++class-id ] qdisc
++[ qdisc specific parameters ]
++.P
++
++.B tc filter [ add | change | replace ] dev
++DEV
++.B [ parent
++qdisc-id
++.B | root ] protocol
++protocol
++.B prio
++priority filtertype
++[ filtertype specific parameters ]
++.B flowid
++flow-id
++
++.B tc [-s | -d ] qdisc show [ dev
++DEV
++.B ]
++.P
++.B tc [-s | -d ] class show dev
++DEV
++.P
++.B tc filter show dev
++DEV
++
++.SH DESCRIPTION
++.B Tc
++is used to configure Traffic Control in the Linux kernel. Traffic Control consists
++of the following:
++
++.TP
++SHAPING
++When traffic is shaped, its rate of transmission is under control. Shaping may
++be more than lowering the available bandwidth - it is also used to smooth out
++bursts in traffic for better network behaviour. Shaping occurs on egress.
++
++.TP
++SCHEDULING
++By scheduling the transmission of packets it is possible to improve interactivity
++for traffic that needs it while still guaranteeing bandwidth to bulk transfers. Reordering
++is also called prioritizing, and happens only on egress.
++
++.TP
++POLICING
++Where shaping deals with transmission of traffic, policing pertains to traffic
++arriving. Policing thus occurs on ingress.
++
++.TP
++DROPPING
++Traffic exceeding a set bandwidth may also be dropped forthwith, both on
++ingress and on egress.
++
++.P
++Processing of traffic is controlled by three kinds of objects: qdiscs,
++classes and filters.
++
++.SH QDISCS
++.B qdisc
++is short for 'queueing discipline' and it is elementary to
++understanding traffic control. Whenever the kernel needs to send a
++packet to an interface, it is
++.B enqueued
++to the qdisc configured for that interface. Immediately afterwards, the kernel
++tries to get as many packets as possible from the qdisc, for giving them
++to the network adaptor driver.
++
++A simple QDISC is the 'pfifo' one, which does no processing at all and is a pure
++First In, First Out queue. It does however store traffic when the network interface
++can't handle it momentarily.
++
++.SH CLASSES
++Some qdiscs can contain classes, which contain further qdiscs - traffic may
++then be enqueued in any of the inner qdiscs, which are within the
++.B classes.
++When the kernel tries to dequeue a packet from such a
++.B classful qdisc
++it can come from any of the classes. A qdisc may for example prioritize
++certain kinds of traffic by trying to dequeue from certain classes
++before others.
++
++.SH FILTERS
++A
++.B filter
++is used by a classful qdisc to determine in which class a packet will
++be enqueued. Whenever traffic arrives at a class with subclasses, it needs
++to be classified. Various methods may be employed to do so, one of these
++are the filters. All filters attached to the class are called, until one of
++them returns with a verdict. If no verdict was made, other criteria may be
++available. This differs per qdisc.
++
++It is important to notice that filters reside
++.B within
++qdiscs - they are not masters of what happens.
++
++.SH CLASSLESS QDISCS
++The classless qdiscs are:
++.TP
++[p|b]fifo
++Simplest usable qdisc, pure First In, First Out behaviour. Limited in
++packets or in bytes.
++.TP
++pfifo_fast
++Standard qdisc for 'Advanced Router' enabled kernels. Consists of a three-band
++queue which honors Type of Service flags, as well as the priority that may be
++assigned to a packet.
++.TP
++red
++Random Early Detection simulates physical congestion by randomly dropping
++packets when nearing configured bandwidth allocation. Well suited to very
++large bandwidth applications.
++.TP
++sfq
++Stochastic Fairness Queueing reorders queued traffic so each 'session'
++gets to send a packet in turn.
++.TP
++tbf
++The Token Bucket Filter is suited for slowing traffic down to a precisely
++configured rate. Scales well to large bandwidths.
++.SH CONFIGURING CLASSLESS QDISCS
++In the absence of classful qdiscs, classless qdiscs can only be attached at
++the root of a device. Full syntax:
++.P
++.B tc qdisc add dev
++DEV
++.B root
++QDISC QDISC-PARAMETERS
++
++To remove, issue
++.P
++.B tc qdisc del dev
++DEV
++.B root
++
++The
++.B pfifo_fast
++qdisc is the automatic default in the absence of a configured qdisc.
++
++.SH CLASSFUL QDISCS
++The classful qdiscs are:
++.TP
++CBQ
++Class Based Queueing implements a rich linksharing hierarchy of classes.
++It contains shaping elements as well as prioritizing capabilities. Shaping is
++performed using link idle time calculations based on average packet size and
++underlying link bandwidth. The latter may be ill-defined for some interfaces.
++.TP
++HTB
++The Hierarchy Token Bucket implements a rich linksharing hierarchy of
++classes with an emphasis on conforming to existing practices. HTB facilitates
++guaranteeing bandwidth to classes, while also allowing specification of upper
++limits to inter-class sharing. It contains shaping elements, based on TBF and
++can prioritize classes.
++.TP
++PRIO
++The PRIO qdisc is a non-shaping container for a configurable number of
++classes which are dequeued in order. This allows for easy prioritization
++of traffic, where lower classes are only able to send if higher ones have
++no packets available. To facilitate configuration, Type Of Service bits are
++honored by default.
++.SH THEORY OF OPERATION
++Classes form a tree, where each class has a single parent.
++A class may have multiple children. Some qdiscs allow for runtime addition
++of classes (CBQ, HTB) while others (PRIO) are created with a static number of
++children.
++
++Qdiscs which allow dynamic addition of classes can have zero or more
++subclasses to which traffic may be enqueued.
++
++Furthermore, each class contains a
++.B leaf qdisc
++which by default has
++.B pfifo
++behaviour though another qdisc can be attached in place. This qdisc may again
++contain classes, but each class can have only one leaf qdisc.
++
++When a packet enters a classful qdisc it can be
++.B classified
++to one of the classes within. Three criteria are available, although not all
++qdiscs will use all three:
++.TP
++tc filters
++If tc filters are attached to a class, they are consulted first
++for relevant instructions. Filters can match on all fields of a packet header,
++as well as on the firewall mark applied by ipchains or iptables. See
++.BR tc-filters (8).
++.TP
++Type of Service
++Some qdiscs have built in rules for classifying packets based on the TOS field.
++.TP
++skb->priority
++Userspace programs can encode a class-id in the 'skb->priority' field using
++the SO_PRIORITY option.
++.P
++Each node within the tree can have its own filters but higher level filters
++may also point directly to lower classes.
++
++If classification did not succeed, packets are enqueued to the leaf qdisc
++attached to that class. Check qdisc specific manpages for details, however.
++
++.SH NAMING
++All qdiscs, classes and filters have IDs, which can either be specified
++or be automatically assigned.
++
++IDs consist of a major number and a minor number, separated by a colon.
++
++.TP
++QDISCS
++A qdisc, which potentially can have children,
++gets assigned a major number, called a 'handle', leaving the minor
++number namespace available for classes. The handle is expressed as '10:'.
++It is customary to explicitly assign a handle to qdiscs expected to have
++children.
++
++.TP
++CLASSES
++Classes residing under a qdisc share their qdisc major number, but each have
++a separate minor number called a 'classid' that has no relation to their
++parent classes, only to their parent qdisc. The same naming custom as for
++qdiscs applies.
++
++.TP
++FILTERS
++Filters have a three part ID, which is only needed when using a hashed
++filter hierarchy, for which see
++.BR tc-filters (8).
++.SH UNITS
++All parameters accept a floating point number, possibly followed by a unit.
++.P
++Bandwidths or rates can be specified in:
++.TP
++kbps
++Kilobytes per second
++.TP
++mbps
++Megabytes per second
++.TP
++kbit
++Kilobits per second
++.TP
++mbit
++Megabits per second
++.TP
++bps or a bare number
++Bits per second
++.P
++Amounts of data can be specified in:
++.TP
++kb or k
++Kilobytes
++.TP
++mb or m
++Megabytes
++.TP
++mbit
++Megabits
++.TP
++kbit
++Kilobits
++.TP
++b or a bare number
++Bytes.
++.P
++Lengths of time can be specified in:
++.TP
++s, sec or secs
++Whole seconds
++.TP
++ms, msec or msecs
++Milliseconds
++.TP
++us, usec, usecs or a bare number
++Microseconds.
++
++.SH TC COMMANDS
++The following commands are available for qdiscs, classes and filter:
++.TP
++add
++Add a qdisc, class or filter to a node. For all entities, a
++.B parent
++must be passed, either by passing its ID or by attaching directly to the root of a device.
++When creating a qdisc or a filter, it can be named with the
++.B handle
++parameter. A class is named with the
++.B classid
++parameter.
++
++.TP
++remove
++A qdisc can be removed by specifying its handle, which may also be 'root'. All subclasses and their leaf qdiscs
++are automatically deleted, as well as any filters attached to them.
++
++.TP
++change
++Some entities can be modified 'in place'. Shares the syntax of 'add', with the exception
++that the handle cannot be changed and neither can the parent. In other words,
++.B
++change
++cannot move a node.
++
++.TP
++replace
++Performs a nearly atomic remove/add on an existing node id. If the node does not exist yet
++it is created.
++
++.TP
++link
++Only available for qdiscs and performs a replace where the node
++must exist already.
++
++
++.SH HISTORY
++.B tc
++was written by Alexey N. Kuznetsov and added in Linux 2.2.
++.SH SEE ALSO
++.BR tc-cbq (8),
++.BR tc-htb (8),
++.BR tc-sfq (8),
++.BR tc-red (8),
++.BR tc-tbf (8),
++.BR tc-pfifo (8),
++.BR tc-bfifo (8),
++.BR tc-pfifo_fast (8),
++.BR tc-filters (8)
++
++.SH AUTHOR
++Manpage maintained by bert hubert (ahu@ds9a.nl)
++
+diff -Naur iproute2-orig/include/rt_names.h iproute2/include/rt_names.h
+--- iproute2-orig/include/rt_names.h 2000-04-16 10:42:50.000000000 -0700
++++ iproute2/include/rt_names.h 2004-05-21 00:16:36.000000000 -0700
+@@ -1,6 +1,8 @@
+ #ifndef RT_NAMES_H_
+ #define RT_NAMES_H_ 1
+
++#include <asm/byteorder.h>
++
+ const char* rtnl_rtprot_n2a(int id, char *buf, int len);
+ const char* rtnl_rtscope_n2a(int id, char *buf, int len);
+ const char* rtnl_rttable_n2a(int id, char *buf, int len);
+diff -Naur iproute2-orig/lib/rt_names.c iproute2/lib/rt_names.c
+--- iproute2-orig/lib/rt_names.c 2000-04-16 10:42:52.000000000 -0700
++++ iproute2/lib/rt_names.c 2004-05-21 00:16:36.000000000 -0700
+@@ -16,6 +16,7 @@
+ #include <fcntl.h>
+ #include <string.h>
+ #include <sys/time.h>
++#include <asm/byteorder.h>
+
+ static void rtnl_tab_initialize(char *file, char **tab, int size)
+ {
+diff -Naur iproute2-orig/misc/arpd.c iproute2/misc/arpd.c
+--- iproute2-orig/misc/arpd.c 2002-01-09 20:02:26.000000000 -0800
++++ iproute2/misc/arpd.c 2004-05-21 00:16:36.000000000 -0700
+@@ -16,7 +16,7 @@
+ #include <unistd.h>
+ #include <stdlib.h>
+ #include <netdb.h>
+-#include <db.h>
++#include <db_185.h>
+ #include <sys/ioctl.h>
+ #include <sys/poll.h>
+ #include <errno.h>
+@@ -28,6 +28,7 @@
+ #include <signal.h>
+ #include <linux/if.h>
+ #include <linux/if_arp.h>
++#include <linux/if_ether.h>
+ #include <netinet/in.h>
+ #include <arpa/inet.h>
+ #include <linux/if_packet.h>