From a013e645fef4ebcd2bc91d81f54df8719e304c76 Mon Sep 17 00:00:00 2001 From: Cecil Hugh Watson Date: Thu, 29 Jan 2009 00:13:05 -0800 Subject: Sync w/ upstream. --- abs/core-testing/iproute2/PKGBUILD | 37 + .../iproute2/iproute2-2.4.7-now-ss020116.patch | 9823 ++++++++++++++++++++ abs/core-testing/iproute2/libdir.patch | 129 + 3 files changed, 9989 insertions(+) create mode 100644 abs/core-testing/iproute2/PKGBUILD create mode 100644 abs/core-testing/iproute2/iproute2-2.4.7-now-ss020116.patch create mode 100644 abs/core-testing/iproute2/libdir.patch diff --git a/abs/core-testing/iproute2/PKGBUILD b/abs/core-testing/iproute2/PKGBUILD new file mode 100644 index 0000000..ed280e5 --- /dev/null +++ b/abs/core-testing/iproute2/PKGBUILD @@ -0,0 +1,37 @@ +# $Id: PKGBUILD 24415 2009-01-16 12:43:06Z ronald $ +# Maintainer: Ronald van Haren +# Contributor: Judd Vinet + +pkgname=iproute2 +pkgver=2.6.28 +pkgrel=1 +pkgdesc="IP Routing Utilities" +arch=('i686' 'x86_64') +license=('GPL2') +url="http://www.linux-foundation.org/en/Net:Iproute2" +depends=('linux-atm' 'perl') + +provides=('iproute') +conflicts=('iproute') +replaces=('iproute') + +backup=('etc/iproute2/ematch_map' 'etc/iproute2/rt_dsfield' 'etc/iproute2/rt_protos' \ + 'etc/iproute2/rt_realms' 'etc/iproute2/rt_scopes' 'etc/iproute2/rt_tables') + +source=(http://devresources.linux-foundation.org/dev/iproute2/download/iproute2-${pkgver}.tar.bz2) +md5sums=('595f9b17320f69e8d30d2fa80f1bca14') + +build() { + cd $srcdir/iproute2-${pkgver} + + sed -i 's|/usr/local/lib/iptables|/usr/lib/iptables|' include/iptables.h || return 1 + sed -i 's|=/share|=/usr/share|' Makefile || return 1 + sed -i 's|=/sbin|=/usr/sbin|' Makefile || return 1 + + ./configure || return 1 + make || return 1 + make DESTDIR=$pkgdir install || return 1 + +# chmod 755 $pkgdir/usr/sbin/ifcfg || return 1 + +} diff --git a/abs/core-testing/iproute2/iproute2-2.4.7-now-ss020116.patch b/abs/core-testing/iproute2/iproute2-2.4.7-now-ss020116.patch new file mode 100644 index 0000000..0e37865 --- /dev/null +++ b/abs/core-testing/iproute2/iproute2-2.4.7-now-ss020116.patch @@ -0,0 +1,9823 @@ +diff -Naur iproute2-orig/Makefile iproute2/Makefile +--- iproute2-orig/Makefile 2002-01-15 15:30:32.000000000 -0800 ++++ iproute2/Makefile 2004-05-21 00:16:36.000000000 -0700 +@@ -4,8 +4,6 @@ + CONFDIR=/etc/iproute2 + DOCDIR=/usr/doc/iproute2 + +-KERNEL_INCLUDE=/usr/src/linux/include +-LIBC_INCLUDE=/usr/include + + DEFINES= -DRESOLVE_HOSTNAMES + +@@ -23,19 +21,11 @@ + #options for ipx + ADDLIB+=ipx_ntop.o ipx_pton.o + +-ifeq ($(LIBC_INCLUDE)/socketbits.h,$(wildcard $(LIBC_INCLUDE)/socketbits.h)) +- ifeq ($(LIBC_INCLUDE)/net/if_packet.h,$(wildcard $(LIBC_INCLUDE)/net/if_packet.h)) +- GLIBCFIX=-I../include-glibc -include ../include-glibc/glibc-bugs.h +- endif +-endif +-ifeq ($(LIBC_INCLUDE)/bits/socket.h,$(wildcard $(LIBC_INCLUDE)/bits/socket.h)) +- GLIBCFIX=-I../include-glibc -I/usr/include/db3 -include ../include-glibc/glibc-bugs.h +-endif + + + CC = gcc + CCOPTS = -D_GNU_SOURCE -O2 -Wstrict-prototypes -Wall -g +-CFLAGS = $(CCOPTS) $(GLIBCFIX) -I$(KERNEL_INCLUDE) -I../include $(DEFINES) ++CFLAGS = $(CCOPTS) -I../include $(DEFINES) + + LDLIBS += -L../lib -lnetlink -lutil + +@@ -43,19 +33,11 @@ + + LIBNETLINK=../lib/libnetlink.a ../lib/libutil.a + +-all: check-kernel ++all: + @set -e; \ + for i in $(SUBDIRS); \ + do $(MAKE) -C $$i; done + +-check-kernel: +-ifeq ($(KERNEL_INCLUDE),) +- @echo "Please, set correct KERNEL_INCLUDE"; false +-else +- @set -e; \ +- if [ ! -r $(KERNEL_INCLUDE)/linux/autoconf.h ]; then \ +- echo "Please, compile the kernel first"; false; fi +-endif + + install: all + install -m 0755 -d $(DESTDIR)$(SBINDIR) +diff -Naur iproute2-orig/Makefile~ iproute2/Makefile~ +--- iproute2-orig/Makefile~ 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/Makefile~ 2002-01-15 15:30:32.000000000 -0800 +@@ -0,0 +1,77 @@ ++# Path to parent kernel include files directory ++DESTDIR= ++SBINDIR=/sbin ++CONFDIR=/etc/iproute2 ++DOCDIR=/usr/doc/iproute2 ++ ++KERNEL_INCLUDE=/usr/src/linux/include ++LIBC_INCLUDE=/usr/include ++ ++DEFINES= -DRESOLVE_HOSTNAMES ++ ++#options if you have a bind>=4.9.4 libresolv (or, maybe, glibc) ++LDLIBS=-lresolv ++ADDLIB= ++ ++#options if you compile with libc5, and without a bind>=4.9.4 libresolv ++#LDLIBS= ++#ADDLIB=inet_ntop.o inet_pton.o ++ ++#options for decnet ++ADDLIB+=dnet_ntop.o dnet_pton.o ++ ++#options for ipx ++ADDLIB+=ipx_ntop.o ipx_pton.o ++ ++ifeq ($(LIBC_INCLUDE)/socketbits.h,$(wildcard $(LIBC_INCLUDE)/socketbits.h)) ++ ifeq ($(LIBC_INCLUDE)/net/if_packet.h,$(wildcard $(LIBC_INCLUDE)/net/if_packet.h)) ++ GLIBCFIX=-I../include-glibc -include ../include-glibc/glibc-bugs.h ++ endif ++endif ++ifeq ($(LIBC_INCLUDE)/bits/socket.h,$(wildcard $(LIBC_INCLUDE)/bits/socket.h)) ++ GLIBCFIX=-I../include-glibc -I/usr/include/db3 -include ../include-glibc/glibc-bugs.h ++endif ++ ++ ++CC = gcc ++CCOPTS = -D_GNU_SOURCE -O2 -Wstrict-prototypes -Wall -g ++CFLAGS = $(CCOPTS) $(GLIBCFIX) -I$(KERNEL_INCLUDE) -I../include $(DEFINES) ++ ++LDLIBS += -L../lib -lnetlink -lutil ++ ++SUBDIRS=lib ip tc misc ++ ++LIBNETLINK=../lib/libnetlink.a ../lib/libutil.a ++ ++all: check-kernel ++ @set -e; \ ++ for i in $(SUBDIRS); \ ++ do $(MAKE) -C $$i; done ++ ++check-kernel: ++ifeq ($(KERNEL_INCLUDE),) ++ @echo "Please, set correct KERNEL_INCLUDE"; false ++else ++ @set -e; \ ++ if [ ! -r $(KERNEL_INCLUDE)/linux/autoconf.h ]; then \ ++ echo "Please, compile the kernel first"; false; fi ++endif ++ ++install: all ++ install -m 0755 -d $(DESTDIR)$(SBINDIR) ++ install -m 0755 -d $(DESTDIR)$(CONFDIR) ++ install -m 0755 -d $(DESTDIR)$(DOCDIR)/examples ++ install -m 0755 -d $(DESTDIR)$(DOCDIR)/examples/diffserv ++ install -m 0644 README.iproute2+tc $(shell find examples -type f -maxdepth 1) $(DESTDIR)$(DOCDIR)/examples ++ install -m 0644 $(shell echo examples/diffserv/*) $(DESTDIR)$(DOCDIR)/examples/diffserv ++ @for i in $(SUBDIRS) doc; do $(MAKE) -C $$i install; done ++ @cd etc/iproute2; for i in *; do \ ++ if [ ! -e $(DESTDIR)$(CONFDIR)/$$i ]; then \ ++ echo install -m 0644 $$i $(DESTDIR)$(CONFDIR); \ ++ install -m 0644 $$i $(DESTDIR)$(CONFDIR); fi; done ++ ++clean: ++ for i in $(SUBDIRS) doc; \ ++ do $(MAKE) -C $$i clean; done ++ ++.EXPORT_ALL_VARIABLES: +diff -Naur iproute2-orig/debian/README.Debian iproute2/debian/README.Debian +--- iproute2-orig/debian/README.Debian 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/README.Debian 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,4 @@ ++This version of "iproute" includes the HTB Linux queuing discipline ++explained in http://luxik.cdi.cz/~devik/qos/htb/ ++ ++You need kernel version 2.4.21 or newer in order to use it. +diff -Naur iproute2-orig/debian/changelog iproute2/debian/changelog +--- iproute2-orig/debian/changelog 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/changelog 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,207 @@ ++iproute (20010824-13) unstable; urgency=low ++ ++ * debian/rules: Run dpkg-shlibdeps with all the executables, ++ to fix dependency problem (closes: Bug#224063) ++ * Really removed references to obsolete include files ++ (Bug#223165 was not fixed properly) ++ ++ -- Juan Cespedes Sun, 25 Jan 2004 23:04:20 +0100 ++ ++iproute (20010824-12) unstable; urgency=low ++ ++ * Updated README.Debian and copyright file ++ * Added two new manpages from http://lartc.org/manpages/: ++ ip(8) and tc-cbq-details(8). ++ * Removed references to obsolete include files which made ++ compilation fail (closes: Bug#223165) ++ ++ -- Juan Cespedes Sun, 14 Dec 2003 00:40:10 +0100 ++ ++iproute (20010824-11) unstable; urgency=low ++ ++ * Changed priority to "optional" ++ * Fixed "tc -s qdisc" on sparc (patch by "Nicolas S. Dade" ++ ) (closes: Bug#194128) ++ ++ -- Juan Cespedes Sun, 17 Aug 2003 00:22:47 +0200 ++ ++iproute (20010824-10) unstable; urgency=low ++ ++ * Updated manual pages from http://www.lartc.org/manpages/ ++ (closes: Bug#156353, Bug#175313, Bug#176989, Bug#189095) ++ * New Standards-Version ++ * Don't "rm -rf /etc/iproute2" on purge (closes: Bug#202862) ++ * Include "iproute2" in the description (closes: Bug#182999) ++ ++ -- Juan Cespedes Sat, 16 Aug 2003 18:29:27 +0200 ++ ++iproute (20010824-9) unstable; urgency=medium ++ ++ * Added patch for HTB v3.6 to be able to work with kernel 2.4.20 ++ (from http://luxik.cdi.cz/~devik/qos/htb/v3/htb3.6-020525.tgz) ++ (closes: Bug#147550, Bug#167149, Bug#167597, Bug#171277) ++ ++ -- Juan Cespedes Thu, 05 Dec 2002 13:44:10 +0100 ++ ++iproute (20010824-8) unstable; urgency=medium ++ ++ * Added support for HTB queuing discipline (closes: Bug#133381) ++ NOTE: you need a patched kernel in order to use it ++ ++ -- Juan Cespedes Tue, 2 Apr 2002 20:29:40 +0200 ++ ++iproute (20010824-7) unstable; urgency=medium ++ ++ * Move `ip' binary to /bin to fix FHS violation (closes: Bug#134812) ++ ++ -- Juan Cespedes Mon, 4 Mar 2002 00:20:30 +0100 ++ ++iproute (20010824-6) unstable; urgency=low ++ ++ * Added a couple of #ifdef's to be able to compile with older ++ kernel headers (needed for arm) (closes: Bug#131695) ++ ++ -- Juan Cespedes Sat, 16 Feb 2002 19:27:15 +0100 ++ ++iproute (20010824-5) unstable; urgency=low ++ ++ * Really fix Bug#121589 (dead gateway bug); apparently I ++ forgot to include the patch in 20010824-2 ++ ++ -- Juan Cespedes Tue, 29 Jan 2002 23:22:24 +0100 ++ ++iproute (20010824-4) unstable; urgency=low ++ ++ * Added support for DIFFSERV and ATM in tc ++ ++ -- Juan Cespedes Sun, 13 Jan 2002 03:01:47 +0100 ++ ++iproute (20010824-3) unstable; urgency=low ++ ++ * Updated tc* man pages (thanks to bert hubert ) ++ * Fixed spurious space in `tc -s qdisc' output (closes: Bug#128501) ++ ++ -- Juan Cespedes Thu, 10 Jan 2002 22:18:25 +0100 ++ ++iproute (20010824-2) unstable; urgency=low ++ ++ * Fixed the following important and serious bugs: ++ + iproute doesn't compile on Alpha (closes: Bug#118113, Bug#123224) ++ + iproute doesn't compile on MIPS (closes: Bug#118424) ++ + iproute doesn't compile on powerpc (closes: Bug#119601) ++ * Added man pages for tc (closes: Bug#124230), tc-cbq, tc-red, tc-tbf, ++ tc-prio and tc-sfq ++ * Removed references to old programs from iproute(7) (closes: Bug#99536) ++ * Fixed bug which presented first hop as dead in equal cost multipath ++ (closes: Bug#121589) ++ * Do not process .ps with through `psnup' (closes: Bug#119820) ++ ++ -- Juan Cespedes Tue, 8 Jan 2002 16:07:27 +0100 ++ ++iproute (20010824-1) unstable; urgency=low ++ ++ * New upstream version ++ * Make ingress qdisc work again with tc (closes: Bug#84444) ++ * Make it compile properly with new include files (closes: Bug#113112) ++ ++ -- Juan Cespedes Sun, 28 Oct 2001 16:38:00 +0100 ++ ++iproute (20001007-1) unstable; urgency=low ++ ++ * New upstream version (closes: Bug#63701) ++ * Remove /etc/iproute2 on purge (closes: Bug#72743) ++ * Fixed Lintian warnings (no-priority-field and no-section-field) ++ ++ -- Juan Cespedes Sat, 14 Oct 2000 19:27:12 +0200 ++ ++iproute (991023-2) unstable; urgency=low ++ ++ * New Standards-Version (3.1.1) (closes: Bug#47923) ++ * Modified description of package to show which kernel options are ++ necessary to use the package (closes: Bug#47922) ++ * Updated manual page to point at /usr/share/doc/iproute (closes: Bug#47924) ++ ++ -- Juan Cespedes Sun, 19 Dec 1999 04:00:21 +0100 ++ ++iproute (991023-1) unstable; urgency=low ++ ++ * New upstream version (closes: Bug#48733) ++ ++ -- Juan Cespedes Tue, 2 Nov 1999 16:29:37 +0100 ++ ++iproute (990824-1) unstable; urgency=low ++ ++ * New maintainer ++ * New upstream version ++ * New Standards-Version: 3.1.0 ++ * Minor fix in "ip rule list": mask in "from" address was not shown ++ correctly ++ * Removed obsoleted documentation from "debian/" directory ++ ++ -- Juan Cespedes Sun, 24 Oct 1999 19:02:56 +0200 ++ ++iproute (990630-1) unstable; urgency=low ++ ++ * New upstream version. ++ * FHS and standards 3.0.1.0. ++ ++ -- Roberto Lumbreras Tue, 3 Aug 1999 02:49:28 +0200 ++ ++iproute (990530-1) unstable; urgency=low ++ ++ * New upstream version. ++ * Build with 2.2.10 kernel headers. ++ * Install new scripts ip/routef ip/routel, but not ip/ifcfg ip/rtpr by ++ now, I don't know who/what needs rtpr; ifcfg uses arping, and it isn't ++ available in debian for now. ++ ++ -- Roberto Lumbreras Tue, 22 Jun 1999 02:28:53 +0200 ++ ++iproute (990329-1) unstable; urgency=low ++ ++ * New upstream version. ++ * Build with 2.2.5 kernel headers. ++ ++ -- Roberto Lumbreras Sun, 4 Apr 1999 18:50:39 +0200 ++ ++iproute (980630-1) unstable; urgency=low ++ ++ * New upstream version. ++ * Build with 2.1.112 kernel headers. ++ * Rewrote the rules file. ++ ++ -- Roberto Lumbreras Wed, 29 Jul 1998 23:37:52 +0200 ++ ++iproute (980119-1) unstable; urgency=low ++ ++ * Outdated documentation. Upstream docs are scarce. ++ * Non-Maintainer release ++ * This package has no correct copyright file! ++ * Include all the README.* docs from the upstream site. ++ * Modified to build under glibc ++ * Build with 2.1.85 kernel headers. ++ * produce a correct diff. ++ * Reworked the rules file to utilize debmake fully ++ * Newest upstream release ++ * glibc compilation ++ ++ -- Christoph Lameter Wed, 4 Feb 1998 13:37:28 -0800 ++ ++iproute (961225-2) unstable frozen; urgency=low ++ ++ * Added a man page for iproute. (Fixes #8080). ++ * Removed out-of-date patches. ++ * Added routing.txt from /usr/src/linux/Documentation/networking/routing.txt ++ * Newer version of debmake. ++ ++ -- Tom Lees Mon, 17 Apr 1997 17:00:36 +0100 ++ ++iproute (961225-1) unstable; urgency=low ++ ++ * Initial Release. ++ ++ -- Tom Lees Mon, 30 Dec 1996 11:12:23 +0000 ++ ++Local variables: ++mode: debian-changelog ++End: +diff -Naur iproute2-orig/debian/conffiles iproute2/debian/conffiles +--- iproute2-orig/debian/conffiles 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/conffiles 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,5 @@ ++/etc/iproute2/rt_dsfield ++/etc/iproute2/rt_protos ++/etc/iproute2/rt_realms ++/etc/iproute2/rt_scopes ++/etc/iproute2/rt_tables +diff -Naur iproute2-orig/debian/control iproute2/debian/control +--- iproute2-orig/debian/control 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/control 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,19 @@ ++Source: iproute ++Section: net ++Priority: optional ++Maintainer: Juan Cespedes ++Standards-Version: 3.6.0 ++Build-Depends: tetex-bin, atm-dev ++ ++Package: iproute ++Architecture: any ++Depends: ${shlibs:Depends} ++Description: Professional tools to control the networking in Linux kernels ++ This is `iproute', the professional set of tools to control the ++ networking behavior in kernels 2.2.x and later. ++ . ++ At least, the options CONFIG_NETLINK and CONFIG_RTNETLINK must ++ be compiled in the running kernel ++ . ++ This package is also known as iproute2 upstream and in some ++ documentation. +diff -Naur iproute2-orig/debian/copyright iproute2/debian/copyright +--- iproute2-orig/debian/copyright 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/copyright 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,42 @@ ++This is the Debian GNU/Linux's prepackaged version of the ++Linux Traffic Control engine and related utils, "iproute" ++ ++This package was put together from sources obtained from: ++ ftp://ftp.inr.ac.ru/ip-routing/iproute2-2.4.7-now-ss010824.tar.gz ++ ++Changes for Debian: ++ * added Debian GNU/Linux package maintenance system files ++ * Added HTB v3.6 from ++ ++ ++ ++Copyrights ++---------- ++Copyright (C) 1996-2001 Alexey Kuznetsov ++ ++Modifications for Debian: ++ Copyright (C) 1996 Tom Lees ++ Copyright (C) 1998 Christoph Lameter ++ Copyright (C) 1998-1999 Roberto Lumbreras ++ Copyright (C) 1999-2003 Juan Cespedes ++ ++ ++License ++------- ++ ++This program is free software; you can redistribute it and/or modify ++it under the terms of the GNU General Public License as published by ++the Free Software Foundation; either version 2, or (at your option) ++any later version. ++ ++This program is distributed in the hope that it will be useful, but ++WITHOUT ANY WARRANTY; without even the implied warranty of ++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++General Public License for more details. ++ ++A copy of the GNU General Public License is available as ++`/usr/share/common-licenses/GPL' in the Debian GNU/Linux distribution ++or on the World Wide Web at `http://www.gnu.org/copyleft/gpl.html'. ++You can also obtain it by writing to the Free Software Foundation, ++Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA ++ +diff -Naur iproute2-orig/debian/manpages/ip.8 iproute2/debian/manpages/ip.8 +--- iproute2-orig/debian/manpages/ip.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/manpages/ip.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,1809 @@ ++.TH IP 8 "17 January 2002" "iproute2" "Linux" ++.SH NAME ++ip \- show / manipulate routing, devices, policy routing and tunnels ++.SH SYNOPSIS ++ ++.ad l ++.in +8 ++.ti -8 ++.B ip ++.RI "[ " OPTIONS " ] " OBJECT " { " COMMAND " | " ++.BR help " }" ++.sp ++ ++.ti -8 ++.IR OBJECT " := { " ++.BR link " | " addr " | " route " | " rule " | " neigh " | " tunnel " | "\ ++maddr " | " mroute " | " monitor " }" ++.sp ++ ++.ti -8 ++.IR OPTIONS " := { " ++\fB\-V\fR[\fIersion\fR] | ++\fB\-s\fR[\fItatistics\fR] | ++\fB\-r\fR[\fIesolve\fR] | ++\fB\-f\fR[\fIamily\fR] { ++.BR inet " | " inet6 " | " ipx " | " dnet " | " link " } | " ++\fB\-o\fR[\fIneline\fR] } ++ ++.ti -8 ++.BI "ip link set " DEVICE ++.RB "{ " up " | " down " | " arp " { " on " | " off " } |" ++.br ++.BR promisc " { " on " | " off " } |" ++.br ++.BR allmulti " { " on " | " off " } |" ++.br ++.BR dynamic " { " on " | " off " } |" ++.br ++.BR multicast " { " on " | " off " } |" ++.br ++.B txqueuelen ++.IR PACKETS " |" ++.br ++.B name ++.IR NEWNAME " |" ++.br ++.B address ++.IR LLADDR " |" ++.B broadcast ++.IR LLADDR " |" ++.br ++.B mtu ++.IR MTU " }" ++ ++.ti -8 ++.B ip link show ++.RI "[ " DEVICE " ]" ++ ++.ti -8 ++.BR "ip addr" " { " add " | " del " } " ++.IB IFADDR " dev " STRING ++ ++.ti -8 ++.BR "ip addr" " { " show " | " flush " } [ " dev ++.IR STRING " ] [ " ++.B scope ++.IR SCOPE-ID " ] [ " ++.B to ++.IR PREFIX " ] [ " FLAG-LIST " ] [ " ++.B label ++.IR PATTERN " ]" ++ ++.ti -8 ++.IR IFADDR " := " PREFIX " | " ADDR ++.B peer ++.IR PREFIX " [ " ++.B broadcast ++.IR ADDR " ] [ " ++.B anycast ++.IR ADDR " ] [ " ++.B label ++.IR STRING " ] [ " ++.B scope ++.IR SCOPE-ID " ]" ++ ++.ti -8 ++.IR SCOPE-ID " := " ++.RB "[ " host " | " link " | " global " | " ++.IR NUMBER " ]" ++ ++.ti -8 ++.IR FLAG-LIST " := [ " FLAG-LIST " ] " FLAG ++ ++.ti -8 ++.IR FLAG " := " ++.RB "[ " permanent " | " dynamic " | " secondary " | " primary " | "\ ++tentative " | " deprecated " ]" ++ ++.ti -8 ++.BR "ip route" " { " ++.BR list " | " flush " } " ++.I SELECTOR ++ ++.ti -8 ++.B ip route get ++.IR ADDRESS " [ " ++.BI from " ADDRESS " iif " STRING" ++.RB " ] [ " oif ++.IR STRING " ] [ " ++.B tos ++.IR TOS " ]" ++ ++.ti -8 ++.BR "ip route" " { " add " | " del " | " change " | " append " | "\ ++replace " | " monitor " } " ++.I ROUTE ++ ++.ti -8 ++.IR SELECTOR " := " ++.RB "[ " root ++.IR PREFIX " ] [ " ++.B match ++.IR PREFIX " ] [ " ++.B exact ++.IR PREFIX " ] [ " ++.B table ++.IR TABLE_ID " ] [ " ++.B proto ++.IR RTPROTO " ] [ " ++.B type ++.IR TYPE " ] [ " ++.B scope ++.IR SCOPE " ]" ++ ++.ti -8 ++.IR ROUTE " := " NODE_SPEC " [ " INFO_SPEC " ]" ++ ++.ti -8 ++.IR NODE_SPEC " := [ " TYPE " ] " PREFIX " [" ++.B tos ++.IR TOS " ] [ " ++.B table ++.IR TABLE_ID " ] [ " ++.B proto ++.IR RTPROTO " ] [ " ++.B scope ++.IR SCOPE " ] [ " ++.B metric ++.IR METRIC " ]" ++ ++.ti -8 ++.IR INFO_SPEC " := " "NH OPTIONS FLAGS" " [" ++.B nexthop ++.IR NH " ] ..." ++ ++.ti -8 ++.IR NH " := [ " ++.B via ++.IR ADDRESS " ] [ " ++.B dev ++.IR STRING " ] [ " ++.B weight ++.IR NUMBER " ] " NHFLAGS ++ ++.ti -8 ++.IR OPTIONS " := " FLAGS " [ " ++.B mtu ++.IR NUMBER " ] [ " ++.B advmss ++.IR NUMBER " ] [ " ++.B rtt ++.IR NUMBER " ] [ " ++.B rttvar ++.IR NUMBER " ] [ " ++.B window ++.IR NUMBER " ] [ " ++.B cwnd ++.IR NUMBER " ] [ " ++.B ssthresh ++.IR REALM " ] [ " ++.B realms ++.IR REALM " ]" ++ ++.ti -8 ++.IR TYPE " := [ " ++.BR unicast " | " local " | " broadcast " | " multicast " | "\ ++throw " | " unreachable " | " prohibit " | " blackhole " | " nat " ]" ++ ++.ti -8 ++.IR TABLE_ID " := [ " ++.BR local "| " main " | " default " | " all " |" ++.IR NUMBER " ]" ++ ++.ti -8 ++.IR SCOPE " := [ " ++.BR host " | " link " | " global " |" ++.IR NUMBER " ]" ++ ++.ti -8 ++.IR FLAGS " := [ " ++.BR equalize " ]" ++ ++.ti -8 ++.IR NHFLAGS " := [ " ++.BR onlink " | " pervasive " ]" ++ ++.ti -8 ++.IR RTPROTO " := [ " ++.BR kernel " | " boot " | " static " |" ++.IR NUMBER " ]" ++ ++.ti -8 ++.B ip rule ++.RB " [ " list " | " add " | " del " ]" ++.I SELECTOR ACTION ++ ++.ti -8 ++.IR SELECTOR " := [ " ++.B from ++.IR PREFIX " ] [ " ++.B to ++.IR PREFIX " ] [ " ++.B tos ++.IR TOS " ] [ " ++.B fwmark ++.IR FWMARK " ] [ " ++.B dev ++.IR STRING " ] [ " ++.B pref ++.IR NUMBER " ]" ++ ++.ti -8 ++.IR ACTION " := [ " ++.B table ++.IR TABLE_ID " ] [ " ++.B nat ++.IR ADDRESS " ] [ " ++.BR prohibit " | " reject " | " unreachable " ] [ " realms ++.RI "[" SRCREALM "/]" DSTREALM " ]" ++ ++.ti -8 ++.IR TABLE_ID " := [ " ++.BR local " | " main " | " default " |" ++.IR NUMBER " ]" ++ ++.ti -8 ++.BR "ip neigh" " { " add " | " del " | " change " | " replace " } { " ++.IR ADDR " [ " ++.B lladdr ++.IR LLADDR " ] [ " ++.BR nud " { " permanent " | " noarp " | " stale " | " reachable " } ] | " proxy ++.IR ADDR " } [ " ++.B dev ++.IR DEV " ]" ++ ++.ti -8 ++.BR "ip neigh" " { " show " | " flush " } [ " to ++.IR PREFIX " ] [ " ++.B dev ++.IR DEV " ] [ " ++.B nud ++.IR STATE " ]" ++ ++.ti -8 ++.BR "ip tunnel" " { " add " | " change " | " del " | " show " }" ++.RI "[ " NAME " ]" ++.br ++.RB "[ " mode " { " ipip " | " gre " | " sit " } ]" ++.br ++.RB "[ " remote ++.IR ADDR " ] [ " ++.B local ++.IR ADDR " ]" ++.br ++.RB "[ [" i "|" o "]" seq " ] [ [" i "|" o "]" key ++.IR KEY " ] [ " ++.RB "[" i "|" o "]" csum " ] ]" ++.br ++.RB "[ " ttl ++.IR TTL " ] [ " ++.B tos ++.IR TOS " ] [ " ++.RB "[" no "]" pmtudisc " ]" ++.br ++.RB "[ " dev ++.IR PHYS_DEV " ]" ++ ++.ti -8 ++.IR ADDR " := { " IP_ADDRESS " |" ++.BR any " }" ++ ++.ti -8 ++.IR TOS " := { " NUMBER " |" ++.BR inherit " }" ++ ++.ti -8 ++.IR TTL " := { " 1 ".." 255 " | " ++.BR inherit " }" ++ ++.ti -8 ++.IR KEY " := { " DOTTED_QUAD " | " NUMBER " }" ++ ++.ti -8 ++.BR "ip maddr" " [ " add " | " del " ]" ++.IB MULTIADDR " dev " STRING ++ ++.ti -8 ++.BR "ip maddr show" " [ " dev ++.IR STRING " ]" ++ ++.ti -8 ++.BR "ip mroute show" " [" ++.IR PREFIX " ] [ " ++.B from ++.IR PREFIX " ] [ " ++.B iif ++.IR DEVICE " ]" ++ ++.ti -8 ++.BR "ip monitor" " [ " all " |" ++.IR LISTofOBJECTS " ]" ++.in -8 ++.ad b ++ ++.SH OPTIONS ++ ++.TP ++.BR "\-V" , " -Version" ++print the version of the ++.B ip ++utility and exit. ++ ++.TP ++.BR "\-s" , " \-stats", " \-statistics" ++output more information. If the option ++appears twice or more, the amount of information increases. ++As a rule, the information is statistics or some time values. ++ ++.TP ++.BR "\-f" , " \-family" ++followed by protocol family identifier: ++.BR "inet" , " inet6" ++or ++.B link ++,enforce the protocol family to use. If the option is not present, ++the protocol family is guessed from other arguments. If the rest ++of the command line does not give enough information to guess the ++family, ++.B ip ++falls back to the default one, usually ++.B inet ++or ++.BR "any" . ++.B link ++is a special family identifier meaning that no networking protocol ++is involved. ++ ++.TP ++.B \-4 ++shortcut for ++.BR "-family inet" . ++ ++.TP ++.B \-6 ++shortcut for ++.BR "\-family inet6" . ++ ++.TP ++.B \-0 ++shortcut for ++.BR "\-family link" . ++ ++.TP ++.BR "\-o" , " \-oneline" ++output each record on a single line, replacing line feeds ++with the ++.B '\' ++character. This is convenient when you want to count records ++with ++.BR wc (1) ++ or to ++.BR grep (1) ++the output. ++ ++.TP ++.BR "\-r" , " \-resolve" ++use the system's name resolver to print DNS names instead of ++host addresses. ++ ++.SH IP - COMMAND SYNTAX ++ ++.SS ++.I OBJECT ++ ++.TP ++.B link ++- network device. ++ ++.TP ++.B address ++- protocol (IP or IPv6) address on a device. ++.TP ++.B neighbour ++- ARP or NDISC cache entry. ++ ++.TP ++.B route ++- routing table entry. ++ ++.TP ++.B rule ++- rule in routing policy database. ++ ++.TP ++.B maddress ++- multicast address. ++ ++.TP ++.B mroute ++- multicast routing cache entry. ++ ++.TP ++.B tunnel ++- tunnel over IP. ++ ++.PP ++The names of all objects may be written in full or ++abbreviated form, f.e. ++.B address ++is abbreviated as ++.B addr ++or just ++.B a. ++ ++.SS ++.I COMMAND ++ ++Specifies the action to perform on the object. ++The set of possible actions depends on the object type. ++As a rule, it is possible to ++.BR "add" , " delete" ++and ++.B show ++(or ++.B list ++) objects, but some objects do not allow all of these operations ++or have some additional commands. The ++.B help ++command is available for all objects. It prints ++out a list of available commands and argument syntax conventions. ++.sp ++If no command is given, some default command is assumed. ++Usually it is ++.B list ++or, if the objects of this class cannot be listed, ++.BR "help" . ++ ++.SH ip link - network device configuration ++ ++.B link ++is a network device and the corresponding commands ++display and change the state of devices. ++ ++.SS ip link set - change device attributes ++ ++.TP ++.BI dev " NAME " (default) ++.I NAME ++specifies network device to operate on. ++ ++.TP ++.BR up " and " down ++change the state of the device to ++.B UP ++or ++.BR "DOWN" . ++ ++.TP ++.BR "arp on " or " arp off" ++change the ++.B NOARP ++flag on the device. ++ ++.TP ++.BR "multicast on " or " multicast off" ++change the ++.B MULTICAST ++flag on the device. ++ ++.TP ++.BR "dynamic on " or " dynamic off" ++change the ++.B DYNAMIC ++flag on the device. ++ ++.TP ++.BI name " NAME" ++change the name of the device. This operation is not ++recommended if the device is running or has some addresses ++already configured. ++ ++.TP ++.BI txqueuelen " NUMBER" ++.TP ++.BI txqlen " NUMBER" ++change the transmit queue length of the device. ++ ++.TP ++.BI mtu " NUMBER" ++change the ++.I MTU ++of the device. ++ ++.TP ++.BI address " LLADDRESS" ++change the station address of the interface. ++ ++.TP ++.BI broadcast " LLADDRESS" ++.TP ++.BI brd " LLADDRESS" ++.TP ++.BI peer " LLADDRESS" ++change the link layer broadcast address or the peer address when ++the interface is ++.IR "POINTOPOINT" . ++ ++.PP ++.B Warning: ++If multiple parameter changes are requested, ++.B ip ++aborts immediately after any of the changes have failed. ++This is the only case when ++.B ip ++can move the system to an unpredictable state. The solution ++is to avoid changing several parameters with one ++.B ip link set ++call. ++ ++.SS ip link show - display device attributes ++ ++.TP ++.BI dev " NAME " (default) ++.I NAME ++specifies the network device to show. ++If this argument is omitted all devices are listed. ++ ++.TP ++.B up ++only display running interfaces. ++ ++.SH ip address - protocol address management. ++ ++The ++.B address ++is a protocol (IP or IPv6) address attached ++to a network device. Each device must have at least one address ++to use the corresponding protocol. It is possible to have several ++different addresses attached to one device. These addresses are not ++discriminated, so that the term ++.B alias ++is not quite appropriate for them and we do not use it in this document. ++.sp ++The ++.B ip addr ++command displays addresses and their properties, adds new addresses ++and deletes old ones. ++ ++.SS ip address add - add new protocol address. ++ ++.TP ++.BI dev " NAME" ++the name of the device to add the address to. ++ ++.TP ++.BI local " ADDRESS " (default) ++the address of the interface. The format of the address depends ++on the protocol. It is a dotted quad for IP and a sequence of ++hexadecimal halfwords separated by colons for IPv6. The ++.I ADDRESS ++may be followed by a slash and a decimal number which encodes ++the network prefix length. ++ ++.TP ++.BI peer " ADDRESS" ++the address of the remote endpoint for pointopoint interfaces. ++Again, the ++.I ADDRESS ++may be followed by a slash and a decimal number, encoding the network ++prefix length. If a peer address is specified, the local address ++cannot have a prefix length. The network prefix is associated ++with the peer rather than with the local address. ++ ++.TP ++.BI broadcast " ADDRESS" ++the broadcast address on the interface. ++.sp ++It is possible to use the special symbols ++.B '+' ++and ++.B '-' ++instead of the broadcast address. In this case, the broadcast address ++is derived by setting/resetting the host bits of the interface prefix. ++ ++.TP ++.BI label " NAME" ++Each address may be tagged with a label string. ++In order to preserve compatibility with Linux-2.0 net aliases, ++this string must coincide with the name of the device or must be prefixed ++with the device name followed by colon. ++ ++.TP ++.BI scope " SCOPE_VALUE" ++the scope of the area where this address is valid. ++The available scopes are listed in file ++.BR "/etc/iproute2/rt_scopes" . ++Predefined scope values are: ++ ++.in +8 ++.B global ++- the address is globally valid. ++.sp ++.B site ++- (IPv6 only) the address is site local, i.e. it is ++valid inside this site. ++.sp ++.B link ++- the address is link local, i.e. it is valid only on this device. ++.sp ++.B host ++- the address is valid only inside this host. ++.in -8 ++ ++.SS ip address delete - delete protocol address ++.B Arguments: ++coincide with the arguments of ++.B ip addr add. ++The device name is a required argument. The rest are optional. ++If no arguments are given, the first address is deleted. ++ ++.SS ip address show - look at protocol addresses ++ ++.TP ++.BI dev " NAME " (default) ++name of device. ++ ++.TP ++.BI scope " SCOPE_VAL" ++only list addresses with this scope. ++ ++.TP ++.BI to " PREFIX" ++only list addresses matching this prefix. ++ ++.TP ++.BI label " PATTERN" ++only list addresses with labels matching the ++.IR "PATTERN" . ++.I PATTERN ++is a usual shell style pattern. ++ ++.TP ++.BR dynamic " and " permanent ++(IPv6 only) only list addresses installed due to stateless ++address configuration or only list permanent (not dynamic) ++addresses. ++ ++.TP ++.B tentative ++(IPv6 only) only list addresses which did not pass duplicate ++address detection. ++ ++.TP ++.B deprecated ++(IPv6 only) only list deprecated addresses. ++ ++.TP ++.BR primary " and " secondary ++only list primary (or secondary) addresses. ++ ++.SS ip address flush - flush protocol addresses ++This command flushes the protocol addresses selected by some criteria. ++ ++.PP ++This command has the same arguments as ++.B show. ++The difference is that it does not run when no arguments are given. ++ ++.PP ++.B Warning: ++This command (and other ++.B flush ++commands described below) is pretty dangerous. If you make a mistake, ++it will not forgive it, but will cruelly purge all the addresses. ++ ++.PP ++With the ++.B -statistics ++option, the command becomes verbose. It prints out the number of deleted ++addresses and the number of rounds made to flush the address list. If ++this option is given twice, ++.B ip addr flush ++also dumps all the deleted addresses in the format described in the ++previous subsection. ++ ++.SH ip neighbour - neighbour/arp tables management. ++ ++.B neighbour ++objects establish bindings between protocol addresses and ++link layer addresses for hosts sharing the same link. ++Neighbour entries are organized into tables. The IPv4 neighbour table ++is known by another name - the ARP table. ++ ++.P ++The corresponding commands display neighbour bindings ++and their properties, add new neighbour entries and delete old ones. ++ ++.SS ip neighbour add - add a new neighbour entry ++.SS ip neighbour change - change an existing entry ++.SS ip neighbour replace - add a new entry or change an existing one ++ ++These commands create new neighbour records or update existing ones. ++ ++.TP ++.BI to " ADDRESS " (default) ++the protocol address of the neighbour. It is either an IPv4 or IPv6 address. ++ ++.TP ++.BI dev " NAME" ++the interface to which this neighbour is attached. ++ ++.TP ++.BI lladdr " LLADDRESS" ++the link layer address of the neighbour. ++.I LLADDRESS ++can also be ++.BR "null" . ++ ++.TP ++.BI nud " NUD_STATE" ++the state of the neighbour entry. ++.B nud ++is an abbreviation for 'Neigh bour Unreachability Detection'. ++The state can take one of the following values: ++ ++.in +8 ++.B permanent ++- the neighbour entry is valid forever and can be only ++be removed administratively. ++.sp ++ ++.B noarp ++- the neighbour entry is valid. No attempts to validate ++this entry will be made but it can be removed when its lifetime expires. ++.sp ++ ++.B reachable ++- the neighbour entry is valid until the reachability ++timeout expires. ++.sp ++ ++.B stale ++- the neighbour entry is valid but suspicious. ++This option to ++.B ip neigh ++does not change the neighbour state if it was valid and the address ++is not changed by this command. ++.in -8 ++ ++.SS ip neighbour delete - delete a neighbour entry ++This command invalidates a neighbour entry. ++ ++.PP ++The arguments are the same as with ++.BR "ip neigh add" , ++except that ++.B lladdr ++and ++.B nud ++are ignored. ++ ++.PP ++.B Warning: ++Attempts to delete or manually change a ++.B noarp ++entry created by the kernel may result in unpredictable behaviour. ++Particularly, the kernel may try to resolve this address even ++on a ++.B NOARP ++interface or if the address is multicast or broadcast. ++ ++.SS ip neighbour show - list neighbour entries ++ ++This commands displays neighbour tables. ++ ++.TP ++.BI to " ADDRESS " (default) ++the prefix selecting the neighbours to list. ++ ++.TP ++.BI dev " NAME" ++only list the neighbours attached to this device. ++ ++.TP ++.B unused ++only list neighbours which are not currently in use. ++ ++.TP ++.BI nud " NUD_STATE" ++only list neighbour entries in this state. ++.I NUD_STATE ++takes values listed below or the special value ++.B all ++which means all states. This option may occur more than once. ++If this option is absent, ++.B ip ++lists all entries except for ++.B none ++and ++.BR "noarp" . ++ ++.SS ip neighbour flush - flush neighbour entries ++This command flushes neighbour tables, selecting ++entries to flush by some criteria. ++ ++.PP ++This command has the same arguments as ++.B show. ++The differences are that it does not run when no arguments are given, ++and that the default neighbour states to be flushed do not include ++.B permanent ++and ++.BR "noarp" . ++ ++.PP ++With the ++.B -statistics ++option, the command becomes verbose. It prints out the number of ++deleted neighbours and the number of rounds made to flush the ++neighbour table. If the option is given ++twice, ++.B ip neigh flush ++also dumps all the deleted neighbours. ++ ++.SH ip route - routing table management ++Manipulate route entries in the kernel routing tables keep ++information about paths to other networked nodes. ++.sp ++.B Route types: ++ ++.in +8 ++.B unicast ++- the route entry describes real paths to the destinations covered ++by the route prefix. ++ ++.sp ++.B unreachable ++- these destinations are unreachable. Packets are discarded and the ++ICMP message ++.I host unreachable ++is generated. ++The local senders get an ++.I EHOSTUNREACH ++error. ++ ++.sp ++.B blackhole ++- these destinations are unreachable. Packets are discarded silently. ++The local senders get an ++.I EINVAL ++error. ++ ++.sp ++.B prohibit ++- these destinations are unreachable. Packets are discarded and the ++ICMP message ++.I communication administratively prohibited ++is generated. The local senders get an ++.I EACCES ++error. ++ ++.sp ++.B local ++- the destinations are assigned to this host. The packets are looped ++back and delivered locally. ++ ++.sp ++.B broadcast ++- the destinations are broadcast addresses. The packets are sent as ++link broadcasts. ++ ++.sp ++.B throw ++- a special control route used together with policy rules. If such a ++route is selected, lookup in this table is terminated pretending that ++no route was found. Without policy routing it is equivalent to the ++absence of the route in the routing table. The packets are dropped ++and the ICMP message ++.I net unreachable ++is generated. The local senders get an ++.I ENETUNREACH ++error. ++ ++.sp ++.B nat ++- a special NAT route. Destinations covered by the prefix ++are considered to be dummy (or external) addresses which require translation ++to real (or internal) ones before forwarding. The addresses to translate to ++are selected with the attribute ++.BR "via" . ++ ++.sp ++.B anycast ++.RI "- " "not implemented" ++the destinations are ++.I anycast ++addresses assigned to this host. They are mainly equivalent ++to ++.B local ++with one difference: such addresses are invalid when used ++as the source address of any packet. ++ ++.sp ++.B multicast ++- a special type used for multicast routing. It is not present in ++normal routing tables. ++.in -8 ++ ++.P ++.B Route tables: ++Linux-2.x can pack routes into several routing ++tables identified by a number in the range from 1 to 255 or by ++name from the file ++.B /etc/iproute2/rt_tables ++. By default all normal routes are inserted into the ++.B main ++table (ID 254) and the kernel only uses this table when calculating routes. ++ ++.sp ++Actually, one other table always exists, which is invisible but ++even more important. It is the ++.B local ++table (ID 255). This table ++consists of routes for local and broadcast addresses. The kernel maintains ++this table automatically and the administrator usually need not modify it ++or even look at it. ++ ++The multiple routing tables enter the game when ++.I policy routing ++is used. ++ ++.SS ip route add - add new route ++.SS ip route change - change route ++.SS ip route replace - change or add new one ++ ++.TP ++.BI to " TYPE PREFIX " (default) ++the destination prefix of the route. If ++.I TYPE ++is omitted, ++.B ip ++assumes type ++.BR "unicast" . ++Other values of ++.I TYPE ++are listed above. ++.I PREFIX ++is an IP or IPv6 address optionally followed by a slash and the ++prefix length. If the length of the prefix is missing, ++.B ip ++assumes a full-length host route. There is also a special ++.I PREFIX ++.B default ++- which is equivalent to IP ++.B 0/0 ++or to IPv6 ++.BR "::/0" . ++ ++.TP ++.BI tos " TOS" ++.TP ++.BI dsfield " TOS" ++the Type Of Service (TOS) key. This key has no associated mask and ++the longest match is understood as: First, compare the TOS ++of the route and of the packet. If they are not equal, then the packet ++may still match a route with a zero TOS. ++.I TOS ++is either an 8 bit hexadecimal number or an identifier ++from ++.BR "/etc/iproute2/rt_dsfield" . ++ ++.TP ++.BI metric " NUMBER" ++.TP ++.BI preference " NUMBER" ++the preference value of the route. ++.I NUMBER ++is an arbitrary 32bit number. ++ ++.TP ++.BI table " TABLEID" ++the table to add this route to. ++.I TABLEID ++may be a number or a string from the file ++.BR "/etc/iproute2/rt_tables" . ++If this parameter is omitted, ++.B ip ++assumes the ++.B main ++table, with the exception of ++.BR local " , " broadcast " and " nat ++routes, which are put into the ++.B local ++table by default. ++ ++.TP ++.BI dev " NAME" ++the output device name. ++ ++.TP ++.BI via " ADDRESS" ++the address of the nexthop router. Actually, the sense of this field ++depends on the route type. For normal ++.B unicast ++routes it is either the true next hop router or, if it is a direct ++route installed in BSD compatibility mode, it can be a local address ++of the interface. For NAT routes it is the first address of the block ++of translated IP destinations. ++ ++.TP ++.BI src " ADDRESS" ++the source address to prefer when sending to the destinations ++covered by the route prefix. ++ ++.TP ++.BI realm " REALMID" ++the realm to which this route is assigned. ++.I REALMID ++may be a number or a string from the file ++.BR "/etc/iproute2/rt_realms" . ++ ++.TP ++.BI mtu " MTU" ++.TP ++.BI "mtu lock" " MTU" ++the MTU along the path to the destination. If the modifier ++.B lock ++is not used, the MTU may be updated by the kernel due to ++Path MTU Discovery. If the modifier ++.B lock ++is used, no path MTU discovery will be tried, all packets ++will be sent without the DF bit in IPv4 case or fragmented ++to MTU for IPv6. ++ ++.TP ++.BI window " NUMBER" ++the maximal window for TCP to advertise to these destinations, ++measured in bytes. It limits maximal data bursts that our TCP ++peers are allowed to send to us. ++ ++.TP ++.BI rtt " NUMBER" ++the initial RTT ('Round Trip Time') estimate. ++ ++.TP ++.BI rttvar " NUMBER " "(2.3.15+ only)" ++the initial RTT variance estimate. ++ ++.TP ++.BI ssthresh " NUMBER " "(2.3.15+ only)" ++an estimate for the initial slow start threshold. ++ ++.TP ++.BI cwnd " NUMBER " "(2.3.15+ only)" ++the clamp for congestion window. It is ignored if the ++.B lock ++flag is not used. ++ ++.TP ++.BI advmss " NUMBER " "(2.3.15+ only)" ++the MSS ('Maximal Segment Size') to advertise to these ++destinations when establishing TCP connections. If it is not given, ++Linux uses a default value calculated from the first hop device MTU. ++(If the path to these destination is asymmetric, this guess may be wrong.) ++ ++.TP ++.BI reordering " NUMBER " "(2.3.15+ only)" ++Maximal reordering on the path to this destination. ++If it is not given, Linux uses the value selected with ++.B sysctl ++variable ++.BR "net/ipv4/tcp_reordering" . ++ ++.TP ++.BI nexthop " NEXTHOP" ++the nexthop of a multipath route. ++.I NEXTHOP ++is a complex value with its own syntax similar to the top level ++argument lists: ++ ++.in +8 ++.BI via " ADDRESS" ++- is the nexthop router. ++.sp ++ ++.BI dev " NAME" ++- is the output device. ++.sp ++ ++.BI weight " NUMBER" ++- is a weight for this element of a multipath ++route reflecting its relative bandwidth or quality. ++.in -8 ++ ++.TP ++.BI scope " SCOPE_VAL" ++the scope of the destinations covered by the route prefix. ++.I SCOPE_VAL ++may be a number or a string from the file ++.BR "/etc/iproute2/rt_scopes" . ++If this parameter is omitted, ++.B ip ++assumes scope ++.B global ++for all gatewayed ++.B unicast ++routes, scope ++.B link ++for direct ++.BR unicast " and " broadcast ++routes and scope ++.BR host " for " local ++routes. ++ ++.TP ++.BI protocol " RTPROTO" ++the routing protocol identifier of this route. ++.I RTPROTO ++may be a number or a string from the file ++.BR "/etc/iproute2/rt_protos" . ++If the routing protocol ID is not given, ++.B ip assumes protocol ++.B boot ++(i.e. it assumes the route was added by someone who doesn't ++understand what they are doing). Several protocol values have ++a fixed interpretation. ++Namely: ++ ++.in +8 ++.B redirect ++- the route was installed due to an ICMP redirect. ++.sp ++ ++.B kernel ++- the route was installed by the kernel during autoconfiguration. ++.sp ++ ++.B boot ++- the route was installed during the bootup sequence. ++If a routing daemon starts, it will purge all of them. ++.sp ++ ++.B static ++- the route was installed by the administrator ++to override dynamic routing. Routing daemon will respect them ++and, probably, even advertise them to its peers. ++.sp ++ ++.B ra ++- the route was installed by Router Discovery protocol. ++.in -8 ++ ++.sp ++The rest of the values are not reserved and the administrator is free ++to assign (or not to assign) protocol tags. ++ ++.TP ++.B onlink ++pretend that the nexthop is directly attached to this link, ++even if it does not match any interface prefix. ++ ++.TP ++.B equalize ++allow packet by packet randomization on multipath routes. ++Without this modifier, the route will be frozen to one selected ++nexthop, so that load splitting will only occur on per-flow base. ++.B equalize ++only works if the kernel is patched. ++ ++.SS ip route delete - delete route ++ ++.B ip route del ++has the same arguments as ++.BR "ip route add" , ++but their semantics are a bit different. ++ ++Key values ++.RB "(" to ", " tos ", " preference " and " table ")" ++select the route to delete. If optional attributes are present, ++.B ip ++verifies that they coincide with the attributes of the route to delete. ++If no route with the given key and attributes was found, ++.B ip route del ++fails. ++ ++.SS ip route show - list routes ++the command displays the contents of the routing tables or the route(s) ++selected by some criteria. ++ ++.TP ++.BI to " SELECTOR " (default) ++only select routes from the given range of destinations. ++.I SELECTOR ++consists of an optional modifier ++.RB "(" root ", " match " or " exact ")" ++and a prefix. ++.BI root " PREFIX" ++selects routes with prefixes not shorter than ++.IR PREFIX "." ++F.e. ++.BI root " 0/0" ++selects the entire routing table. ++.BI match " PREFIX" ++selects routes with prefixes not longer than ++.IR PREFIX "." ++F.e. ++.BI match " 10.0/16" ++selects ++.IR 10.0/16 "," ++.IR 10/8 " and " 0/0 , ++but it does not select ++.IR 10.1/16 " and " 10.0.0/24 . ++And ++.BI exact " PREFIX" ++(or just ++.IR PREFIX ")" ++selects routes with this exact prefix. If neither of these options ++are present, ++.B ip ++assumes ++.BI root " 0/0" ++i.e. it lists the entire table. ++ ++.TP ++.BI tos " TOS" ++.BI dsfield " TOS" ++only select routes with the given TOS. ++ ++.TP ++.BI table " TABLEID" ++show the routes from this table(s). The default setting is to show ++.BR table main "." ++.I TABLEID ++may either be the ID of a real table or one of the special values: ++.sp ++.in +8 ++.B all ++- list all of the tables. ++.sp ++.B cache ++- dump the routing cache. ++.in -8 ++ ++.TP ++.B cloned ++.TP ++.B cached ++list cloned routes i.e. routes which were dynamically forked from ++other routes because some route attribute (f.e. MTU) was updated. ++Actually, it is equivalent to ++.BR "table cache" "." ++ ++.TP ++.BI from " SELECTOR" ++the same syntax as for ++.BR to "," ++but it binds the source address range rather than destinations. ++Note that the ++.B from ++option only works with cloned routes. ++ ++.TP ++.BI protocol " RTPROTO" ++only list routes of this protocol. ++ ++.TP ++.BI scope " SCOPE_VAL" ++only list routes with this scope. ++ ++.TP ++.BI type " TYPE" ++only list routes of this type. ++ ++.TP ++.BI dev " NAME" ++only list routes going via this device. ++ ++.TP ++.BI via " PREFIX" ++only list routes going via the nexthop routers selected by ++.IR PREFIX "." ++ ++.TP ++.BI src " PREFIX" ++only list routes with preferred source addresses selected ++by ++.IR PREFIX "." ++ ++.TP ++.BI realm " REALMID" ++.TP ++.BI realms " FROMREALM/TOREALM" ++only list routes with these realms. ++ ++.SS ip route flush - flush routing tables ++this command flushes routes selected by some criteria. ++ ++.sp ++The arguments have the same syntax and semantics as the arguments of ++.BR "ip route show" , ++but routing tables are not listed but purged. The only difference is ++the default action: ++.B show ++dumps all the IP main routing table but ++.B flush ++prints the helper page. ++ ++.sp ++With the ++.B -statistics ++option, the command becomes verbose. It prints out the number of ++deleted routes and the number of rounds made to flush the routing ++table. If the option is given ++twice, ++.B ip route flush ++also dumps all the deleted routes in the format described in the ++previous subsection. ++ ++.SS ip route get - get a single route ++this command gets a single route to a destination and prints its ++contents exactly as the kernel sees it. ++ ++.TP ++.BI to " ADDRESS " (default) ++the destination address. ++ ++.TP ++.BI from " ADDRESS" ++the source address. ++ ++.TP ++.BI tos " TOS" ++.TP ++.BI dsfield " TOS" ++the Type Of Service. ++ ++.TP ++.BI iif " NAME" ++the device from which this packet is expected to arrive. ++ ++.TP ++.BI oif " NAME" ++force the output device on which this packet will be routed. ++ ++.TP ++.B connected ++if no source address ++.RB "(option " from ")" ++was given, relookup the route with the source set to the preferred ++address received from the first lookup. ++If policy routing is used, it may be a different route. ++ ++.P ++Note that this operation is not equivalent to ++.BR "ip route show" . ++.B show ++shows existing routes. ++.B get ++resolves them and creates new clones if necessary. Essentially, ++.B get ++is equivalent to sending a packet along this path. ++If the ++.B iif ++argument is not given, the kernel creates a route ++to output packets towards the requested destination. ++This is equivalent to pinging the destination ++with a subsequent ++.BR "ip route ls cache" , ++however, no packets are actually sent. With the ++.B iif ++argument, the kernel pretends that a packet arrived from this interface ++and searches for a path to forward the packet. ++ ++.SH ip rule - routing policy database management ++ ++.BR "Rule" s ++in the routing policy database control the route selection algorithm. ++ ++.P ++Classic routing algorithms used in the Internet make routing decisions ++based only on the destination address of packets (and in theory, ++but not in practice, on the TOS field). ++ ++.P ++In some circumstances we want to route packets differently depending not only ++on destination addresses, but also on other packet fields: source address, ++IP protocol, transport protocol ports or even packet payload. ++This task is called 'policy routing'. ++ ++.P ++To solve this task, the conventional destination based routing table, ordered ++according to the longest match rule, is replaced with a 'routing policy ++database' (or RPDB), which selects routes by executing some set of rules. ++ ++.P ++Each policy routing rule consists of a ++.B selector ++and an ++.B action predicate. ++The RPDB is scanned in the order of increasing priority. The selector ++of each rule is applied to {source address, destination address, incoming ++interface, tos, fwmark} and, if the selector matches the packet, ++the action is performed. The action predicate may return with success. ++In this case, it will either give a route or failure indication ++and the RPDB lookup is terminated. Otherwise, the RPDB program ++continues on the next rule. ++ ++.P ++Semantically, natural action is to select the nexthop and the output device. ++ ++.P ++At startup time the kernel configures the default RPDB consisting of three ++rules: ++ ++.TP ++1. ++Priority: 0, Selector: match anything, Action: lookup routing ++table ++.B local ++(ID 255). ++The ++.B local ++table is a special routing table containing ++high priority control routes for local and broadcast addresses. ++.sp ++Rule 0 is special. It cannot be deleted or overridden. ++ ++.TP ++2. ++Priority: 32766, Selector: match anything, Action: lookup routing ++table ++.B main ++(ID 254). ++The ++.B main ++table is the normal routing table containing all non-policy ++routes. This rule may be deleted and/or overridden with other ++ones by the administrator. ++ ++.TP ++3. ++Priority: 32767, Selector: match anything, Action: lookup routing ++table ++.B default ++(ID 253). ++The ++.B default ++table is empty. It is reserved for some post-processing if no previous ++default rules selected the packet. ++This rule may also be deleted. ++ ++.P ++Each RPDB entry has additional ++attributes. F.e. each rule has a pointer to some routing ++table. NAT and masquerading rules have an attribute to select new IP ++address to translate/masquerade. Besides that, rules have some ++optional attributes, which routes have, namely ++.BR "realms" . ++These values do not override those contained in the routing tables. They ++are only used if the route did not select any attributes. ++ ++.sp ++The RPDB may contain rules of the following types: ++ ++.in +8 ++.B unicast ++- the rule prescribes to return the route found ++in the routing table referenced by the rule. ++ ++.B blackhole ++- the rule prescribes to silently drop the packet. ++ ++.B unreachable ++- the rule prescribes to generate a 'Network is unreachable' error. ++ ++.B prohibit ++- the rule prescribes to generate 'Communication is administratively ++prohibited' error. ++ ++.B nat ++- the rule prescribes to translate the source address ++of the IP packet into some other value. ++.in -8 ++ ++.SS ip rule add - insert a new rule ++.SS ip rule delete - delete a rule ++ ++.TP ++.BI type " TYPE " (default) ++the type of this rule. The list of valid types was given in the previous ++subsection. ++ ++.TP ++.BI from " PREFIX" ++select the source prefix to match. ++ ++.TP ++.BI to " PREFIX" ++select the destination prefix to match. ++ ++.TP ++.BI iif " NAME" ++select the incoming device to match. If the interface is loopback, ++the rule only matches packets originating from this host. This means ++that you may create separate routing tables for forwarded and local ++packets and, hence, completely segregate them. ++ ++.TP ++.BI tos " TOS" ++.TP ++.BI dsfield " TOS" ++select the TOS value to match. ++ ++.TP ++.BI fwmark " MARK" ++select the ++.B fwmark ++value to match. ++ ++.TP ++.BI priority " PREFERENCE" ++the priority of this rule. Each rule should have an explicitly ++set ++.I unique ++priority value. ++ ++.TP ++.BI table " TABLEID" ++the routing table identifier to lookup if the rule selector matches. ++ ++.TP ++.BI realms " FROM/TO" ++Realms to select if the rule matched and the routing table lookup ++succeeded. Realm ++.I TO ++is only used if the route did not select any realm. ++ ++.TP ++.BI nat " ADDRESS" ++The base of the IP address block to translate (for source addresses). ++The ++.I ADDRESS ++may be either the start of the block of NAT addresses (selected by NAT ++routes) or a local host address (or even zero). ++In the last case the router does not translate the packets, but ++masquerades them to this address. ++ ++.B Warning: ++Changes to the RPDB made with these commands do not become active ++immediately. It is assumed that after a script finishes a batch of ++updates, it flushes the routing cache with ++.BR "ip route flush cache" . ++ ++.SS ip rule show - list rules ++This command has no arguments. ++ ++.SH ip maddress - multicast addresses management ++ ++.B maddress ++objects are multicast addresses. ++ ++.SS ip maddress show - list multicast addresses ++ ++.TP ++.BI dev " NAME " (default) ++the device name. ++ ++.SS ip maddress add - add a multicast address ++.SS ip maddress delete - delete a multicast address ++these commands attach/detach a static link layer multicast address ++to listen on the interface. ++Note that it is impossible to join protocol multicast groups ++statically. This command only manages link layer addresses. ++ ++.TP ++.BI address " LLADDRESS " (default) ++the link layer multicast address. ++ ++.TP ++.BI dev " NAME" ++the device to join/leave this multicast address. ++ ++.SH ip mroute - multicast routing cache management ++.B mroute ++objects are multicast routing cache entries created by a user level ++mrouting daemon (f.e. ++.B pimd ++or ++.B mrouted ++). ++ ++Due to the limitations of the current interface to the multicast routing ++engine, it is impossible to change ++.B mroute ++objects administratively, so we may only display them. This limitation ++will be removed in the future. ++ ++.SS ip mroute show - list mroute cache entries ++ ++.TP ++.BI to " PREFIX " (default) ++the prefix selecting the destination multicast addresses to list. ++ ++.TP ++.BI iif " NAME" ++the interface on which multicast packets are received. ++ ++.TP ++.BI from " PREFIX" ++the prefix selecting the IP source addresses of the multicast route. ++ ++.SH ip tunnel - tunnel configuration ++.B tunnel ++objects are tunnels, encapsulating packets in IPv4 packets and then ++sending them over the IP infrastructure. ++ ++.SS ip tunnel add - add a new tunnel ++.SS ip tunnel change - change an existing tunnel ++.SS ip tunnel delete - destroy a tunnel ++ ++.TP ++.BI name " NAME " (default) ++select the tunnel device name. ++ ++.TP ++.BI mode " MODE" ++set the tunnel mode. Three modes are currently available: ++.BR ipip ", " sit " and " gre "." ++ ++.TP ++.BI remote " ADDRESS" ++set the remote endpoint of the tunnel. ++ ++.TP ++.BI local " ADDRESS" ++set the fixed local address for tunneled packets. ++It must be an address on another interface of this host. ++ ++.TP ++.BI ttl " N" ++set a fixed TTL ++.I N ++on tunneled packets. ++.I N ++is a number in the range 1--255. 0 is a special value ++meaning that packets inherit the TTL value. ++The default value is: ++.BR "inherit" . ++ ++.TP ++.BI tos " T" ++.TP ++.BI dsfield " T" ++set a fixed TOS ++.I T ++on tunneled packets. ++The default value is: ++.BR "inherit" . ++ ++.TP ++.BI dev " NAME" ++bind the tunnel to the device ++.I NAME ++so that tunneled packets will only be routed via this device and will ++not be able to escape to another device when the route to endpoint ++changes. ++ ++.TP ++.B nopmtudisc ++disable Path MTU Discovery on this tunnel. ++It is enabled by default. Note that a fixed ttl is incompatible ++with this option: tunnelling with a fixed ttl always makes pmtu ++discovery. ++ ++.TP ++.BI key " K" ++.TP ++.BI ikey " K" ++.TP ++.BI okey " K" ++.RB ( " only GRE tunnels " ) ++use keyed GRE with key ++.IR K ". " K ++is either a number or an IP address-like dotted quad. ++The ++.B key ++parameter sets the key to use in both directions. ++The ++.BR ikey " and " okey ++parameters set different keys for input and output. ++ ++.TP ++.BR csum ", " icsum ", " ocsum ++.RB ( " only GRE tunnels " ) ++generate/require checksums for tunneled packets. ++The ++.B ocsum ++flag calculates checksums for outgoing packets. ++The ++.B icsum ++flag requires that all input packets have the correct ++checksum. The ++.B csum ++flag is equivalent to the combination ++.BR "icsum ocsum" . ++ ++.TP ++.BR seq ", " iseq ", " oseq ++.RB ( " only GRE tunnels " ) ++serialize packets. ++The ++.B oseq ++flag enables sequencing of outgoing packets. ++The ++.B iseq ++flag requires that all input packets are serialized. ++The ++.B seq ++flag is equivalent to the combination ++.BR "iseq oseq" . ++.B It isn't work. Don't use it. ++ ++.SS ip tunnel show - list tunnels ++This command has no arguments. ++ ++.SH ip monitor and rtmon - state monitoring ++ ++The ++.B ip ++utility can monitor the state of devices, addresses ++and routes continuously. This option has a slightly different format. ++Namely, the ++.B monitor ++command is the first in the command line and then the object list follows: ++ ++.BR "ip monitor" " [ " all " |" ++.IR LISTofOBJECTS " ]" ++ ++.I OBJECT-LIST ++is the list of object types that we want to monitor. ++It may contain ++.BR link ", " address " and " route "." ++If no ++.B file ++argument is given, ++.B ip ++opens RTNETLINK, listens on it and dumps state changes in the format ++described in previous sections. ++ ++.P ++If a file name is given, it does not listen on RTNETLINK, ++but opens the file containing RTNETLINK messages saved in binary format ++and dumps them. Such a history file can be generated with the ++.B rtmon ++utility. This utility has a command line syntax similar to ++.BR "ip monitor" . ++Ideally, ++.B rtmon ++should be started before the first network configuration command ++is issued. F.e. if you insert: ++.sp ++.in +8 ++rtmon file /var/log/rtmon.log ++.in -8 ++.sp ++in a startup script, you will be able to view the full history ++later. ++ ++.P ++Certainly, it is possible to start ++.B rtmon ++at any time. ++It prepends the history with the state snapshot dumped at the moment ++of starting. ++ ++.SH HISTORY ++ ++.B ip ++was written by Alexey N. Kuznetsov and added in Linux 2.2. ++.SH SEE ALSO ++.BR tc (8) ++.br ++.RB "IP Command reference " ip-cref.ps ++.br ++.RB "IP tunnels " ip-cref.ps ++ ++.SH AUTHOR ++ ++Manpage maintained by Michail Litvak +diff -Naur iproute2-orig/debian/manpages/old/ip.8 iproute2/debian/manpages/old/ip.8 +--- iproute2-orig/debian/manpages/old/ip.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/manpages/old/ip.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,1809 @@ ++.TH IP 8 "17 January 2002" "iproute2" "Linux" ++.SH NAME ++ip \- show / manipulate routing, devices, policy routing and tunnels ++.SH SYNOPSIS ++ ++.ad l ++.in +8 ++.ti -8 ++.B ip ++.RI "[ " OPTIONS " ] " OBJECT " { " COMMAND " | " ++.BR help " }" ++.sp ++ ++.ti -8 ++.IR OBJECT " := { " ++.BR link " | " addr " | " route " | " rule " | " neigh " | " tunnel " | "\ ++maddr " | " mroute " | " monitor " }" ++.sp ++ ++.ti -8 ++.IR OPTIONS " := { " ++\fB\-V\fR[\fIersion\fR] | ++\fB\-s\fR[\fItatistics\fR] | ++\fB\-r\fR[\fIesolve\fR] | ++\fB\-f\fR[\fIamily\fR] { ++.BR inet " | " inet6 " | " ipx " | " dnet " | " link " } | " ++\fB\-o\fR[\fIneline\fR] } ++ ++.ti -8 ++.BI "ip link set " DEVICE ++.RB "{ " up " | " down " | " arp " { " on " | " off " } |" ++.br ++.BR promisc " { " on " | " off " } |" ++.br ++.BR allmulti " { " on " | " off " } |" ++.br ++.BR dynamic " { " on " | " off " } |" ++.br ++.BR multicast " { " on " | " off " } |" ++.br ++.B txqueuelen ++.IR PACKETS " |" ++.br ++.B name ++.IR NEWNAME " |" ++.br ++.B address ++.IR LLADDR " |" ++.B broadcast ++.IR LLADDR " |" ++.br ++.B mtu ++.IR MTU " }" ++ ++.ti -8 ++.B ip link show ++.RI "[ " DEVICE " ]" ++ ++.ti -8 ++.BR "ip addr" " { " add " | " del " } " ++.IB IFADDR " dev " STRING ++ ++.ti -8 ++.BR "ip addr" " { " show " | " flush " } [ " dev ++.IR STRING " ] [ " ++.B scope ++.IR SCOPE-ID " ] [ " ++.B to ++.IR PREFIX " ] [ " FLAG-LIST " ] [ " ++.B label ++.IR PATTERN " ]" ++ ++.ti -8 ++.IR IFADDR " := " PREFIX " | " ADDR ++.B peer ++.IR PREFIX " [ " ++.B broadcast ++.IR ADDR " ] [ " ++.B anycast ++.IR ADDR " ] [ " ++.B label ++.IR STRING " ] [ " ++.B scope ++.IR SCOPE-ID " ]" ++ ++.ti -8 ++.IR SCOPE-ID " := " ++.RB "[ " host " | " link " | " global " | " ++.IR NUMBER " ]" ++ ++.ti -8 ++.IR FLAG-LIST " := [ " FLAG-LIST " ] " FLAG ++ ++.ti -8 ++.IR FLAG " := " ++.RB "[ " permanent " | " dynamic " | " secondary " | " primary " | "\ ++tentative " | " deprecated " ]" ++ ++.ti -8 ++.BR "ip route" " { " ++.BR list " | " flush " } " ++.I SELECTOR ++ ++.ti -8 ++.B ip route get ++.IR ADDRESS " [ " ++.BI from " ADDRESS " iif " STRING" ++.RB " ] [ " oif ++.IR STRING " ] [ " ++.B tos ++.IR TOS " ]" ++ ++.ti -8 ++.BR "ip route" " { " add " | " del " | " change " | " append " | "\ ++replace " | " monitor " } " ++.I ROUTE ++ ++.ti -8 ++.IR SELECTOR " := " ++.RB "[ " root ++.IR PREFIX " ] [ " ++.B match ++.IR PREFIX " ] [ " ++.B exact ++.IR PREFIX " ] [ " ++.B table ++.IR TABLE_ID " ] [ " ++.B proto ++.IR RTPROTO " ] [ " ++.B type ++.IR TYPE " ] [ " ++.B scope ++.IR SCOPE " ]" ++ ++.ti -8 ++.IR ROUTE " := " NODE_SPEC " [ " INFO_SPEC " ]" ++ ++.ti -8 ++.IR NODE_SPEC " := [ " TYPE " ] " PREFIX " [" ++.B tos ++.IR TOS " ] [ " ++.B table ++.IR TABLE_ID " ] [ " ++.B proto ++.IR RTPROTO " ] [ " ++.B scope ++.IR SCOPE " ] [ " ++.B metric ++.IR METRIC " ]" ++ ++.ti -8 ++.IR INFO_SPEC " := " "NH OPTIONS FLAGS" " [" ++.B nexthop ++.IR NH " ] ..." ++ ++.ti -8 ++.IR NH " := [ " ++.B via ++.IR ADDRESS " ] [ " ++.B dev ++.IR STRING " ] [ " ++.B weight ++.IR NUMBER " ] " NHFLAGS ++ ++.ti -8 ++.IR OPTIONS " := " FLAGS " [ " ++.B mtu ++.IR NUMBER " ] [ " ++.B advmss ++.IR NUMBER " ] [ " ++.B rtt ++.IR NUMBER " ] [ " ++.B rttvar ++.IR NUMBER " ] [ " ++.B window ++.IR NUMBER " ] [ " ++.B cwnd ++.IR NUMBER " ] [ " ++.B ssthresh ++.IR REALM " ] [ " ++.B realms ++.IR REALM " ]" ++ ++.ti -8 ++.IR TYPE " := [ " ++.BR unicast " | " local " | " broadcast " | " multicast " | "\ ++throw " | " unreachable " | " prohibit " | " blackhole " | " nat " ]" ++ ++.ti -8 ++.IR TABLE_ID " := [ " ++.BR local "| " main " | " default " | " all " |" ++.IR NUMBER " ]" ++ ++.ti -8 ++.IR SCOPE " := [ " ++.BR host " | " link " | " global " |" ++.IR NUMBER " ]" ++ ++.ti -8 ++.IR FLAGS " := [ " ++.BR equalize " ]" ++ ++.ti -8 ++.IR NHFLAGS " := [ " ++.BR onlink " | " pervasive " ]" ++ ++.ti -8 ++.IR RTPROTO " := [ " ++.BR kernel " | " boot " | " static " |" ++.IR NUMBER " ]" ++ ++.ti -8 ++.B ip rule ++.RB " [ " list " | " add " | " del " ]" ++.I SELECTOR ACTION ++ ++.ti -8 ++.IR SELECTOR " := [ " ++.B from ++.IR PREFIX " ] [ " ++.B to ++.IR PREFIX " ] [ " ++.B tos ++.IR TOS " ] [ " ++.B fwmark ++.IR FWMARK " ] [ " ++.B dev ++.IR STRING " ] [ " ++.B pref ++.IR NUMBER " ]" ++ ++.ti -8 ++.IR ACTION " := [ " ++.B table ++.IR TABLE_ID " ] [ " ++.B nat ++.IR ADDRESS " ] [ " ++.BR prohibit " | " reject " | " unreachable " ] [ " realms ++.RI "[" SRCREALM "/]" DSTREALM " ]" ++ ++.ti -8 ++.IR TABLE_ID " := [ " ++.BR local " | " main " | " default " |" ++.IR NUMBER " ]" ++ ++.ti -8 ++.BR "ip neigh" " { " add " | " del " | " change " | " replace " } { " ++.IR ADDR " [ " ++.B lladdr ++.IR LLADDR " ] [ " ++.BR nud " { " permanent " | " noarp " | " stale " | " reachable " } ] | " proxy ++.IR ADDR " } [ " ++.B dev ++.IR DEV " ]" ++ ++.ti -8 ++.BR "ip neigh" " { " show " | " flush " } [ " to ++.IR PREFIX " ] [ " ++.B dev ++.IR DEV " ] [ " ++.B nud ++.IR STATE " ]" ++ ++.ti -8 ++.BR "ip tunnel" " { " add " | " change " | " del " | " show " }" ++.RI "[ " NAME " ]" ++.br ++.RB "[ " mode " { " ipip " | " gre " | " sit " } ]" ++.br ++.RB "[ " remote ++.IR ADDR " ] [ " ++.B local ++.IR ADDR " ]" ++.br ++.RB "[ [" i "|" o "]" seq " ] [ [" i "|" o "]" key ++.IR KEY " ] [ " ++.RB "[" i "|" o "]" csum " ] ]" ++.br ++.RB "[ " ttl ++.IR TTL " ] [ " ++.B tos ++.IR TOS " ] [ " ++.RB "[" no "]" pmtudisc " ]" ++.br ++.RB "[ " dev ++.IR PHYS_DEV " ]" ++ ++.ti -8 ++.IR ADDR " := { " IP_ADDRESS " |" ++.BR any " }" ++ ++.ti -8 ++.IR TOS " := { " NUMBER " |" ++.BR inherit " }" ++ ++.ti -8 ++.IR TTL " := { " 1 ".." 255 " | " ++.BR inherit " }" ++ ++.ti -8 ++.IR KEY " := { " DOTTED_QUAD " | " NUMBER " }" ++ ++.ti -8 ++.BR "ip maddr" " [ " add " | " del " ]" ++.IB MULTIADDR " dev " STRING ++ ++.ti -8 ++.BR "ip maddr show" " [ " dev ++.IR STRING " ]" ++ ++.ti -8 ++.BR "ip mroute show" " [" ++.IR PREFIX " ] [ " ++.B from ++.IR PREFIX " ] [ " ++.B iif ++.IR DEVICE " ]" ++ ++.ti -8 ++.BR "ip monitor" " [ " all " |" ++.IR LISTofOBJECTS " ]" ++.in -8 ++.ad b ++ ++.SH OPTIONS ++ ++.TP ++.BR "\-V" , " -Version" ++print the version of the ++.B ip ++utility and exit. ++ ++.TP ++.BR "\-s" , " \-stats", " \-statistics" ++output more information. If the option ++appears twice or more, the amount of information increases. ++As a rule, the information is statistics or some time values. ++ ++.TP ++.BR "\-f" , " \-family" ++followed by protocol family identifier: ++.BR "inet" , " inet6" ++or ++.B link ++,enforce the protocol family to use. If the option is not present, ++the protocol family is guessed from other arguments. If the rest ++of the command line does not give enough information to guess the ++family, ++.B ip ++falls back to the default one, usually ++.B inet ++or ++.BR "any" . ++.B link ++is a special family identifier meaning that no networking protocol ++is involved. ++ ++.TP ++.B \-4 ++shortcut for ++.BR "-family inet" . ++ ++.TP ++.B \-6 ++shortcut for ++.BR "\-family inet6" . ++ ++.TP ++.B \-0 ++shortcut for ++.BR "\-family link" . ++ ++.TP ++.BR "\-o" , " \-oneline" ++output each record on a single line, replacing line feeds ++with the ++.B '\' ++character. This is convenient when you want to count records ++with ++.BR wc (1) ++ or to ++.BR grep (1) ++the output. ++ ++.TP ++.BR "\-r" , " \-resolve" ++use the system's name resolver to print DNS names instead of ++host addresses. ++ ++.SH IP - COMMAND SYNTAX ++ ++.SS ++.I OBJECT ++ ++.TP ++.B link ++- network device. ++ ++.TP ++.B address ++- protocol (IP or IPv6) address on a device. ++.TP ++.B neighbour ++- ARP or NDISC cache entry. ++ ++.TP ++.B route ++- routing table entry. ++ ++.TP ++.B rule ++- rule in routing policy database. ++ ++.TP ++.B maddress ++- multicast address. ++ ++.TP ++.B mroute ++- multicast routing cache entry. ++ ++.TP ++.B tunnel ++- tunnel over IP. ++ ++.PP ++The names of all objects may be written in full or ++abbreviated form, f.e. ++.B address ++is abbreviated as ++.B addr ++or just ++.B a. ++ ++.SS ++.I COMMAND ++ ++Specifies the action to perform on the object. ++The set of possible actions depends on the object type. ++As a rule, it is possible to ++.BR "add" , " delete" ++and ++.B show ++(or ++.B list ++) objects, but some objects do not allow all of these operations ++or have some additional commands. The ++.B help ++command is available for all objects. It prints ++out a list of available commands and argument syntax conventions. ++.sp ++If no command is given, some default command is assumed. ++Usually it is ++.B list ++or, if the objects of this class cannot be listed, ++.BR "help" . ++ ++.SH ip link - network device configuration ++ ++.B link ++is a network device and the corresponding commands ++display and change the state of devices. ++ ++.SS ip link set - change device attributes ++ ++.TP ++.BI dev " NAME " (default) ++.I NAME ++specifies network device to operate on. ++ ++.TP ++.BR up " and " down ++change the state of the device to ++.B UP ++or ++.BR "DOWN" . ++ ++.TP ++.BR "arp on " or " arp off" ++change the ++.B NOARP ++flag on the device. ++ ++.TP ++.BR "multicast on " or " multicast off" ++change the ++.B MULTICAST ++flag on the device. ++ ++.TP ++.BR "dynamic on " or " dynamic off" ++change the ++.B DYNAMIC ++flag on the device. ++ ++.TP ++.BI name " NAME" ++change the name of the device. This operation is not ++recommended if the device is running or has some addresses ++already configured. ++ ++.TP ++.BI txqueuelen " NUMBER" ++.TP ++.BI txqlen " NUMBER" ++change the transmit queue length of the device. ++ ++.TP ++.BI mtu " NUMBER" ++change the ++.I MTU ++of the device. ++ ++.TP ++.BI address " LLADDRESS" ++change the station address of the interface. ++ ++.TP ++.BI broadcast " LLADDRESS" ++.TP ++.BI brd " LLADDRESS" ++.TP ++.BI peer " LLADDRESS" ++change the link layer broadcast address or the peer address when ++the interface is ++.IR "POINTOPOINT" . ++ ++.PP ++.B Warning: ++If multiple parameter changes are requested, ++.B ip ++aborts immediately after any of the changes have failed. ++This is the only case when ++.B ip ++can move the system to an unpredictable state. The solution ++is to avoid changing several parameters with one ++.B ip link set ++call. ++ ++.SS ip link show - display device attributes ++ ++.TP ++.BI dev " NAME " (default) ++.I NAME ++specifies the network device to show. ++If this argument is omitted all devices are listed. ++ ++.TP ++.B up ++only display running interfaces. ++ ++.SH ip address - protocol address management. ++ ++The ++.B address ++is a protocol (IP or IPv6) address attached ++to a network device. Each device must have at least one address ++to use the corresponding protocol. It is possible to have several ++different addresses attached to one device. These addresses are not ++discriminated, so that the term ++.B alias ++is not quite appropriate for them and we do not use it in this document. ++.sp ++The ++.B ip addr ++command displays addresses and their properties, adds new addresses ++and deletes old ones. ++ ++.SS ip address add - add new protocol address. ++ ++.TP ++.BI dev " NAME" ++the name of the device to add the address to. ++ ++.TP ++.BI local " ADDRESS " (default) ++the address of the interface. The format of the address depends ++on the protocol. It is a dotted quad for IP and a sequence of ++hexadecimal halfwords separated by colons for IPv6. The ++.I ADDRESS ++may be followed by a slash and a decimal number which encodes ++the network prefix length. ++ ++.TP ++.BI peer " ADDRESS" ++the address of the remote endpoint for pointopoint interfaces. ++Again, the ++.I ADDRESS ++may be followed by a slash and a decimal number, encoding the network ++prefix length. If a peer address is specified, the local address ++cannot have a prefix length. The network prefix is associated ++with the peer rather than with the local address. ++ ++.TP ++.BI broadcast " ADDRESS" ++the broadcast address on the interface. ++.sp ++It is possible to use the special symbols ++.B '+' ++and ++.B '-' ++instead of the broadcast address. In this case, the broadcast address ++is derived by setting/resetting the host bits of the interface prefix. ++ ++.TP ++.BI label " NAME" ++Each address may be tagged with a label string. ++In order to preserve compatibility with Linux-2.0 net aliases, ++this string must coincide with the name of the device or must be prefixed ++with the device name followed by colon. ++ ++.TP ++.BI scope " SCOPE_VALUE" ++the scope of the area where this address is valid. ++The available scopes are listed in file ++.BR "/etc/iproute2/rt_scopes" . ++Predefined scope values are: ++ ++.in +8 ++.B global ++- the address is globally valid. ++.sp ++.B site ++- (IPv6 only) the address is site local, i.e. it is ++valid inside this site. ++.sp ++.B link ++- the address is link local, i.e. it is valid only on this device. ++.sp ++.B host ++- the address is valid only inside this host. ++.in -8 ++ ++.SS ip address delete - delete protocol address ++.B Arguments: ++coincide with the arguments of ++.B ip addr add. ++The device name is a required argument. The rest are optional. ++If no arguments are given, the first address is deleted. ++ ++.SS ip address show - look at protocol addresses ++ ++.TP ++.BI dev " NAME " (default) ++name of device. ++ ++.TP ++.BI scope " SCOPE_VAL" ++only list addresses with this scope. ++ ++.TP ++.BI to " PREFIX" ++only list addresses matching this prefix. ++ ++.TP ++.BI label " PATTERN" ++only list addresses with labels matching the ++.IR "PATTERN" . ++.I PATTERN ++is a usual shell style pattern. ++ ++.TP ++.BR dynamic " and " permanent ++(IPv6 only) only list addresses installed due to stateless ++address configuration or only list permanent (not dynamic) ++addresses. ++ ++.TP ++.B tentative ++(IPv6 only) only list addresses which did not pass duplicate ++address detection. ++ ++.TP ++.B deprecated ++(IPv6 only) only list deprecated addresses. ++ ++.TP ++.BR primary " and " secondary ++only list primary (or secondary) addresses. ++ ++.SS ip address flush - flush protocol addresses ++This command flushes the protocol addresses selected by some criteria. ++ ++.PP ++This command has the same arguments as ++.B show. ++The difference is that it does not run when no arguments are given. ++ ++.PP ++.B Warning: ++This command (and other ++.B flush ++commands described below) is pretty dangerous. If you make a mistake, ++it will not forgive it, but will cruelly purge all the addresses. ++ ++.PP ++With the ++.B -statistics ++option, the command becomes verbose. It prints out the number of deleted ++addresses and the number of rounds made to flush the address list. If ++this option is given twice, ++.B ip addr flush ++also dumps all the deleted addresses in the format described in the ++previous subsection. ++ ++.SH ip neighbour - neighbour/arp tables management. ++ ++.B neighbour ++objects establish bindings between protocol addresses and ++link layer addresses for hosts sharing the same link. ++Neighbour entries are organized into tables. The IPv4 neighbour table ++is known by another name - the ARP table. ++ ++.P ++The corresponding commands display neighbour bindings ++and their properties, add new neighbour entries and delete old ones. ++ ++.SS ip neighbour add - add a new neighbour entry ++.SS ip neighbour change - change an existing entry ++.SS ip neighbour replace - add a new entry or change an existing one ++ ++These commands create new neighbour records or update existing ones. ++ ++.TP ++.BI to " ADDRESS " (default) ++the protocol address of the neighbour. It is either an IPv4 or IPv6 address. ++ ++.TP ++.BI dev " NAME" ++the interface to which this neighbour is attached. ++ ++.TP ++.BI lladdr " LLADDRESS" ++the link layer address of the neighbour. ++.I LLADDRESS ++can also be ++.BR "null" . ++ ++.TP ++.BI nud " NUD_STATE" ++the state of the neighbour entry. ++.B nud ++is an abbreviation for 'Neigh bour Unreachability Detection'. ++The state can take one of the following values: ++ ++.in +8 ++.B permanent ++- the neighbour entry is valid forever and can be only ++be removed administratively. ++.sp ++ ++.B noarp ++- the neighbour entry is valid. No attempts to validate ++this entry will be made but it can be removed when its lifetime expires. ++.sp ++ ++.B reachable ++- the neighbour entry is valid until the reachability ++timeout expires. ++.sp ++ ++.B stale ++- the neighbour entry is valid but suspicious. ++This option to ++.B ip neigh ++does not change the neighbour state if it was valid and the address ++is not changed by this command. ++.in -8 ++ ++.SS ip neighbour delete - delete a neighbour entry ++This command invalidates a neighbour entry. ++ ++.PP ++The arguments are the same as with ++.BR "ip neigh add" , ++except that ++.B lladdr ++and ++.B nud ++are ignored. ++ ++.PP ++.B Warning: ++Attempts to delete or manually change a ++.B noarp ++entry created by the kernel may result in unpredictable behaviour. ++Particularly, the kernel may try to resolve this address even ++on a ++.B NOARP ++interface or if the address is multicast or broadcast. ++ ++.SS ip neighbour show - list neighbour entries ++ ++This commands displays neighbour tables. ++ ++.TP ++.BI to " ADDRESS " (default) ++the prefix selecting the neighbours to list. ++ ++.TP ++.BI dev " NAME" ++only list the neighbours attached to this device. ++ ++.TP ++.B unused ++only list neighbours which are not currently in use. ++ ++.TP ++.BI nud " NUD_STATE" ++only list neighbour entries in this state. ++.I NUD_STATE ++takes values listed below or the special value ++.B all ++which means all states. This option may occur more than once. ++If this option is absent, ++.B ip ++lists all entries except for ++.B none ++and ++.BR "noarp" . ++ ++.SS ip neighbour flush - flush neighbour entries ++This command flushes neighbour tables, selecting ++entries to flush by some criteria. ++ ++.PP ++This command has the same arguments as ++.B show. ++The differences are that it does not run when no arguments are given, ++and that the default neighbour states to be flushed do not include ++.B permanent ++and ++.BR "noarp" . ++ ++.PP ++With the ++.B -statistics ++option, the command becomes verbose. It prints out the number of ++deleted neighbours and the number of rounds made to flush the ++neighbour table. If the option is given ++twice, ++.B ip neigh flush ++also dumps all the deleted neighbours. ++ ++.SH ip route - routing table management ++Manipulate route entries in the kernel routing tables keep ++information about paths to other networked nodes. ++.sp ++.B Route types: ++ ++.in +8 ++.B unicast ++- the route entry describes real paths to the destinations covered ++by the route prefix. ++ ++.sp ++.B unreachable ++- these destinations are unreachable. Packets are discarded and the ++ICMP message ++.I host unreachable ++is generated. ++The local senders get an ++.I EHOSTUNREACH ++error. ++ ++.sp ++.B blackhole ++- these destinations are unreachable. Packets are discarded silently. ++The local senders get an ++.I EINVAL ++error. ++ ++.sp ++.B prohibit ++- these destinations are unreachable. Packets are discarded and the ++ICMP message ++.I communication administratively prohibited ++is generated. The local senders get an ++.I EACCES ++error. ++ ++.sp ++.B local ++- the destinations are assigned to this host. The packets are looped ++back and delivered locally. ++ ++.sp ++.B broadcast ++- the destinations are broadcast addresses. The packets are sent as ++link broadcasts. ++ ++.sp ++.B throw ++- a special control route used together with policy rules. If such a ++route is selected, lookup in this table is terminated pretending that ++no route was found. Without policy routing it is equivalent to the ++absence of the route in the routing table. The packets are dropped ++and the ICMP message ++.I net unreachable ++is generated. The local senders get an ++.I ENETUNREACH ++error. ++ ++.sp ++.B nat ++- a special NAT route. Destinations covered by the prefix ++are considered to be dummy (or external) addresses which require translation ++to real (or internal) ones before forwarding. The addresses to translate to ++are selected with the attribute ++.BR "via" . ++ ++.sp ++.B anycast ++.RI "- " "not implemented" ++the destinations are ++.I anycast ++addresses assigned to this host. They are mainly equivalent ++to ++.B local ++with one difference: such addresses are invalid when used ++as the source address of any packet. ++ ++.sp ++.B multicast ++- a special type used for multicast routing. It is not present in ++normal routing tables. ++.in -8 ++ ++.P ++.B Route tables: ++Linux-2.x can pack routes into several routing ++tables identified by a number in the range from 1 to 255 or by ++name from the file ++.B /etc/iproute2/rt_tables ++. By default all normal routes are inserted into the ++.B main ++table (ID 254) and the kernel only uses this table when calculating routes. ++ ++.sp ++Actually, one other table always exists, which is invisible but ++even more important. It is the ++.B local ++table (ID 255). This table ++consists of routes for local and broadcast addresses. The kernel maintains ++this table automatically and the administrator usually need not modify it ++or even look at it. ++ ++The multiple routing tables enter the game when ++.I policy routing ++is used. ++ ++.SS ip route add - add new route ++.SS ip route change - change route ++.SS ip route replace - change or add new one ++ ++.TP ++.BI to " TYPE PREFIX " (default) ++the destination prefix of the route. If ++.I TYPE ++is omitted, ++.B ip ++assumes type ++.BR "unicast" . ++Other values of ++.I TYPE ++are listed above. ++.I PREFIX ++is an IP or IPv6 address optionally followed by a slash and the ++prefix length. If the length of the prefix is missing, ++.B ip ++assumes a full-length host route. There is also a special ++.I PREFIX ++.B default ++- which is equivalent to IP ++.B 0/0 ++or to IPv6 ++.BR "::/0" . ++ ++.TP ++.BI tos " TOS" ++.TP ++.BI dsfield " TOS" ++the Type Of Service (TOS) key. This key has no associated mask and ++the longest match is understood as: First, compare the TOS ++of the route and of the packet. If they are not equal, then the packet ++may still match a route with a zero TOS. ++.I TOS ++is either an 8 bit hexadecimal number or an identifier ++from ++.BR "/etc/iproute2/rt_dsfield" . ++ ++.TP ++.BI metric " NUMBER" ++.TP ++.BI preference " NUMBER" ++the preference value of the route. ++.I NUMBER ++is an arbitrary 32bit number. ++ ++.TP ++.BI table " TABLEID" ++the table to add this route to. ++.I TABLEID ++may be a number or a string from the file ++.BR "/etc/iproute2/rt_tables" . ++If this parameter is omitted, ++.B ip ++assumes the ++.B main ++table, with the exception of ++.BR local " , " broadcast " and " nat ++routes, which are put into the ++.B local ++table by default. ++ ++.TP ++.BI dev " NAME" ++the output device name. ++ ++.TP ++.BI via " ADDRESS" ++the address of the nexthop router. Actually, the sense of this field ++depends on the route type. For normal ++.B unicast ++routes it is either the true next hop router or, if it is a direct ++route installed in BSD compatibility mode, it can be a local address ++of the interface. For NAT routes it is the first address of the block ++of translated IP destinations. ++ ++.TP ++.BI src " ADDRESS" ++the source address to prefer when sending to the destinations ++covered by the route prefix. ++ ++.TP ++.BI realm " REALMID" ++the realm to which this route is assigned. ++.I REALMID ++may be a number or a string from the file ++.BR "/etc/iproute2/rt_realms" . ++ ++.TP ++.BI mtu " MTU" ++.TP ++.BI "mtu lock" " MTU" ++the MTU along the path to the destination. If the modifier ++.B lock ++is not used, the MTU may be updated by the kernel due to ++Path MTU Discovery. If the modifier ++.B lock ++is used, no path MTU discovery will be tried, all packets ++will be sent without the DF bit in IPv4 case or fragmented ++to MTU for IPv6. ++ ++.TP ++.BI window " NUMBER" ++the maximal window for TCP to advertise to these destinations, ++measured in bytes. It limits maximal data bursts that our TCP ++peers are allowed to send to us. ++ ++.TP ++.BI rtt " NUMBER" ++the initial RTT ('Round Trip Time') estimate. ++ ++.TP ++.BI rttvar " NUMBER " "(2.3.15+ only)" ++the initial RTT variance estimate. ++ ++.TP ++.BI ssthresh " NUMBER " "(2.3.15+ only)" ++an estimate for the initial slow start threshold. ++ ++.TP ++.BI cwnd " NUMBER " "(2.3.15+ only)" ++the clamp for congestion window. It is ignored if the ++.B lock ++flag is not used. ++ ++.TP ++.BI advmss " NUMBER " "(2.3.15+ only)" ++the MSS ('Maximal Segment Size') to advertise to these ++destinations when establishing TCP connections. If it is not given, ++Linux uses a default value calculated from the first hop device MTU. ++(If the path to these destination is asymmetric, this guess may be wrong.) ++ ++.TP ++.BI reordering " NUMBER " "(2.3.15+ only)" ++Maximal reordering on the path to this destination. ++If it is not given, Linux uses the value selected with ++.B sysctl ++variable ++.BR "net/ipv4/tcp_reordering" . ++ ++.TP ++.BI nexthop " NEXTHOP" ++the nexthop of a multipath route. ++.I NEXTHOP ++is a complex value with its own syntax similar to the top level ++argument lists: ++ ++.in +8 ++.BI via " ADDRESS" ++- is the nexthop router. ++.sp ++ ++.BI dev " NAME" ++- is the output device. ++.sp ++ ++.BI weight " NUMBER" ++- is a weight for this element of a multipath ++route reflecting its relative bandwidth or quality. ++.in -8 ++ ++.TP ++.BI scope " SCOPE_VAL" ++the scope of the destinations covered by the route prefix. ++.I SCOPE_VAL ++may be a number or a string from the file ++.BR "/etc/iproute2/rt_scopes" . ++If this parameter is omitted, ++.B ip ++assumes scope ++.B global ++for all gatewayed ++.B unicast ++routes, scope ++.B link ++for direct ++.BR unicast " and " broadcast ++routes and scope ++.BR host " for " local ++routes. ++ ++.TP ++.BI protocol " RTPROTO" ++the routing protocol identifier of this route. ++.I RTPROTO ++may be a number or a string from the file ++.BR "/etc/iproute2/rt_protos" . ++If the routing protocol ID is not given, ++.B ip assumes protocol ++.B boot ++(i.e. it assumes the route was added by someone who doesn't ++understand what they are doing). Several protocol values have ++a fixed interpretation. ++Namely: ++ ++.in +8 ++.B redirect ++- the route was installed due to an ICMP redirect. ++.sp ++ ++.B kernel ++- the route was installed by the kernel during autoconfiguration. ++.sp ++ ++.B boot ++- the route was installed during the bootup sequence. ++If a routing daemon starts, it will purge all of them. ++.sp ++ ++.B static ++- the route was installed by the administrator ++to override dynamic routing. Routing daemon will respect them ++and, probably, even advertise them to its peers. ++.sp ++ ++.B ra ++- the route was installed by Router Discovery protocol. ++.in -8 ++ ++.sp ++The rest of the values are not reserved and the administrator is free ++to assign (or not to assign) protocol tags. ++ ++.TP ++.B onlink ++pretend that the nexthop is directly attached to this link, ++even if it does not match any interface prefix. ++ ++.TP ++.B equalize ++allow packet by packet randomization on multipath routes. ++Without this modifier, the route will be frozen to one selected ++nexthop, so that load splitting will only occur on per-flow base. ++.B equalize ++only works if the kernel is patched. ++ ++.SS ip route delete - delete route ++ ++.B ip route del ++has the same arguments as ++.BR "ip route add" , ++but their semantics are a bit different. ++ ++Key values ++.RB "(" to ", " tos ", " preference " and " table ")" ++select the route to delete. If optional attributes are present, ++.B ip ++verifies that they coincide with the attributes of the route to delete. ++If no route with the given key and attributes was found, ++.B ip route del ++fails. ++ ++.SS ip route show - list routes ++the command displays the contents of the routing tables or the route(s) ++selected by some criteria. ++ ++.TP ++.BI to " SELECTOR " (default) ++only select routes from the given range of destinations. ++.I SELECTOR ++consists of an optional modifier ++.RB "(" root ", " match " or " exact ")" ++and a prefix. ++.BI root " PREFIX" ++selects routes with prefixes not shorter than ++.IR PREFIX "." ++F.e. ++.BI root " 0/0" ++selects the entire routing table. ++.BI match " PREFIX" ++selects routes with prefixes not longer than ++.IR PREFIX "." ++F.e. ++.BI match " 10.0/16" ++selects ++.IR 10.0/16 "," ++.IR 10/8 " and " 0/0 , ++but it does not select ++.IR 10.1/16 " and " 10.0.0/24 . ++And ++.BI exact " PREFIX" ++(or just ++.IR PREFIX ")" ++selects routes with this exact prefix. If neither of these options ++are present, ++.B ip ++assumes ++.BI root " 0/0" ++i.e. it lists the entire table. ++ ++.TP ++.BI tos " TOS" ++.BI dsfield " TOS" ++only select routes with the given TOS. ++ ++.TP ++.BI table " TABLEID" ++show the routes from this table(s). The default setting is to show ++.BR table main "." ++.I TABLEID ++may either be the ID of a real table or one of the special values: ++.sp ++.in +8 ++.B all ++- list all of the tables. ++.sp ++.B cache ++- dump the routing cache. ++.in -8 ++ ++.TP ++.B cloned ++.TP ++.B cached ++list cloned routes i.e. routes which were dynamically forked from ++other routes because some route attribute (f.e. MTU) was updated. ++Actually, it is equivalent to ++.BR "table cache" "." ++ ++.TP ++.BI from " SELECTOR" ++the same syntax as for ++.BR to "," ++but it binds the source address range rather than destinations. ++Note that the ++.B from ++option only works with cloned routes. ++ ++.TP ++.BI protocol " RTPROTO" ++only list routes of this protocol. ++ ++.TP ++.BI scope " SCOPE_VAL" ++only list routes with this scope. ++ ++.TP ++.BI type " TYPE" ++only list routes of this type. ++ ++.TP ++.BI dev " NAME" ++only list routes going via this device. ++ ++.TP ++.BI via " PREFIX" ++only list routes going via the nexthop routers selected by ++.IR PREFIX "." ++ ++.TP ++.BI src " PREFIX" ++only list routes with preferred source addresses selected ++by ++.IR PREFIX "." ++ ++.TP ++.BI realm " REALMID" ++.TP ++.BI realms " FROMREALM/TOREALM" ++only list routes with these realms. ++ ++.SS ip route flush - flush routing tables ++this command flushes routes selected by some criteria. ++ ++.sp ++The arguments have the same syntax and semantics as the arguments of ++.BR "ip route show" , ++but routing tables are not listed but purged. The only difference is ++the default action: ++.B show ++dumps all the IP main routing table but ++.B flush ++prints the helper page. ++ ++.sp ++With the ++.B -statistics ++option, the command becomes verbose. It prints out the number of ++deleted routes and the number of rounds made to flush the routing ++table. If the option is given ++twice, ++.B ip route flush ++also dumps all the deleted routes in the format described in the ++previous subsection. ++ ++.SS ip route get - get a single route ++this command gets a single route to a destination and prints its ++contents exactly as the kernel sees it. ++ ++.TP ++.BI to " ADDRESS " (default) ++the destination address. ++ ++.TP ++.BI from " ADDRESS" ++the source address. ++ ++.TP ++.BI tos " TOS" ++.TP ++.BI dsfield " TOS" ++the Type Of Service. ++ ++.TP ++.BI iif " NAME" ++the device from which this packet is expected to arrive. ++ ++.TP ++.BI oif " NAME" ++force the output device on which this packet will be routed. ++ ++.TP ++.B connected ++if no source address ++.RB "(option " from ")" ++was given, relookup the route with the source set to the preferred ++address received from the first lookup. ++If policy routing is used, it may be a different route. ++ ++.P ++Note that this operation is not equivalent to ++.BR "ip route show" . ++.B show ++shows existing routes. ++.B get ++resolves them and creates new clones if necessary. Essentially, ++.B get ++is equivalent to sending a packet along this path. ++If the ++.B iif ++argument is not given, the kernel creates a route ++to output packets towards the requested destination. ++This is equivalent to pinging the destination ++with a subsequent ++.BR "ip route ls cache" , ++however, no packets are actually sent. With the ++.B iif ++argument, the kernel pretends that a packet arrived from this interface ++and searches for a path to forward the packet. ++ ++.SH ip rule - routing policy database management ++ ++.BR "Rule" s ++in the routing policy database control the route selection algorithm. ++ ++.P ++Classic routing algorithms used in the Internet make routing decisions ++based only on the destination address of packets (and in theory, ++but not in practice, on the TOS field). ++ ++.P ++In some circumstances we want to route packets differently depending not only ++on destination addresses, but also on other packet fields: source address, ++IP protocol, transport protocol ports or even packet payload. ++This task is called 'policy routing'. ++ ++.P ++To solve this task, the conventional destination based routing table, ordered ++according to the longest match rule, is replaced with a 'routing policy ++database' (or RPDB), which selects routes by executing some set of rules. ++ ++.P ++Each policy routing rule consists of a ++.B selector ++and an ++.B action predicate. ++The RPDB is scanned in the order of increasing priority. The selector ++of each rule is applied to {source address, destination address, incoming ++interface, tos, fwmark} and, if the selector matches the packet, ++the action is performed. The action predicate may return with success. ++In this case, it will either give a route or failure indication ++and the RPDB lookup is terminated. Otherwise, the RPDB program ++continues on the next rule. ++ ++.P ++Semantically, natural action is to select the nexthop and the output device. ++ ++.P ++At startup time the kernel configures the default RPDB consisting of three ++rules: ++ ++.TP ++1. ++Priority: 0, Selector: match anything, Action: lookup routing ++table ++.B local ++(ID 255). ++The ++.B local ++table is a special routing table containing ++high priority control routes for local and broadcast addresses. ++.sp ++Rule 0 is special. It cannot be deleted or overridden. ++ ++.TP ++2. ++Priority: 32766, Selector: match anything, Action: lookup routing ++table ++.B main ++(ID 254). ++The ++.B main ++table is the normal routing table containing all non-policy ++routes. This rule may be deleted and/or overridden with other ++ones by the administrator. ++ ++.TP ++3. ++Priority: 32767, Selector: match anything, Action: lookup routing ++table ++.B default ++(ID 253). ++The ++.B default ++table is empty. It is reserved for some post-processing if no previous ++default rules selected the packet. ++This rule may also be deleted. ++ ++.P ++Each RPDB entry has additional ++attributes. F.e. each rule has a pointer to some routing ++table. NAT and masquerading rules have an attribute to select new IP ++address to translate/masquerade. Besides that, rules have some ++optional attributes, which routes have, namely ++.BR "realms" . ++These values do not override those contained in the routing tables. They ++are only used if the route did not select any attributes. ++ ++.sp ++The RPDB may contain rules of the following types: ++ ++.in +8 ++.B unicast ++- the rule prescribes to return the route found ++in the routing table referenced by the rule. ++ ++.B blackhole ++- the rule prescribes to silently drop the packet. ++ ++.B unreachable ++- the rule prescribes to generate a 'Network is unreachable' error. ++ ++.B prohibit ++- the rule prescribes to generate 'Communication is administratively ++prohibited' error. ++ ++.B nat ++- the rule prescribes to translate the source address ++of the IP packet into some other value. ++.in -8 ++ ++.SS ip rule add - insert a new rule ++.SS ip rule delete - delete a rule ++ ++.TP ++.BI type " TYPE " (default) ++the type of this rule. The list of valid types was given in the previous ++subsection. ++ ++.TP ++.BI from " PREFIX" ++select the source prefix to match. ++ ++.TP ++.BI to " PREFIX" ++select the destination prefix to match. ++ ++.TP ++.BI iif " NAME" ++select the incoming device to match. If the interface is loopback, ++the rule only matches packets originating from this host. This means ++that you may create separate routing tables for forwarded and local ++packets and, hence, completely segregate them. ++ ++.TP ++.BI tos " TOS" ++.TP ++.BI dsfield " TOS" ++select the TOS value to match. ++ ++.TP ++.BI fwmark " MARK" ++select the ++.B fwmark ++value to match. ++ ++.TP ++.BI priority " PREFERENCE" ++the priority of this rule. Each rule should have an explicitly ++set ++.I unique ++priority value. ++ ++.TP ++.BI table " TABLEID" ++the routing table identifier to lookup if the rule selector matches. ++ ++.TP ++.BI realms " FROM/TO" ++Realms to select if the rule matched and the routing table lookup ++succeeded. Realm ++.I TO ++is only used if the route did not select any realm. ++ ++.TP ++.BI nat " ADDRESS" ++The base of the IP address block to translate (for source addresses). ++The ++.I ADDRESS ++may be either the start of the block of NAT addresses (selected by NAT ++routes) or a local host address (or even zero). ++In the last case the router does not translate the packets, but ++masquerades them to this address. ++ ++.B Warning: ++Changes to the RPDB made with these commands do not become active ++immediately. It is assumed that after a script finishes a batch of ++updates, it flushes the routing cache with ++.BR "ip route flush cache" . ++ ++.SS ip rule show - list rules ++This command has no arguments. ++ ++.SH ip maddress - multicast addresses management ++ ++.B maddress ++objects are multicast addresses. ++ ++.SS ip maddress show - list multicast addresses ++ ++.TP ++.BI dev " NAME " (default) ++the device name. ++ ++.SS ip maddress add - add a multicast address ++.SS ip maddress delete - delete a multicast address ++these commands attach/detach a static link layer multicast address ++to listen on the interface. ++Note that it is impossible to join protocol multicast groups ++statically. This command only manages link layer addresses. ++ ++.TP ++.BI address " LLADDRESS " (default) ++the link layer multicast address. ++ ++.TP ++.BI dev " NAME" ++the device to join/leave this multicast address. ++ ++.SH ip mroute - multicast routing cache management ++.B mroute ++objects are multicast routing cache entries created by a user level ++mrouting daemon (f.e. ++.B pimd ++or ++.B mrouted ++). ++ ++Due to the limitations of the current interface to the multicast routing ++engine, it is impossible to change ++.B mroute ++objects administratively, so we may only display them. This limitation ++will be removed in the future. ++ ++.SS ip mroute show - list mroute cache entries ++ ++.TP ++.BI to " PREFIX " (default) ++the prefix selecting the destination multicast addresses to list. ++ ++.TP ++.BI iif " NAME" ++the interface on which multicast packets are received. ++ ++.TP ++.BI from " PREFIX" ++the prefix selecting the IP source addresses of the multicast route. ++ ++.SH ip tunnel - tunnel configuration ++.B tunnel ++objects are tunnels, encapsulating packets in IPv4 packets and then ++sending them over the IP infrastructure. ++ ++.SS ip tunnel add - add a new tunnel ++.SS ip tunnel change - change an existing tunnel ++.SS ip tunnel delete - destroy a tunnel ++ ++.TP ++.BI name " NAME " (default) ++select the tunnel device name. ++ ++.TP ++.BI mode " MODE" ++set the tunnel mode. Three modes are currently available: ++.BR ipip ", " sit " and " gre "." ++ ++.TP ++.BI remote " ADDRESS" ++set the remote endpoint of the tunnel. ++ ++.TP ++.BI local " ADDRESS" ++set the fixed local address for tunneled packets. ++It must be an address on another interface of this host. ++ ++.TP ++.BI ttl " N" ++set a fixed TTL ++.I N ++on tunneled packets. ++.I N ++is a number in the range 1--255. 0 is a special value ++meaning that packets inherit the TTL value. ++The default value is: ++.BR "inherit" . ++ ++.TP ++.BI tos " T" ++.TP ++.BI dsfield " T" ++set a fixed TOS ++.I T ++on tunneled packets. ++The default value is: ++.BR "inherit" . ++ ++.TP ++.BI dev " NAME" ++bind the tunnel to the device ++.I NAME ++so that tunneled packets will only be routed via this device and will ++not be able to escape to another device when the route to endpoint ++changes. ++ ++.TP ++.B nopmtudisc ++disable Path MTU Discovery on this tunnel. ++It is enabled by default. Note that a fixed ttl is incompatible ++with this option: tunnelling with a fixed ttl always makes pmtu ++discovery. ++ ++.TP ++.BI key " K" ++.TP ++.BI ikey " K" ++.TP ++.BI okey " K" ++.RB ( " only GRE tunnels " ) ++use keyed GRE with key ++.IR K ". " K ++is either a number or an IP address-like dotted quad. ++The ++.B key ++parameter sets the key to use in both directions. ++The ++.BR ikey " and " okey ++parameters set different keys for input and output. ++ ++.TP ++.BR csum ", " icsum ", " ocsum ++.RB ( " only GRE tunnels " ) ++generate/require checksums for tunneled packets. ++The ++.B ocsum ++flag calculates checksums for outgoing packets. ++The ++.B icsum ++flag requires that all input packets have the correct ++checksum. The ++.B csum ++flag is equivalent to the combination ++.BR "icsum ocsum" . ++ ++.TP ++.BR seq ", " iseq ", " oseq ++.RB ( " only GRE tunnels " ) ++serialize packets. ++The ++.B oseq ++flag enables sequencing of outgoing packets. ++The ++.B iseq ++flag requires that all input packets are serialized. ++The ++.B seq ++flag is equivalent to the combination ++.BR "iseq oseq" . ++.B It isn't work. Don't use it. ++ ++.SS ip tunnel show - list tunnels ++This command has no arguments. ++ ++.SH ip monitor and rtmon - state monitoring ++ ++The ++.B ip ++utility can monitor the state of devices, addresses ++and routes continuously. This option has a slightly different format. ++Namely, the ++.B monitor ++command is the first in the command line and then the object list follows: ++ ++.BR "ip monitor" " [ " all " |" ++.IR LISTofOBJECTS " ]" ++ ++.I OBJECT-LIST ++is the list of object types that we want to monitor. ++It may contain ++.BR link ", " address " and " route "." ++If no ++.B file ++argument is given, ++.B ip ++opens RTNETLINK, listens on it and dumps state changes in the format ++described in previous sections. ++ ++.P ++If a file name is given, it does not listen on RTNETLINK, ++but opens the file containing RTNETLINK messages saved in binary format ++and dumps them. Such a history file can be generated with the ++.B rtmon ++utility. This utility has a command line syntax similar to ++.BR "ip monitor" . ++Ideally, ++.B rtmon ++should be started before the first network configuration command ++is issued. F.e. if you insert: ++.sp ++.in +8 ++rtmon file /var/log/rtmon.log ++.in -8 ++.sp ++in a startup script, you will be able to view the full history ++later. ++ ++.P ++Certainly, it is possible to start ++.B rtmon ++at any time. ++It prepends the history with the state snapshot dumped at the moment ++of starting. ++ ++.SH HISTORY ++ ++.B ip ++was written by Alexey N. Kuznetsov and added in Linux 2.2. ++.SH SEE ALSO ++.BR tc (8) ++.br ++.RB "IP Command reference " ip-cref.ps ++.br ++.RB "IP tunnels " ip-cref.ps ++ ++.SH AUTHOR ++ ++Manpage maintained by Michail Litvak +diff -Naur iproute2-orig/debian/manpages/old/tc-cbq-details.8 iproute2/debian/manpages/old/tc-cbq-details.8 +--- iproute2-orig/debian/manpages/old/tc-cbq-details.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/manpages/old/tc-cbq-details.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,425 @@ ++.TH CBQ 8 "8 December 2001" "iproute2" "Linux" ++.SH NAME ++CBQ \- Class Based Queueing ++.SH SYNOPSIS ++.B tc qdisc ... dev ++dev ++.B ( parent ++classid ++.B | root) [ handle ++major: ++.B ] cbq avpkt ++bytes ++.B bandwidth ++rate ++.B [ cell ++bytes ++.B ] [ ewma ++log ++.B ] [ mpu ++bytes ++.B ] ++ ++.B tc class ... dev ++dev ++.B parent ++major:[minor] ++.B [ classid ++major:minor ++.B ] cbq allot ++bytes ++.B [ bandwidth ++rate ++.B ] [ rate ++rate ++.B ] prio ++priority ++.B [ weight ++weight ++.B ] [ minburst ++packets ++.B ] [ maxburst ++packets ++.B ] [ ewma ++log ++.B ] [ cell ++bytes ++.B ] avpkt ++bytes ++.B [ mpu ++bytes ++.B ] [ bounded isolated ] [ split ++handle ++.B & defmap ++defmap ++.B ] [ estimator ++interval timeconstant ++.B ] ++ ++.SH DESCRIPTION ++Class Based Queueing is a classful qdisc that implements a rich ++linksharing hierarchy of classes. It contains shaping elements as ++well as prioritizing capabilities. Shaping is performed using link ++idle time calculations based on the timing of dequeue events and ++underlying link bandwidth. ++ ++.SH SHAPING ALGORITHM ++Shaping is done using link idle time calculations, and actions taken if ++these calculations deviate from set limits. ++ ++When shaping a 10mbit/s connection to 1mbit/s, the link will ++be idle 90% of the time. If it isn't, it needs to be throttled so that it ++IS idle 90% of the time. ++ ++From the kernel's perspective, this is hard to measure, so CBQ instead ++derives the idle time from the number of microseconds (in fact, jiffies) ++that elapse between requests from the device driver for more data. Combined ++with the knowledge of packet sizes, this is used to approximate how full or ++empty the link is. ++ ++This is rather circumspect and doesn't always arrive at proper ++results. For example, what is the actual link speed of an interface ++that is not really able to transmit the full 100mbit/s of data, ++perhaps because of a badly implemented driver? A PCMCIA network card ++will also never achieve 100mbit/s because of the way the bus is ++designed - again, how do we calculate the idle time? ++ ++The physical link bandwidth may be ill defined in case of not-quite-real ++network devices like PPP over Ethernet or PPTP over TCP/IP. The effective ++bandwidth in that case is probably determined by the efficiency of pipes ++to userspace - which not defined. ++ ++During operations, the effective idletime is measured using an ++exponential weighted moving average (EWMA), which considers recent ++packets to be exponentially more important than past ones. The Unix ++loadaverage is calculated in the same way. ++ ++The calculated idle time is subtracted from the EWMA measured one, ++the resulting number is called 'avgidle'. A perfectly loaded link has ++an avgidle of zero: packets arrive exactly at the calculated ++interval. ++ ++An overloaded link has a negative avgidle and if it gets too negative, ++CBQ throttles and is then 'overlimit'. ++ ++Conversely, an idle link might amass a huge avgidle, which would then ++allow infinite bandwidths after a few hours of silence. To prevent ++this, avgidle is capped at ++.B maxidle. ++ ++If overlimit, in theory, the CBQ could throttle itself for exactly the ++amount of time that was calculated to pass between packets, and then ++pass one packet, and throttle again. Due to timer resolution constraints, ++this may not be feasible, see the ++.B minburst ++parameter below. ++ ++.SH CLASSIFICATION ++Within the one CBQ instance many classes may exist. Each of these classes ++contains another qdisc, by default ++.BR tc-pfifo (8). ++ ++When enqueueing a packet, CBQ starts at the root and uses various methods to ++determine which class should receive the data. If a verdict is reached, this ++process is repeated for the recipient class which might have further ++means of classifying traffic to its children, if any. ++ ++CBQ has the following methods available to classify a packet to any child ++classes. ++.TP ++(i) ++.B skb->priority class encoding. ++Can be set from userspace by an application with the ++.B SO_PRIORITY ++setsockopt. ++The ++.B skb->priority class encoding ++only applies if the skb->priority holds a major:minor handle of an existing ++class within this qdisc. ++.TP ++(ii) ++tc filters attached to the class. ++.TP ++(iii) ++The defmap of a class, as set with the ++.B split & defmap ++parameters. The defmap may contain instructions for each possible Linux packet ++priority. ++ ++.P ++Each class also has a ++.B level. ++Leaf nodes, attached to the bottom of the class hierarchy, have a level of 0. ++.SH CLASSIFICATION ALGORITHM ++ ++Classification is a loop, which terminates when a leaf class is found. At any ++point the loop may jump to the fallback algorithm. ++ ++The loop consists of the following steps: ++.TP ++(i) ++If the packet is generated locally and has a valid classid encoded within its ++.B skb->priority, ++choose it and terminate. ++ ++.TP ++(ii) ++Consult the tc filters, if any, attached to this child. If these return ++a class which is not a leaf class, restart loop from the class returned. ++If it is a leaf, choose it and terminate. ++.TP ++(iii) ++If the tc filters did not return a class, but did return a classid, ++try to find a class with that id within this qdisc. ++Check if the found class is of a lower ++.B level ++than the current class. If so, and the returned class is not a leaf node, ++restart the loop at the found class. If it is a leaf node, terminate. ++If we found an upward reference to a higher level, enter the fallback ++algorithm. ++.TP ++(iv) ++If the tc filters did not return a class, nor a valid reference to one, ++consider the minor number of the reference to be the priority. Retrieve ++a class from the defmap of this class for the priority. If this did not ++contain a class, consult the defmap of this class for the ++.B BEST_EFFORT ++class. If this is an upward reference, or no ++.B BEST_EFFORT ++class was defined, ++enter the fallback algorithm. If a valid class was found, and it is not a ++leaf node, restart the loop at this class. If it is a leaf, choose it and ++terminate. If ++neither the priority distilled from the classid, nor the ++.B BEST_EFFORT ++priority yielded a class, enter the fallback algorithm. ++.P ++The fallback algorithm resides outside of the loop and is as follows. ++.TP ++(i) ++Consult the defmap of the class at which the jump to fallback occured. If ++the defmap contains a class for the ++.B ++priority ++of the class (which is related to the TOS field), choose this class and ++terminate. ++.TP ++(ii) ++Consult the map for a class for the ++.B BEST_EFFORT ++priority. If found, choose it, and terminate. ++.TP ++(iii) ++Choose the class at which break out to the fallback algorithm occured. Terminate. ++.P ++The packet is enqueued to the class which was chosen when either algorithm ++terminated. It is therefore possible for a packet to be enqueued *not* at a ++leaf node, but in the middle of the hierarchy. ++ ++.SH LINK SHARING ALGORITHM ++When dequeuing for sending to the network device, CBQ decides which of its ++classes will be allowed to send. It does so with a Weighted Round Robin process ++in which each class with packets gets a chance to send in turn. The WRR process ++starts by asking the highest priority classes (lowest numerically - ++highest semantically) for packets, and will continue to do so until they ++have no more data to offer, in which case the process repeats for lower ++priorities. ++ ++.B CERTAINTY ENDS HERE, ANK PLEASE HELP ++ ++Each class is not allowed to send at length though - they can only dequeue a ++configurable amount of data during each round. ++ ++If a class is about to go overlimit, and it is not ++.B bounded ++it will try to borrow avgidle from siblings that are not ++.B isolated. ++This process is repeated from the bottom upwards. If a class is unable ++to borrow enough avgidle to send a packet, it is throttled and not asked ++for a packet for enough time for the avgidle to increase above zero. ++ ++.B I REALLY NEED HELP FIGURING THIS OUT. REST OF DOCUMENT IS PRETTY CERTAIN ++.B AGAIN. ++ ++.SH QDISC ++The root qdisc of a CBQ class tree has the following parameters: ++ ++.TP ++parent major:minor | root ++This mandatory parameter determines the place of the CBQ instance, either at the ++.B root ++of an interface or within an existing class. ++.TP ++handle major: ++Like all other qdiscs, the CBQ can be assigned a handle. Should consist only ++of a major number, followed by a colon. Optional. ++.TP ++avpkt bytes ++For calculations, the average packet size must be known. It is silently capped ++at a minimum of 2/3 of the interface MTU. Mandatory. ++.TP ++bandwidth rate ++To determine the idle time, CBQ must know the bandwidth of your underlying ++physical interface, or parent qdisc. This is a vital parameter, more about it ++later. Mandatory. ++.TP ++cell ++The cell size determines he granularity of packet transmission time calculations. Has a sensible default. ++.TP ++mpu ++A zero sized packet may still take time to transmit. This value is the lower ++cap for packet transmission time calculations - packets smaller than this value ++are still deemed to have this size. Defaults to zero. ++.TP ++ewma log ++When CBQ needs to measure the average idle time, it does so using an ++Exponentially Weighted Moving Average which smoothes out measurements into ++a moving average. The EWMA LOG determines how much smoothing occurs. Defaults ++to 5. Lower values imply greater sensitivity. Must be between 0 and 31. ++.P ++A CBQ qdisc does not shape out of its own accord. It only needs to know certain ++parameters about the underlying link. Actual shaping is done in classes. ++ ++.SH CLASSES ++Classes have a host of parameters to configure their operation. ++ ++.TP ++parent major:minor ++Place of this class within the hierarchy. If attached directly to a qdisc ++and not to another class, minor can be omitted. Mandatory. ++.TP ++classid major:minor ++Like qdiscs, classes can be named. The major number must be equal to the ++major number of the qdisc to which it belongs. Optional, but needed if this ++class is going to have children. ++.TP ++weight weight ++When dequeuing to the interface, classes are tried for traffic in a ++round-robin fashion. Classes with a higher configured qdisc will generally ++have more traffic to offer during each round, so it makes sense to allow ++it to dequeue more traffic. All weights under a class are normalized, so ++only the ratios matter. Defaults to the configured rate, unless the priority ++of this class is maximal, in which case it is set to 1. ++.TP ++allot bytes ++Allot specifies how many bytes a qdisc can dequeue ++during each round of the process. This parameter is weighted using the ++renormalized class weight described above. ++ ++.TP ++priority priority ++In the round-robin process, classes with the lowest priority field are tried ++for packets first. Mandatory. ++ ++.TP ++rate rate ++Maximum rate this class and all its children combined can send at. Mandatory. ++ ++.TP ++bandwidth rate ++This is different from the bandwidth specified when creating a CBQ disc. Only ++used to determine maxidle and offtime, which are only calculated when ++specifying maxburst or minburst. Mandatory if specifying maxburst or minburst. ++ ++.TP ++maxburst ++This number of packets is used to calculate maxidle so that when ++avgidle is at maxidle, this number of average packets can be burst ++before avgidle drops to 0. Set it higher to be more tolerant of ++bursts. You can't set maxidle directly, only via this parameter. ++ ++.TP ++minburst ++As mentioned before, CBQ needs to throttle in case of ++overlimit. The ideal solution is to do so for exactly the calculated ++idle time, and pass 1 packet. However, Unix kernels generally have a ++hard time scheduling events shorter than 10ms, so it is better to ++throttle for a longer period, and then pass minburst packets in one ++go, and then sleep minburst times longer. ++ ++The time to wait is called the offtime. Higher values of minburst lead ++to more accurate shaping in the long term, but to bigger bursts at ++millisecond timescales. ++ ++.TP ++minidle ++If avgidle is below 0, we are overlimits and need to wait until ++avgidle will be big enough to send one packet. To prevent a sudden ++burst from shutting down the link for a prolonged period of time, ++avgidle is reset to minidle if it gets too low. ++ ++Minidle is specified in negative microseconds, so 10 means that ++avgidle is capped at -10us. ++ ++.TP ++bounded ++Signifies that this class will not borrow bandwidth from its siblings. ++.TP ++isolated ++Means that this class will not borrow bandwidth to its siblings ++ ++.TP ++split major:minor & defmap bitmap[/bitmap] ++If consulting filters attached to a class did not give a verdict, ++CBQ can also classify based on the packet's priority. There are 16 ++priorities available, numbered from 0 to 15. ++ ++The defmap specifies which priorities this class wants to receive, ++specified as a bitmap. The Least Significant Bit corresponds to priority ++zero. The ++.B split ++parameter tells CBQ at which class the decision must be made, which should ++be a (grand)parent of the class you are adding. ++ ++As an example, 'tc class add ... classid 10:1 cbq .. split 10:0 defmap c0' ++configures class 10:0 to send packets with priorities 6 and 7 to 10:1. ++ ++The complimentary configuration would then ++be: 'tc class add ... classid 10:2 cbq ... split 10:0 defmap 3f' ++Which would send all packets 0, 1, 2, 3, 4 and 5 to 10:1. ++.TP ++estimator interval timeconstant ++CBQ can measure how much bandwidth each class is using, which tc filters ++can use to classify packets with. In order to determine the bandwidth ++it uses a very simple estimator that measures once every ++.B interval ++microseconds how much traffic has passed. This again is a EWMA, for which ++the time constant can be specified, also in microseconds. The ++.B time constant ++corresponds to the sluggishness of the measurement or, conversely, to the ++sensitivity of the average to short bursts. Higher values mean less ++sensitivity. ++ ++ ++ ++.SH SOURCES ++.TP ++o ++Sally Floyd and Van Jacobson, "Link-sharing and Resource ++Management Models for Packet Networks", ++IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995 ++ ++.TP ++o ++Sally Floyd, "Notes on CBQ and Guarantee Service", 1995 ++ ++.TP ++o ++Sally Floyd, "Notes on Class-Based Queueing: Setting ++Parameters", 1996 ++ ++.TP ++o ++Sally Floyd and Michael Speer, "Experimental Results ++for Class-Based Queueing", 1998, not published. ++ ++ ++ ++.SH SEE ALSO ++.BR tc (8) ++ ++.SH AUTHOR ++Alexey N. Kuznetsov, . This manpage maintained by ++bert hubert ++ ++ +diff -Naur iproute2-orig/debian/manpages/old/tc-cbq.8 iproute2/debian/manpages/old/tc-cbq.8 +--- iproute2-orig/debian/manpages/old/tc-cbq.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/manpages/old/tc-cbq.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,353 @@ ++.TH CBQ 8 "16 December 2001" "iproute2" "Linux" ++.SH NAME ++CBQ \- Class Based Queueing ++.SH SYNOPSIS ++.B tc qdisc ... dev ++dev ++.B ( parent ++classid ++.B | root) [ handle ++major: ++.B ] cbq [ allot ++bytes ++.B ] avpkt ++bytes ++.B bandwidth ++rate ++.B [ cell ++bytes ++.B ] [ ewma ++log ++.B ] [ mpu ++bytes ++.B ] ++ ++.B tc class ... dev ++dev ++.B parent ++major:[minor] ++.B [ classid ++major:minor ++.B ] cbq allot ++bytes ++.B [ bandwidth ++rate ++.B ] [ rate ++rate ++.B ] prio ++priority ++.B [ weight ++weight ++.B ] [ minburst ++packets ++.B ] [ maxburst ++packets ++.B ] [ ewma ++log ++.B ] [ cell ++bytes ++.B ] avpkt ++bytes ++.B [ mpu ++bytes ++.B ] [ bounded isolated ] [ split ++handle ++.B & defmap ++defmap ++.B ] [ estimator ++interval timeconstant ++.B ] ++ ++.SH DESCRIPTION ++Class Based Queueing is a classful qdisc that implements a rich ++linksharing hierarchy of classes. It contains shaping elements as ++well as prioritizing capabilities. Shaping is performed using link ++idle time calculations based on the timing of dequeue events and ++underlying link bandwidth. ++ ++.SH SHAPING ALGORITHM ++When shaping a 10mbit/s connection to 1mbit/s, the link will ++be idle 90% of the time. If it isn't, it needs to be throttled so that it ++IS idle 90% of the time. ++ ++During operations, the effective idletime is measured using an ++exponential weighted moving average (EWMA), which considers recent ++packets to be exponentially more important than past ones. The Unix ++loadaverage is calculated in the same way. ++ ++The calculated idle time is subtracted from the EWMA measured one, ++the resulting number is called 'avgidle'. A perfectly loaded link has ++an avgidle of zero: packets arrive exactly at the calculated ++interval. ++ ++An overloaded link has a negative avgidle and if it gets too negative, ++CBQ throttles and is then 'overlimit'. ++ ++Conversely, an idle link might amass a huge avgidle, which would then ++allow infinite bandwidths after a few hours of silence. To prevent ++this, avgidle is capped at ++.B maxidle. ++ ++If overlimit, in theory, the CBQ could throttle itself for exactly the ++amount of time that was calculated to pass between packets, and then ++pass one packet, and throttle again. Due to timer resolution constraints, ++this may not be feasible, see the ++.B minburst ++parameter below. ++ ++.SH CLASSIFICATION ++Within the one CBQ instance many classes may exist. Each of these classes ++contains another qdisc, by default ++.BR tc-pfifo (8). ++ ++When enqueueing a packet, CBQ starts at the root and uses various methods to ++determine which class should receive the data. ++ ++In the absence of uncommon configuration options, the process is rather easy. ++At each node we look for an instruction, and then go to the class the ++instruction refers us to. If the class found is a barren leaf-node (without ++children), we enqueue the packet there. If it is not yet a leaf node, we do ++the whole thing over again starting from that node. ++ ++The following actions are performed, in order at each node we visit, until one ++sends us to another node, or terminates the process. ++.TP ++(i) ++Consult filters attached to the class. If sent to a leafnode, we are done. ++Otherwise, restart. ++.TP ++(ii) ++Consult the defmap for the priority assigned to this packet, which depends ++on the TOS bits. Check if the referral is leafless, otherwise restart. ++.TP ++(iii) ++Ask the defmap for instructions for the 'best effort' priority. Check the ++answer for leafness, otherwise restart. ++.TP ++(iv) ++If none of the above returned with an instruction, enqueue at this node. ++.P ++This algorithm makes sure that a packet always ends up somewhere, even while ++you are busy building your configuration. ++ ++For more details, see ++.BR tc-cbq-details(8). ++ ++.SH LINK SHARING ALGORITHM ++When dequeuing for sending to the network device, CBQ decides which of its ++classes will be allowed to send. It does so with a Weighted Round Robin process ++in which each class with packets gets a chance to send in turn. The WRR process ++starts by asking the highest priority classes (lowest numerically - ++highest semantically) for packets, and will continue to do so until they ++have no more data to offer, in which case the process repeats for lower ++priorities. ++ ++Classes by default borrow bandwidth from their siblings. A class can be ++prevented from doing so by declaring it 'bounded'. A class can also indicate ++its unwillingness to lend out bandwidth by being 'isolated'. ++ ++.SH QDISC ++The root of a CBQ qdisc class tree has the following parameters: ++ ++.TP ++parent major:minor | root ++This mandatory parameter determines the place of the CBQ instance, either at the ++.B root ++of an interface or within an existing class. ++.TP ++handle major: ++Like all other qdiscs, the CBQ can be assigned a handle. Should consist only ++of a major number, followed by a colon. Optional, but very useful if classes ++will be generated within this qdisc. ++.TP ++allot bytes ++This allotment is the 'chunkiness' of link sharing and is used for determining packet ++transmission time tables. The qdisc allot differs slightly from the class allot discussed ++below. Optional. Defaults to a reasonable value, related to avpkt. ++.TP ++avpkt bytes ++The average size of a packet is needed for calculating maxidle, and is also used ++for making sure 'allot' has a safe value. Mandatory. ++.TP ++bandwidth rate ++To determine the idle time, CBQ must know the bandwidth of your underlying ++physical interface, or parent qdisc. This is a vital parameter, more about it ++later. Mandatory. ++.TP ++cell ++The cell size determines he granularity of packet transmission time calculations. Has a sensible default. ++.TP ++mpu ++A zero sized packet may still take time to transmit. This value is the lower ++cap for packet transmission time calculations - packets smaller than this value ++are still deemed to have this size. Defaults to zero. ++.TP ++ewma log ++When CBQ needs to measure the average idle time, it does so using an ++Exponentially Weighted Moving Average which smoothes out measurements into ++a moving average. The EWMA LOG determines how much smoothing occurs. Lower ++values imply greater sensitivity. Must be between 0 and 31. Defaults ++to 5. ++.P ++A CBQ qdisc does not shape out of its own accord. It only needs to know certain ++parameters about the underlying link. Actual shaping is done in classes. ++ ++.SH CLASSES ++Classes have a host of parameters to configure their operation. ++ ++.TP ++parent major:minor ++Place of this class within the hierarchy. If attached directly to a qdisc ++and not to another class, minor can be omitted. Mandatory. ++.TP ++classid major:minor ++Like qdiscs, classes can be named. The major number must be equal to the ++major number of the qdisc to which it belongs. Optional, but needed if this ++class is going to have children. ++.TP ++weight weight ++When dequeuing to the interface, classes are tried for traffic in a ++round-robin fashion. Classes with a higher configured qdisc will generally ++have more traffic to offer during each round, so it makes sense to allow ++it to dequeue more traffic. All weights under a class are normalized, so ++only the ratios matter. Defaults to the configured rate, unless the priority ++of this class is maximal, in which case it is set to 1. ++.TP ++allot bytes ++Allot specifies how many bytes a qdisc can dequeue ++during each round of the process. This parameter is weighted using the ++renormalized class weight described above. Silently capped at a minimum of ++3/2 avpkt. Mandatory. ++ ++.TP ++prio priority ++In the round-robin process, classes with the lowest priority field are tried ++for packets first. Mandatory. ++ ++.TP ++avpkt ++See the QDISC section. ++ ++.TP ++rate rate ++Maximum rate this class and all its children combined can send at. Mandatory. ++ ++.TP ++bandwidth rate ++This is different from the bandwidth specified when creating a CBQ disc! Only ++used to determine maxidle and offtime, which are only calculated when ++specifying maxburst or minburst. Mandatory if specifying maxburst or minburst. ++ ++.TP ++maxburst ++This number of packets is used to calculate maxidle so that when ++avgidle is at maxidle, this number of average packets can be burst ++before avgidle drops to 0. Set it higher to be more tolerant of ++bursts. You can't set maxidle directly, only via this parameter. ++ ++.TP ++minburst ++As mentioned before, CBQ needs to throttle in case of ++overlimit. The ideal solution is to do so for exactly the calculated ++idle time, and pass 1 packet. However, Unix kernels generally have a ++hard time scheduling events shorter than 10ms, so it is better to ++throttle for a longer period, and then pass minburst packets in one ++go, and then sleep minburst times longer. ++ ++The time to wait is called the offtime. Higher values of minburst lead ++to more accurate shaping in the long term, but to bigger bursts at ++millisecond timescales. Optional. ++ ++.TP ++minidle ++If avgidle is below 0, we are overlimits and need to wait until ++avgidle will be big enough to send one packet. To prevent a sudden ++burst from shutting down the link for a prolonged period of time, ++avgidle is reset to minidle if it gets too low. ++ ++Minidle is specified in negative microseconds, so 10 means that ++avgidle is capped at -10us. Optional. ++ ++.TP ++bounded ++Signifies that this class will not borrow bandwidth from its siblings. ++.TP ++isolated ++Means that this class will not borrow bandwidth to its siblings ++ ++.TP ++split major:minor & defmap bitmap[/bitmap] ++If consulting filters attached to a class did not give a verdict, ++CBQ can also classify based on the packet's priority. There are 16 ++priorities available, numbered from 0 to 15. ++ ++The defmap specifies which priorities this class wants to receive, ++specified as a bitmap. The Least Significant Bit corresponds to priority ++zero. The ++.B split ++parameter tells CBQ at which class the decision must be made, which should ++be a (grand)parent of the class you are adding. ++ ++As an example, 'tc class add ... classid 10:1 cbq .. split 10:0 defmap c0' ++configures class 10:0 to send packets with priorities 6 and 7 to 10:1. ++ ++The complimentary configuration would then ++be: 'tc class add ... classid 10:2 cbq ... split 10:0 defmap 3f' ++Which would send all packets 0, 1, 2, 3, 4 and 5 to 10:1. ++.TP ++estimator interval timeconstant ++CBQ can measure how much bandwidth each class is using, which tc filters ++can use to classify packets with. In order to determine the bandwidth ++it uses a very simple estimator that measures once every ++.B interval ++microseconds how much traffic has passed. This again is a EWMA, for which ++the time constant can be specified, also in microseconds. The ++.B time constant ++corresponds to the sluggishness of the measurement or, conversely, to the ++sensitivity of the average to short bursts. Higher values mean less ++sensitivity. ++ ++.SH BUGS ++The actual bandwidth of the underlying link may not be known, for example ++in the case of PPoE or PPTP connections which in fact may send over a ++pipe, instead of over a physical device. CBQ is quite resilient to major ++errors in the configured bandwidth, probably a the cost of coarser shaping. ++ ++Default kernels rely on coarse timing information for making decisions. These ++may make shaping precise in the long term, but inaccurate on second long scales. ++ ++See ++.BR tc-cbq-details(8) ++for hints on how to improve this. ++ ++.SH SOURCES ++.TP ++o ++Sally Floyd and Van Jacobson, "Link-sharing and Resource ++Management Models for Packet Networks", ++IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995 ++ ++.TP ++o ++Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995 ++ ++.TP ++o ++Sally Floyd, "Notes on Class-Based Queueing: Setting ++Parameters", 1996 ++ ++.TP ++o ++Sally Floyd and Michael Speer, "Experimental Results ++for Class-Based Queueing", 1998, not published. ++ ++ ++ ++.SH SEE ALSO ++.BR tc (8) ++ ++.SH AUTHOR ++Alexey N. Kuznetsov, . This manpage maintained by ++bert hubert ++ ++ +diff -Naur iproute2-orig/debian/manpages/old/tc-htb.8 iproute2/debian/manpages/old/tc-htb.8 +--- iproute2-orig/debian/manpages/old/tc-htb.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/manpages/old/tc-htb.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,150 @@ ++.TH HTB 8 "10 January 2002" "iproute2" "Linux" ++.SH NAME ++HTB \- Hierarchy Token Bucket ++.SH SYNOPSIS ++.B tc qdisc ... dev ++dev ++.B ( parent ++classid ++.B | root) [ handle ++major: ++.B ] htb [ default ++minor-id ++.B ] ++ ++.B tc class ... dev ++dev ++.B parent ++major:[minor] ++.B [ classid ++major:minor ++.B ] htb rate ++rate ++.B [ ceil ++rate ++.B ] burst ++bytes ++.B [ cburst ++bytes ++.B ] [ prio ++priority ++.B ] ++ ++.SH DESCRIPTION ++HTB is meant as a more understandable and intuitive replacement for ++the CBQ qdisc in Linux. Both CBQ and HTB help you to control the use ++of the outbound bandwidth on a given link. Both allow you to use one ++physical link to simulate several slower links and to send different ++kinds of traffic on different simulated links. In both cases, you have ++to specify how to divide the physical link into simulated links and ++how to decide which simulated link to use for a given packet to be sent. ++ ++Unlike CBQ, HTB shapes traffic based on the Token Bucket Filter algorithm ++which does not depend on interface characteristics and so does not need to ++know the underlying bandwidth of the outgoing interface. ++ ++.SH SHAPING ALGORITHM ++Shaping works as documented in ++.B tc-tbf (8). ++ ++.SH CLASSIFICATION ++Within the one HRB instance many classes may exist. Each of these classes ++contains another qdisc, by default ++.BR tc-pfifo (8). ++ ++When enqueueing a packet, HTB starts at the root and uses various methods to ++determine which class should receive the data. ++ ++In the absence of uncommon configuration options, the process is rather easy. ++At each node we look for an instruction, and then go to the class the ++instruction refers us to. If the class found is a barren leaf-node (without ++children), we enqueue the packet there. If it is not yet a leaf node, we do ++the whole thing over again starting from that node. ++ ++The following actions are performed, in order at each node we visit, until one ++sends us to another node, or terminates the process. ++.TP ++(i) ++Consult filters attached to the class. If sent to a leafnode, we are done. ++Otherwise, restart. ++.TP ++(ii) ++If none of the above returned with an instruction, enqueue at this node. ++.P ++This algorithm makes sure that a packet always ends up somewhere, even while ++you are busy building your configuration. ++ ++.SH LINK SHARING ALGORITHM ++FIXME ++ ++.SH QDISC ++The root of a HTB qdisc class tree has the following parameters: ++ ++.TP ++parent major:minor | root ++This mandatory parameter determines the place of the HTB instance, either at the ++.B root ++of an interface or within an existing class. ++.TP ++handle major: ++Like all other qdiscs, the HTB can be assigned a handle. Should consist only ++of a major number, followed by a colon. Optional, but very useful if classes ++will be generated within this qdisc. ++.TP ++default minor-id ++Unclassified traffic gets sent to the class with this minor-id. ++ ++.SH CLASSES ++Classes have a host of parameters to configure their operation. ++ ++.TP ++parent major:minor ++Place of this class within the hierarchy. If attached directly to a qdisc ++and not to another class, minor can be omitted. Mandatory. ++.TP ++classid major:minor ++Like qdiscs, classes can be named. The major number must be equal to the ++major number of the qdisc to which it belongs. Optional, but needed if this ++class is going to have children. ++.TP ++prio priority ++In the round-robin process, classes with the lowest priority field are tried ++for packets first. Mandatory. ++ ++.TP ++rate rate ++Maximum rate this class and all its children are guaranteed. Mandatory. ++ ++.TP ++ceil rate ++Maximum rate at which a class can send, if its parent has bandwidth to spare. ++Defaults to the configured rate, which implies no borrowing ++ ++.TP ++burst bytes ++Amount of bytes that can be burst at ++.B ceil ++speed, in excess of the configured ++.B rate. ++Should be at least as high as the highest burst of all children. ++ ++.TP ++cburst bytes ++Amount of bytes that can be burst at 'infinite' speed, in other words, as fast ++as the interface can transmit them. For perfect evening out, should be equal to at most one average ++packet. Should be at least as high as the highest cburst of all children. ++ ++.SH NOTES ++Due to Unix timing constraints, the maximum ceil rate is not infinite and may in fact be quite low. On Intel, ++there are 100 timer events per second, the maximum rate is that rate at which 'burst' bytes are sent each timer tick. ++From this, the mininum burst size for a specified rate can be calculated. For i386, a 10mbit rate requires a 12 kilobyte ++burst as 100*12kb*8 equals 10mbit. ++ ++.SH SEE ALSO ++.BR tc (8) ++.P ++HTB website: http://luxik.cdi.cz/~devik/qos/htb/ ++.SH AUTHOR ++Martin Devera . This manpage maintained by bert hubert ++ ++ +diff -Naur iproute2-orig/debian/manpages/old/tc-pbfifo.8 iproute2/debian/manpages/old/tc-pbfifo.8 +--- iproute2-orig/debian/manpages/old/tc-pbfifo.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/manpages/old/tc-pbfifo.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,72 @@ ++.TH PBFIFO 8 "10 January 2002" "iproute2" "Linux" ++.SH NAME ++pfifo \- Packet limited First In, First Out queue ++.P ++bfifo \- Byte limited First In, First Out queue ++ ++.SH SYNOPSIS ++.B tc qdisc ... add pfifo ++.B [ limit ++packets ++.B ] ++.P ++.B tc qdisc ... add bfifo ++.B [ limit ++bytes ++.B ] ++ ++.SH DESCRIPTION ++The pfifo and bfifo qdiscs are unadorned First In, First Out queues. They are the ++simplest queues possible and therefore have no overhead. ++.B pfifo ++constrains the queue size as measured in packets. ++.B bfifo ++does so as measured in bytes. ++ ++Like all non-default qdiscs, they maintain statistics. This might be a reason to prefer ++pfifo or bfifo over the default. ++ ++.SH ALGORITHM ++A list of packets is maintained, when a packet is enqueued it gets inserted at the tail of ++a list. When a packet needs to be sent out to the network, it is taken from the head of the list. ++ ++If the list is too long, no further packets are allowed on. This is called 'tail drop'. ++ ++.SH PARAMETERS ++.TP ++limit ++Maximum queue size. Specified in bytes for bfifo, in packets for pfifo. For pfifo, defaults ++to the interface txqueuelen, as specified with ++.BR ifconfig (8) ++or ++.BR ip (8). ++ ++For bfifo, it defaults to the txqueuelen multiplied by the interface MTU. ++ ++.SH OUTPUT ++The output of ++.B tc -s qdisc ls ++contains the limit, either in packets or in bytes, and the number of bytes ++and packets actually sent. An unsent and dropped packet only appears between braces ++and is not counted as 'Sent'. ++ ++In this example, the queue length is 100 packets, 45894 bytes were sent over 681 packets. ++No packets were dropped, and as the pfifo queue does not slow down packets, there were also no ++overlimits: ++.P ++.nf ++# tc -s qdisc ls dev eth0 ++qdisc pfifo 8001: dev eth0 limit 100p ++ Sent 45894 bytes 681 pkts (dropped 0, overlimits 0) ++.fi ++ ++If a backlog occurs, this is displayed as well. ++.SH SEE ALSO ++.BR tc (8) ++ ++.SH AUTHORS ++Alexey N. Kuznetsov, ++ ++This manpage maintained by bert hubert ++ ++ +diff -Naur iproute2-orig/debian/manpages/old/tc-pfifo_fast.8 iproute2/debian/manpages/old/tc-pfifo_fast.8 +--- iproute2-orig/debian/manpages/old/tc-pfifo_fast.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/manpages/old/tc-pfifo_fast.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,59 @@ ++.TH PFIFO_FAST 8 "10 January 2002" "iproute2" "Linux" ++.SH NAME ++pfifo_fast \- three-band first in, first out queue ++ ++.SH DESCRIPTION ++pfifo_fast is the default qdisc of each interface. ++ ++Whenever an interface is created, the pfifo_fast qdisc is automatically used ++as a queue. If another qdisc is attached, it preempts the default ++pfifo_fast, which automatically returns to function when an existing qdisc ++is detached. ++ ++In this sense this qdisc is magic, and unlike other qdiscs. ++ ++.SH ALGORITHM ++The algorithm is very similar to that of the classful ++.BR tc-prio (8) ++qdisc. ++.B pfifo_fast ++is like three ++.BR tc-pfifo (8) ++queues side by side, where packets can be enqueued in any of the three bands ++based on their Type of Service bits or assigned priority. ++ ++Not all three bands are dequeued simultaneously - as long as lower bands ++have traffic, higher bands are never dequeued. This can be used to ++prioritize interactive traffic or penalize 'lowest cost' traffic. ++ ++Each band can be txqueuelen packets long, as configured with ++.BR ifconfig (8) ++or ++.BR ip (8). ++Additional packets coming in are not enqueued but are instead dropped. ++ ++See ++.BR tc-prio (8) ++for complete details on how TOS bits are translated into bands. ++.SH PARAMETERS ++.TP ++txqueuelen ++The length of the three bands depends on the interface txqueuelen, as ++specified with ++.BR ifconfig (8) ++or ++.BR ip (8). ++ ++.SH BUGS ++Does not maintain statistics and does not show up in tc qdisc ls. This is because ++it is the automatic default in the absence of a configured qdisc. ++ ++.SH SEE ALSO ++.BR tc (8) ++ ++.SH AUTHORS ++Alexey N. Kuznetsov, ++ ++This manpage maintained by bert hubert ++ ++ +diff -Naur iproute2-orig/debian/manpages/old/tc-prio.8 iproute2/debian/manpages/old/tc-prio.8 +--- iproute2-orig/debian/manpages/old/tc-prio.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/manpages/old/tc-prio.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,187 @@ ++.TH PRIO 8 "16 December 2001" "iproute2" "Linux" ++.SH NAME ++PRIO \- Priority qdisc ++.SH SYNOPSIS ++.B tc qdisc ... dev ++dev ++.B ( parent ++classid ++.B | root) [ handle ++major: ++.B ] prio [ bands ++bands ++.B ] [ priomap ++band,band,band... ++.B ] [ estimator ++interval timeconstant ++.B ] ++ ++.SH DESCRIPTION ++The PRIO qdisc is a simple classful queueing discipline that contains ++an arbitrary number of classes of differing priority. The classes are ++dequeued in numerical descending order of priority. PRIO is a scheduler ++and never delays packets - it is a work-conserving qdisc, though the qdiscs ++contained in the classes may not be. ++ ++Very useful for lowering latency when there is no need for slowing down ++traffic. ++ ++.SH ALGORITHM ++On creation with 'tc qdisc add', a fixed number of bands is created. Each ++band is a class, although is not possible to add classes with 'tc qdisc ++add', the number of bands to be created must instead be specified on the ++commandline attaching PRIO to its root. ++ ++When dequeueing, band 0 is tried first and only if it did not deliver a ++packet does PRIO try band 1, and so onwards. Maximum reliability packets ++should therefore go to band 0, minimum delay to band 1 and the rest to band ++2. ++ ++As the PRIO qdisc itself will have minor number 0, band 0 is actually ++major:1, band 1 is major:2, etc. For major, substitute the major number ++assigned to the qdisc on 'tc qdisc add' with the ++.B handle ++parameter. ++ ++.SH CLASSIFICATION ++Three methods are available to PRIO to determine in which band a packet will ++be enqueued. ++.TP ++From userspace ++A process with sufficient privileges can encode the destination class ++directly with SO_PRIORITY, see ++.BR tc(7). ++.TP ++with a tc filter ++A tc filter attached to the root qdisc can point traffic directly to a class ++.TP ++with the priomap ++Based on the packet priority, which in turn is derived from the Type of ++Service assigned to the packet. ++.P ++Only the priomap is specific to this qdisc. ++.SH QDISC PARAMETERS ++.TP ++bands ++Number of bands. If changed from the default of 3, ++.B priomap ++must be updated as well. ++.TP ++priomap ++The priomap maps the priority of ++a packet to a class. The priority can either be set directly from userspace, ++or be derived from the Type of Service of the packet. ++ ++Determines how packet priorities, as assigned by the kernel, map to ++bands. Mapping occurs based on the TOS octet of the packet, which looks like ++this: ++ ++.nf ++0 1 2 3 4 5 6 7 +++---+---+---+---+---+---+---+---+ ++| | | | ++|PRECEDENCE | TOS |MBZ| ++| | | | +++---+---+---+---+---+---+---+---+ ++.fi ++ ++The four TOS bits (the 'TOS field') are defined as: ++ ++.nf ++Binary Decimcal Meaning ++----------------------------------------- ++1000 8 Minimize delay (md) ++0100 4 Maximize throughput (mt) ++0010 2 Maximize reliability (mr) ++0001 1 Minimize monetary cost (mmc) ++0000 0 Normal Service ++.fi ++ ++As there is 1 bit to the right of these four bits, the actual value of the ++TOS field is double the value of the TOS bits. Tcpdump -v -v shows you the ++value of the entire TOS field, not just the four bits. It is the value you ++see in the first column of this table: ++ ++.nf ++TOS Bits Means Linux Priority Band ++------------------------------------------------------------ ++0x0 0 Normal Service 0 Best Effort 1 ++0x2 1 Minimize Monetary Cost 1 Filler 2 ++0x4 2 Maximize Reliability 0 Best Effort 1 ++0x6 3 mmc+mr 0 Best Effort 1 ++0x8 4 Maximize Throughput 2 Bulk 2 ++0xa 5 mmc+mt 2 Bulk 2 ++0xc 6 mr+mt 2 Bulk 2 ++0xe 7 mmc+mr+mt 2 Bulk 2 ++0x10 8 Minimize Delay 6 Interactive 0 ++0x12 9 mmc+md 6 Interactive 0 ++0x14 10 mr+md 6 Interactive 0 ++0x16 11 mmc+mr+md 6 Interactive 0 ++0x18 12 mt+md 4 Int. Bulk 1 ++0x1a 13 mmc+mt+md 4 Int. Bulk 1 ++0x1c 14 mr+mt+md 4 Int. Bulk 1 ++0x1e 15 mmc+mr+mt+md 4 Int. Bulk 1 ++.fi ++ ++The second column contains the value of the relevant ++four TOS bits, followed by their translated meaning. For example, 15 stands ++for a packet wanting Minimal Montetary Cost, Maximum Reliability, Maximum ++Throughput AND Minimum Delay. ++ ++The fourth column lists the way the Linux kernel interprets the TOS bits, by ++showing to which Priority they are mapped. ++ ++The last column shows the result of the default priomap. On the commandline, ++the default priomap looks like this: ++ ++ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 ++ ++This means that priority 4, for example, gets mapped to band number 1. ++The priomap also allows you to list higher priorities (> 7) which do not ++correspond to TOS mappings, but which are set by other means. ++ ++This table from RFC 1349 (read it for more details) explains how ++applications might very well set their TOS bits: ++ ++.nf ++TELNET 1000 (minimize delay) ++FTP ++ Control 1000 (minimize delay) ++ Data 0100 (maximize throughput) ++ ++TFTP 1000 (minimize delay) ++ ++SMTP ++ Command phase 1000 (minimize delay) ++ DATA phase 0100 (maximize throughput) ++ ++Domain Name Service ++ UDP Query 1000 (minimize delay) ++ TCP Query 0000 ++ Zone Transfer 0100 (maximize throughput) ++ ++NNTP 0001 (minimize monetary cost) ++ ++ICMP ++ Errors 0000 ++ Requests 0000 (mostly) ++ Responses (mostly) ++.fi ++ ++ ++.SH CLASSES ++PRIO classes cannot be configured further - they are automatically created ++when the PRIO qdisc is attached. Each class however can contain yet a ++further qdisc. ++ ++.SH BUGS ++Large amounts of traffic in the lower bands can cause starvation of higher ++bands. Can be prevented by attaching a shaper (for example, ++.BR tc-tbf(8) ++to these bands to make sure they cannot dominate the link. ++ ++.SH AUTHORS ++Alexey N. Kuznetsov, , J Hadi Salim ++. This manpage maintained by bert hubert ++ ++ +diff -Naur iproute2-orig/debian/manpages/old/tc-red.8 iproute2/debian/manpages/old/tc-red.8 +--- iproute2-orig/debian/manpages/old/tc-red.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/manpages/old/tc-red.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,131 @@ ++.TH RED 8 "13 December 2001" "iproute2" "Linux" ++.SH NAME ++red \- Random Early Detection ++.SH SYNOPSIS ++.B tc qdisc ... red ++.B limit ++bytes ++.B min ++bytes ++.B max ++bytes ++.B avpkt ++bytes ++.B burst ++packets ++.B [ ecn ] [ bandwidth ++rate ++.B ] probability ++chance ++ ++.SH DESCRIPTION ++Random Early Detection is a classless qdisc which manages its queue size ++smartly. Regular queues simply drop packets from the tail when they are ++full, which may not be the optimal behaviour. RED also performs tail drop, ++but does so in a more gradual way. ++ ++Once the queue hits a certain average length, packets enqueued have a ++configurable chance of being marked (which may mean dropped). This chance ++increases linearly up to a point called the ++.B max ++average queue length, although the queue might get bigger. ++ ++This has a host of benefits over simple taildrop, while not being processor ++intensive. It prevents synchronous retransmits after a burst in traffic, ++which cause further retransmits, etc. ++ ++The goal is the have a small queue size, which is good for interactivity ++while not disturbing TCP/IP traffic with too many sudden drops after a burst ++of traffic. ++ ++Depending on if ECN is configured, marking either means dropping or ++purely marking a packet as overlimit. ++.SH ALGORITHM ++The average queue size is used for determining the marking ++probability. This is calculated using an Exponential Weighted Moving ++Average, which can be more or less sensitive to bursts. ++ ++When the average queue size is below ++.B min ++bytes, no packet will ever be marked. When it exceeds ++.B min, ++the probability of doing so climbs linearly up ++to ++.B probability, ++until the average queue size hits ++.B max ++bytes. Because ++.B probability ++is normally not set to 100%, the queue size might ++conceivably rise above ++.B max ++bytes, so the ++.B limit ++parameter is provided to set a hard maximum for the size of the queue. ++ ++.SH PARAMETERS ++.TP ++min ++Average queue size at which marking becomes a possibility. ++.TP ++max ++At this average queue size, the marking probability is maximal. Should be at ++least twice ++.B min ++to prevent synchronous retransmits, higher for low ++.B min. ++.TP ++probability ++Maximum probability for marking, specified as a floating point ++number from 0.0 to 1.0. Suggested values are 0.01 or 0.02 (1 or 2%, ++respectively). ++.TP ++limit ++Hard limit on the real (not average) queue size in bytes. Further packets ++are dropped. Should be set higher than max+burst. It is advised to set this ++a few times higher than ++.B max. ++.TP ++burst ++Used for determining how fast the average queue size is influenced by the ++real queue size. Larger values make the calculation more sluggish, allowing ++longer bursts of traffic before marking starts. Real life experiments ++support the following guideline: (min+min+max)/(3*avpkt). ++.TP ++avpkt ++Specified in bytes. Used with burst to determine the time constant for ++average queue size calculations. 1000 is a good value. ++.TP ++bandwidth ++This rate is used for calculating the average queue size after some ++idle time. Should be set to the bandwidth of your interface. Does not mean ++that RED will shape for you! Optional. ++.TP ++ecn ++As mentioned before, RED can either 'mark' or 'drop'. Explicit Congestion ++Notification allows RED to notify remote hosts that their rate exceeds the ++amount of bandwidth available. Non-ECN capable hosts can only be notified by ++dropping a packet. If this parameter is specified, packets which indicate ++that their hosts honor ECN will only be marked and not dropped, unless the ++queue size hits ++.B limit ++bytes. Needs a tc binary with RED support compiled in. Recommended. ++ ++.SH SEE ALSO ++.BR tc (8) ++ ++.SH SOURCES ++.TP ++o ++Floyd, S., and Jacobson, V., Random Early Detection gateways for ++Congestion Avoidance. http://www.aciri.org/floyd/papers/red/red.html ++.TP ++o ++Some changes to the algorithm by Alexey N. Kuznetsov. ++ ++.SH AUTHORS ++Alexey N. Kuznetsov, , Alexey Makarenko ++, J Hadi Salim . ++This manpage maintained by bert hubert ++ ++ +diff -Naur iproute2-orig/debian/manpages/old/tc-sfq.8 iproute2/debian/manpages/old/tc-sfq.8 +--- iproute2-orig/debian/manpages/old/tc-sfq.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/manpages/old/tc-sfq.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,107 @@ ++.TH TC 8 "8 December 2001" "iproute2" "Linux" ++.SH NAME ++sfq \- Stochastic Fairness Queueing ++.SH SYNOPSIS ++.B tc qdisc ... perturb ++seconds ++.B quantum ++bytes ++ ++.SH DESCRIPTION ++ ++Stochastic Fairness Queueing is a classless queueing discipline available for ++traffic control with the ++.BR tc (8) ++command. ++ ++SFQ does not shape traffic but only schedules the transmission of packets, based on 'flows'. ++The goal is to ensure fairness so that each flow is able to send data in turn, thus preventing ++any single flow from drowning out the rest. ++ ++This may in fact have some effect in mitigating a Denial of Service attempt. ++ ++SFQ is work-conserving and therefore always delivers a packet if it has one available. ++.SH ALGORITHM ++On enqueueing, each packet is assigned to a hash bucket, based on ++.TP ++(i) ++Source address ++.TP ++(ii) ++Destination address ++.TP ++(iii) ++Source port ++.P ++If these are available. SFQ knows about ipv4 and ipv6 and also UDP, TCP and ESP. ++Packets with other protocols are hashed based on the 32bits representation of their ++destination and the socket they belong to. A flow corresponds mostly to a TCP/IP ++connection. ++ ++Each of these buckets should represent a unique flow. Because multiple flows may ++get hashed to the same bucket, the hashing algorithm is perturbed at configurable ++intervals so that the unfairness lasts only for a short while. Perturbation may ++however cause some inadvertent packet reordering to occur. ++ ++When dequeuing, each hashbucket with data is queried in a round robin fashion. ++ ++The compile time maximum length of the SFQ is 128 packets, which can be spread over ++at most 128 buckets of 1024 available. In case of overflow, tail-drop is performed ++on the fullest bucket, thus maintaining fairness. ++ ++.SH PARAMETERS ++.TP ++perturb ++Interval in seconds for queue algorithm perturbation. Defaults to 0, which means that ++no perturbation occurs. Do not set too low for each perturbation may cause some packet ++reordering. Advised value: 10 ++.TP ++quantum ++Amount of bytes a flow is allowed to dequeue during a round of the round robin process. ++Defaults to the MTU of the interface which is also the advised value and the minimum value. ++ ++.SH EXAMPLE & USAGE ++ ++To attach to device ppp0: ++.P ++# tc qdisc add dev ppp0 root sfq perturb 10 ++.P ++Please note that SFQ, like all non-shaping (work-conserving) qdiscs, is only useful ++if it owns the queue. ++This is the case when the link speed equals the actually available bandwidth. This holds ++for regular phone modems, ISDN connections and direct non-switched ethernet links. ++.P ++Most often, cable modems and DSL devices do not fall into this category. The same holds ++for when connected to a switch and trying to send data to a congested segment also ++connected to the switch. ++.P ++In this case, the effective queue does not reside within Linux and is therefore not ++available for scheduling. ++.P ++Embed SFQ in a classful qdisc to make sure it owns the queue. ++ ++.SH SOURCE ++.TP ++o ++Paul E. McKenney "Stochastic Fairness Queuing", ++IEEE INFOCOMM'90 Proceedings, San Francisco, 1990. ++ ++.TP ++o ++Paul E. McKenney "Stochastic Fairness Queuing", ++"Interworking: Research and Experience", v.2, 1991, p.113-131. ++ ++.TP ++o ++See also: ++M. Shreedhar and George Varghese "Efficient Fair ++Queuing using Deficit Round Robin", Proc. SIGCOMM 95. ++ ++.SH SEE ALSO ++.BR tc (8) ++ ++.SH AUTHOR ++Alexey N. Kuznetsov, . This manpage maintained by ++bert hubert ++ ++ +diff -Naur iproute2-orig/debian/manpages/old/tc-tbf.8 iproute2/debian/manpages/old/tc-tbf.8 +--- iproute2-orig/debian/manpages/old/tc-tbf.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/manpages/old/tc-tbf.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,138 @@ ++.TH TC 8 "13 December 2001" "iproute2" "Linux" ++.SH NAME ++tbf \- Token Bucket Filter ++.SH SYNOPSIS ++.B tc qdisc ... tbf rate ++rate ++.B burst ++bytes/cell ++.B ( latency ++ms ++.B | limit ++bytes ++.B ) [ mpu ++bytes ++.B [ peakrate ++rate ++.B mtu ++bytes/cell ++.B ] ] ++.P ++burst is also known as buffer and maxburst. mtu is also known as minburst. ++.SH DESCRIPTION ++ ++The Token Bucket Filter is a classless queueing discipline available for ++traffic control with the ++.BR tc (8) ++command. ++ ++TBF is a pure shaper and never schedules traffic. It is non-work-conserving and may throttle ++itself, although packets are available, to ensure that the configured rate is not exceeded. ++On all platforms except for Alpha, ++it is able to shape up to 1mbit/s of normal traffic with ideal minimal burstiness, ++sending out data exactly at the configured rates. ++ ++Much higher rates are possible but at the cost of losing the minimal burstiness. In that ++case, data is on average dequeued at the configured rate but may be sent much faster at millisecond ++timescales. Because of further queues living in network adaptors, this is often not a problem. ++ ++Kernels with a higher 'HZ' can achieve higher rates with perfect burstiness. On Alpha, HZ is ten ++times higher, leading to a 10mbit/s limit to perfection. These calculations hold for packets of on ++average 1000 bytes. ++ ++.SH ALGORITHM ++As the name implies, traffic is filtered based on the expenditure of ++.B tokens. ++Tokens roughly correspond to bytes, with the additional constraint that each packet consumes ++some tokens, no matter how small it is. This reflects the fact that even a zero-sized packet occupies ++the link for some time. ++ ++On creation, the TBF is stocked with tokens which correspond to the amount of traffic that can be burst ++in one go. Tokens arrive at a steady rate, until the bucket is full. ++ ++If no tokens are available, packets are queued, up to a configured limit. The TBF now ++calculates the token deficit, and throttles until the first packet in the queue can be sent. ++ ++If it is not acceptable to burst out packets at maximum speed, a peakrate can be configured ++to limit the speed at which the bucket empties. This peakrate is implemented as a second TBF ++with a very small bucket, so that it doesn't burst. ++ ++To achieve perfection, the second bucket may contain only a single packet, which leads to ++the earlier mentioned 1mbit/s limit. ++ ++This limit is caused by the fact that the kernel can only throttle for at minimum 1 'jiffy', which depends ++on HZ as 1/HZ. For perfect shaping, only a single packet can get sent per jiffy - for HZ=100, this means 100 ++packets of on average 1000 bytes each, which roughly corresponds to 1mbit/s. ++ ++.SH PARAMETERS ++See ++.BR tc (8) ++for how to specify the units of these values. ++.TP ++limit or latency ++Limit is the number of bytes that can be queued waiting for tokens to become ++available. You can also specify this the other way around by setting the ++latency parameter, which specifies the maximum amount of time a packet can ++sit in the TBF. The latter calculation takes into account the size of the ++bucket, the rate and possibly the peakrate (if set). These two parameters ++are mutually exclusive. ++.TP ++burst ++Also known as buffer or maxburst. ++Size of the bucket, in bytes. This is the maximum amount of bytes that tokens can be available for instantaneously. ++In general, larger shaping rates require a larger buffer. For 10mbit/s on Intel, you need at least 10kbyte buffer ++if you want to reach your configured rate! ++ ++If your buffer is too small, packets may be dropped because more tokens arrive per timer tick than fit in your bucket. ++The minimum buffer size can be calculated by dividing the rate by HZ. ++ ++Token usage calculations are performed using a table which by default has a resolution of 8 packets. ++This resolution can be changed by specifying the ++.B cell ++size with the burst. For example, to specify a 6000 byte buffer with a 16 ++byte cell size, set a burst of 6000/16. You will probably never have to set ++this. Must be an integral power of 2. ++.TP ++mpu ++A zero-sized packet does not use zero bandwidth. For ethernet, no packet uses less than 64 bytes. The Minimum Packet Unit ++determines the minimal token usage (specified in bytes) for a packet. Defaults to zero. ++.TP ++rate ++The speed knob. See remarks above about limits! See ++.BR tc (8) ++for units. ++.PP ++Furthermore, if a peakrate is desired, the following parameters are available: ++ ++.TP ++peakrate ++Maximum depletion rate of the bucket. Limited to 1mbit/s on Intel, 10mbit/s on Alpha. The peakrate does ++not need to be set, it is only necessary if perfect millisecond timescale shaping is required. ++ ++.TP ++mtu/minburst ++Specifies the size of the peakrate bucket. For perfect accuracy, should be set to the MTU of the interface. ++If a peakrate is needed, but some burstiness is acceptable, this size can be raised. A 3000 byte minburst ++allows around 3mbit/s of peakrate, given 1000 byte packets. ++ ++Like the regular burstsize you can also specify a ++.B cell ++size. ++.SH EXAMPLE & USAGE ++ ++To attach a TBF with a sustained maximum rate of 0.5mbit/s, a peakrate of 1.0mbit/s, ++a 5kilobyte buffer, with a pre-bucket queue size limit calculated so the TBF causes ++at most 70ms of latency, with perfect peakrate behaviour, issue: ++.P ++# tc qdisc add dev eth0 root tbf rate 0.5mbit \\ ++ burst 5kb latency 70ms peakrate 1mbit \\ ++ minburst 1540 ++ ++.SH SEE ALSO ++.BR tc (8) ++ ++.SH AUTHOR ++Alexey N. Kuznetsov, . This manpage maintained by ++bert hubert ++ ++ +diff -Naur iproute2-orig/debian/manpages/old/tc.8 iproute2/debian/manpages/old/tc.8 +--- iproute2-orig/debian/manpages/old/tc.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/manpages/old/tc.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,348 @@ ++.TH TC 8 "16 December 2001" "iproute2" "Linux" ++.SH NAME ++tc \- show / manipulate traffic control settings ++.SH SYNOPSIS ++.B tc qdisc [ add | change | replace | link ] dev ++DEV ++.B ++[ parent ++qdisc-id ++.B | root ] ++.B [ handle ++qdisc-id ] qdisc ++[ qdisc specific parameters ] ++.P ++ ++.B tc class [ add | change | replace ] dev ++DEV ++.B parent ++qdisc-id ++.B [ classid ++class-id ] qdisc ++[ qdisc specific parameters ] ++.P ++ ++.B tc filter [ add | change | replace ] dev ++DEV ++.B [ parent ++qdisc-id ++.B | root ] protocol ++protocol ++.B prio ++priority filtertype ++[ filtertype specific parameters ] ++.B flowid ++flow-id ++ ++.B tc [-s | -d ] qdisc show [ dev ++DEV ++.B ] ++.P ++.B tc [-s | -d ] class show dev ++DEV ++.P ++.B tc filter show dev ++DEV ++ ++.SH DESCRIPTION ++.B Tc ++is used to configure Traffic Control in the Linux kernel. Traffic Control consists ++of the following: ++ ++.TP ++SHAPING ++When traffic is shaped, its rate of transmission is under control. Shaping may ++be more than lowering the available bandwidth - it is also used to smooth out ++bursts in traffic for better network behaviour. Shaping occurs on egress. ++ ++.TP ++SCHEDULING ++By scheduling the transmission of packets it is possible to improve interactivity ++for traffic that needs it while still guaranteeing bandwidth to bulk transfers. Reordering ++is also called prioritizing, and happens only on egress. ++ ++.TP ++POLICING ++Where shaping deals with transmission of traffic, policing pertains to traffic ++arriving. Policing thus occurs on ingress. ++ ++.TP ++DROPPING ++Traffic exceeding a set bandwidth may also be dropped forthwith, both on ++ingress and on egress. ++ ++.P ++Processing of traffic is controlled by three kinds of objects: qdiscs, ++classes and filters. ++ ++.SH QDISCS ++.B qdisc ++is short for 'queueing discipline' and it is elementary to ++understanding traffic control. Whenever the kernel needs to send a ++packet to an interface, it is ++.B enqueued ++to the qdisc configured for that interface. Immediately afterwards, the kernel ++tries to get as many packets as possible from the qdisc, for giving them ++to the network adaptor driver. ++ ++A simple QDISC is the 'pfifo' one, which does no processing at all and is a pure ++First In, First Out queue. It does however store traffic when the network interface ++can't handle it momentarily. ++ ++.SH CLASSES ++Some qdiscs can contain classes, which contain further qdiscs - traffic may ++then be enqueued in any of the inner qdiscs, which are within the ++.B classes. ++When the kernel tries to dequeue a packet from such a ++.B classful qdisc ++it can come from any of the classes. A qdisc may for example prioritize ++certain kinds of traffic by trying to dequeue from certain classes ++before others. ++ ++.SH FILTERS ++A ++.B filter ++is used by a classful qdisc to determine in which class a packet will ++be enqueued. Whenever traffic arrives at a class with subclasses, it needs ++to be classified. Various methods may be employed to do so, one of these ++are the filters. All filters attached to the class are called, until one of ++them returns with a verdict. If no verdict was made, other criteria may be ++available. This differs per qdisc. ++ ++It is important to notice that filters reside ++.B within ++qdiscs - they are not masters of what happens. ++ ++.SH CLASSLESS QDISCS ++The classless qdiscs are: ++.TP ++[p|b]fifo ++Simplest usable qdisc, pure First In, First Out behaviour. Limited in ++packets or in bytes. ++.TP ++pfifo_fast ++Standard qdisc for 'Advanced Router' enabled kernels. Consists of a three-band ++queue which honors Type of Service flags, as well as the priority that may be ++assigned to a packet. ++.TP ++red ++Random Early Detection simulates physical congestion by randomly dropping ++packets when nearing configured bandwidth allocation. Well suited to very ++large bandwidth applications. ++.TP ++sfq ++Stochastic Fairness Queueing reorders queued traffic so each 'session' ++gets to send a packet in turn. ++.TP ++tbf ++The Token Bucket Filter is suited for slowing traffic down to a precisely ++configured rate. Scales well to large bandwidths. ++.SH CONFIGURING CLASSLESS QDISCS ++In the absence of classful qdiscs, classless qdiscs can only be attached at ++the root of a device. Full syntax: ++.P ++.B tc qdisc add dev ++DEV ++.B root ++QDISC QDISC-PARAMETERS ++ ++To remove, issue ++.P ++.B tc qdisc del dev ++DEV ++.B root ++ ++The ++.B pfifo_fast ++qdisc is the automatic default in the absence of a configured qdisc. ++ ++.SH CLASSFUL QDISCS ++The classful qdiscs are: ++.TP ++CBQ ++Class Based Queueing implements a rich linksharing hierarchy of classes. ++It contains shaping elements as well as prioritizing capabilities. Shaping is ++performed using link idle time calculations based on average packet size and ++underlying link bandwidth. The latter may be ill-defined for some interfaces. ++.TP ++HTB ++The Hierarchy Token Bucket implements a rich linksharing hierarchy of ++classes with an emphasis on conforming to existing practices. HTB facilitates ++guaranteeing bandwidth to classes, while also allowing specification of upper ++limits to inter-class sharing. It contains shaping elements, based on TBF and ++can prioritize classes. ++.TP ++PRIO ++The PRIO qdisc is a non-shaping container for a configurable number of ++classes which are dequeued in order. This allows for easy prioritization ++of traffic, where lower classes are only able to send if higher ones have ++no packets available. To facilitate configuration, Type Of Service bits are ++honored by default. ++.SH THEORY OF OPERATION ++Classes form a tree, where each class has a single parent. ++A class may have multiple children. Some qdiscs allow for runtime addition ++of classes (CBQ, HTB) while others (PRIO) are created with a static number of ++children. ++ ++Qdiscs which allow dynamic addition of classes can have zero or more ++subclasses to which traffic may be enqueued. ++ ++Furthermore, each class contains a ++.B leaf qdisc ++which by default has ++.B pfifo ++behaviour though another qdisc can be attached in place. This qdisc may again ++contain classes, but each class can have only one leaf qdisc. ++ ++When a packet enters a classful qdisc it can be ++.B classified ++to one of the classes within. Three criteria are available, although not all ++qdiscs will use all three: ++.TP ++tc filters ++If tc filters are attached to a class, they are consulted first ++for relevant instructions. Filters can match on all fields of a packet header, ++as well as on the firewall mark applied by ipchains or iptables. See ++.BR tc-filters (8). ++.TP ++Type of Service ++Some qdiscs have built in rules for classifying packets based on the TOS field. ++.TP ++skb->priority ++Userspace programs can encode a class-id in the 'skb->priority' field using ++the SO_PRIORITY option. ++.P ++Each node within the tree can have its own filters but higher level filters ++may also point directly to lower classes. ++ ++If classification did not succeed, packets are enqueued to the leaf qdisc ++attached to that class. Check qdisc specific manpages for details, however. ++ ++.SH NAMING ++All qdiscs, classes and filters have IDs, which can either be specified ++or be automatically assigned. ++ ++IDs consist of a major number and a minor number, separated by a colon. ++ ++.TP ++QDISCS ++A qdisc, which potentially can have children, ++gets assigned a major number, called a 'handle', leaving the minor ++number namespace available for classes. The handle is expressed as '10:'. ++It is customary to explicitly assign a handle to qdiscs expected to have ++children. ++ ++.TP ++CLASSES ++Classes residing under a qdisc share their qdisc major number, but each have ++a separate minor number called a 'classid' that has no relation to their ++parent classes, only to their parent qdisc. The same naming custom as for ++qdiscs applies. ++ ++.TP ++FILTERS ++Filters have a three part ID, which is only needed when using a hashed ++filter hierarchy, for which see ++.BR tc-filters (8). ++.SH UNITS ++All parameters accept a floating point number, possibly followed by a unit. ++.P ++Bandwidths or rates can be specified in: ++.TP ++kbps ++Kilobytes per second ++.TP ++mbps ++Megabytes per second ++.TP ++kbit ++Kilobits per second ++.TP ++mbit ++Megabits per second ++.TP ++bps or a bare number ++Bytes per second ++.P ++Amounts of data can be specified in: ++.TP ++kb or k ++Kilobytes ++.TP ++mb or m ++Megabytes ++.TP ++mbit ++Megabits ++.TP ++kbit ++Kilobits ++.TP ++b or a bare number ++Bytes. ++.P ++Lengths of time can be specified in: ++.TP ++s, sec or secs ++Whole seconds ++.TP ++ms, msec or msecs ++Milliseconds ++.TP ++us, usec, usecs or a bare number ++Microseconds. ++ ++.SH TC COMMANDS ++The following commands are available for qdiscs, classes and filter: ++.TP ++add ++Add a qdisc, class or filter to a node. For all entities, a ++.B parent ++must be passed, either by passing its ID or by attaching directly to the root of a device. ++When creating a qdisc or a filter, it can be named with the ++.B handle ++parameter. A class is named with the ++.B classid ++parameter. ++ ++.TP ++remove ++A qdisc can be removed by specifying its handle, which may also be 'root'. All subclasses and their leaf qdiscs ++are automatically deleted, as well as any filters attached to them. ++ ++.TP ++change ++Some entities can be modified 'in place'. Shares the syntax of 'add', with the exception ++that the handle cannot be changed and neither can the parent. In other words, ++.B ++change ++cannot move a node. ++ ++.TP ++replace ++Performs a nearly atomic remove/add on an existing node id. If the node does not exist yet ++it is created. ++ ++.TP ++link ++Only available for qdiscs and performs a replace where the node ++must exist already. ++ ++ ++.SH HISTORY ++.B tc ++was written by Alexey N. Kuznetsov and added in Linux 2.2. ++.SH SEE ALSO ++.BR tc-cbq (8), ++.BR tc-htb (8), ++.BR tc-sfq (8), ++.BR tc-red (8), ++.BR tc-tbf (8), ++.BR tc-pfifo (8), ++.BR tc-bfifo (8), ++.BR tc-pfifo_fast (8), ++.BR tc-filters (8) ++ ++.SH AUTHOR ++Manpage maintained by bert hubert (ahu@ds9a.nl) ++ +diff -Naur iproute2-orig/debian/manpages/tc-cbq-details.8 iproute2/debian/manpages/tc-cbq-details.8 +--- iproute2-orig/debian/manpages/tc-cbq-details.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/manpages/tc-cbq-details.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,425 @@ ++.TH CBQ 8 "8 December 2001" "iproute2" "Linux" ++.SH NAME ++CBQ \- Class Based Queueing ++.SH SYNOPSIS ++.B tc qdisc ... dev ++dev ++.B ( parent ++classid ++.B | root) [ handle ++major: ++.B ] cbq avpkt ++bytes ++.B bandwidth ++rate ++.B [ cell ++bytes ++.B ] [ ewma ++log ++.B ] [ mpu ++bytes ++.B ] ++ ++.B tc class ... dev ++dev ++.B parent ++major:[minor] ++.B [ classid ++major:minor ++.B ] cbq allot ++bytes ++.B [ bandwidth ++rate ++.B ] [ rate ++rate ++.B ] prio ++priority ++.B [ weight ++weight ++.B ] [ minburst ++packets ++.B ] [ maxburst ++packets ++.B ] [ ewma ++log ++.B ] [ cell ++bytes ++.B ] avpkt ++bytes ++.B [ mpu ++bytes ++.B ] [ bounded isolated ] [ split ++handle ++.B & defmap ++defmap ++.B ] [ estimator ++interval timeconstant ++.B ] ++ ++.SH DESCRIPTION ++Class Based Queueing is a classful qdisc that implements a rich ++linksharing hierarchy of classes. It contains shaping elements as ++well as prioritizing capabilities. Shaping is performed using link ++idle time calculations based on the timing of dequeue events and ++underlying link bandwidth. ++ ++.SH SHAPING ALGORITHM ++Shaping is done using link idle time calculations, and actions taken if ++these calculations deviate from set limits. ++ ++When shaping a 10mbit/s connection to 1mbit/s, the link will ++be idle 90% of the time. If it isn't, it needs to be throttled so that it ++IS idle 90% of the time. ++ ++From the kernel's perspective, this is hard to measure, so CBQ instead ++derives the idle time from the number of microseconds (in fact, jiffies) ++that elapse between requests from the device driver for more data. Combined ++with the knowledge of packet sizes, this is used to approximate how full or ++empty the link is. ++ ++This is rather circumspect and doesn't always arrive at proper ++results. For example, what is the actual link speed of an interface ++that is not really able to transmit the full 100mbit/s of data, ++perhaps because of a badly implemented driver? A PCMCIA network card ++will also never achieve 100mbit/s because of the way the bus is ++designed - again, how do we calculate the idle time? ++ ++The physical link bandwidth may be ill defined in case of not-quite-real ++network devices like PPP over Ethernet or PPTP over TCP/IP. The effective ++bandwidth in that case is probably determined by the efficiency of pipes ++to userspace - which not defined. ++ ++During operations, the effective idletime is measured using an ++exponential weighted moving average (EWMA), which considers recent ++packets to be exponentially more important than past ones. The Unix ++loadaverage is calculated in the same way. ++ ++The calculated idle time is subtracted from the EWMA measured one, ++the resulting number is called 'avgidle'. A perfectly loaded link has ++an avgidle of zero: packets arrive exactly at the calculated ++interval. ++ ++An overloaded link has a negative avgidle and if it gets too negative, ++CBQ throttles and is then 'overlimit'. ++ ++Conversely, an idle link might amass a huge avgidle, which would then ++allow infinite bandwidths after a few hours of silence. To prevent ++this, avgidle is capped at ++.B maxidle. ++ ++If overlimit, in theory, the CBQ could throttle itself for exactly the ++amount of time that was calculated to pass between packets, and then ++pass one packet, and throttle again. Due to timer resolution constraints, ++this may not be feasible, see the ++.B minburst ++parameter below. ++ ++.SH CLASSIFICATION ++Within the one CBQ instance many classes may exist. Each of these classes ++contains another qdisc, by default ++.BR tc-pfifo (8). ++ ++When enqueueing a packet, CBQ starts at the root and uses various methods to ++determine which class should receive the data. If a verdict is reached, this ++process is repeated for the recipient class which might have further ++means of classifying traffic to its children, if any. ++ ++CBQ has the following methods available to classify a packet to any child ++classes. ++.TP ++(i) ++.B skb->priority class encoding. ++Can be set from userspace by an application with the ++.B SO_PRIORITY ++setsockopt. ++The ++.B skb->priority class encoding ++only applies if the skb->priority holds a major:minor handle of an existing ++class within this qdisc. ++.TP ++(ii) ++tc filters attached to the class. ++.TP ++(iii) ++The defmap of a class, as set with the ++.B split & defmap ++parameters. The defmap may contain instructions for each possible Linux packet ++priority. ++ ++.P ++Each class also has a ++.B level. ++Leaf nodes, attached to the bottom of the class hierarchy, have a level of 0. ++.SH CLASSIFICATION ALGORITHM ++ ++Classification is a loop, which terminates when a leaf class is found. At any ++point the loop may jump to the fallback algorithm. ++ ++The loop consists of the following steps: ++.TP ++(i) ++If the packet is generated locally and has a valid classid encoded within its ++.B skb->priority, ++choose it and terminate. ++ ++.TP ++(ii) ++Consult the tc filters, if any, attached to this child. If these return ++a class which is not a leaf class, restart loop from the class returned. ++If it is a leaf, choose it and terminate. ++.TP ++(iii) ++If the tc filters did not return a class, but did return a classid, ++try to find a class with that id within this qdisc. ++Check if the found class is of a lower ++.B level ++than the current class. If so, and the returned class is not a leaf node, ++restart the loop at the found class. If it is a leaf node, terminate. ++If we found an upward reference to a higher level, enter the fallback ++algorithm. ++.TP ++(iv) ++If the tc filters did not return a class, nor a valid reference to one, ++consider the minor number of the reference to be the priority. Retrieve ++a class from the defmap of this class for the priority. If this did not ++contain a class, consult the defmap of this class for the ++.B BEST_EFFORT ++class. If this is an upward reference, or no ++.B BEST_EFFORT ++class was defined, ++enter the fallback algorithm. If a valid class was found, and it is not a ++leaf node, restart the loop at this class. If it is a leaf, choose it and ++terminate. If ++neither the priority distilled from the classid, nor the ++.B BEST_EFFORT ++priority yielded a class, enter the fallback algorithm. ++.P ++The fallback algorithm resides outside of the loop and is as follows. ++.TP ++(i) ++Consult the defmap of the class at which the jump to fallback occured. If ++the defmap contains a class for the ++.B ++priority ++of the class (which is related to the TOS field), choose this class and ++terminate. ++.TP ++(ii) ++Consult the map for a class for the ++.B BEST_EFFORT ++priority. If found, choose it, and terminate. ++.TP ++(iii) ++Choose the class at which break out to the fallback algorithm occured. Terminate. ++.P ++The packet is enqueued to the class which was chosen when either algorithm ++terminated. It is therefore possible for a packet to be enqueued *not* at a ++leaf node, but in the middle of the hierarchy. ++ ++.SH LINK SHARING ALGORITHM ++When dequeuing for sending to the network device, CBQ decides which of its ++classes will be allowed to send. It does so with a Weighted Round Robin process ++in which each class with packets gets a chance to send in turn. The WRR process ++starts by asking the highest priority classes (lowest numerically - ++highest semantically) for packets, and will continue to do so until they ++have no more data to offer, in which case the process repeats for lower ++priorities. ++ ++.B CERTAINTY ENDS HERE, ANK PLEASE HELP ++ ++Each class is not allowed to send at length though - they can only dequeue a ++configurable amount of data during each round. ++ ++If a class is about to go overlimit, and it is not ++.B bounded ++it will try to borrow avgidle from siblings that are not ++.B isolated. ++This process is repeated from the bottom upwards. If a class is unable ++to borrow enough avgidle to send a packet, it is throttled and not asked ++for a packet for enough time for the avgidle to increase above zero. ++ ++.B I REALLY NEED HELP FIGURING THIS OUT. REST OF DOCUMENT IS PRETTY CERTAIN ++.B AGAIN. ++ ++.SH QDISC ++The root qdisc of a CBQ class tree has the following parameters: ++ ++.TP ++parent major:minor | root ++This mandatory parameter determines the place of the CBQ instance, either at the ++.B root ++of an interface or within an existing class. ++.TP ++handle major: ++Like all other qdiscs, the CBQ can be assigned a handle. Should consist only ++of a major number, followed by a colon. Optional. ++.TP ++avpkt bytes ++For calculations, the average packet size must be known. It is silently capped ++at a minimum of 2/3 of the interface MTU. Mandatory. ++.TP ++bandwidth rate ++To determine the idle time, CBQ must know the bandwidth of your underlying ++physical interface, or parent qdisc. This is a vital parameter, more about it ++later. Mandatory. ++.TP ++cell ++The cell size determines he granularity of packet transmission time calculations. Has a sensible default. ++.TP ++mpu ++A zero sized packet may still take time to transmit. This value is the lower ++cap for packet transmission time calculations - packets smaller than this value ++are still deemed to have this size. Defaults to zero. ++.TP ++ewma log ++When CBQ needs to measure the average idle time, it does so using an ++Exponentially Weighted Moving Average which smoothes out measurements into ++a moving average. The EWMA LOG determines how much smoothing occurs. Defaults ++to 5. Lower values imply greater sensitivity. Must be between 0 and 31. ++.P ++A CBQ qdisc does not shape out of its own accord. It only needs to know certain ++parameters about the underlying link. Actual shaping is done in classes. ++ ++.SH CLASSES ++Classes have a host of parameters to configure their operation. ++ ++.TP ++parent major:minor ++Place of this class within the hierarchy. If attached directly to a qdisc ++and not to another class, minor can be omitted. Mandatory. ++.TP ++classid major:minor ++Like qdiscs, classes can be named. The major number must be equal to the ++major number of the qdisc to which it belongs. Optional, but needed if this ++class is going to have children. ++.TP ++weight weight ++When dequeuing to the interface, classes are tried for traffic in a ++round-robin fashion. Classes with a higher configured qdisc will generally ++have more traffic to offer during each round, so it makes sense to allow ++it to dequeue more traffic. All weights under a class are normalized, so ++only the ratios matter. Defaults to the configured rate, unless the priority ++of this class is maximal, in which case it is set to 1. ++.TP ++allot bytes ++Allot specifies how many bytes a qdisc can dequeue ++during each round of the process. This parameter is weighted using the ++renormalized class weight described above. ++ ++.TP ++priority priority ++In the round-robin process, classes with the lowest priority field are tried ++for packets first. Mandatory. ++ ++.TP ++rate rate ++Maximum rate this class and all its children combined can send at. Mandatory. ++ ++.TP ++bandwidth rate ++This is different from the bandwidth specified when creating a CBQ disc. Only ++used to determine maxidle and offtime, which are only calculated when ++specifying maxburst or minburst. Mandatory if specifying maxburst or minburst. ++ ++.TP ++maxburst ++This number of packets is used to calculate maxidle so that when ++avgidle is at maxidle, this number of average packets can be burst ++before avgidle drops to 0. Set it higher to be more tolerant of ++bursts. You can't set maxidle directly, only via this parameter. ++ ++.TP ++minburst ++As mentioned before, CBQ needs to throttle in case of ++overlimit. The ideal solution is to do so for exactly the calculated ++idle time, and pass 1 packet. However, Unix kernels generally have a ++hard time scheduling events shorter than 10ms, so it is better to ++throttle for a longer period, and then pass minburst packets in one ++go, and then sleep minburst times longer. ++ ++The time to wait is called the offtime. Higher values of minburst lead ++to more accurate shaping in the long term, but to bigger bursts at ++millisecond timescales. ++ ++.TP ++minidle ++If avgidle is below 0, we are overlimits and need to wait until ++avgidle will be big enough to send one packet. To prevent a sudden ++burst from shutting down the link for a prolonged period of time, ++avgidle is reset to minidle if it gets too low. ++ ++Minidle is specified in negative microseconds, so 10 means that ++avgidle is capped at -10us. ++ ++.TP ++bounded ++Signifies that this class will not borrow bandwidth from its siblings. ++.TP ++isolated ++Means that this class will not borrow bandwidth to its siblings ++ ++.TP ++split major:minor & defmap bitmap[/bitmap] ++If consulting filters attached to a class did not give a verdict, ++CBQ can also classify based on the packet's priority. There are 16 ++priorities available, numbered from 0 to 15. ++ ++The defmap specifies which priorities this class wants to receive, ++specified as a bitmap. The Least Significant Bit corresponds to priority ++zero. The ++.B split ++parameter tells CBQ at which class the decision must be made, which should ++be a (grand)parent of the class you are adding. ++ ++As an example, 'tc class add ... classid 10:1 cbq .. split 10:0 defmap c0' ++configures class 10:0 to send packets with priorities 6 and 7 to 10:1. ++ ++The complimentary configuration would then ++be: 'tc class add ... classid 10:2 cbq ... split 10:0 defmap 3f' ++Which would send all packets 0, 1, 2, 3, 4 and 5 to 10:1. ++.TP ++estimator interval timeconstant ++CBQ can measure how much bandwidth each class is using, which tc filters ++can use to classify packets with. In order to determine the bandwidth ++it uses a very simple estimator that measures once every ++.B interval ++microseconds how much traffic has passed. This again is a EWMA, for which ++the time constant can be specified, also in microseconds. The ++.B time constant ++corresponds to the sluggishness of the measurement or, conversely, to the ++sensitivity of the average to short bursts. Higher values mean less ++sensitivity. ++ ++ ++ ++.SH SOURCES ++.TP ++o ++Sally Floyd and Van Jacobson, "Link-sharing and Resource ++Management Models for Packet Networks", ++IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995 ++ ++.TP ++o ++Sally Floyd, "Notes on CBQ and Guarantee Service", 1995 ++ ++.TP ++o ++Sally Floyd, "Notes on Class-Based Queueing: Setting ++Parameters", 1996 ++ ++.TP ++o ++Sally Floyd and Michael Speer, "Experimental Results ++for Class-Based Queueing", 1998, not published. ++ ++ ++ ++.SH SEE ALSO ++.BR tc (8) ++ ++.SH AUTHOR ++Alexey N. Kuznetsov, . This manpage maintained by ++bert hubert ++ ++ +diff -Naur iproute2-orig/debian/manpages/tc-cbq.8 iproute2/debian/manpages/tc-cbq.8 +--- iproute2-orig/debian/manpages/tc-cbq.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/manpages/tc-cbq.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,353 @@ ++.TH CBQ 8 "16 December 2001" "iproute2" "Linux" ++.SH NAME ++CBQ \- Class Based Queueing ++.SH SYNOPSIS ++.B tc qdisc ... dev ++dev ++.B ( parent ++classid ++.B | root) [ handle ++major: ++.B ] cbq [ allot ++bytes ++.B ] avpkt ++bytes ++.B bandwidth ++rate ++.B [ cell ++bytes ++.B ] [ ewma ++log ++.B ] [ mpu ++bytes ++.B ] ++ ++.B tc class ... dev ++dev ++.B parent ++major:[minor] ++.B [ classid ++major:minor ++.B ] cbq allot ++bytes ++.B [ bandwidth ++rate ++.B ] [ rate ++rate ++.B ] prio ++priority ++.B [ weight ++weight ++.B ] [ minburst ++packets ++.B ] [ maxburst ++packets ++.B ] [ ewma ++log ++.B ] [ cell ++bytes ++.B ] avpkt ++bytes ++.B [ mpu ++bytes ++.B ] [ bounded isolated ] [ split ++handle ++.B & defmap ++defmap ++.B ] [ estimator ++interval timeconstant ++.B ] ++ ++.SH DESCRIPTION ++Class Based Queueing is a classful qdisc that implements a rich ++linksharing hierarchy of classes. It contains shaping elements as ++well as prioritizing capabilities. Shaping is performed using link ++idle time calculations based on the timing of dequeue events and ++underlying link bandwidth. ++ ++.SH SHAPING ALGORITHM ++When shaping a 10mbit/s connection to 1mbit/s, the link will ++be idle 90% of the time. If it isn't, it needs to be throttled so that it ++IS idle 90% of the time. ++ ++During operations, the effective idletime is measured using an ++exponential weighted moving average (EWMA), which considers recent ++packets to be exponentially more important than past ones. The Unix ++loadaverage is calculated in the same way. ++ ++The calculated idle time is subtracted from the EWMA measured one, ++the resulting number is called 'avgidle'. A perfectly loaded link has ++an avgidle of zero: packets arrive exactly at the calculated ++interval. ++ ++An overloaded link has a negative avgidle and if it gets too negative, ++CBQ throttles and is then 'overlimit'. ++ ++Conversely, an idle link might amass a huge avgidle, which would then ++allow infinite bandwidths after a few hours of silence. To prevent ++this, avgidle is capped at ++.B maxidle. ++ ++If overlimit, in theory, the CBQ could throttle itself for exactly the ++amount of time that was calculated to pass between packets, and then ++pass one packet, and throttle again. Due to timer resolution constraints, ++this may not be feasible, see the ++.B minburst ++parameter below. ++ ++.SH CLASSIFICATION ++Within the one CBQ instance many classes may exist. Each of these classes ++contains another qdisc, by default ++.BR tc-pfifo (8). ++ ++When enqueueing a packet, CBQ starts at the root and uses various methods to ++determine which class should receive the data. ++ ++In the absence of uncommon configuration options, the process is rather easy. ++At each node we look for an instruction, and then go to the class the ++instruction refers us to. If the class found is a barren leaf-node (without ++children), we enqueue the packet there. If it is not yet a leaf node, we do ++the whole thing over again starting from that node. ++ ++The following actions are performed, in order at each node we visit, until one ++sends us to another node, or terminates the process. ++.TP ++(i) ++Consult filters attached to the class. If sent to a leafnode, we are done. ++Otherwise, restart. ++.TP ++(ii) ++Consult the defmap for the priority assigned to this packet, which depends ++on the TOS bits. Check if the referral is leafless, otherwise restart. ++.TP ++(iii) ++Ask the defmap for instructions for the 'best effort' priority. Check the ++answer for leafness, otherwise restart. ++.TP ++(iv) ++If none of the above returned with an instruction, enqueue at this node. ++.P ++This algorithm makes sure that a packet always ends up somewhere, even while ++you are busy building your configuration. ++ ++For more details, see ++.BR tc-cbq-details(8). ++ ++.SH LINK SHARING ALGORITHM ++When dequeuing for sending to the network device, CBQ decides which of its ++classes will be allowed to send. It does so with a Weighted Round Robin process ++in which each class with packets gets a chance to send in turn. The WRR process ++starts by asking the highest priority classes (lowest numerically - ++highest semantically) for packets, and will continue to do so until they ++have no more data to offer, in which case the process repeats for lower ++priorities. ++ ++Classes by default borrow bandwidth from their siblings. A class can be ++prevented from doing so by declaring it 'bounded'. A class can also indicate ++its unwillingness to lend out bandwidth by being 'isolated'. ++ ++.SH QDISC ++The root of a CBQ qdisc class tree has the following parameters: ++ ++.TP ++parent major:minor | root ++This mandatory parameter determines the place of the CBQ instance, either at the ++.B root ++of an interface or within an existing class. ++.TP ++handle major: ++Like all other qdiscs, the CBQ can be assigned a handle. Should consist only ++of a major number, followed by a colon. Optional, but very useful if classes ++will be generated within this qdisc. ++.TP ++allot bytes ++This allotment is the 'chunkiness' of link sharing and is used for determining packet ++transmission time tables. The qdisc allot differs slightly from the class allot discussed ++below. Optional. Defaults to a reasonable value, related to avpkt. ++.TP ++avpkt bytes ++The average size of a packet is needed for calculating maxidle, and is also used ++for making sure 'allot' has a safe value. Mandatory. ++.TP ++bandwidth rate ++To determine the idle time, CBQ must know the bandwidth of your underlying ++physical interface, or parent qdisc. This is a vital parameter, more about it ++later. Mandatory. ++.TP ++cell ++The cell size determines he granularity of packet transmission time calculations. Has a sensible default. ++.TP ++mpu ++A zero sized packet may still take time to transmit. This value is the lower ++cap for packet transmission time calculations - packets smaller than this value ++are still deemed to have this size. Defaults to zero. ++.TP ++ewma log ++When CBQ needs to measure the average idle time, it does so using an ++Exponentially Weighted Moving Average which smoothes out measurements into ++a moving average. The EWMA LOG determines how much smoothing occurs. Lower ++values imply greater sensitivity. Must be between 0 and 31. Defaults ++to 5. ++.P ++A CBQ qdisc does not shape out of its own accord. It only needs to know certain ++parameters about the underlying link. Actual shaping is done in classes. ++ ++.SH CLASSES ++Classes have a host of parameters to configure their operation. ++ ++.TP ++parent major:minor ++Place of this class within the hierarchy. If attached directly to a qdisc ++and not to another class, minor can be omitted. Mandatory. ++.TP ++classid major:minor ++Like qdiscs, classes can be named. The major number must be equal to the ++major number of the qdisc to which it belongs. Optional, but needed if this ++class is going to have children. ++.TP ++weight weight ++When dequeuing to the interface, classes are tried for traffic in a ++round-robin fashion. Classes with a higher configured qdisc will generally ++have more traffic to offer during each round, so it makes sense to allow ++it to dequeue more traffic. All weights under a class are normalized, so ++only the ratios matter. Defaults to the configured rate, unless the priority ++of this class is maximal, in which case it is set to 1. ++.TP ++allot bytes ++Allot specifies how many bytes a qdisc can dequeue ++during each round of the process. This parameter is weighted using the ++renormalized class weight described above. Silently capped at a minimum of ++3/2 avpkt. Mandatory. ++ ++.TP ++prio priority ++In the round-robin process, classes with the lowest priority field are tried ++for packets first. Mandatory. ++ ++.TP ++avpkt ++See the QDISC section. ++ ++.TP ++rate rate ++Maximum rate this class and all its children combined can send at. Mandatory. ++ ++.TP ++bandwidth rate ++This is different from the bandwidth specified when creating a CBQ disc! Only ++used to determine maxidle and offtime, which are only calculated when ++specifying maxburst or minburst. Mandatory if specifying maxburst or minburst. ++ ++.TP ++maxburst ++This number of packets is used to calculate maxidle so that when ++avgidle is at maxidle, this number of average packets can be burst ++before avgidle drops to 0. Set it higher to be more tolerant of ++bursts. You can't set maxidle directly, only via this parameter. ++ ++.TP ++minburst ++As mentioned before, CBQ needs to throttle in case of ++overlimit. The ideal solution is to do so for exactly the calculated ++idle time, and pass 1 packet. However, Unix kernels generally have a ++hard time scheduling events shorter than 10ms, so it is better to ++throttle for a longer period, and then pass minburst packets in one ++go, and then sleep minburst times longer. ++ ++The time to wait is called the offtime. Higher values of minburst lead ++to more accurate shaping in the long term, but to bigger bursts at ++millisecond timescales. Optional. ++ ++.TP ++minidle ++If avgidle is below 0, we are overlimits and need to wait until ++avgidle will be big enough to send one packet. To prevent a sudden ++burst from shutting down the link for a prolonged period of time, ++avgidle is reset to minidle if it gets too low. ++ ++Minidle is specified in negative microseconds, so 10 means that ++avgidle is capped at -10us. Optional. ++ ++.TP ++bounded ++Signifies that this class will not borrow bandwidth from its siblings. ++.TP ++isolated ++Means that this class will not borrow bandwidth to its siblings ++ ++.TP ++split major:minor & defmap bitmap[/bitmap] ++If consulting filters attached to a class did not give a verdict, ++CBQ can also classify based on the packet's priority. There are 16 ++priorities available, numbered from 0 to 15. ++ ++The defmap specifies which priorities this class wants to receive, ++specified as a bitmap. The Least Significant Bit corresponds to priority ++zero. The ++.B split ++parameter tells CBQ at which class the decision must be made, which should ++be a (grand)parent of the class you are adding. ++ ++As an example, 'tc class add ... classid 10:1 cbq .. split 10:0 defmap c0' ++configures class 10:0 to send packets with priorities 6 and 7 to 10:1. ++ ++The complimentary configuration would then ++be: 'tc class add ... classid 10:2 cbq ... split 10:0 defmap 3f' ++Which would send all packets 0, 1, 2, 3, 4 and 5 to 10:1. ++.TP ++estimator interval timeconstant ++CBQ can measure how much bandwidth each class is using, which tc filters ++can use to classify packets with. In order to determine the bandwidth ++it uses a very simple estimator that measures once every ++.B interval ++microseconds how much traffic has passed. This again is a EWMA, for which ++the time constant can be specified, also in microseconds. The ++.B time constant ++corresponds to the sluggishness of the measurement or, conversely, to the ++sensitivity of the average to short bursts. Higher values mean less ++sensitivity. ++ ++.SH BUGS ++The actual bandwidth of the underlying link may not be known, for example ++in the case of PPoE or PPTP connections which in fact may send over a ++pipe, instead of over a physical device. CBQ is quite resilient to major ++errors in the configured bandwidth, probably a the cost of coarser shaping. ++ ++Default kernels rely on coarse timing information for making decisions. These ++may make shaping precise in the long term, but inaccurate on second long scales. ++ ++See ++.BR tc-cbq-details(8) ++for hints on how to improve this. ++ ++.SH SOURCES ++.TP ++o ++Sally Floyd and Van Jacobson, "Link-sharing and Resource ++Management Models for Packet Networks", ++IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995 ++ ++.TP ++o ++Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995 ++ ++.TP ++o ++Sally Floyd, "Notes on Class-Based Queueing: Setting ++Parameters", 1996 ++ ++.TP ++o ++Sally Floyd and Michael Speer, "Experimental Results ++for Class-Based Queueing", 1998, not published. ++ ++ ++ ++.SH SEE ALSO ++.BR tc (8) ++ ++.SH AUTHOR ++Alexey N. Kuznetsov, . This manpage maintained by ++bert hubert ++ ++ +diff -Naur iproute2-orig/debian/manpages/tc-htb.8 iproute2/debian/manpages/tc-htb.8 +--- iproute2-orig/debian/manpages/tc-htb.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/manpages/tc-htb.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,150 @@ ++.TH HTB 8 "10 January 2002" "iproute2" "Linux" ++.SH NAME ++HTB \- Hierarchy Token Bucket ++.SH SYNOPSIS ++.B tc qdisc ... dev ++dev ++.B ( parent ++classid ++.B | root) [ handle ++major: ++.B ] htb [ default ++minor-id ++.B ] ++ ++.B tc class ... dev ++dev ++.B parent ++major:[minor] ++.B [ classid ++major:minor ++.B ] htb rate ++rate ++.B [ ceil ++rate ++.B ] burst ++bytes ++.B [ cburst ++bytes ++.B ] [ prio ++priority ++.B ] ++ ++.SH DESCRIPTION ++HTB is meant as a more understandable and intuitive replacement for ++the CBQ qdisc in Linux. Both CBQ and HTB help you to control the use ++of the outbound bandwidth on a given link. Both allow you to use one ++physical link to simulate several slower links and to send different ++kinds of traffic on different simulated links. In both cases, you have ++to specify how to divide the physical link into simulated links and ++how to decide which simulated link to use for a given packet to be sent. ++ ++Unlike CBQ, HTB shapes traffic based on the Token Bucket Filter algorithm ++which does not depend on interface characteristics and so does not need to ++know the underlying bandwidth of the outgoing interface. ++ ++.SH SHAPING ALGORITHM ++Shaping works as documented in ++.B tc-tbf (8). ++ ++.SH CLASSIFICATION ++Within the one HRB instance many classes may exist. Each of these classes ++contains another qdisc, by default ++.BR tc-pfifo (8). ++ ++When enqueueing a packet, HTB starts at the root and uses various methods to ++determine which class should receive the data. ++ ++In the absence of uncommon configuration options, the process is rather easy. ++At each node we look for an instruction, and then go to the class the ++instruction refers us to. If the class found is a barren leaf-node (without ++children), we enqueue the packet there. If it is not yet a leaf node, we do ++the whole thing over again starting from that node. ++ ++The following actions are performed, in order at each node we visit, until one ++sends us to another node, or terminates the process. ++.TP ++(i) ++Consult filters attached to the class. If sent to a leafnode, we are done. ++Otherwise, restart. ++.TP ++(ii) ++If none of the above returned with an instruction, enqueue at this node. ++.P ++This algorithm makes sure that a packet always ends up somewhere, even while ++you are busy building your configuration. ++ ++.SH LINK SHARING ALGORITHM ++FIXME ++ ++.SH QDISC ++The root of a HTB qdisc class tree has the following parameters: ++ ++.TP ++parent major:minor | root ++This mandatory parameter determines the place of the HTB instance, either at the ++.B root ++of an interface or within an existing class. ++.TP ++handle major: ++Like all other qdiscs, the HTB can be assigned a handle. Should consist only ++of a major number, followed by a colon. Optional, but very useful if classes ++will be generated within this qdisc. ++.TP ++default minor-id ++Unclassified traffic gets sent to the class with this minor-id. ++ ++.SH CLASSES ++Classes have a host of parameters to configure their operation. ++ ++.TP ++parent major:minor ++Place of this class within the hierarchy. If attached directly to a qdisc ++and not to another class, minor can be omitted. Mandatory. ++.TP ++classid major:minor ++Like qdiscs, classes can be named. The major number must be equal to the ++major number of the qdisc to which it belongs. Optional, but needed if this ++class is going to have children. ++.TP ++prio priority ++In the round-robin process, classes with the lowest priority field are tried ++for packets first. Mandatory. ++ ++.TP ++rate rate ++Maximum rate this class and all its children are guaranteed. Mandatory. ++ ++.TP ++ceil rate ++Maximum rate at which a class can send, if its parent has bandwidth to spare. ++Defaults to the configured rate, which implies no borrowing ++ ++.TP ++burst bytes ++Amount of bytes that can be burst at ++.B ceil ++speed, in excess of the configured ++.B rate. ++Should be at least as high as the highest burst of all children. ++ ++.TP ++cburst bytes ++Amount of bytes that can be burst at 'infinite' speed, in other words, as fast ++as the interface can transmit them. For perfect evening out, should be equal to at most one average ++packet. Should be at least as high as the highest cburst of all children. ++ ++.SH NOTES ++Due to Unix timing constraints, the maximum ceil rate is not infinite and may in fact be quite low. On Intel, ++there are 100 timer events per second, the maximum rate is that rate at which 'burst' bytes are sent each timer tick. ++From this, the mininum burst size for a specified rate can be calculated. For i386, a 10mbit rate requires a 12 kilobyte ++burst as 100*12kb*8 equals 10mbit. ++ ++.SH SEE ALSO ++.BR tc (8) ++.P ++HTB website: http://luxik.cdi.cz/~devik/qos/htb/ ++.SH AUTHOR ++Martin Devera . This manpage maintained by bert hubert ++ ++ +diff -Naur iproute2-orig/debian/manpages/tc-pbfifo.8 iproute2/debian/manpages/tc-pbfifo.8 +--- iproute2-orig/debian/manpages/tc-pbfifo.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/manpages/tc-pbfifo.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,72 @@ ++.TH PBFIFO 8 "10 January 2002" "iproute2" "Linux" ++.SH NAME ++pfifo \- Packet limited First In, First Out queue ++.P ++bfifo \- Byte limited First In, First Out queue ++ ++.SH SYNOPSIS ++.B tc qdisc ... add pfifo ++.B [ limit ++packets ++.B ] ++.P ++.B tc qdisc ... add bfifo ++.B [ limit ++bytes ++.B ] ++ ++.SH DESCRIPTION ++The pfifo and bfifo qdiscs are unadorned First In, First Out queues. They are the ++simplest queues possible and therefore have no overhead. ++.B pfifo ++constrains the queue size as measured in packets. ++.B bfifo ++does so as measured in bytes. ++ ++Like all non-default qdiscs, they maintain statistics. This might be a reason to prefer ++pfifo or bfifo over the default. ++ ++.SH ALGORITHM ++A list of packets is maintained, when a packet is enqueued it gets inserted at the tail of ++a list. When a packet needs to be sent out to the network, it is taken from the head of the list. ++ ++If the list is too long, no further packets are allowed on. This is called 'tail drop'. ++ ++.SH PARAMETERS ++.TP ++limit ++Maximum queue size. Specified in bytes for bfifo, in packets for pfifo. For pfifo, defaults ++to the interface txqueuelen, as specified with ++.BR ifconfig (8) ++or ++.BR ip (8). ++ ++For bfifo, it defaults to the txqueuelen multiplied by the interface MTU. ++ ++.SH OUTPUT ++The output of ++.B tc -s qdisc ls ++contains the limit, either in packets or in bytes, and the number of bytes ++and packets actually sent. An unsent and dropped packet only appears between braces ++and is not counted as 'Sent'. ++ ++In this example, the queue length is 100 packets, 45894 bytes were sent over 681 packets. ++No packets were dropped, and as the pfifo queue does not slow down packets, there were also no ++overlimits: ++.P ++.nf ++# tc -s qdisc ls dev eth0 ++qdisc pfifo 8001: dev eth0 limit 100p ++ Sent 45894 bytes 681 pkts (dropped 0, overlimits 0) ++.fi ++ ++If a backlog occurs, this is displayed as well. ++.SH SEE ALSO ++.BR tc (8) ++ ++.SH AUTHORS ++Alexey N. Kuznetsov, ++ ++This manpage maintained by bert hubert ++ ++ +diff -Naur iproute2-orig/debian/manpages/tc-pfifo_fast.8 iproute2/debian/manpages/tc-pfifo_fast.8 +--- iproute2-orig/debian/manpages/tc-pfifo_fast.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/manpages/tc-pfifo_fast.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,59 @@ ++.TH PFIFO_FAST 8 "10 January 2002" "iproute2" "Linux" ++.SH NAME ++pfifo_fast \- three-band first in, first out queue ++ ++.SH DESCRIPTION ++pfifo_fast is the default qdisc of each interface. ++ ++Whenever an interface is created, the pfifo_fast qdisc is automatically used ++as a queue. If another qdisc is attached, it preempts the default ++pfifo_fast, which automatically returns to function when an existing qdisc ++is detached. ++ ++In this sense this qdisc is magic, and unlike other qdiscs. ++ ++.SH ALGORITHM ++The algorithm is very similar to that of the classful ++.BR tc-prio (8) ++qdisc. ++.B pfifo_fast ++is like three ++.BR tc-pfifo (8) ++queues side by side, where packets can be enqueued in any of the three bands ++based on their Type of Service bits or assigned priority. ++ ++Not all three bands are dequeued simultaneously - as long as lower bands ++have traffic, higher bands are never dequeued. This can be used to ++prioritize interactive traffic or penalize 'lowest cost' traffic. ++ ++Each band can be txqueuelen packets long, as configured with ++.BR ifconfig (8) ++or ++.BR ip (8). ++Additional packets coming in are not enqueued but are instead dropped. ++ ++See ++.BR tc-prio (8) ++for complete details on how TOS bits are translated into bands. ++.SH PARAMETERS ++.TP ++txqueuelen ++The length of the three bands depends on the interface txqueuelen, as ++specified with ++.BR ifconfig (8) ++or ++.BR ip (8). ++ ++.SH BUGS ++Does not maintain statistics and does not show up in tc qdisc ls. This is because ++it is the automatic default in the absence of a configured qdisc. ++ ++.SH SEE ALSO ++.BR tc (8) ++ ++.SH AUTHORS ++Alexey N. Kuznetsov, ++ ++This manpage maintained by bert hubert ++ ++ +diff -Naur iproute2-orig/debian/manpages/tc-prio.8 iproute2/debian/manpages/tc-prio.8 +--- iproute2-orig/debian/manpages/tc-prio.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/manpages/tc-prio.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,187 @@ ++.TH PRIO 8 "16 December 2001" "iproute2" "Linux" ++.SH NAME ++PRIO \- Priority qdisc ++.SH SYNOPSIS ++.B tc qdisc ... dev ++dev ++.B ( parent ++classid ++.B | root) [ handle ++major: ++.B ] prio [ bands ++bands ++.B ] [ priomap ++band,band,band... ++.B ] [ estimator ++interval timeconstant ++.B ] ++ ++.SH DESCRIPTION ++The PRIO qdisc is a simple classful queueing discipline that contains ++an arbitrary number of classes of differing priority. The classes are ++dequeued in numerical descending order of priority. PRIO is a scheduler ++and never delays packets - it is a work-conserving qdisc, though the qdiscs ++contained in the classes may not be. ++ ++Very useful for lowering latency when there is no need for slowing down ++traffic. ++ ++.SH ALGORITHM ++On creation with 'tc qdisc add', a fixed number of bands is created. Each ++band is a class, although is not possible to add classes with 'tc qdisc ++add', the number of bands to be created must instead be specified on the ++commandline attaching PRIO to its root. ++ ++When dequeueing, band 0 is tried first and only if it did not deliver a ++packet does PRIO try band 1, and so onwards. Maximum reliability packets ++should therefore go to band 0, minimum delay to band 1 and the rest to band ++2. ++ ++As the PRIO qdisc itself will have minor number 0, band 0 is actually ++major:1, band 1 is major:2, etc. For major, substitute the major number ++assigned to the qdisc on 'tc qdisc add' with the ++.B handle ++parameter. ++ ++.SH CLASSIFICATION ++Three methods are available to PRIO to determine in which band a packet will ++be enqueued. ++.TP ++From userspace ++A process with sufficient privileges can encode the destination class ++directly with SO_PRIORITY, see ++.BR tc(7). ++.TP ++with a tc filter ++A tc filter attached to the root qdisc can point traffic directly to a class ++.TP ++with the priomap ++Based on the packet priority, which in turn is derived from the Type of ++Service assigned to the packet. ++.P ++Only the priomap is specific to this qdisc. ++.SH QDISC PARAMETERS ++.TP ++bands ++Number of bands. If changed from the default of 3, ++.B priomap ++must be updated as well. ++.TP ++priomap ++The priomap maps the priority of ++a packet to a class. The priority can either be set directly from userspace, ++or be derived from the Type of Service of the packet. ++ ++Determines how packet priorities, as assigned by the kernel, map to ++bands. Mapping occurs based on the TOS octet of the packet, which looks like ++this: ++ ++.nf ++0 1 2 3 4 5 6 7 +++---+---+---+---+---+---+---+---+ ++| | | | ++|PRECEDENCE | TOS |MBZ| ++| | | | +++---+---+---+---+---+---+---+---+ ++.fi ++ ++The four TOS bits (the 'TOS field') are defined as: ++ ++.nf ++Binary Decimcal Meaning ++----------------------------------------- ++1000 8 Minimize delay (md) ++0100 4 Maximize throughput (mt) ++0010 2 Maximize reliability (mr) ++0001 1 Minimize monetary cost (mmc) ++0000 0 Normal Service ++.fi ++ ++As there is 1 bit to the right of these four bits, the actual value of the ++TOS field is double the value of the TOS bits. Tcpdump -v -v shows you the ++value of the entire TOS field, not just the four bits. It is the value you ++see in the first column of this table: ++ ++.nf ++TOS Bits Means Linux Priority Band ++------------------------------------------------------------ ++0x0 0 Normal Service 0 Best Effort 1 ++0x2 1 Minimize Monetary Cost 1 Filler 2 ++0x4 2 Maximize Reliability 0 Best Effort 1 ++0x6 3 mmc+mr 0 Best Effort 1 ++0x8 4 Maximize Throughput 2 Bulk 2 ++0xa 5 mmc+mt 2 Bulk 2 ++0xc 6 mr+mt 2 Bulk 2 ++0xe 7 mmc+mr+mt 2 Bulk 2 ++0x10 8 Minimize Delay 6 Interactive 0 ++0x12 9 mmc+md 6 Interactive 0 ++0x14 10 mr+md 6 Interactive 0 ++0x16 11 mmc+mr+md 6 Interactive 0 ++0x18 12 mt+md 4 Int. Bulk 1 ++0x1a 13 mmc+mt+md 4 Int. Bulk 1 ++0x1c 14 mr+mt+md 4 Int. Bulk 1 ++0x1e 15 mmc+mr+mt+md 4 Int. Bulk 1 ++.fi ++ ++The second column contains the value of the relevant ++four TOS bits, followed by their translated meaning. For example, 15 stands ++for a packet wanting Minimal Montetary Cost, Maximum Reliability, Maximum ++Throughput AND Minimum Delay. ++ ++The fourth column lists the way the Linux kernel interprets the TOS bits, by ++showing to which Priority they are mapped. ++ ++The last column shows the result of the default priomap. On the commandline, ++the default priomap looks like this: ++ ++ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 ++ ++This means that priority 4, for example, gets mapped to band number 1. ++The priomap also allows you to list higher priorities (> 7) which do not ++correspond to TOS mappings, but which are set by other means. ++ ++This table from RFC 1349 (read it for more details) explains how ++applications might very well set their TOS bits: ++ ++.nf ++TELNET 1000 (minimize delay) ++FTP ++ Control 1000 (minimize delay) ++ Data 0100 (maximize throughput) ++ ++TFTP 1000 (minimize delay) ++ ++SMTP ++ Command phase 1000 (minimize delay) ++ DATA phase 0100 (maximize throughput) ++ ++Domain Name Service ++ UDP Query 1000 (minimize delay) ++ TCP Query 0000 ++ Zone Transfer 0100 (maximize throughput) ++ ++NNTP 0001 (minimize monetary cost) ++ ++ICMP ++ Errors 0000 ++ Requests 0000 (mostly) ++ Responses (mostly) ++.fi ++ ++ ++.SH CLASSES ++PRIO classes cannot be configured further - they are automatically created ++when the PRIO qdisc is attached. Each class however can contain yet a ++further qdisc. ++ ++.SH BUGS ++Large amounts of traffic in the lower bands can cause starvation of higher ++bands. Can be prevented by attaching a shaper (for example, ++.BR tc-tbf(8) ++to these bands to make sure they cannot dominate the link. ++ ++.SH AUTHORS ++Alexey N. Kuznetsov, , J Hadi Salim ++. This manpage maintained by bert hubert ++ ++ +diff -Naur iproute2-orig/debian/manpages/tc-red.8 iproute2/debian/manpages/tc-red.8 +--- iproute2-orig/debian/manpages/tc-red.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/manpages/tc-red.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,131 @@ ++.TH RED 8 "13 December 2001" "iproute2" "Linux" ++.SH NAME ++red \- Random Early Detection ++.SH SYNOPSIS ++.B tc qdisc ... red ++.B limit ++bytes ++.B min ++bytes ++.B max ++bytes ++.B avpkt ++bytes ++.B burst ++packets ++.B [ ecn ] [ bandwidth ++rate ++.B ] probability ++chance ++ ++.SH DESCRIPTION ++Random Early Detection is a classless qdisc which manages its queue size ++smartly. Regular queues simply drop packets from the tail when they are ++full, which may not be the optimal behaviour. RED also performs tail drop, ++but does so in a more gradual way. ++ ++Once the queue hits a certain average length, packets enqueued have a ++configurable chance of being marked (which may mean dropped). This chance ++increases linearly up to a point called the ++.B max ++average queue length, although the queue might get bigger. ++ ++This has a host of benefits over simple taildrop, while not being processor ++intensive. It prevents synchronous retransmits after a burst in traffic, ++which cause further retransmits, etc. ++ ++The goal is the have a small queue size, which is good for interactivity ++while not disturbing TCP/IP traffic with too many sudden drops after a burst ++of traffic. ++ ++Depending on if ECN is configured, marking either means dropping or ++purely marking a packet as overlimit. ++.SH ALGORITHM ++The average queue size is used for determining the marking ++probability. This is calculated using an Exponential Weighted Moving ++Average, which can be more or less sensitive to bursts. ++ ++When the average queue size is below ++.B min ++bytes, no packet will ever be marked. When it exceeds ++.B min, ++the probability of doing so climbs linearly up ++to ++.B probability, ++until the average queue size hits ++.B max ++bytes. Because ++.B probability ++is normally not set to 100%, the queue size might ++conceivably rise above ++.B max ++bytes, so the ++.B limit ++parameter is provided to set a hard maximum for the size of the queue. ++ ++.SH PARAMETERS ++.TP ++min ++Average queue size at which marking becomes a possibility. ++.TP ++max ++At this average queue size, the marking probability is maximal. Should be at ++least twice ++.B min ++to prevent synchronous retransmits, higher for low ++.B min. ++.TP ++probability ++Maximum probability for marking, specified as a floating point ++number from 0.0 to 1.0. Suggested values are 0.01 or 0.02 (1 or 2%, ++respectively). ++.TP ++limit ++Hard limit on the real (not average) queue size in bytes. Further packets ++are dropped. Should be set higher than max+burst. It is advised to set this ++a few times higher than ++.B max. ++.TP ++burst ++Used for determining how fast the average queue size is influenced by the ++real queue size. Larger values make the calculation more sluggish, allowing ++longer bursts of traffic before marking starts. Real life experiments ++support the following guideline: (min+min+max)/(3*avpkt). ++.TP ++avpkt ++Specified in bytes. Used with burst to determine the time constant for ++average queue size calculations. 1000 is a good value. ++.TP ++bandwidth ++This rate is used for calculating the average queue size after some ++idle time. Should be set to the bandwidth of your interface. Does not mean ++that RED will shape for you! Optional. ++.TP ++ecn ++As mentioned before, RED can either 'mark' or 'drop'. Explicit Congestion ++Notification allows RED to notify remote hosts that their rate exceeds the ++amount of bandwidth available. Non-ECN capable hosts can only be notified by ++dropping a packet. If this parameter is specified, packets which indicate ++that their hosts honor ECN will only be marked and not dropped, unless the ++queue size hits ++.B limit ++bytes. Needs a tc binary with RED support compiled in. Recommended. ++ ++.SH SEE ALSO ++.BR tc (8) ++ ++.SH SOURCES ++.TP ++o ++Floyd, S., and Jacobson, V., Random Early Detection gateways for ++Congestion Avoidance. http://www.aciri.org/floyd/papers/red/red.html ++.TP ++o ++Some changes to the algorithm by Alexey N. Kuznetsov. ++ ++.SH AUTHORS ++Alexey N. Kuznetsov, , Alexey Makarenko ++, J Hadi Salim . ++This manpage maintained by bert hubert ++ ++ +diff -Naur iproute2-orig/debian/manpages/tc-sfq.8 iproute2/debian/manpages/tc-sfq.8 +--- iproute2-orig/debian/manpages/tc-sfq.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/manpages/tc-sfq.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,107 @@ ++.TH TC 8 "8 December 2001" "iproute2" "Linux" ++.SH NAME ++sfq \- Stochastic Fairness Queueing ++.SH SYNOPSIS ++.B tc qdisc ... perturb ++seconds ++.B quantum ++bytes ++ ++.SH DESCRIPTION ++ ++Stochastic Fairness Queueing is a classless queueing discipline available for ++traffic control with the ++.BR tc (8) ++command. ++ ++SFQ does not shape traffic but only schedules the transmission of packets, based on 'flows'. ++The goal is to ensure fairness so that each flow is able to send data in turn, thus preventing ++any single flow from drowning out the rest. ++ ++This may in fact have some effect in mitigating a Denial of Service attempt. ++ ++SFQ is work-conserving and therefore always delivers a packet if it has one available. ++.SH ALGORITHM ++On enqueueing, each packet is assigned to a hash bucket, based on ++.TP ++(i) ++Source address ++.TP ++(ii) ++Destination address ++.TP ++(iii) ++Source port ++.P ++If these are available. SFQ knows about ipv4 and ipv6 and also UDP, TCP and ESP. ++Packets with other protocols are hashed based on the 32bits representation of their ++destination and the socket they belong to. A flow corresponds mostly to a TCP/IP ++connection. ++ ++Each of these buckets should represent a unique flow. Because multiple flows may ++get hashed to the same bucket, the hashing algorithm is perturbed at configurable ++intervals so that the unfairness lasts only for a short while. Perturbation may ++however cause some inadvertent packet reordering to occur. ++ ++When dequeuing, each hashbucket with data is queried in a round robin fashion. ++ ++The compile time maximum length of the SFQ is 128 packets, which can be spread over ++at most 128 buckets of 1024 available. In case of overflow, tail-drop is performed ++on the fullest bucket, thus maintaining fairness. ++ ++.SH PARAMETERS ++.TP ++perturb ++Interval in seconds for queue algorithm perturbation. Defaults to 0, which means that ++no perturbation occurs. Do not set too low for each perturbation may cause some packet ++reordering. Advised value: 10 ++.TP ++quantum ++Amount of bytes a flow is allowed to dequeue during a round of the round robin process. ++Defaults to the MTU of the interface which is also the advised value and the minimum value. ++ ++.SH EXAMPLE & USAGE ++ ++To attach to device ppp0: ++.P ++# tc qdisc add dev ppp0 root sfq perturb 10 ++.P ++Please note that SFQ, like all non-shaping (work-conserving) qdiscs, is only useful ++if it owns the queue. ++This is the case when the link speed equals the actually available bandwidth. This holds ++for regular phone modems, ISDN connections and direct non-switched ethernet links. ++.P ++Most often, cable modems and DSL devices do not fall into this category. The same holds ++for when connected to a switch and trying to send data to a congested segment also ++connected to the switch. ++.P ++In this case, the effective queue does not reside within Linux and is therefore not ++available for scheduling. ++.P ++Embed SFQ in a classful qdisc to make sure it owns the queue. ++ ++.SH SOURCE ++.TP ++o ++Paul E. McKenney "Stochastic Fairness Queuing", ++IEEE INFOCOMM'90 Proceedings, San Francisco, 1990. ++ ++.TP ++o ++Paul E. McKenney "Stochastic Fairness Queuing", ++"Interworking: Research and Experience", v.2, 1991, p.113-131. ++ ++.TP ++o ++See also: ++M. Shreedhar and George Varghese "Efficient Fair ++Queuing using Deficit Round Robin", Proc. SIGCOMM 95. ++ ++.SH SEE ALSO ++.BR tc (8) ++ ++.SH AUTHOR ++Alexey N. Kuznetsov, . This manpage maintained by ++bert hubert ++ ++ +diff -Naur iproute2-orig/debian/manpages/tc-tbf.8 iproute2/debian/manpages/tc-tbf.8 +--- iproute2-orig/debian/manpages/tc-tbf.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/manpages/tc-tbf.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,138 @@ ++.TH TC 8 "13 December 2001" "iproute2" "Linux" ++.SH NAME ++tbf \- Token Bucket Filter ++.SH SYNOPSIS ++.B tc qdisc ... tbf rate ++rate ++.B burst ++bytes/cell ++.B ( latency ++ms ++.B | limit ++bytes ++.B ) [ mpu ++bytes ++.B [ peakrate ++rate ++.B mtu ++bytes/cell ++.B ] ] ++.P ++burst is also known as buffer and maxburst. mtu is also known as minburst. ++.SH DESCRIPTION ++ ++The Token Bucket Filter is a classless queueing discipline available for ++traffic control with the ++.BR tc (8) ++command. ++ ++TBF is a pure shaper and never schedules traffic. It is non-work-conserving and may throttle ++itself, although packets are available, to ensure that the configured rate is not exceeded. ++On all platforms except for Alpha, ++it is able to shape up to 1mbit/s of normal traffic with ideal minimal burstiness, ++sending out data exactly at the configured rates. ++ ++Much higher rates are possible but at the cost of losing the minimal burstiness. In that ++case, data is on average dequeued at the configured rate but may be sent much faster at millisecond ++timescales. Because of further queues living in network adaptors, this is often not a problem. ++ ++Kernels with a higher 'HZ' can achieve higher rates with perfect burstiness. On Alpha, HZ is ten ++times higher, leading to a 10mbit/s limit to perfection. These calculations hold for packets of on ++average 1000 bytes. ++ ++.SH ALGORITHM ++As the name implies, traffic is filtered based on the expenditure of ++.B tokens. ++Tokens roughly correspond to bytes, with the additional constraint that each packet consumes ++some tokens, no matter how small it is. This reflects the fact that even a zero-sized packet occupies ++the link for some time. ++ ++On creation, the TBF is stocked with tokens which correspond to the amount of traffic that can be burst ++in one go. Tokens arrive at a steady rate, until the bucket is full. ++ ++If no tokens are available, packets are queued, up to a configured limit. The TBF now ++calculates the token deficit, and throttles until the first packet in the queue can be sent. ++ ++If it is not acceptable to burst out packets at maximum speed, a peakrate can be configured ++to limit the speed at which the bucket empties. This peakrate is implemented as a second TBF ++with a very small bucket, so that it doesn't burst. ++ ++To achieve perfection, the second bucket may contain only a single packet, which leads to ++the earlier mentioned 1mbit/s limit. ++ ++This limit is caused by the fact that the kernel can only throttle for at minimum 1 'jiffy', which depends ++on HZ as 1/HZ. For perfect shaping, only a single packet can get sent per jiffy - for HZ=100, this means 100 ++packets of on average 1000 bytes each, which roughly corresponds to 1mbit/s. ++ ++.SH PARAMETERS ++See ++.BR tc (8) ++for how to specify the units of these values. ++.TP ++limit or latency ++Limit is the number of bytes that can be queued waiting for tokens to become ++available. You can also specify this the other way around by setting the ++latency parameter, which specifies the maximum amount of time a packet can ++sit in the TBF. The latter calculation takes into account the size of the ++bucket, the rate and possibly the peakrate (if set). These two parameters ++are mutually exclusive. ++.TP ++burst ++Also known as buffer or maxburst. ++Size of the bucket, in bytes. This is the maximum amount of bytes that tokens can be available for instantaneously. ++In general, larger shaping rates require a larger buffer. For 10mbit/s on Intel, you need at least 10kbyte buffer ++if you want to reach your configured rate! ++ ++If your buffer is too small, packets may be dropped because more tokens arrive per timer tick than fit in your bucket. ++The minimum buffer size can be calculated by dividing the rate by HZ. ++ ++Token usage calculations are performed using a table which by default has a resolution of 8 packets. ++This resolution can be changed by specifying the ++.B cell ++size with the burst. For example, to specify a 6000 byte buffer with a 16 ++byte cell size, set a burst of 6000/16. You will probably never have to set ++this. Must be an integral power of 2. ++.TP ++mpu ++A zero-sized packet does not use zero bandwidth. For ethernet, no packet uses less than 64 bytes. The Minimum Packet Unit ++determines the minimal token usage (specified in bytes) for a packet. Defaults to zero. ++.TP ++rate ++The speed knob. See remarks above about limits! See ++.BR tc (8) ++for units. ++.PP ++Furthermore, if a peakrate is desired, the following parameters are available: ++ ++.TP ++peakrate ++Maximum depletion rate of the bucket. Limited to 1mbit/s on Intel, 10mbit/s on Alpha. The peakrate does ++not need to be set, it is only necessary if perfect millisecond timescale shaping is required. ++ ++.TP ++mtu/minburst ++Specifies the size of the peakrate bucket. For perfect accuracy, should be set to the MTU of the interface. ++If a peakrate is needed, but some burstiness is acceptable, this size can be raised. A 3000 byte minburst ++allows around 3mbit/s of peakrate, given 1000 byte packets. ++ ++Like the regular burstsize you can also specify a ++.B cell ++size. ++.SH EXAMPLE & USAGE ++ ++To attach a TBF with a sustained maximum rate of 0.5mbit/s, a peakrate of 1.0mbit/s, ++a 5kilobyte buffer, with a pre-bucket queue size limit calculated so the TBF causes ++at most 70ms of latency, with perfect peakrate behaviour, issue: ++.P ++# tc qdisc add dev eth0 root tbf rate 0.5mbit \\ ++ burst 5kb latency 70ms peakrate 1mbit \\ ++ minburst 1540 ++ ++.SH SEE ALSO ++.BR tc (8) ++ ++.SH AUTHOR ++Alexey N. Kuznetsov, . This manpage maintained by ++bert hubert ++ ++ +diff -Naur iproute2-orig/debian/manpages/tc.8 iproute2/debian/manpages/tc.8 +--- iproute2-orig/debian/manpages/tc.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/manpages/tc.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,348 @@ ++.TH TC 8 "16 December 2001" "iproute2" "Linux" ++.SH NAME ++tc \- show / manipulate traffic control settings ++.SH SYNOPSIS ++.B tc qdisc [ add | change | replace | link ] dev ++DEV ++.B ++[ parent ++qdisc-id ++.B | root ] ++.B [ handle ++qdisc-id ] qdisc ++[ qdisc specific parameters ] ++.P ++ ++.B tc class [ add | change | replace ] dev ++DEV ++.B parent ++qdisc-id ++.B [ classid ++class-id ] qdisc ++[ qdisc specific parameters ] ++.P ++ ++.B tc filter [ add | change | replace ] dev ++DEV ++.B [ parent ++qdisc-id ++.B | root ] protocol ++protocol ++.B prio ++priority filtertype ++[ filtertype specific parameters ] ++.B flowid ++flow-id ++ ++.B tc [-s | -d ] qdisc show [ dev ++DEV ++.B ] ++.P ++.B tc [-s | -d ] class show dev ++DEV ++.P ++.B tc filter show dev ++DEV ++ ++.SH DESCRIPTION ++.B Tc ++is used to configure Traffic Control in the Linux kernel. Traffic Control consists ++of the following: ++ ++.TP ++SHAPING ++When traffic is shaped, its rate of transmission is under control. Shaping may ++be more than lowering the available bandwidth - it is also used to smooth out ++bursts in traffic for better network behaviour. Shaping occurs on egress. ++ ++.TP ++SCHEDULING ++By scheduling the transmission of packets it is possible to improve interactivity ++for traffic that needs it while still guaranteeing bandwidth to bulk transfers. Reordering ++is also called prioritizing, and happens only on egress. ++ ++.TP ++POLICING ++Where shaping deals with transmission of traffic, policing pertains to traffic ++arriving. Policing thus occurs on ingress. ++ ++.TP ++DROPPING ++Traffic exceeding a set bandwidth may also be dropped forthwith, both on ++ingress and on egress. ++ ++.P ++Processing of traffic is controlled by three kinds of objects: qdiscs, ++classes and filters. ++ ++.SH QDISCS ++.B qdisc ++is short for 'queueing discipline' and it is elementary to ++understanding traffic control. Whenever the kernel needs to send a ++packet to an interface, it is ++.B enqueued ++to the qdisc configured for that interface. Immediately afterwards, the kernel ++tries to get as many packets as possible from the qdisc, for giving them ++to the network adaptor driver. ++ ++A simple QDISC is the 'pfifo' one, which does no processing at all and is a pure ++First In, First Out queue. It does however store traffic when the network interface ++can't handle it momentarily. ++ ++.SH CLASSES ++Some qdiscs can contain classes, which contain further qdiscs - traffic may ++then be enqueued in any of the inner qdiscs, which are within the ++.B classes. ++When the kernel tries to dequeue a packet from such a ++.B classful qdisc ++it can come from any of the classes. A qdisc may for example prioritize ++certain kinds of traffic by trying to dequeue from certain classes ++before others. ++ ++.SH FILTERS ++A ++.B filter ++is used by a classful qdisc to determine in which class a packet will ++be enqueued. Whenever traffic arrives at a class with subclasses, it needs ++to be classified. Various methods may be employed to do so, one of these ++are the filters. All filters attached to the class are called, until one of ++them returns with a verdict. If no verdict was made, other criteria may be ++available. This differs per qdisc. ++ ++It is important to notice that filters reside ++.B within ++qdiscs - they are not masters of what happens. ++ ++.SH CLASSLESS QDISCS ++The classless qdiscs are: ++.TP ++[p|b]fifo ++Simplest usable qdisc, pure First In, First Out behaviour. Limited in ++packets or in bytes. ++.TP ++pfifo_fast ++Standard qdisc for 'Advanced Router' enabled kernels. Consists of a three-band ++queue which honors Type of Service flags, as well as the priority that may be ++assigned to a packet. ++.TP ++red ++Random Early Detection simulates physical congestion by randomly dropping ++packets when nearing configured bandwidth allocation. Well suited to very ++large bandwidth applications. ++.TP ++sfq ++Stochastic Fairness Queueing reorders queued traffic so each 'session' ++gets to send a packet in turn. ++.TP ++tbf ++The Token Bucket Filter is suited for slowing traffic down to a precisely ++configured rate. Scales well to large bandwidths. ++.SH CONFIGURING CLASSLESS QDISCS ++In the absence of classful qdiscs, classless qdiscs can only be attached at ++the root of a device. Full syntax: ++.P ++.B tc qdisc add dev ++DEV ++.B root ++QDISC QDISC-PARAMETERS ++ ++To remove, issue ++.P ++.B tc qdisc del dev ++DEV ++.B root ++ ++The ++.B pfifo_fast ++qdisc is the automatic default in the absence of a configured qdisc. ++ ++.SH CLASSFUL QDISCS ++The classful qdiscs are: ++.TP ++CBQ ++Class Based Queueing implements a rich linksharing hierarchy of classes. ++It contains shaping elements as well as prioritizing capabilities. Shaping is ++performed using link idle time calculations based on average packet size and ++underlying link bandwidth. The latter may be ill-defined for some interfaces. ++.TP ++HTB ++The Hierarchy Token Bucket implements a rich linksharing hierarchy of ++classes with an emphasis on conforming to existing practices. HTB facilitates ++guaranteeing bandwidth to classes, while also allowing specification of upper ++limits to inter-class sharing. It contains shaping elements, based on TBF and ++can prioritize classes. ++.TP ++PRIO ++The PRIO qdisc is a non-shaping container for a configurable number of ++classes which are dequeued in order. This allows for easy prioritization ++of traffic, where lower classes are only able to send if higher ones have ++no packets available. To facilitate configuration, Type Of Service bits are ++honored by default. ++.SH THEORY OF OPERATION ++Classes form a tree, where each class has a single parent. ++A class may have multiple children. Some qdiscs allow for runtime addition ++of classes (CBQ, HTB) while others (PRIO) are created with a static number of ++children. ++ ++Qdiscs which allow dynamic addition of classes can have zero or more ++subclasses to which traffic may be enqueued. ++ ++Furthermore, each class contains a ++.B leaf qdisc ++which by default has ++.B pfifo ++behaviour though another qdisc can be attached in place. This qdisc may again ++contain classes, but each class can have only one leaf qdisc. ++ ++When a packet enters a classful qdisc it can be ++.B classified ++to one of the classes within. Three criteria are available, although not all ++qdiscs will use all three: ++.TP ++tc filters ++If tc filters are attached to a class, they are consulted first ++for relevant instructions. Filters can match on all fields of a packet header, ++as well as on the firewall mark applied by ipchains or iptables. See ++.BR tc-filters (8). ++.TP ++Type of Service ++Some qdiscs have built in rules for classifying packets based on the TOS field. ++.TP ++skb->priority ++Userspace programs can encode a class-id in the 'skb->priority' field using ++the SO_PRIORITY option. ++.P ++Each node within the tree can have its own filters but higher level filters ++may also point directly to lower classes. ++ ++If classification did not succeed, packets are enqueued to the leaf qdisc ++attached to that class. Check qdisc specific manpages for details, however. ++ ++.SH NAMING ++All qdiscs, classes and filters have IDs, which can either be specified ++or be automatically assigned. ++ ++IDs consist of a major number and a minor number, separated by a colon. ++ ++.TP ++QDISCS ++A qdisc, which potentially can have children, ++gets assigned a major number, called a 'handle', leaving the minor ++number namespace available for classes. The handle is expressed as '10:'. ++It is customary to explicitly assign a handle to qdiscs expected to have ++children. ++ ++.TP ++CLASSES ++Classes residing under a qdisc share their qdisc major number, but each have ++a separate minor number called a 'classid' that has no relation to their ++parent classes, only to their parent qdisc. The same naming custom as for ++qdiscs applies. ++ ++.TP ++FILTERS ++Filters have a three part ID, which is only needed when using a hashed ++filter hierarchy, for which see ++.BR tc-filters (8). ++.SH UNITS ++All parameters accept a floating point number, possibly followed by a unit. ++.P ++Bandwidths or rates can be specified in: ++.TP ++kbps ++Kilobytes per second ++.TP ++mbps ++Megabytes per second ++.TP ++kbit ++Kilobits per second ++.TP ++mbit ++Megabits per second ++.TP ++bps or a bare number ++Bytes per second ++.P ++Amounts of data can be specified in: ++.TP ++kb or k ++Kilobytes ++.TP ++mb or m ++Megabytes ++.TP ++mbit ++Megabits ++.TP ++kbit ++Kilobits ++.TP ++b or a bare number ++Bytes. ++.P ++Lengths of time can be specified in: ++.TP ++s, sec or secs ++Whole seconds ++.TP ++ms, msec or msecs ++Milliseconds ++.TP ++us, usec, usecs or a bare number ++Microseconds. ++ ++.SH TC COMMANDS ++The following commands are available for qdiscs, classes and filter: ++.TP ++add ++Add a qdisc, class or filter to a node. For all entities, a ++.B parent ++must be passed, either by passing its ID or by attaching directly to the root of a device. ++When creating a qdisc or a filter, it can be named with the ++.B handle ++parameter. A class is named with the ++.B classid ++parameter. ++ ++.TP ++remove ++A qdisc can be removed by specifying its handle, which may also be 'root'. All subclasses and their leaf qdiscs ++are automatically deleted, as well as any filters attached to them. ++ ++.TP ++change ++Some entities can be modified 'in place'. Shares the syntax of 'add', with the exception ++that the handle cannot be changed and neither can the parent. In other words, ++.B ++change ++cannot move a node. ++ ++.TP ++replace ++Performs a nearly atomic remove/add on an existing node id. If the node does not exist yet ++it is created. ++ ++.TP ++link ++Only available for qdiscs and performs a replace where the node ++must exist already. ++ ++ ++.SH HISTORY ++.B tc ++was written by Alexey N. Kuznetsov and added in Linux 2.2. ++.SH SEE ALSO ++.BR tc-cbq (8), ++.BR tc-htb (8), ++.BR tc-sfq (8), ++.BR tc-red (8), ++.BR tc-tbf (8), ++.BR tc-pfifo (8), ++.BR tc-bfifo (8), ++.BR tc-pfifo_fast (8), ++.BR tc-filters (8) ++ ++.SH AUTHOR ++Manpage maintained by bert hubert (ahu@ds9a.nl) ++ +diff -Naur iproute2-orig/debian/postinst iproute2/debian/postinst +--- iproute2-orig/debian/postinst 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/postinst 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,6 @@ ++#!/bin/sh -e ++ ++# FHS: ++if [ "$1" = "configure" -a -d /usr/doc -a ! -e /usr/doc/iproute ]; then ++ ln -sf ../share/doc/iproute /usr/doc/iproute ++fi +diff -Naur iproute2-orig/debian/postrm iproute2/debian/postrm +--- iproute2-orig/debian/postrm 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/postrm 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,6 @@ ++#!/bin/sh ++ ++if [ "$1" = "purge" ] ++then ++ rm -rf /etc/iproute2 ++fi +diff -Naur iproute2-orig/debian/prerm iproute2/debian/prerm +--- iproute2-orig/debian/prerm 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/prerm 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,5 @@ ++#!/bin/sh -e ++ ++if [ \( "$1" = "upgrade" -o "$1" = "remove" \) -a -L /usr/doc/iproute ]; then ++ rm -f /usr/doc/iproute ++fi +diff -Naur iproute2-orig/debian/rules iproute2/debian/rules +--- iproute2-orig/debian/rules 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/rules 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,85 @@ ++#!/usr/bin/make -f ++# ++# Copyright (C) 1999 Roberto Lumbreras ++# Copyright (C) 1999-2002 Juan Cespedes ++# Copying: GPL ++ ++SHELL = bash ++ ++PACKAGE = $(shell perl -e 'print <> =~ /^(.*) \(.*\)/' debian/changelog) ++PKG_VER = $(shell perl -e 'print <> =~ /\((.*)\)/' debian/changelog) ++PKG_UPVER= $(shell perl -e 'print <> =~ /\((.*)-[^-]*\)/' debian/changelog) ++ ++BINS = ip/ip ++SBINS = ip/rtmon ip/rtacct tc/tc ++SHBINS = ip/routef ip/routel # ip/ifcfg ip/rtpr ++DOCS = README* doc/Plan debian/README.Debian ++MAN8 = debian/manpages/*.8 ++MANLINKS= rtmon rtacct routef routel ++TEXDOCS = ip-cref ip-tunnels api-ip6-flowlabels ++ ++build: stamp-build ++ ++stamp-build: ++ test -f include-glibc/netinet/in.h.orig || \ ++ mv include-glibc/netinet/in.h \ ++ include-glibc/netinet/in.h.orig ++ $(MAKE) KERNEL_INCLUDE=/usr/include ++ $(MAKE) -C doc ++ touch stamp-build ++ ++binary: binary-indep binary-arch ++ ++binary-indep: ++ ++binary-arch: checkroot stamp-build ++ $(RM) -r debian/tmp ++ install -d -m0755 debian/tmp/{DEBIAN,bin,sbin,usr/{bin,share/doc/$(PACKAGE),share/man/man{7,8}}} ++ install -s -m0755 $(BINS) debian/tmp/bin/ ++ install -s -m0755 $(SBINS) debian/tmp/sbin/ ++ ln -s /bin/ip debian/tmp/sbin/ip ++ install -m0755 $(SHBINS) debian/tmp/usr/bin/ ++ cp -p $(DOCS) debian/tmp/usr/share/doc/$(PACKAGE)/ ++ cp -rp examples debian/tmp/usr/share/doc/$(PACKAGE)/ ++ find debian/tmp/usr/share/doc/$(PACKAGE)/examples -type f -exec chmod -x {} \; ++ install -m0644 debian/changelog debian/tmp/usr/share/doc/$(PACKAGE)/changelog.Debian ++ cp -p RELNOTES debian/tmp/usr/share/doc/$(PACKAGE)/changelog ++ for i in $(TEXDOCS); do \ ++ install -m0644 doc/$$i.tex debian/tmp/usr/share/doc/$(PACKAGE)/; \ ++ install -m0644 doc/$$i.dvi debian/tmp/usr/share/doc/$(PACKAGE)/; \ ++ install -m0644 doc/$$i.ps debian/tmp/usr/share/doc/$(PACKAGE)/; \ ++ done ++ install -m0644 $(MAN8) debian/tmp/usr/share/man/man8/ ++ gzip -9fr debian/tmp/usr/share || true ++ ln -s tc-pbfifo.8.gz debian/tmp/usr/share/man/man8/tc-pfifo.8.gz ++ ln -s tc-pbfifo.8.gz debian/tmp/usr/share/man/man8/tc-bfifo.8.gz ++ for i in $(MANLINKS); do \ ++ ln -s ../man7/undocumented.7.gz debian/tmp/usr/share/man/man8/$$i.8.gz; \ ++ done ++ cp -p debian/copyright debian/tmp/usr/share/doc/$(PACKAGE)/ ++ cp -rp etc debian/tmp/ ++ install -m0644 debian/conffiles debian/tmp/DEBIAN/ ++ ++ dpkg-shlibdeps $(BINS) $(SBINS) ++ dpkg-gencontrol -isp ++ chown -R root.root debian/tmp ++ chmod -R u=rwX,go=rX debian/tmp ++ dpkg --build debian/tmp .. ++ ++checkdir: ++ @test -f debian/rules ++ ++checkroot: checkdir ++ @test 0 = `id -u` || { echo "Error: not super-user"; exit 1; } ++ ++clean: checkdir debian/control ++ $(RM) stamp-build debian/files debian/substvars ++ $(MAKE) clean ++ $(MAKE) -C doc clean ++ $(RM) `find . -name "*~" -o -name core` ++ $(RM) -r debian/tmp ++ test -f include-glibc/netinet/in.h.orig && \ ++ mv include-glibc/netinet/in.h.orig \ ++ include-glibc/netinet/in.h || true ++ ++.PHONY: build binary binary-arch binary-indep checkdir checkroot clean +diff -Naur iproute2-orig/debian/tc-cbq.8 iproute2/debian/tc-cbq.8 +--- iproute2-orig/debian/tc-cbq.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/tc-cbq.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,353 @@ ++.TH CBQ 8 "16 December 2001" "iproute2" "Linux" ++.SH NAME ++CBQ \- Class Based Queueing ++.SH SYNOPSIS ++.B tc qdisc ... dev ++dev ++.B ( parent ++classid ++.B | root) [ handle ++major: ++.B ] cbq [ allot ++bytes ++.B ] avpkt ++bytes ++.B bandwidth ++rate ++.B [ cell ++bytes ++.B ] [ ewma ++log ++.B ] [ mpu ++bytes ++.B ] ++ ++.B tc class ... dev ++dev ++.B parent ++major:[minor] ++.B [ classid ++major:minor ++.B ] cbq allot ++bytes ++.B [ bandwidth ++rate ++.B ] [ rate ++rate ++.B ] prio ++priority ++.B [ weight ++weight ++.B ] [ minburst ++packets ++.B ] [ maxburst ++packets ++.B ] [ ewma ++log ++.B ] [ cell ++bytes ++.B ] avpkt ++bytes ++.B [ mpu ++bytes ++.B ] [ bounded isolated ] [ split ++handle ++.B & defmap ++defmap ++.B ] [ estimator ++interval timeconstant ++.B ] ++ ++.SH DESCRIPTION ++Class Based Queueing is a classful qdisc that implements a rich ++linksharing hierarchy of classes. It contains shaping elements as ++well as prioritizing capabilities. Shaping is performed using link ++idle time calculations based on the timing of dequeue events and ++underlying link bandwidth. ++ ++.SH SHAPING ALGORITHM ++When shaping a 10mbit/s connection to 1mbit/s, the link will ++be idle 90% of the time. If it isn't, it needs to be throttled so that it ++IS idle 90% of the time. ++ ++During operations, the effective idletime is measured using an ++exponential weighted moving average (EWMA), which considers recent ++packets to be exponentially more important than past ones. The Unix ++loadaverage is calculated in the same way. ++ ++The calculated idle time is subtracted from the EWMA measured one, ++the resulting number is called 'avgidle'. A perfectly loaded link has ++an avgidle of zero: packets arrive exactly at the calculated ++interval. ++ ++An overloaded link has a negative avgidle and if it gets too negative, ++CBQ throttles and is then 'overlimit'. ++ ++Conversely, an idle link might amass a huge avgidle, which would then ++allow infinite bandwidths after a few hours of silence. To prevent ++this, avgidle is capped at ++.B maxidle. ++ ++If overlimit, in theory, the CBQ could throttle itself for exactly the ++amount of time that was calculated to pass between packets, and then ++pass one packet, and throttle again. Due to timer resolution constraints, ++this may not be feasible, see the ++.B minburst ++parameter below. ++ ++.SH CLASSIFICATION ++Within the one CBQ instance many classes may exist. Each of these classes ++contains another qdisc, by default ++.BR tc-pfifo (8). ++ ++When enqueueing a packet, CBQ starts at the root and uses various methods to ++determine which class should receive the data. ++ ++In the absence of uncommon configuration options, the process is rather easy. ++At each node we look for an instruction, and then go to the class the ++instruction refers us to. If the class found is a barren leaf-node (without ++children), we enqueue the packet there. If it is not yet a leaf node, we do ++the whole thing over again starting from that node. ++ ++The following actions are performed, in order at each node we visit, until one ++sends us to another node, or terminates the process. ++.TP ++(i) ++Consult filters attached to the class. If sent to a leafnode, we are done. ++Otherwise, restart. ++.TP ++(ii) ++Consult the defmap for the priority assigned to this packet, which depends ++on the TOS bits. Check if the referral is leafless, otherwise restart. ++.TP ++(iii) ++Ask the defmap for instructions for the 'best effort' priority. Check the ++answer for leafness, otherwise restart. ++.TP ++(iv) ++If none of the above returned with an instruction, enqueue at this node. ++.P ++This algorithm makes sure that a packet always ends up somewhere, even while ++you are busy building your configuration. ++ ++For more details, see ++.BR tc-cbq-details(8). ++ ++.SH LINK SHARING ALGORITHM ++When dequeuing for sending to the network device, CBQ decides which of its ++classes will be allowed to send. It does so with a Weighted Round Robin process ++in which each class with packets gets a chance to send in turn. The WRR process ++starts by asking the highest priority classes (lowest numerically - ++highest semantically) for packets, and will continue to do so until they ++have no more data to offer, in which case the process repeats for lower ++priorities. ++ ++Classes by default borrow bandwidth from their siblings. A class can be ++prevented from doing so by declaring it 'bounded'. A class can also indicate ++its unwillingness to lend out bandwidth by being 'isolated'. ++ ++.SH QDISC ++The root of a CBQ qdisc class tree has the following parameters: ++ ++.TP ++parent major:minor | root ++This mandatory parameter determines the place of the CBQ instance, either at the ++.B root ++of an interface or within an existing class. ++.TP ++handle major: ++Like all other qdiscs, the CBQ can be assigned a handle. Should consist only ++of a major number, followed by a colon. Optional, but very useful if classes ++will be generated within this qdisc. ++.TP ++allot bytes ++This allotment is the 'chunkiness' of link sharing and is used for determining packet ++transmission time tables. The qdisc allot differs slightly from the class allot discussed ++below. Optional. Defaults to a reasonable value, related to avpkt. ++.TP ++avpkt bytes ++The average size of a packet is needed for calculating maxidle, and is also used ++for making sure 'allot' has a safe value. Mandatory. ++.TP ++bandwidth rate ++To determine the idle time, CBQ must know the bandwidth of your underlying ++physical interface, or parent qdisc. This is a vital parameter, more about it ++later. Mandatory. ++.TP ++cell ++The cell size determines he granularity of packet transmission time calculations. Has a sensible default. ++.TP ++mpu ++A zero sized packet may still take time to transmit. This value is the lower ++cap for packet transmission time calculations - packets smaller than this value ++are still deemed to have this size. Defaults to zero. ++.TP ++ewma log ++When CBQ needs to measure the average idle time, it does so using an ++Exponentially Weighted Moving Average which smoothes out measurements into ++a moving average. The EWMA LOG determines how much smoothing occurs. Lower ++values imply greater sensitivity. Must be between 0 and 31. Defaults ++to 5. ++.P ++A CBQ qdisc does not shape out of its own accord. It only needs to know certain ++parameters about the underlying link. Actual shaping is done in classes. ++ ++.SH CLASSES ++Classes have a host of parameters to configure their operation. ++ ++.TP ++parent major:minor ++Place of this class within the hierarchy. If attached directly to a qdisc ++and not to another class, minor can be omitted. Mandatory. ++.TP ++classid major:minor ++Like qdiscs, classes can be named. The major number must be equal to the ++major number of the qdisc to which it belongs. Optional, but needed if this ++class is going to have children. ++.TP ++weight weight ++When dequeuing to the interface, classes are tried for traffic in a ++round-robin fashion. Classes with a higher configured qdisc will generally ++have more traffic to offer during each round, so it makes sense to allow ++it to dequeue more traffic. All weights under a class are normalized, so ++only the ratios matter. Defaults to the configured rate, unless the priority ++of this class is maximal, in which case it is set to 1. ++.TP ++allot bytes ++Allot specifies how many bytes a qdisc can dequeue ++during each round of the process. This parameter is weighted using the ++renormalized class weight described above. Silently capped at a minimum of ++3/2 avpkt. Mandatory. ++ ++.TP ++prio priority ++In the round-robin process, classes with the lowest priority field are tried ++for packets first. Mandatory. ++ ++.TP ++avpkt ++See the QDISC section. ++ ++.TP ++rate rate ++Maximum rate this class and all its children combined can send at. Mandatory. ++ ++.TP ++bandwidth rate ++This is different from the bandwidth specified when creating a CBQ disc! Only ++used to determine maxidle and offtime, which are only calculated when ++specifying maxburst or minburst. Mandatory if specifying maxburst or minburst. ++ ++.TP ++maxburst ++This number of packets is used to calculate maxidle so that when ++avgidle is at maxidle, this number of average packets can be burst ++before avgidle drops to 0. Set it higher to be more tolerant of ++bursts. You can't set maxidle directly, only via this parameter. ++ ++.TP ++minburst ++As mentioned before, CBQ needs to throttle in case of ++overlimit. The ideal solution is to do so for exactly the calculated ++idle time, and pass 1 packet. However, Unix kernels generally have a ++hard time scheduling events shorter than 10ms, so it is better to ++throttle for a longer period, and then pass minburst packets in one ++go, and then sleep minburst times longer. ++ ++The time to wait is called the offtime. Higher values of minburst lead ++to more accurate shaping in the long term, but to bigger bursts at ++millisecond timescales. Optional. ++ ++.TP ++minidle ++If avgidle is below 0, we are overlimits and need to wait until ++avgidle will be big enough to send one packet. To prevent a sudden ++burst from shutting down the link for a prolonged period of time, ++avgidle is reset to minidle if it gets too low. ++ ++Minidle is specified in negative microseconds, so 10 means that ++avgidle is capped at -10us. Optional. ++ ++.TP ++bounded ++Signifies that this class will not borrow bandwidth from its siblings. ++.TP ++isolated ++Means that this class will not borrow bandwidth to its siblings ++ ++.TP ++split major:minor & defmap bitmap[/bitmap] ++If consulting filters attached to a class did not give a verdict, ++CBQ can also classify based on the packet's priority. There are 16 ++priorities available, numbered from 0 to 15. ++ ++The defmap specifies which priorities this class wants to receive, ++specified as a bitmap. The Least Significant Bit corresponds to priority ++zero. The ++.B split ++parameter tells CBQ at which class the decision must be made, which should ++be a (grand)parent of the class you are adding. ++ ++As an example, 'tc class add ... classid 10:1 cbq .. split 10:0 defmap c0' ++configures class 10:0 to send packets with priorities 6 and 7 to 10:1. ++ ++The complimentary configuration would then ++be: 'tc class add ... classid 10:2 cbq ... split 10:0 defmap 3f' ++Which would send all packets 0, 1, 2, 3, 4 and 5 to 10:1. ++.TP ++estimator interval timeconstant ++CBQ can measure how much bandwidth each class is using, which tc filters ++can use to classify packets with. In order to determine the bandwidth ++it uses a very simple estimator that measures once every ++.B interval ++microseconds how much traffic has passed. This again is a EWMA, for which ++the time constant can be specified, also in microseconds. The ++.B time constant ++corresponds to the sluggishness of the measurement or, conversely, to the ++sensitivity of the average to short bursts. Higher values mean less ++sensitivity. ++ ++.SH BUGS ++The actual bandwidth of the underlying link may not be known, for example ++in the case of PPoE or PPTP connections which in fact may send over a ++pipe, instead of over a physical device. CBQ is quite resilient to major ++errors in the configured bandwidth, probably a the cost of coarser shaping. ++ ++Default kernels rely on coarse timing information for making decisions. These ++may make shaping precise in the long term, but inaccurate on second long scales. ++ ++See ++.BR tc-cbq-details(8) ++for hints on how to improve this. ++ ++.SH SOURCES ++.TP ++o ++Sally Floyd and Van Jacobson, "Link-sharing and Resource ++Management Models for Packet Networks", ++IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995 ++ ++.TP ++o ++Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995 ++ ++.TP ++o ++Sally Floyd, "Notes on Class-Based Queueing: Setting ++Parameters", 1996 ++ ++.TP ++o ++Sally Floyd and Michael Speer, "Experimental Results ++for Class-Based Queueing", 1998, not published. ++ ++ ++ ++.SH SEE ALSO ++.BR tc (8) ++ ++.SH AUTHOR ++Alexey N. Kuznetsov, . This manpage maintained by ++bert hubert ++ ++ +diff -Naur iproute2-orig/debian/tc-htb.8 iproute2/debian/tc-htb.8 +--- iproute2-orig/debian/tc-htb.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/tc-htb.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,153 @@ ++.TH HTB 8 "10 January 2002" "iproute2" "Linux" ++.SH NAME ++HTB \- Hierarchy Token Bucket ++.SH SYNOPSIS ++.B tc qdisc ... dev ++dev ++.B ( parent ++classid ++.B | root) [ handle ++major: ++.B ] htb [ default ++minor-id ++.B ] ++ ++.B tc class ... dev ++dev ++.B parent ++major:[minor] ++.B [ classid ++major:minor ++.B ] htb rate ++rate ++.B [ ceil ++rate ++.B ] burst ++bytes ++.B [ cburst ++bytes ++.B ] [ prio ++priority ++.B ] ++ ++.SH DESCRIPTION ++HTB is meant as a more understandable and intuitive replacement for ++the CBQ qdisc in Linux. Both CBQ and HTB help you to control the use ++of the outbound bandwidth on a given link. Both allow you to use one ++physical link to simulate several slower links and to send different ++kinds of traffic on different simulated links. In both cases, you have ++to specify how to divide the physical link into simulated links and ++how to decide which simulated link to use for a given packet to be sent. ++ ++Unlike CBQ, HTB shapes traffic based on the Token Bucket Filter algorithm ++which does not depend on interface characteristics and so does not need to ++know the underlying bandwidth of the outgoing interface. ++ ++.SH SHAPING ALGORITHM ++Shaping works as documented in ++.B tc-tbf (8). ++ ++.SH CLASSIFICATION ++Within the one HRB instance many classes may exist. Each of these classes ++contains another qdisc, by default ++.BR tc-pfifo (8). ++ ++When enqueueing a packet, HTB starts at the root and uses various methods to ++determine which class should receive the data. ++ ++In the absence of uncommon configuration options, the process is rather easy. ++At each node we look for an instruction, and then go to the class the ++instruction refers us to. If the class found is a barren leaf-node (without ++children), we enqueue the packet there. If it is not yet a leaf node, we do ++the whole thing over again starting from that node. ++ ++The following actions are performed, in order at each node we visit, until one ++sends us to another node, or terminates the process. ++.TP ++(i) ++Consult filters attached to the class. If sent to a leafnode, we are done. ++Otherwise, restart. ++.TP ++(ii) ++If none of the above returned with an instruction, enqueue at this node. ++.P ++This algorithm makes sure that a packet always ends up somewhere, even while ++you are busy building your configuration. ++ ++.SH LINK SHARING ALGORITHM ++FIXME ++ ++.SH QDISC ++The root of a CBQ qdisc class tree has the following parameters: ++ ++.TP ++parent major:minor | root ++This mandatory parameter determines the place of the CBQ instance, either at the ++.B root ++of an interface or within an existing class. ++.TP ++handle major: ++Like all other qdiscs, the CBQ can be assigned a handle. Should consist only ++of a major number, followed by a colon. Optional, but very useful if classes ++will be generated within this qdisc. ++.TP ++default minor-id ++Unclassified traffic gets sent to the class with this minor-id. ++ ++.SH CLASSES ++Classes have a host of parameters to configure their operation. ++ ++.TP ++parent major:minor ++Place of this class within the hierarchy. If attached directly to a qdisc ++and not to another class, minor can be omitted. Mandatory. ++.TP ++classid major:minor ++Like qdiscs, classes can be named. The major number must be equal to the ++major number of the qdisc to which it belongs. Optional, but needed if this ++class is going to have children. ++.TP ++prio priority ++In the round-robin process, classes with the lowest priority field are tried ++for packets first. Mandatory. ++ ++.TP ++rate rate ++Maximum rate this class and all its children are guaranteed. Mandatory. ++ ++.TP ++ceil rate ++Maximum rate at which a class can send, if its parent has bandwidth to spare. ++Defaults to the configured rate, which implies no borrowing ++ ++.TP ++burst bytes ++Amount of bytes that can be burst at ++.B ceil ++speed, in excess of the configured ++.B rate. ++Should be at least as high as the highest burst of all children. ++ ++.TP ++cburst bytes ++Amount of bytes that can be burst at 'infinite' speed, in other words, as fast ++as the interface can transmit them. For perfect evening out, should be equal to at most one average ++packet. Should be at least as high as the highest cburst of all children. ++ ++.SH NOTES ++Due to Unix timing constraints, the maximum ceil rate is not infinite and may in fact be quite low. On Intel, ++there are 100 timer events per second, the maximum rate is that rate at which 'burst' bytes are sent each timer tick. ++From this, the mininum burst size for a specified rate can be calculated. For i386, a 10mbit rate requires a 12 kilobyte ++burst as 100*12kb*8 equals 10mbit. ++ ++.SH BUGS ++Not in the stock kernel yet. ++ ++.SH SEE ALSO ++.BR tc (8) ++.P ++HTB website: http://luxik.cdi.cz/~devik/qos/htb/ ++.SH AUTHOR ++Martin Devera . This manpage maintained by bert hubert ++ ++ +diff -Naur iproute2-orig/debian/tc-pbfifo.8 iproute2/debian/tc-pbfifo.8 +--- iproute2-orig/debian/tc-pbfifo.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/tc-pbfifo.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,72 @@ ++.TH PBFIFO 8 "10 January 2002" "iproute2" "Linux" ++.SH NAME ++pfifo \- Packet limited First In, First Out queue ++.P ++bfifo \- Byte limited First In, First Out queue ++ ++.SH SYNOPSIS ++.B tc qdisc ... add pfifo ++.B [ limit ++packets ++.B ] ++.P ++.B tc qdisc ... add bfifo ++.B [ limit ++bytes ++.B ] ++ ++.SH DESCRIPTION ++The pfifo and bfifo qdiscs are unadorned First In, First Out queues. They are the ++simplest queues possible and therefore have no overhead. ++.B pfifo ++constrains the queue size as measured in packets. ++.B bfifo ++does so as measured in bytes. ++ ++Like all non-default qdiscs, they maintain statistics. This might be a reason to prefer ++pfifo or bfifo over the default. ++ ++.SH ALGORITHM ++A list of packets is maintained, when a packet is enqueued it gets inserted at the tail of ++a list. When a packet needs to be sent out to the network, it is taken from the head of the list. ++ ++If the list is too long, no further packets are allowed on. This is called 'tail drop'. ++ ++.SH PARAMETERS ++.TP ++limit ++Maximum queue size. Specified in bytes for bfifo, in packets for pfifo. For pfifo, defaults ++to the interface txqueuelen, as specified with ++.BR ifconfig (8) ++or ++.BR ip (8). ++ ++For bfifo, it defaults to the txqueuelen multiplied by the interface MTU. ++ ++.SH OUTPUT ++The output of ++.B tc -s qdisc ls ++contains the limit, either in packets or in bytes, and the number of bytes ++and packets actually sent. An unsent and dropped packet only appears between braces ++and is not counted as 'Sent'. ++ ++In this example, the queue length is 100 packets, 45894 bytes were sent over 681 packets. ++No packets were dropped, and as the pfifo queue does not slow down packets, there were also no ++overlimits: ++.P ++.nf ++# tc -s qdisc ls dev eth0 ++qdisc pfifo 8001: dev eth0 limit 100p ++ Sent 45894 bytes 681 pkts (dropped 0, overlimits 0) ++.fi ++ ++If a backlog occurs, this is displayed as well. ++.SH SEE ALSO ++.BR tc (8) ++ ++.SH AUTHORS ++Alexey N. Kuznetsov, ++ ++This manpage maintained by bert hubert ++ ++ +diff -Naur iproute2-orig/debian/tc-pfifo_fast.8 iproute2/debian/tc-pfifo_fast.8 +--- iproute2-orig/debian/tc-pfifo_fast.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/tc-pfifo_fast.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,59 @@ ++.TH PFIFO_FAST 8 "10 January 2002" "iproute2" "Linux" ++.SH NAME ++pfifo_fast \- three-band first in, first out queue ++ ++.SH DESCRIPTION ++pfifo_fast is the default qdisc of each interface. ++ ++Whenever an interface is created, the pfifo_fast qdisc is automatically used ++as a queue. If another qdisc is attached, it preempts the default ++pfifo_fast, which automatically returns to function when an existing qdisc ++is detached. ++ ++In this sense this qdisc is magic, and unlike other qdiscs. ++ ++.SH ALGORITHM ++The algorithm is very similar to that of the classful ++.BR tc-prio (8) ++qdisc. ++.B pfifo_fast ++is like three ++.BR tc-pfifo (8) ++queues side by side, where packets can be enqueued in any of the three bands ++based on their Type of Service bits or assigned priority. ++ ++Not all three bands are dequeued simultaneously - as long as lower bands ++have traffic, higher bands are never dequeued. This can be used to ++prioritize interactive traffic or penalize 'lowest cost' traffic. ++ ++Each band can be txqueuelen packets long, as configured with ++.BR ifconfig (8) ++or ++.BR ip (8). ++Additional packets coming in are not enqueued but are instead dropped. ++ ++See ++.BR tc-prio (8) ++for complete details on how TOS bits are translated into bands. ++.SH PARAMETERS ++.TP ++txqueuelen ++The length of the three bands depends on the interface txqueuelen, as ++specified with ++.BR ifconfig (8) ++or ++.BR ip (8). ++ ++.SH BUGS ++Does not maintain statistics and does not show up in tc qdisc ls. This is because ++it is the automatic default in the absence of a configured qdisc. ++ ++.SH SEE ALSO ++.BR tc (8) ++ ++.SH AUTHORS ++Alexey N. Kuznetsov, ++ ++This manpage maintained by bert hubert ++ ++ +diff -Naur iproute2-orig/debian/tc-prio.8 iproute2/debian/tc-prio.8 +--- iproute2-orig/debian/tc-prio.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/tc-prio.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,187 @@ ++.TH PRIO 8 "16 December 2001" "iproute2" "Linux" ++.SH NAME ++PRIO \- Priority qdisc ++.SH SYNOPSIS ++.B tc qdisc ... dev ++dev ++.B ( parent ++classid ++.B | root) [ handle ++major: ++.B ] prio [ bands ++bands ++.B ] [ priomap ++band,band,band... ++.B ] [ estimator ++interval timeconstant ++.B ] ++ ++.SH DESCRIPTION ++The PRIO qdisc is a simple classful queueing discipline that contains ++an arbitrary number of classes of differing priority. The classes are ++dequeued in numerical descending order of priority. PRIO is a scheduler ++and never delays packets - it is a work-conserving qdisc, though the qdiscs ++contained in the classes may not be. ++ ++Very useful for lowering latency when there is no need for slowing down ++traffic. ++ ++.SH ALGORITHM ++On creation with 'tc qdisc add', a fixed number of bands is created. Each ++band is a class, although is not possible to add classes with 'tc qdisc ++add', the number of bands to be created must instead be specified on the ++commandline attaching PRIO to its root. ++ ++When dequeueing, band 0 is tried first and only if it did not deliver a ++packet does PRIO try band 1, and so onwards. Maximum reliability packets ++should therefore go to band 0, minimum delay to band 1 and the rest to band ++2. ++ ++As the PRIO qdisc itself will have minor number 0, band 0 is actually ++major:1, band 1 is major:2, etc. For major, substitute the major number ++assigned to the qdisc on 'tc qdisc add' with the ++.B handle ++parameter. ++ ++.SH CLASSIFICATION ++Three methods are available to PRIO to determine in which band a packet will ++be enqueued. ++.TP ++From userspace ++A process with sufficient privileges can encode the destination class ++directly with SO_PRIORITY, see ++.BR tc(7). ++.TP ++with a tc filter ++A tc filter attached to the root qdisc can point traffic directly to a class ++.TP ++with the priomap ++Based on the packet priority, which in turn is derived from the Type of ++Service assigned to the packet. ++.P ++Only the priomap is specific to this qdisc. ++.SH QDISC PARAMETERS ++.TP ++bands ++Number of bands. If changed from the default of 3, ++.B priomap ++must be updated as well. ++.TP ++priomap ++The priomap maps the priority of ++a packet to a class. The priority can either be set directly from userspace, ++or be derived from the Type of Service of the packet. ++ ++Determines how packet priorities, as assigned by the kernel, map to ++bands. Mapping occurs based on the TOS octet of the packet, which looks like ++this: ++ ++.nf ++0 1 2 3 4 5 6 7 +++---+---+---+---+---+---+---+---+ ++| | | | ++|PRECEDENCE | TOS |MBZ| ++| | | | +++---+---+---+---+---+---+---+---+ ++.fi ++ ++The four TOS bits (the 'TOS field') are defined as: ++ ++.nf ++Binary Decimcal Meaning ++----------------------------------------- ++1000 8 Minimize delay (md) ++0100 4 Maximize throughput (mt) ++0010 2 Maximize reliability (mr) ++0001 1 Minimize monetary cost (mmc) ++0000 0 Normal Service ++.fi ++ ++As there is 1 bit to the right of these four bits, the actual value of the ++TOS field is double the value of the TOS bits. Tcpdump -v -v shows you the ++value of the entire TOS field, not just the four bits. It is the value you ++see in the first column of this table: ++ ++.nf ++TOS Bits Means Linux Priority Band ++------------------------------------------------------------ ++0x0 0 Normal Service 0 Best Effort 1 ++0x2 1 Minimize Monetary Cost 1 Filler 2 ++0x4 2 Maximize Reliability 0 Best Effort 1 ++0x6 3 mmc+mr 0 Best Effort 1 ++0x8 4 Maximize Throughput 2 Bulk 2 ++0xa 5 mmc+mt 2 Bulk 2 ++0xc 6 mr+mt 2 Bulk 2 ++0xe 7 mmc+mr+mt 2 Bulk 2 ++0x10 8 Minimize Delay 6 Interactive 0 ++0x12 9 mmc+md 6 Interactive 0 ++0x14 10 mr+md 6 Interactive 0 ++0x16 11 mmc+mr+md 6 Interactive 0 ++0x18 12 mt+md 4 Int. Bulk 1 ++0x1a 13 mmc+mt+md 4 Int. Bulk 1 ++0x1c 14 mr+mt+md 4 Int. Bulk 1 ++0x1e 15 mmc+mr+mt+md 4 Int. Bulk 1 ++.fi ++ ++The second column contains the value of the relevant ++four TOS bits, followed by their translated meaning. For example, 15 stands ++for a packet wanting Minimal Montetary Cost, Maximum Reliability, Maximum ++Throughput AND Minimum Delay. ++ ++The fourth column lists the way the Linux kernel interprets the TOS bits, by ++showing to which Priority they are mapped. ++ ++The last column shows the result of the default priomap. On the commandline, ++the default priomap looks like this: ++ ++ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 ++ ++This means that priority 4, for example, gets mapped to band number 1. ++The priomap also allows you to list higher priorities (> 7) which do not ++correspond to TOS mappings, but which are set by other means. ++ ++This table from RFC 1349 (read it for more details) explains how ++applications might very well set their TOS bits: ++ ++.nf ++TELNET 1000 (minimize delay) ++FTP ++ Control 1000 (minimize delay) ++ Data 0100 (maximize throughput) ++ ++TFTP 1000 (minimize delay) ++ ++SMTP ++ Command phase 1000 (minimize delay) ++ DATA phase 0100 (maximize throughput) ++ ++Domain Name Service ++ UDP Query 1000 (minimize delay) ++ TCP Query 0000 ++ Zone Transfer 0100 (maximize throughput) ++ ++NNTP 0001 (minimize monetary cost) ++ ++ICMP ++ Errors 0000 ++ Requests 0000 (mostly) ++ Responses (mostly) ++.fi ++ ++ ++.SH CLASSES ++PRIO classes cannot be configured further - they are automatically created ++when the PRIO qdisc is attached. Each class however can contain yet a ++further qdisc. ++ ++.SH BUGS ++Large amounts of traffic in the lower bands can cause starvation of higher ++bands. Can be prevented by attaching a shaper (for example, ++.BR tc-tbf(8) ++to these bands to make sure they cannot dominate the link. ++ ++.SH AUTHORS ++Alexey N. Kuznetsov, , J Hadi Salim ++. This manpage maintained by bert hubert ++ ++ +diff -Naur iproute2-orig/debian/tc-red.8 iproute2/debian/tc-red.8 +--- iproute2-orig/debian/tc-red.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/tc-red.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,131 @@ ++.TH RED 8 "13 December 2001" "iproute2" "Linux" ++.SH NAME ++red \- Random Early Detection ++.SH SYNOPSIS ++.B tc qdisc ... red ++.B limit ++bytes ++.B min ++bytes ++.B max ++bytes ++.B avpkt ++bytes ++.B burst ++packets ++.B [ ecn ] [ bandwidth ++rate ++.B ] probability ++chance ++ ++.SH DESCRIPTION ++Random Early Detection is a classless qdisc which manages its queue size ++smartly. Regular queues simply drop packets from the tail when they are ++full, which may not be the optimal behaviour. RED also performs tail drop, ++but does so in a more gradual way. ++ ++Once the queue hits a certain average length, packets enqueued have a ++configurable chance of being marked (which may mean dropped). This chance ++increases linearly up to a point called the ++.B max ++average queue length, although the queue might get bigger. ++ ++This has a host of benefits over simple taildrop, while not being processor ++intensive. It prevents synchronous retransmits after a burst in traffic, ++which cause further retransmits, etc. ++ ++The goal is the have a small queue size, which is good for interactivity ++while not disturbing TCP/IP traffic with too many sudden drops after a burst ++of traffic. ++ ++Depending on 08 ECN is configured, marking either means dropping or ++purely marking a packet as overlimit. ++.SH ALGORITHM ++The average queue size is used for determining the marking ++probability. This is calculated using an Exponential Weighted Moving ++Average, which can be more or less sensitive to bursts. ++ ++When the average queue size is below ++.B min ++bytes, no packet will ever be marked. When it exceeds ++.B min, ++the probability of doing so climbs linearly up ++to ++.B probability, ++until the average queue size hits ++.B max ++bytes. Because ++.B probability ++is normally not set to 100%, the queue size might ++conceivably rise above ++.B max ++bytes, so the ++.B limit ++parameter is provided to set a hard maximum for the size of the queue. ++ ++.SH PARAMETERS ++.TP ++min ++Average queue size at which marking becomes a possibility. ++.TP ++max ++At this average queue size, the marking probability is maximal. Should be at ++least twice ++.B min ++to prevent synchronous retransmits, higher for low ++.B min. ++.TP ++probability ++Maximum probability for marking, specified as a floating point ++number from 0.0 to 1.0. Suggested values are 0.01 or 0.02 (1 or 2%, ++respectively). ++.TP ++limit ++Hard limit on the real (not average) queue size in bytes. Further packets ++are dropped. Should be set higher than max+burst. It is advised to set this ++a few times higher than ++.B max. ++.TP ++burst ++Used for determining how fast the average queue size is influenced by the ++real queue size. Larger values make the calculation more sluggish, allowing ++longer bursts of traffic before marking starts. Real life experiments ++support the following guideline: (min+min+max)/(3*avpkt). ++.TP ++avpkt ++Specified in bytes. Used with burst to determine the time constant for ++average queue size calculations. 1000 is a good value. ++.TP ++bandwidth ++This rate is used for calculating the average queue size after some ++idle time. Should be set to the bandwidth of your interface. Does not mean ++that RED will shape for you! Optional. ++.TP ++ecn ++As mentioned before, RED can either 'mark' or 'drop'. Explicit Congestion ++Notification allows RED to notify remote hosts that their rate exceeds the ++amount of bandwidth available. Non-ECN capable hosts can only be notified by ++dropping a packet. If this parameter is specified, packets which indicate ++that their hosts honor ECN will only be marked and not dropped, unless the ++queue size hits ++.B limit ++bytes. Needs a tc binary with RED support compiled in. Recommended. ++ ++.SH SEE ALSO ++.BR tc (8) ++ ++.SH SOURCES ++.TP ++o ++Floyd, S., and Jacobson, V., Random Early Detection gateways for ++Congestion Avoidance. http://www.aciri.org/floyd/papers/red/red.html ++.TP ++o ++Some changes to the algorithm by Alexey N. Kuznetsov. ++ ++.SH AUTHORS ++Alexey N. Kuznetsov, , Alexey Makarenko ++, J Hadi Salim . ++This manpage maintained by bert hubert ++ ++ +diff -Naur iproute2-orig/debian/tc-sfq.8 iproute2/debian/tc-sfq.8 +--- iproute2-orig/debian/tc-sfq.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/tc-sfq.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,107 @@ ++.TH TC 8 "8 December 2001" "iproute2" "Linux" ++.SH NAME ++sfq \- Stochastic Fairness Queueing ++.SH SYNOPSIS ++.B tc qdisc ... perturb ++seconds ++.B quantum ++bytes ++ ++.SH DESCRIPTION ++ ++Stochastic Fairness Queueing is a classless queueing discipline available for ++traffic control with the ++.BR tc (8) ++command. ++ ++SFQ does not shape traffic but only schedules the transmission of packets, based on 'flows'. ++The goal is to ensure fairness so that each flow is able to send data in turn, thus preventing ++any single flow from drowning out the rest. ++ ++This may in fact have some effect in mitigating a Denial of Service attempt. ++ ++SFQ is work-conserving and therefore always delivers a packet if it has one available. ++.SH ALGORITHM ++On enqueueing, each packet is assigned to a hash bucket, based on ++.TP ++(i) ++Source address ++.TP ++(ii) ++Destination address ++.TP ++(iii) ++Source port ++.P ++If these are available. SFQ knows about ipv4 and ipv6 and also UDP, TCP and ESP. ++Packets with other protocols are hashed based on the 32bits representation of their ++destination and the socket they belong to. A flow corresponds mostly to a TCP/IP ++connection. ++ ++Each of these buckets should represent a unique flow. Because multiple flows may ++get hashed to the same bucket, the hashing algorithm is perturbed at configurable ++intervals so that the unfairness lasts only for a short while. Perturbation may ++however cause some inadvertent packet reordering to occur. ++ ++When dequeuing, each hashbucket with data is queried in a round robin fashion. ++ ++The compile time maximum length of the SFQ is 128 packets, which can be spread over ++at most 128 buckets of 1024 available. In case of overflow, tail-drop is performed ++on the fullest bucket, thus maintaining fairness. ++ ++.SH PARAMETERS ++.TP ++perturb ++Interval in seconds for queue algorithm perturbation. Defaults to 0, which means that ++no perturbation occurs. Do not set too low for each perturbation may cause some packet ++reordering. Advised value: 10 ++.TP ++quantum ++Amount of bytes a flow is allowed to dequeue during a round of the round robin process. ++Defaults to the MTU of the interface which is also the advised value and the minimum value. ++ ++.SH EXAMPLE & USAGE ++ ++To attach to device ppp0: ++.P ++# tc qdisc add dev ppp0 root sfq perturb 10 ++.P ++Please note that SFQ, like all non-shaping (work-conserving) qdiscs, is only useful ++if it owns the queue. ++This is the case when the link speed equals the actually available bandwidth. This holds ++for regular phone modems, ISDN connections and direct non-switched ethernet links. ++.P ++Most often, cable modems and DSL devices do not fall into this category. The same holds ++for when connected to a switch and trying to send data to a congested segment also ++connected to the switch. ++.P ++In this case, the effective queue does not reside within Linux and is therefore not ++available for scheduling. ++.P ++Embed SFQ in a classful qdisc to make sure it owns the queue. ++ ++.SH SOURCE ++.TP ++o ++Paul E. McKenney "Stochastic Fairness Queuing", ++IEEE INFOCOMM'90 Proceedings, San Francisco, 1990. ++ ++.TP ++o ++Paul E. McKenney "Stochastic Fairness Queuing", ++"Interworking: Research and Experience", v.2, 1991, p.113-131. ++ ++.TP ++o ++See also: ++M. Shreedhar and George Varghese "Efficient Fair ++Queuing using Deficit Round Robin", Proc. SIGCOMM 95. ++ ++.SH SEE ALSO ++.BR tc (8) ++ ++.SH AUTHOR ++Alexey N. Kuznetsov, . This manpage maintained by ++bert hubert ++ ++ +diff -Naur iproute2-orig/debian/tc-tbf.8 iproute2/debian/tc-tbf.8 +--- iproute2-orig/debian/tc-tbf.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/tc-tbf.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,138 @@ ++.TH TC 8 "13 December 2001" "iproute2" "Linux" ++.SH NAME ++tbf \- Token Bucket Filter ++.SH SYNOPSIS ++.B tc qdisc ... tbf rate ++rate ++.B burst ++bytes/cell ++.B ( latency ++ms ++.B | limit ++bytes ++.B ) [ mpu ++bytes ++.B [ peakrate ++rate ++.B mtu ++bytes/cell ++.B ] ] ++.P ++burst is also known as buffer and maxburst. mtu is also known as minburst. ++.SH DESCRIPTION ++ ++The Token Bucket Filter is a classless queueing discipline available for ++traffic control with the ++.BR tc (8) ++command. ++ ++TBF is a pure shaper and never schedules traffic. It is non-work-conserving and may throttle ++itself, although packets are available, to ensure that the configured rate is not exceeded. ++On all platforms except for Alpha, ++it is able to shape up to 1mbit/s of normal traffic with ideal minimal burstiness, ++sending out data exactly at the configured rates. ++ ++Much higher rates are possible but at the cost of losing the minimal burstiness. In that ++case, data is on average dequeued at the configured rate but may be sent much faster at millisecond ++timescales. Because of further queues living in network adaptors, this is often not a problem. ++ ++Kernels with a higher 'HZ' can achieve higher rates with perfect burstiness. On Alpha, HZ is ten ++times higher, leading to a 10mbit/s limit to perfection. These calculations hold for packets of on ++average 1000 bytes. ++ ++.SH ALGORITHM ++As the name implies, traffic is filtered based on the expenditure of ++.B tokens. ++Tokens roughly correspond to bytes, with the additional constraint that each packet consumes ++some tokens, no matter how small it is. This reflects the fact that even a zero-sized packet occupies ++the link for some time. ++ ++On creation, the TBF is stocked with tokens which correspond to the amount of traffic that can be burst ++in one go. Tokens arrive at a steady rate, until the bucket is full. ++ ++If no tokens are available, packets are queued, up to a configured limit. The TBF now ++calculates the token deficit, and throttles until the first packet in the queue can be sent. ++ ++If it is not acceptable to burst out packets at maximum speed, a peakrate can be configured ++to limit the speed at which the bucket empties. This peakrate is implemented as a second TBF ++with a very small bucket, so that it doesn't burst. ++ ++To achieve perfection, the second bucket may contain only a single packet, which leads to ++the earlier mentioned 1mbit/s limit. ++ ++This limit is caused by the fact that the kernel can only throttle for at minimum 1 'jiffy', which depends ++on HZ as 1/HZ. For perfect shaping, only a single packet can get sent per jiffy - for HZ=100, this means 100 ++packets of on average 1000 bytes each, which roughly corresponds to 1mbit/s. ++ ++.SH PARAMETERS ++See ++.BR tc (8) ++for how to specify the units of these values. ++.TP ++limit or latency ++Limit is the number of bytes that can be queued waiting for tokens to become ++available. You can also specify this the other way around by setting the ++latency parameter, which specifies the maximum amount of time a packet can ++sit in the TBF. The latter calculation takes into account the size of the ++bucket, the rate and possibly the peakrate (if set). These two parameters ++are mutually exclusive. ++.TP ++burst ++Also known as buffer or maxburst. ++Size of the bucket, in bytes. This is the maximum amount of bytes that tokens can be available for instantaneously. ++In general, larger shaping rates require a larger buffer. For 10mbit/s on Intel, you need at least 10kbyte buffer ++if you want to reach your configured rate! ++ ++If your buffer is too small, packets may be dropped because more tokens arrive per timer tick than fit in your bucket. ++The minimum buffer size can be calculated by dividing the rate by HZ. ++ ++Token usage calculations are performed using a table which by default has a resolution of 8 packets. ++This resolution can be changed by specifying the ++.B cell ++size with the burst. For example, to specify a 6000 byte buffer with a 16 ++byte cell size, set a burst of 6000/16. You will probably never have to set ++this. Must be an integral power of 2. ++.TP ++mpu ++A zero-sized packet does not use zero bandwidth. For ethernet, no packet uses less than 64 bytes. The Minimum Packet Unit ++determines the minimal token usage (specified in bytes) for a packet. Defaults to zero. ++.TP ++rate ++The speed knob. See remarks above about limits! See ++.BR tc (8) ++for units. ++.PP ++Furthermore, if a peakrate is desired, the following parameters are available: ++ ++.TP ++peakrate ++Maximum depletion rate of the bucket. Limited to 1mbit/s on Intel, 10mbit/s on Alpha. The peakrate does ++not need to be set, it is only necessary if perfect millisecond timescale shaping is required. ++ ++.TP ++mtu/minburst ++Specifies the size of the peakrate bucket. For perfect accuracy, should be set to the MTU of the interface. ++If a peakrate is needed, but some burstiness is acceptable, this size can be raised. A 3000 byte minburst ++allows around 3mbit/s of peakrate, given 1000 byte packets. ++ ++Like the regular burstsize you can also specify a ++.B cell ++size. ++.SH EXAMPLE & USAGE ++ ++To attach a TBF with a sustained maximum rate of 0.5mbit/s, a peakrate of 1.0mbit/s, ++a 5kilobyte buffer, with a pre-bucket queue size limit calculated so the TBF causes ++at most 70ms of latency, with perfect peakrate behaviour, issue: ++.P ++# tc qdisc add dev eth0 root tbf rate 0.5mbit \\ ++ burst 5kb latency 70ms peakrate 1mbit \\ ++ minburst 1540 ++ ++.SH SEE ALSO ++.BR tc (8) ++ ++.SH AUTHOR ++Alexey N. Kuznetsov, . This manpage maintained by ++bert hubert ++ ++ +diff -Naur iproute2-orig/debian/tc.8 iproute2/debian/tc.8 +--- iproute2-orig/debian/tc.8 1969-12-31 16:00:00.000000000 -0800 ++++ iproute2/debian/tc.8 2004-05-21 00:09:38.000000000 -0700 +@@ -0,0 +1,348 @@ ++.TH TC 8 "16 December 2001" "iproute2" "Linux" ++.SH NAME ++tc \- show / manipulate traffic control settings ++.SH SYNOPSIS ++.B tc qdisc [ add | change | replace | link ] dev ++DEV ++.B ++[ parent ++qdisc-id ++.B | root ] ++.B [ handle ++qdisc-id ] qdisc ++[ qdisc specific parameters ] ++.P ++ ++.B tc class [ add | change | replace ] dev ++DEV ++.B parent ++qdisc-id ++.B [ classid ++class-id ] qdisc ++[ qdisc specific parameters ] ++.P ++ ++.B tc filter [ add | change | replace ] dev ++DEV ++.B [ parent ++qdisc-id ++.B | root ] protocol ++protocol ++.B prio ++priority filtertype ++[ filtertype specific parameters ] ++.B flowid ++flow-id ++ ++.B tc [-s | -d ] qdisc show [ dev ++DEV ++.B ] ++.P ++.B tc [-s | -d ] class show dev ++DEV ++.P ++.B tc filter show dev ++DEV ++ ++.SH DESCRIPTION ++.B Tc ++is used to configure Traffic Control in the Linux kernel. Traffic Control consists ++of the following: ++ ++.TP ++SHAPING ++When traffic is shaped, its rate of transmission is under control. Shaping may ++be more than lowering the available bandwidth - it is also used to smooth out ++bursts in traffic for better network behaviour. Shaping occurs on egress. ++ ++.TP ++SCHEDULING ++By scheduling the transmission of packets it is possible to improve interactivity ++for traffic that needs it while still guaranteeing bandwidth to bulk transfers. Reordering ++is also called prioritizing, and happens only on egress. ++ ++.TP ++POLICING ++Where shaping deals with transmission of traffic, policing pertains to traffic ++arriving. Policing thus occurs on ingress. ++ ++.TP ++DROPPING ++Traffic exceeding a set bandwidth may also be dropped forthwith, both on ++ingress and on egress. ++ ++.P ++Processing of traffic is controlled by three kinds of objects: qdiscs, ++classes and filters. ++ ++.SH QDISCS ++.B qdisc ++is short for 'queueing discipline' and it is elementary to ++understanding traffic control. Whenever the kernel needs to send a ++packet to an interface, it is ++.B enqueued ++to the qdisc configured for that interface. Immediately afterwards, the kernel ++tries to get as many packets as possible from the qdisc, for giving them ++to the network adaptor driver. ++ ++A simple QDISC is the 'pfifo' one, which does no processing at all and is a pure ++First In, First Out queue. It does however store traffic when the network interface ++can't handle it momentarily. ++ ++.SH CLASSES ++Some qdiscs can contain classes, which contain further qdiscs - traffic may ++then be enqueued in any of the inner qdiscs, which are within the ++.B classes. ++When the kernel tries to dequeue a packet from such a ++.B classful qdisc ++it can come from any of the classes. A qdisc may for example prioritize ++certain kinds of traffic by trying to dequeue from certain classes ++before others. ++ ++.SH FILTERS ++A ++.B filter ++is used by a classful qdisc to determine in which class a packet will ++be enqueued. Whenever traffic arrives at a class with subclasses, it needs ++to be classified. Various methods may be employed to do so, one of these ++are the filters. All filters attached to the class are called, until one of ++them returns with a verdict. If no verdict was made, other criteria may be ++available. This differs per qdisc. ++ ++It is important to notice that filters reside ++.B within ++qdiscs - they are not masters of what happens. ++ ++.SH CLASSLESS QDISCS ++The classless qdiscs are: ++.TP ++[p|b]fifo ++Simplest usable qdisc, pure First In, First Out behaviour. Limited in ++packets or in bytes. ++.TP ++pfifo_fast ++Standard qdisc for 'Advanced Router' enabled kernels. Consists of a three-band ++queue which honors Type of Service flags, as well as the priority that may be ++assigned to a packet. ++.TP ++red ++Random Early Detection simulates physical congestion by randomly dropping ++packets when nearing configured bandwidth allocation. Well suited to very ++large bandwidth applications. ++.TP ++sfq ++Stochastic Fairness Queueing reorders queued traffic so each 'session' ++gets to send a packet in turn. ++.TP ++tbf ++The Token Bucket Filter is suited for slowing traffic down to a precisely ++configured rate. Scales well to large bandwidths. ++.SH CONFIGURING CLASSLESS QDISCS ++In the absence of classful qdiscs, classless qdiscs can only be attached at ++the root of a device. Full syntax: ++.P ++.B tc qdisc add dev ++DEV ++.B root ++QDISC QDISC-PARAMETERS ++ ++To remove, issue ++.P ++.B tc qdisc del dev ++DEV ++.B root ++ ++The ++.B pfifo_fast ++qdisc is the automatic default in the absence of a configured qdisc. ++ ++.SH CLASSFUL QDISCS ++The classful qdiscs are: ++.TP ++CBQ ++Class Based Queueing implements a rich linksharing hierarchy of classes. ++It contains shaping elements as well as prioritizing capabilities. Shaping is ++performed using link idle time calculations based on average packet size and ++underlying link bandwidth. The latter may be ill-defined for some interfaces. ++.TP ++HTB ++The Hierarchy Token Bucket implements a rich linksharing hierarchy of ++classes with an emphasis on conforming to existing practices. HTB facilitates ++guaranteeing bandwidth to classes, while also allowing specification of upper ++limits to inter-class sharing. It contains shaping elements, based on TBF and ++can prioritize classes. ++.TP ++PRIO ++The PRIO qdisc is a non-shaping container for a configurable number of ++classes which are dequeued in order. This allows for easy prioritization ++of traffic, where lower classes are only able to send if higher ones have ++no packets available. To facilitate configuration, Type Of Service bits are ++honored by default. ++.SH THEORY OF OPERATION ++Classes form a tree, where each class has a single parent. ++A class may have multiple children. Some qdiscs allow for runtime addition ++of classes (CBQ, HTB) while others (PRIO) are created with a static number of ++children. ++ ++Qdiscs which allow dynamic addition of classes can have zero or more ++subclasses to which traffic may be enqueued. ++ ++Furthermore, each class contains a ++.B leaf qdisc ++which by default has ++.B pfifo ++behaviour though another qdisc can be attached in place. This qdisc may again ++contain classes, but each class can have only one leaf qdisc. ++ ++When a packet enters a classful qdisc it can be ++.B classified ++to one of the classes within. Three criteria are available, although not all ++qdiscs will use all three: ++.TP ++tc filters ++If tc filters are attached to a class, they are consulted first ++for relevant instructions. Filters can match on all fields of a packet header, ++as well as on the firewall mark applied by ipchains or iptables. See ++.BR tc-filters (8). ++.TP ++Type of Service ++Some qdiscs have built in rules for classifying packets based on the TOS field. ++.TP ++skb->priority ++Userspace programs can encode a class-id in the 'skb->priority' field using ++the SO_PRIORITY option. ++.P ++Each node within the tree can have its own filters but higher level filters ++may also point directly to lower classes. ++ ++If classification did not succeed, packets are enqueued to the leaf qdisc ++attached to that class. Check qdisc specific manpages for details, however. ++ ++.SH NAMING ++All qdiscs, classes and filters have IDs, which can either be specified ++or be automatically assigned. ++ ++IDs consist of a major number and a minor number, separated by a colon. ++ ++.TP ++QDISCS ++A qdisc, which potentially can have children, ++gets assigned a major number, called a 'handle', leaving the minor ++number namespace available for classes. The handle is expressed as '10:'. ++It is customary to explicitly assign a handle to qdiscs expected to have ++children. ++ ++.TP ++CLASSES ++Classes residing under a qdisc share their qdisc major number, but each have ++a separate minor number called a 'classid' that has no relation to their ++parent classes, only to their parent qdisc. The same naming custom as for ++qdiscs applies. ++ ++.TP ++FILTERS ++Filters have a three part ID, which is only needed when using a hashed ++filter hierarchy, for which see ++.BR tc-filters (8). ++.SH UNITS ++All parameters accept a floating point number, possibly followed by a unit. ++.P ++Bandwidths or rates can be specified in: ++.TP ++kbps ++Kilobytes per second ++.TP ++mbps ++Megabytes per second ++.TP ++kbit ++Kilobits per second ++.TP ++mbit ++Megabits per second ++.TP ++bps or a bare number ++Bits per second ++.P ++Amounts of data can be specified in: ++.TP ++kb or k ++Kilobytes ++.TP ++mb or m ++Megabytes ++.TP ++mbit ++Megabits ++.TP ++kbit ++Kilobits ++.TP ++b or a bare number ++Bytes. ++.P ++Lengths of time can be specified in: ++.TP ++s, sec or secs ++Whole seconds ++.TP ++ms, msec or msecs ++Milliseconds ++.TP ++us, usec, usecs or a bare number ++Microseconds. ++ ++.SH TC COMMANDS ++The following commands are available for qdiscs, classes and filter: ++.TP ++add ++Add a qdisc, class or filter to a node. For all entities, a ++.B parent ++must be passed, either by passing its ID or by attaching directly to the root of a device. ++When creating a qdisc or a filter, it can be named with the ++.B handle ++parameter. A class is named with the ++.B classid ++parameter. ++ ++.TP ++remove ++A qdisc can be removed by specifying its handle, which may also be 'root'. All subclasses and their leaf qdiscs ++are automatically deleted, as well as any filters attached to them. ++ ++.TP ++change ++Some entities can be modified 'in place'. Shares the syntax of 'add', with the exception ++that the handle cannot be changed and neither can the parent. In other words, ++.B ++change ++cannot move a node. ++ ++.TP ++replace ++Performs a nearly atomic remove/add on an existing node id. If the node does not exist yet ++it is created. ++ ++.TP ++link ++Only available for qdiscs and performs a replace where the node ++must exist already. ++ ++ ++.SH HISTORY ++.B tc ++was written by Alexey N. Kuznetsov and added in Linux 2.2. ++.SH SEE ALSO ++.BR tc-cbq (8), ++.BR tc-htb (8), ++.BR tc-sfq (8), ++.BR tc-red (8), ++.BR tc-tbf (8), ++.BR tc-pfifo (8), ++.BR tc-bfifo (8), ++.BR tc-pfifo_fast (8), ++.BR tc-filters (8) ++ ++.SH AUTHOR ++Manpage maintained by bert hubert (ahu@ds9a.nl) ++ +diff -Naur iproute2-orig/include/rt_names.h iproute2/include/rt_names.h +--- iproute2-orig/include/rt_names.h 2000-04-16 10:42:50.000000000 -0700 ++++ iproute2/include/rt_names.h 2004-05-21 00:16:36.000000000 -0700 +@@ -1,6 +1,8 @@ + #ifndef RT_NAMES_H_ + #define RT_NAMES_H_ 1 + ++#include ++ + const char* rtnl_rtprot_n2a(int id, char *buf, int len); + const char* rtnl_rtscope_n2a(int id, char *buf, int len); + const char* rtnl_rttable_n2a(int id, char *buf, int len); +diff -Naur iproute2-orig/lib/rt_names.c iproute2/lib/rt_names.c +--- iproute2-orig/lib/rt_names.c 2000-04-16 10:42:52.000000000 -0700 ++++ iproute2/lib/rt_names.c 2004-05-21 00:16:36.000000000 -0700 +@@ -16,6 +16,7 @@ + #include + #include + #include ++#include + + static void rtnl_tab_initialize(char *file, char **tab, int size) + { +diff -Naur iproute2-orig/misc/arpd.c iproute2/misc/arpd.c +--- iproute2-orig/misc/arpd.c 2002-01-09 20:02:26.000000000 -0800 ++++ iproute2/misc/arpd.c 2004-05-21 00:16:36.000000000 -0700 +@@ -16,7 +16,7 @@ + #include + #include + #include +-#include ++#include + #include + #include + #include +@@ -28,6 +28,7 @@ + #include + #include + #include ++#include + #include + #include + #include diff --git a/abs/core-testing/iproute2/libdir.patch b/abs/core-testing/iproute2/libdir.patch new file mode 100644 index 0000000..4e5671a --- /dev/null +++ b/abs/core-testing/iproute2/libdir.patch @@ -0,0 +1,129 @@ +Submitted By: Joe Ciccone +Date: 2008-09-31 +Upstream Status: Unknown +Origin: Joe Ciccone +Description: This patch gives the ability to change the libdir. + +--- iproute2-2.6.26.orig/Config 1969-12-31 19:00:00.000000000 -0500 ++++ iproute2-2.6.26/Config 2008-09-13 15:07:31.000000000 -0400 +@@ -0,0 +1 @@ ++# Generated config based on /usr/src/clfs-tarballs/conglomeration/iproute2-2.6.26/include +diff -Naur iproute2-2.6.26.orig/include/iptables.h iproute2-2.6.26/include/iptables.h +--- iproute2-2.6.26.orig/include/iptables.h 2008-07-25 16:46:07.000000000 -0400 ++++ iproute2-2.6.26/include/iptables.h 2008-09-13 15:43:33.000000000 -0400 +@@ -4,8 +4,12 @@ + #include "iptables_common.h" + #include "libiptc/libiptc.h" + ++#ifndef LIBDIR ++#define LIBDIR "/usr/lib" ++#endif ++ + #ifndef IPT_LIB_DIR +-#define IPT_LIB_DIR "/usr/local/lib/iptables" ++#define IPT_LIB_DIR LIBDIR "/iptables" + #endif + + #ifndef IPPROTO_SCTP +diff -Naur iproute2-2.6.26.orig/ip/iplink.c iproute2-2.6.26/ip/iplink.c +--- iproute2-2.6.26.orig/ip/iplink.c 2008-07-25 16:46:07.000000000 -0400 ++++ iproute2-2.6.26/ip/iplink.c 2008-09-13 15:14:41.000000000 -0400 +@@ -34,7 +34,11 @@ + + #define IPLINK_IOCTL_COMPAT 1 + #ifndef DESTDIR +-#define DESTDIR "/usr/" ++#define DESTDIR "/usr" ++#endif ++ ++#ifndef LIBDIR ++#define LIBDIR "/usr/lib" + #endif + + static void usage(void) __attribute__((noreturn)); +@@ -81,7 +85,7 @@ + if (strcmp(l->id, id) == 0) + return l; + +- snprintf(buf, sizeof(buf), DESTDIR "/lib/ip/link_%s.so", id); ++ snprintf(buf, sizeof(buf), DESTDIR LIBDIR "/ip/link_%s.so", id); + dlh = dlopen(buf, RTLD_LAZY); + if (dlh == NULL) { + /* look in current binary, only open once */ +diff -Naur iproute2-2.6.26.orig/Makefile iproute2-2.6.26/Makefile +--- iproute2-2.6.26.orig/Makefile 2008-07-25 16:46:07.000000000 -0400 ++++ iproute2-2.6.26/Makefile 2008-09-13 15:15:24.000000000 -0400 +@@ -1,13 +1,15 @@ +-DESTDIR=/usr/ ++DESTDIR=/usr + SBINDIR=/sbin + CONFDIR=/etc/iproute2 + DOCDIR=/share/doc/iproute2 + MANDIR=/share/man + ++LIBDIR=/usr/lib ++ + # Path to db_185.h include + DBM_INCLUDE:=/usr/include + +-DEFINES= -DRESOLVE_HOSTNAMES -DDESTDIR=\"$(DESTDIR)\" ++DEFINES= -DRESOLVE_HOSTNAMES -DDESTDIR=\"$(DESTDIR)\" -DLIBDIR=\"$(LIBDIR)\" + + #options if you have a bind>=4.9.4 libresolv (or, maybe, glibc) + LDLIBS=-lresolv +diff -Naur iproute2-2.6.26.orig/netem/Makefile iproute2-2.6.26/netem/Makefile +--- iproute2-2.6.26.orig/netem/Makefile 2008-07-25 16:46:07.000000000 -0400 ++++ iproute2-2.6.26/netem/Makefile 2008-09-13 14:27:07.000000000 -0400 +@@ -20,9 +20,9 @@ + $(HOSTCC) $(CCOPTS) -I../include -o $@ $@.c -lm + + install: all +- mkdir -p $(DESTDIR)/lib/tc ++ mkdir -p $(DESTDIR)$(LIBDIR)/tc + for i in $(DISTDATA); \ +- do install -m 755 $$i $(DESTDIR)/lib/tc; \ ++ do install -m 755 $$i $(DESTDIR)$(LIBDIR)/tc; \ + done + + clean: +diff -Naur iproute2-2.6.26.orig/tc/Makefile iproute2-2.6.26/tc/Makefile +--- iproute2-2.6.26.orig/tc/Makefile 2008-07-25 16:46:07.000000000 -0400 ++++ iproute2-2.6.26/tc/Makefile 2008-09-13 14:30:37.000000000 -0400 +@@ -72,10 +72,10 @@ + $(AR) rcs $@ $(TCLIB) + + install: all +- mkdir -p $(DESTDIR)/lib/tc ++ mkdir -p $(DESTDIR)$(LIBDIR)/tc + install -m 0755 tc $(DESTDIR)$(SBINDIR) + for i in $(TCSO); \ +- do install -m 755 $$i $(DESTDIR)/lib/tc; \ ++ do install -m 755 $$i $(DESTDIR)$(LIBDIR)/tc; \ + done + + clean: +diff -Naur iproute2-2.6.26.orig/tc/tc_util.c iproute2-2.6.26/tc/tc_util.c +--- iproute2-2.6.26.orig/tc/tc_util.c 2008-07-25 16:46:07.000000000 -0400 ++++ iproute2-2.6.26/tc/tc_util.c 2008-09-13 15:15:16.000000000 -0400 +@@ -25,7 +25,11 @@ + #include "tc_util.h" + + #ifndef DESTDIR +-#define DESTDIR "/usr/" ++#define DESTDIR "/usr" ++#endif ++ ++#ifndef LIBDIR ++#define LIBDIR "/usr/lib" + #endif + + const char *get_tc_lib(void) +@@ -34,7 +38,7 @@ + + lib_dir = getenv("TC_LIB_DIR"); + if (!lib_dir) +- lib_dir = DESTDIR "/lib/tc"; ++ lib_dir = DESTDIR LIBDIR "/tc"; + + return lib_dir; + -- cgit v0.12