1 diff -urN linux-2.6.22.1.orig/include/linux/pkt_sched.h linux-2.6.22.1/include/linux/pkt_sched.h
2 --- linux-2.6.22.1.orig/include/linux/pkt_sched.h 2007-07-09 01:32:17.000000000 +0200
3 +++ linux-2.6.22.1/include/linux/pkt_sched.h 2007-07-17 14:45:53.000000000 +0200
8 +#include <linux/if_ether.h>
10 /* Generic queue statistics, available for all the elements.
11 Particular schedulers may have also their private records.
15 #define NETEM_DIST_SCALE 8192
21 + * A sub weight and of a class
22 + * All numbers are represented as parts of (2^64-1).
24 +struct tc_wrr_class_weight {
25 + __u64 val; // Current value (0 is not valid)
26 + __u64 decr; // Value pr bytes (2^64-1 is not valid)
27 + __u64 incr; // Value pr seconds (2^64-1 is not valid)
28 + __u64 min; // Minimal value (0 is not valid)
29 + __u64 max; // Minimal value (0 is not valid)
30 + time_t tim; // The time where the above information was correct
33 +/* Packet send when modifying a class */
34 +struct tc_wrr_class_modf {
35 + /* Not-valid values are ignored */
36 + struct tc_wrr_class_weight weight1;
37 + struct tc_wrr_class_weight weight2;
40 +/* Packet returned when quering a class */
41 +struct tc_wrr_class_stats {
42 + char used; /* If this is false the information below is invalid */
43 + struct tc_wrr_class_modf class_modf;
44 + unsigned char addr[ETH_ALEN];
45 + char usemac; /* True if addr is a MAC address, else it is an IP address
46 + (this value is only for convience, it is always the same
47 + value as in the qdisc) */
48 + int heappos; /* Current heap position or 0 if not in heap */
49 + __u64 penal_ls; /* Penalty value in heap (ls) */
50 + __u64 penal_ms; /* Penalty value in heap (ms) */
53 +/* Qdisc-wide penalty information (boolean values - 2 not valid) */
54 +struct tc_wrr_qdisc_weight {
55 + signed char weight_mode; /* 0=No automatic change to weight
57 + 2=Also multiply with number of machines
58 + 3=Instead multiply with priority divided
59 + with priority of the other.
63 +/* Packet send when modifing a qdisc */
64 +struct tc_wrr_qdisc_modf {
65 + /* Not-valid values are ignored */
66 + struct tc_wrr_qdisc_weight weight1;
67 + struct tc_wrr_qdisc_weight weight2;
70 +/* Packet send when creating a qdisc */
71 +struct tc_wrr_qdisc_crt {
72 + struct tc_wrr_qdisc_modf qdisc_modf;
73 + char srcaddr; /* 1=lookup source, 0=lookup destination */
74 + char usemac; /* 1=Classify on MAC addresses, 0=classify on IP */
75 + char usemasq; /* 1=Classify based on masqgrading - only valid
76 + if usemac is zero */
77 + int bands_max; /* Maximal number of bands (i.e.: classes) */
78 + int proxy_maxconn; /* If differnt from 0 then we support proxy remapping
79 + of packets. And this is the number of maximal
80 + concurrent proxy connections. */
83 +/* Packet returned when quering a qdisc */
84 +struct tc_wrr_qdisc_stats {
85 + struct tc_wrr_qdisc_crt qdisc_crt;
87 + int nodes_in_heap; /* Current number of bands wanting to send something */
88 + int bands_cur; /* Current number of bands used (i.e.: MAC/IP addresses seen) */
89 + int bands_reused; /* Number of times this band has been reused. */
90 + int packets_requed; /* Number of times packets have been requeued. */
91 + __u64 priosum; /* Sum of priorities in heap where 1 is 2^32 */
94 +struct tc_wrr_qdisc_modf_std {
95 + char proxy; /* This indicates which of the tc_wrr_qdisc_modf structers this is. 0=This struct */
96 + char change_class; /* Should we also change a class? */
97 + struct tc_wrr_qdisc_modf qdisc_modf; /* Only valid if change_class is false */
98 + unsigned char addr[ETH_ALEN]; /* Class to change (non-used bytes should be 0). Valid only of change_class is true */
99 + struct tc_wrr_class_modf class_modf; /* The change */
102 +/* Used for proxyremapping */
103 +struct tc_wrr_qdisc_modf_proxy {
104 + char proxy; /* This indicates which of the tc_wrr_qdisc_modf structers this is. 1=This struct */
105 + char reset; /* This is 1 if the proxyremap information should be reset */
106 + int changec; /* changec is the number of elements in changes. */
107 + long changes[0]; /* This is an array of type ProxyRemapBlock */
111 diff -urN linux-2.6.22.1.orig/include/linux/wrr.h linux-2.6.22.1/include/linux/wrr.h
112 --- linux-2.6.22.1.orig/include/linux/wrr.h 1970-01-01 01:00:00.000000000 +0100
113 +++ linux-2.6.22.1/include/linux/wrr.h 2007-07-17 14:45:53.000000000 +0200
119 + * This describes the information that is written in proxyremap.log and which
120 + * are used in the communication between proxyremapserver and proxyremapclient.
121 + * Everything is in network order.
124 +/* First this header is send */
125 +#define PROXY_WELCOME_LINE "ProxyRemap 1.02. This is a binary protocol.\r\n"
128 + * Then this block is send every time a connection is opened or closed.
129 + * Note how it is alligned to use small space usage - arrays of this
130 + * structure are saved in many places.
133 + /* Server endpoint of connection */
135 + unsigned short sport;
137 + /* IP protocol for this connection (typically udp or tcp) */
138 + unsigned char proto;
140 + /* Is the connection opened or closed? */
141 + unsigned char open;
143 + /* Client the packets should be accounted to */
145 + unsigned char macaddr[6]; /* Might be 0. */
147 + /* An informal two-charecter code from the proxyserver. Used for debugging. */
153 + * This is common code for for handling the tables containing information about
154 + * which proxyserver connections are associated with which machines..
157 +/* Returns the number of bytes that should be available in the area
158 + * maintained by this module given the maximal number of concurrent
160 +int proxyGetMemSize(int max_connections);
162 +/* Initializes a memory area to use. There must be as many bytes
163 + available as returned by getMemSize. */
164 +void proxyInitMem(void *data, int max_connections);
167 +int proxyGetCurConn(void *data); /* Returns current number of connections */
168 +int proxyMaxCurConn(void *data); /* Returns maximal number of connections */
170 +/* This is called to open and close conenctions. Returns -1 if
171 + a protocol error occores (i.e.: If it is discovered) */
172 +int proxyConsumeBlock(void *data, ProxyRemapBlock *);
174 +/* Returns the RemapBlock associated with this connection or 0: */
175 +ProxyRemapBlock *proxyLookup(void *data, unsigned ipaddr, unsigned short port,
178 +/* Return the maximum number of connections */
179 +int proxyGetMaxConn(void *data);
182 diff -urN linux-2.6.22.1.orig/MAINTAINERS linux-2.6.22.1/MAINTAINERS
183 --- linux-2.6.22.1.orig/MAINTAINERS 2007-07-09 01:32:17.000000000 +0200
184 +++ linux-2.6.22.1/MAINTAINERS 2007-07-17 14:45:53.000000000 +0200
185 @@ -4005,6 +4005,12 @@
186 W: http://oops.ghostprotocols.net:81/blog
189 +WRR NETWORK SCHEDULER
190 +P: Rasmus Bøg Hansen
192 +W: http://www.zz9.dk/wrr
198 diff -urN linux-2.6.22.1.orig/net/sched/Kconfig linux-2.6.22.1/net/sched/Kconfig
199 --- linux-2.6.22.1.orig/net/sched/Kconfig 2007-07-09 01:32:17.000000000 +0200
200 +++ linux-2.6.22.1/net/sched/Kconfig 2007-07-17 14:54:23.000000000 +0200
202 To compile this code as a module, choose M here: the
203 module will be called sch_prio.
206 + tristate "WRR packet scheduler"
207 + depends on NET_SCHED && ( NF_CONNTRACK || !NF_CONNTRACK )
209 + The weighted robin-robin scheduling algorithm directs network
210 + connections to different real servers based on server weights
211 + in a round-robin manner. Servers with higher weights receive
212 + new connections first than those with less weights, and servers
213 + with higher weights get more connections than those with less
214 + weights and servers with equal weights get equal connections.
216 + If you want masquerading (the "masq" option to the tc userspace
217 + program) you need to enable connection tracking (IP_NF_CONNTRACK)
218 + in the netfilter options.
220 + If you want to compile it in kernel, say Y. If you want to compile
221 + it as a module, say M here and read Documentation/modules.txt. The
222 + module will be called sch_wrr. If unsure, say N.
225 tristate "Random Early Detection (RED)"
227 diff -urN linux-2.6.22.1.orig/net/sched/Makefile linux-2.6.22.1/net/sched/Makefile
228 --- linux-2.6.22.1.orig/net/sched/Makefile 2007-07-09 01:32:17.000000000 +0200
229 +++ linux-2.6.22.1/net/sched/Makefile 2007-07-17 14:45:53.000000000 +0200
233 obj-y := sch_generic.o
234 +sch_wrr-objs = wrr.o wrr_proxydict.o
236 obj-$(CONFIG_NET_SCHED) += sch_api.o sch_blackhole.o
237 obj-$(CONFIG_NET_CLS) += cls_api.o
239 obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o
240 obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o
241 obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o
242 +obj-$(CONFIG_NET_SCH_WRR) += sch_wrr.o
243 obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o
244 obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o
245 obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
246 diff -urN linux-2.6.22.1.orig/net/sched/wrr.c linux-2.6.22.1/net/sched/wrr.c
247 --- linux-2.6.22.1.orig/net/sched/wrr.c 1970-01-01 01:00:00.000000000 +0100
248 +++ linux-2.6.22.1/net/sched/wrr.c 2007-07-17 14:50:11.000000000 +0200
250 +/*-----------------------------------------------------------------------------
251 +Weighted Round Robin scheduler.
253 +Written by Christian Worm Mortensen, cworm@it-c.dk.
257 +This module implements a weighted round robin queue with build-in classifier.
258 +The classifier currently map each MAC or IP address (configurable either MAC
259 +or IP and either source or destination) to different classes. Each such class
260 +is called a band. Whan using MAC addresses only bridged packets can be
261 +classified other packets go to a default MAC address.
263 +Each band has a weight value, where 0<weight<=1. The bandwidth each band
264 +get is proportional to the weight as can be deduced from the next section.
268 +Each band has a penalty value. Bands having something to sent are kept in
269 +a heap according to this value. The band with the lowest penalty value
270 +is in the root of the heap. The penalty value is a 128 bit number. Initially
271 +no bands are in the heap.
273 +Two global 64 bit values counter_low_penal and couter_high_penal are initialized
274 +to 0 and to 2^63 respectively.
277 + The packet is inserted in the queue for the band it belongs to. If the band
278 + is not in the heap it is inserted into it. In this case, the upper 64 bits
279 + of its penalty value is set to the same as for the root-band of the heap.
280 + If the heap is empty 0 is used. The lower 64 bit is set to couter_low_penal
281 + and couter_low_penal is incremented by 1.
284 + If the heap is empty we have nothing to send.
286 + If the root band has a non-empty queue a packet is dequeued from that.
287 + The upper 64 bit of the penalty value of the band is incremented by the
288 + packet size divided with the weight of the band. The lower 64 bit is set to
289 + couter_high_penal and couter_high_penal is incremented by 1.
291 + If the root element for some reason has an empty queue it is removed from
292 + the heap and we try to dequeue again.
294 +The effect of the heap and the upper 64 bit of the penalty values is to
295 +implement a weighted round robin queue. The effect of counter_low_penal,
296 +counter_high_penal and the lower 64 bit of the penalty value is primarily to
297 +stabilize the queue and to give better quality of service to machines only
298 +sending a packet now and then. For example machines which have a single
299 +interactive connection such as telnet or simple text chatting.
303 +The weight value can be changed dynamically by the queue itself. The weight
304 +value and how it is changed is described by the two members weight1 and
305 +weight2 which has type tc_wrr_class_weight and which are in each class. And
306 +by the two integer value members of the qdisc called penalfact1 and penalfact2.
307 +The structure is defined as:
309 + struct tc_wrr_class_weight {
310 + // All are represented as parts of (2^64-1).
311 + __u64 val; // Current value (0 is not valid)
312 + __u64 decr; // Value pr bytes (2^64-1 is not valid)
313 + __u64 incr; // Value pr seconds (2^64-1 is not valid)
314 + __u64 min; // Minimal value (0 is not valid)
315 + __u64 max; // Minimal value (0 is not valid)
317 + // The time where the above information was correct:
321 +The weight value used by the dequeue operations is calculated as
322 +weight1.val*weight2.val. weight1 and weight2 and handled independently and in the
323 +same way as will be described now.
325 +Every second, the val parameter is incremented by incr.
327 +Every time a packet is transmitted the value is increment by decr times
328 +the packet size. Depending on the value of the weight_mode parameter it
329 +is also mulitplied with other numbers. This makes it possible to give
330 +penalty to machines transferring much data.
332 +-----------------------------------------------------------------------------*/
334 +#include <linux/autoconf.h>
335 +#include <linux/module.h>
336 +#include <asm/uaccess.h>
337 +#include <asm/system.h>
338 +#include <linux/bitops.h>
339 +#include <linux/types.h>
340 +#include <linux/kernel.h>
341 +#include <linux/vmalloc.h>
342 +#include <linux/sched.h>
343 +#include <linux/string.h>
344 +#include <linux/mm.h>
345 +#include <linux/socket.h>
346 +#include <linux/sockios.h>
347 +#include <linux/in.h>
348 +#include <linux/errno.h>
349 +#include <linux/interrupt.h>
350 +#include <linux/if_ether.h>
351 +#include <linux/inet.h>
352 +#include <linux/netdevice.h>
353 +#include <linux/etherdevice.h>
354 +#include <linux/notifier.h>
356 +#include <net/route.h>
357 +#include <linux/skbuff.h>
358 +#include <net/sock.h>
359 +#include <net/pkt_sched.h>
360 +#include <linux/if_arp.h>
361 +#include <linux/version.h>
362 +#include <linux/wrr.h>
364 +#define WRR_VER "051111"
366 +#define my_malloc(size) kmalloc(size,GFP_KERNEL)
367 +#define my_free(ptr) kfree(ptr)
369 +#define LOCK_START sch_tree_lock(sch);
370 +#define LOCK_END sch_tree_unlock(sch);
371 +#define ENQUEUE_SUCCESS 0
372 +#define ENQUEUE_FAIL NET_XMIT_DROP
374 +#if defined CONFIG_NF_CONNTRACK || defined CONFIG_NF_CONNTRACK_MODULE
375 +#include <linux/netfilter/nf_conntrack_common.h>
376 +#include <linux/netfilter/nf_conntrack_tuple_common.h>
377 +#include <net/netfilter/nf_conntrack.h>
378 +#define MASQ_SUPPORT
381 +/* The penalty (priority) type */
382 +typedef u64 penalty_base_t;
383 +#define penalty_base_t_max ((penalty_base_t)-1)
384 +typedef struct penalty_t {
388 +#define penalty_leq(a,b) (a.ms<b.ms || (a.ms==b.ms && a.ls<=b.ls))
389 +#define penalty_le(a,b) (a.ms<b.ms || (a.ms==b.ms && a.ls<b.ls))
390 +static penalty_t penalty_max = { penalty_base_t_max, penalty_base_t_max };
397 +struct heap_element;
399 +/* Initializes an empty heap:
400 + * he: A pointer to an unintialized heap structure identifying the heap
401 + * size: Maximal number of elements the heap can contain
402 + * poll: An array of size "size" used by the heap.
404 +static void heap_init(struct heap *he, int size, struct heap_element *poll);
406 +/* Each element in the heap is identified by a user-assigned id which
407 + * should be a non negative integer less than the size argument
408 + * given to heap_init.
410 +static void heap_insert(struct heap *, int id, penalty_t);
411 +static void heap_remove(struct heap *, int id);
412 +static void heap_set_penalty(struct heap *, int id, penalty_t);
414 +/* Retreviewing information */
415 +static char heap_empty(struct heap *); /* Heap empty? */
416 +static char heap_contains(struct heap *, int id); /* Does heap contain
418 +static int heap_root(struct heap *); /* Returns the id of the root */
419 +static penalty_t heap_get_penalty(struct heap *, int id); /* Returns penalty
423 + * Heap implementation
426 +struct heap_element {
428 + int id; /* The user-assigned id of this element */
429 + int id2idx; /* Maps from user-assigned ids to indices in root_1 */
433 + struct heap_element *root_1;
437 +/* Heap implementation */
438 +static void heap_init(struct heap *h, int size, struct heap_element *poll)
443 + h->root_1 = poll - 1;
445 + for (i = 0; i < size; i++)
446 + poll[i].id2idx = 0;
449 +static char heap_empty(struct heap *h)
451 + return h->elements == 0;
454 +static char heap_contains(struct heap *h, int id)
456 + return h->root_1[id + 1].id2idx != 0;
459 +static int heap_root(struct heap *h)
461 + return h->root_1[1].id;
464 +static penalty_t heap_get_penalty(struct heap *h, int id)
466 + return h->root_1[h->root_1[id + 1].id2idx].penalty;
469 +static void heap_penalty_changed_internal(struct heap *h, int idx);
471 +static void heap_set_penalty(struct heap *h, int id, penalty_t p)
473 + int idx = h->root_1[id + 1].id2idx;
474 + h->root_1[idx].penalty = p;
475 + heap_penalty_changed_internal(h, idx);
478 +static void heap_insert(struct heap *h, int id, penalty_t p)
480 + /* Insert at the end of the heap */
482 + h->root_1[h->elements].id = id;
483 + h->root_1[h->elements].penalty = p;
484 + h->root_1[id + 1].id2idx = h->elements;
486 + /* And put it in the right position */
487 + heap_penalty_changed_internal(h, h->elements);
490 +static void heap_remove(struct heap *h, int id)
492 + int idx = h->root_1[id + 1].id2idx;
494 + h->root_1[id + 1].id2idx = 0;
496 + if (h->elements == idx) {
501 + mvid = h->root_1[h->elements].id;
502 + h->root_1[idx].id = mvid;
503 + h->root_1[idx].penalty = h->root_1[h->elements].penalty;
504 + h->root_1[mvid + 1].id2idx = idx;
507 + heap_penalty_changed_internal(h, idx);
510 +static void heap_swap(struct heap *h, int idx0, int idx1)
516 + /* Simple content */
517 + tmp_p = h->root_1[idx0].penalty;
518 + tmp_id = h->root_1[idx0].id;
519 + h->root_1[idx0].penalty = h->root_1[idx1].penalty;
520 + h->root_1[idx0].id = h->root_1[idx1].id;
521 + h->root_1[idx1].penalty = tmp_p;
522 + h->root_1[idx1].id = tmp_id;
524 + /* Update reverse pointers */
525 + id0 = h->root_1[idx0].id;
526 + id1 = h->root_1[idx1].id;
527 + h->root_1[id0 + 1].id2idx = idx0;
528 + h->root_1[id1 + 1].id2idx = idx1;
531 +static void heap_penalty_changed_internal(struct heap *h, int cur)
534 + || penalty_leq(h->root_1[cur >> 1].penalty,
535 + h->root_1[cur].penalty)) {
536 + /* We are in heap order upwards - so we should move the element down */
538 + int nxt0 = cur << 1;
539 + int nxt1 = nxt0 + 1;
540 + penalty_t pen_c = h->root_1[cur].penalty;
543 + h->elements ? h->root_1[nxt0].penalty : penalty_max;
546 + h->elements ? h->root_1[nxt1].penalty : penalty_max;
548 + if (penalty_le(pen_0, pen_c)
549 + && penalty_leq(pen_0, pen_1)) {
550 + /* Swap with child 0 */
551 + heap_swap(h, cur, nxt0);
553 + } else if (penalty_le(pen_1, pen_c)) {
554 + /* Swap with child 1 */
555 + heap_swap(h, cur, nxt1);
558 + /* Heap in heap order */
563 + /* We are not in heap order upwards (and thus we must be it downwards).
565 + while (cur != 1) { /* While not root */
566 + int nxt = cur >> 1;
568 + (h->root_1[nxt].penalty, h->root_1[cur].penalty))
570 + heap_swap(h, cur, nxt);
577 + * Classification based on MAC or IP adresses. Note that of historical reason
578 + * these are prefixed with mac_ since originally only MAC bases classification
581 + * This code should be in a separate filter module - but it isn't.
589 +/* Initialices/destroys the structure we maintain.
590 + Returns -1 on error */
591 +static int mac_init(struct mac_head *, int max_macs, char srcaddr,
592 + char usemac, char usemasq, void *proxyremap);
593 +static void mac_done(struct mac_head *);
594 +static void mac_reset(struct mac_head *);
596 +/* Classify a packet. Returns a number n where 0<=n<max_macs. Or -1 if
597 + the packet should be dropped. */
598 +static int mac_classify(struct mac_head *, struct sk_buff *skb);
605 + unsigned char addr[ETH_ALEN]; /* Address of this band (last two are 0 on IP) */
606 + unsigned long lastused; /* Last time a packet was encountered */
607 + int class; /* Classid of this band (0<=classid<max_macs) */
610 +static int mac_compare(const void *a, const void *b)
612 + return memcmp(a, b, ETH_ALEN);
616 + int mac_max; /* Maximal number of MAC addresses/classes allowed */
617 + int mac_cur; /* Current number of MAC addresses/classes */
618 + int mac_reused; /* Number of times we have reused a class with a new address. */
620 + char srcaddr; /* True if we classify on the source address of packets,
621 + else we use destination address. */
622 + char usemac; /* If true we use mac, else we use IP */
623 + char usemasq; /* If true we try to demasqgrade */
624 + struct mac_addr *macs; /* Allocated mac_max elements, used max_cur */
625 + char *cls2mac; /* Mapping from classnumbers to addresses -
626 + there is 6 bytes in each entry */
628 + void *proxyremap; /* Information on proxy remapping of data or 0 */
631 +/* This is as the standard C library function with the same name: */
632 +static const void *bsearch(const void *key, const void *base, int nmemb,
634 + int (*compare) (const void *, const void *))
643 + m_idx = nmemb >> 1;
644 + m_ptr = ((const char *)base) + m_idx * size;
646 + i = compare(key, m_ptr);
647 + if (i < 0) /* key is less */
648 + return bsearch(key, base, m_idx, size, compare);
650 + return bsearch(key, ((const char *)m_ptr) + size,
651 + nmemb - m_idx - 1, size, compare);
656 +static int mac_init(struct mac_head *h, int max_macs, char srcaddr,
657 + char usemac, char usemasq, void *proxyremap)
662 + h->srcaddr = srcaddr;
663 + h->usemac = usemac;
664 + h->usemasq = usemasq;
665 + h->mac_max = max_macs;
666 + h->proxyremap = proxyremap;
668 + h->macs = (struct mac_addr *)
669 + my_malloc(sizeof(struct mac_addr) * max_macs);
670 + h->cls2mac = (char *)my_malloc(6 * max_macs);
671 + if (!h->macs || !h->cls2mac) {
675 + my_free(h->cls2mac);
681 +static void mac_done(struct mac_head *h)
684 + my_free(h->cls2mac);
687 +static void mac_reset(struct mac_head *h)
694 +static int lookup_mac(struct mac_head *h, unsigned char *addr)
699 + /* First try to find the address in the table */
700 + struct mac_addr *m = (struct mac_addr *)
701 + bsearch(addr, h->macs, h->mac_cur, sizeof(struct mac_addr),
705 + m->lastused = h->incr_time++;
708 + /* Okay - the MAC adress was not in table */
709 + if (h->mac_cur == h->mac_max) {
710 + /* And the table is full - delete the oldest entry */
712 + /* Find the oldest entry */
715 + for (i = 1; i < h->mac_cur; i++)
716 + if (h->macs[i].lastused < h->macs[lowidx].lastused)
719 + class = h->macs[lowidx].class;
721 + /* And delete it */
722 + memmove(&h->macs[lowidx], &h->macs[lowidx + 1],
723 + (h->mac_cur - lowidx - 1) * sizeof(struct mac_addr));
727 + class = h->mac_cur;
730 + /* The table is not full - find the position we should put the address in */
731 + for (i = 0; i < h->mac_cur; i++)
732 + if (mac_compare(addr, &h->macs[i]) < 0)
735 + /* We should insert at position i */
736 + memmove(&h->macs[i + 1], &h->macs[i],
737 + (h->mac_cur - i) * sizeof(struct mac_addr));
739 + memcpy(m->addr, addr, ETH_ALEN);
740 + m->lastused = h->incr_time++;
744 + /* Finally update the cls2mac variabel */
745 + memcpy(h->cls2mac + ETH_ALEN * class, addr, ETH_ALEN);
750 +int valid_ip_checksum(struct iphdr *ip, int size)
752 + __u16 header_len = ip->ihl << 2;
754 + __u16 *ipu = (u16 *) ip;
757 + /* We require 4 bytes in the packet since we access the port numbers */
758 + if ((size < header_len) || size < sizeof(struct iphdr) + 4)
761 + for (a = 0; a < (header_len >> 1); a++, ipu++) {
763 + /* If not the checksum field */
771 + return ip->check == (__u16) ~ c;
774 +static int mac_classify(struct mac_head *head, struct sk_buff *skb)
776 + /* We set this to the address we map to. In case we map to an IP
777 + address the last two entries are set to 0. */
778 + unsigned char addr[ETH_ALEN];
780 + /* Used later for mac classification */
781 + struct ethhdr *hdr;
783 + /* This is the size of the network part of the packet */
784 + int size = ((char *)skb->data + skb->len) - ((char *)ip_hdr(skb));
786 + /* Set a default value for the address */
787 + memset(addr, 0, ETH_ALEN);
789 + /* Map IPv4 traffic to their correct addresses.
790 + Everything (non-IPv4) else goes to a default address */
791 + if (ntohs(skb->protocol) == ETH_P_IP) {
793 + struct iphdr *iph = ip_hdr(skb); /* This is the IP header */
795 + const __u16 *portp = (__u16 *) & (((char *)iph)[iph->ihl * 4]); /* Port numbers*/
796 + __u16 sport = portp[0];
797 + __u16 dport = portp[1];
799 + unsigned ipaddr; /* IP classification */
800 + ProxyRemapBlock *prm; /* Proxyremapping */
802 + /* IP packets must have valid checsum */
803 + if (!valid_ip_checksum(ip_hdr(skb), size))
808 + ipaddr = iph->saddr;
810 + ipaddr = iph->daddr;
813 + /* Update ipaddr if packet is masqgraded */
814 + if (head->usemasq) {
815 + /* Thanks to Rusty Russell for help with the following code */
816 + enum ip_conntrack_info ctinfo;
817 + struct nf_conn *ct;
818 + ct = nf_ct_get(skb, &ctinfo);
822 + ct->tuplehash[CTINFO2DIR(ctinfo)].
826 + ct->tuplehash[CTINFO2DIR(ctinfo)].
832 + /* Set prm (proxyremap) based on ipaddr */
834 + if (head->proxyremap) {
835 + if (head->srcaddr) {
837 + proxyLookup(head->proxyremap, ipaddr, sport,
838 + ip_hdr(skb)->protocol);
841 + proxyLookup(head->proxyremap, ipaddr, dport,
842 + ip_hdr(skb)->protocol);
845 + /* And finally set the correct address */
847 + /* This packet should be remapped */
849 + memcpy(addr, prm->macaddr, ETH_ALEN);
851 + memcpy(addr, &prm->caddr, sizeof(unsigned));
853 + /* This packet should not be remapped */
854 + if (head->usemac) {
855 + /* Make sure we have a valid mac address */
857 + skb->dev->type == ARPHRD_ETHER &&
858 + skb->len >= ETH_HLEN) {
859 + hdr = (struct ethhdr*)skb->data;
861 + memcpy(addr,hdr->h_source, ETH_ALEN);
863 + memcpy(addr,hdr->h_dest, ETH_ALEN);
866 + memcpy(addr, &ipaddr, 4);
871 + return lookup_mac(head, addr);
878 +/* Pr-class information */
879 +struct wrrc_sched_data {
880 + struct Qdisc *que; /* The queue for this class */
881 + struct tc_wrr_class_modf class_modf; /* Information about the class */
883 + /* For classes in the heap this is the priority value priosum
884 + was updated with for this class */
888 +/* Pr-qdisc information */
889 +struct wrr_sched_data {
890 + struct heap h; /* A heap containing all the bands that will send something */
891 + struct heap_element *poll; /* bandc elements */
893 + /* The sum of the prioities of the elements in the heap where
894 + a priority of 1 is saved as 2^32 */
897 + /* A class for each band */
898 + struct wrrc_sched_data *bands; /* bandc elements */
900 + /* Information maintained by the proxydict module of 0 if we
901 + have no proxy remapping */
904 + /* Always incrementning counters, we always have that any value of
905 + counter_low_penal < any value of counter_high_penal. */
906 + penalty_base_t counter_low_penal;
907 + penalty_base_t counter_high_penal;
909 + struct tc_wrr_qdisc_modf qdisc_modf; /* Penalty updating */
911 + int packets_requed; /* Statistics */
913 + struct mac_head filter; /* The filter */
914 + int bandc; /* Number of bands */
917 +/* Priority handling.
918 + * weight is in interval [0..2^32]
919 + * priosum has whole numbers in the upper and fragments in the lower 32 bits.
921 +static void weight_transmit(struct tc_wrr_class_weight *p,
922 + struct tc_wrr_qdisc_weight q,
924 + u64 priosum, u64 weight, unsigned size)
927 + unsigned long now = jiffies / HZ;
929 + /* Penalty for transmitting */
934 + switch (q.weight_mode) {
936 + change = p->decr * size;
939 + change = p->decr * size * heapsize;
941 + case 3: /* Note: 64 bit division is not always available */
942 + divisor = (u32) (weight >> 16);
945 + change = p->decr * size * (((u32) (priosum >> 16)) / divisor);
950 + if (p->val > old || p->val < p->min)
953 + /* Credit for time went */
954 + change = (now - p->tim) * p->incr;
958 + if (p->val < old || p->val > p->max)
962 +static void weight_setdefault(struct tc_wrr_class_weight *p)
964 + p->val = (u64) - 1;
967 + p->min = (u64) - 1;
968 + p->max = (u64) - 1;
969 + p->tim = jiffies / HZ;
972 +static void weight_setvalue(struct tc_wrr_class_weight *dst,
973 + struct tc_wrr_class_weight *src)
975 + if (src->val != 0) {
976 + dst->val = src->val;
977 + dst->tim = jiffies / HZ;
980 + dst->min = src->min;
982 + dst->max = src->max;
983 + if (src->decr != ((u64) - 1))
984 + dst->decr = src->decr;
985 + if (src->incr != ((u64) - 1))
986 + dst->incr = src->incr;
987 + if (dst->val < dst->min)
988 + dst->val = dst->min;
989 + if (dst->val > dst->max)
990 + dst->val = dst->max;
993 +static void wrr_destroy(struct Qdisc *sch)
995 + struct wrr_sched_data *q = qdisc_priv(sch);
998 + /* Destroy our filter */
999 + mac_done(&q->filter);
1001 + /* Destroy all our childre ques */
1002 + for (i = 0; i < q->bandc; i++)
1003 + qdisc_destroy(q->bands[i].que);
1005 + /* And free memory */
1006 + my_free(q->bands);
1009 + my_free(q->proxydict);
1012 +static int wrr_init(struct Qdisc *sch, struct rtattr *opt)
1014 + struct wrr_sched_data *q = qdisc_priv(sch);
1015 + int i, maciniterr;
1017 + struct tc_wrr_qdisc_crt *qopt;
1019 + /* Parse options */
1021 + return -EINVAL; /* Options must be specified */
1022 + if (opt->rta_len < RTA_LENGTH(sizeof(*qopt)))
1024 + qopt = RTA_DATA(opt);
1026 + if (qopt->bands_max > 8192 || qopt->bands_max < 2) {
1027 + /* More than 8192 queues or less than 2? That cannot be true - it must be
1032 + if (qopt->proxy_maxconn < 0 || qopt->proxy_maxconn > 20000) {
1033 + /* More than this number of maximal concurrent connections is unrealistic */
1036 +#ifndef MASQ_SUPPORT
1037 + if (qopt->usemasq) {
1041 + q->bandc = qopt->bands_max;
1042 + q->qdisc_modf = qopt->qdisc_modf;
1044 + /* Create structures */
1045 + q->poll = (struct heap_element *)
1046 + my_malloc(sizeof(struct heap_element) * q->bandc);
1047 + q->bands = (struct wrrc_sched_data *)
1048 + my_malloc(sizeof(struct wrrc_sched_data) * q->bandc);
1050 + if (qopt->proxy_maxconn > 0) {
1051 + q->proxydict = my_malloc(proxyGetMemSize(qopt->proxy_maxconn));
1056 + /* Init mac module */
1057 + maciniterr = mac_init(&q->filter, qopt->bands_max, qopt->srcaddr,
1058 + qopt->usemac, qopt->usemasq, q->proxydict);
1060 + /* See if we got the memory we wanted */
1061 + if (!q->poll || !q->bands ||
1062 + (qopt->proxy_maxconn > 0 && !q->proxydict) || maciniterr < 0) {
1066 + my_free(q->bands);
1068 + my_free(q->proxydict);
1069 + if (maciniterr >= 0)
1070 + mac_done(&q->filter);
1073 + /* Initialize proxy */
1075 + proxyInitMem(q->proxydict, qopt->proxy_maxconn);
1076 + /* Initialize values */
1077 + q->counter_low_penal = 0;
1078 + q->counter_high_penal = penalty_base_t_max >> 1;
1079 + q->packets_requed = 0;
1081 + /* Initialize empty heap */
1082 + heap_init(&q->h, q->bandc, q->poll);
1085 + /* Initialize each band */
1087 + for (i = 0; i < q->bandc; i++) {
1088 + weight_setdefault(&q->bands[i].class_modf.weight1);
1089 + weight_setdefault(&q->bands[i].class_modf.weight2);
1091 + struct Qdisc *child =
1092 + qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle);
1094 + q->bands[i].que = child;
1096 + /* Queue couldn't be created :-( */
1101 + q->bands[i].que = &noop_qdisc;
1105 + /* Destroy again */
1110 + printk(KERN_DEBUG "sch_wrr: Initialized version " WRR_VER "\n");
1115 +static void wrr_reset(struct Qdisc *sch)
1117 + struct wrr_sched_data *q = qdisc_priv(sch);
1120 + /* Reset own values */
1121 + q->counter_low_penal = 0;
1122 + q->counter_high_penal = penalty_base_t_max >> 1;
1123 + q->packets_requed = 0;
1125 + /* Reset filter */
1126 + mac_reset(&q->filter);
1128 + /* Reinitialize heap */
1129 + heap_init(&q->h, q->bandc, q->poll);
1132 + /* Reset all bands */
1133 + for (i = 0; i < q->bandc; i++) {
1134 + weight_setdefault(&q->bands[i].class_modf.weight1);
1135 + weight_setdefault(&q->bands[i].class_modf.weight2);
1136 + qdisc_reset(q->bands[i].que);
1139 + /* Reset proxy remapping information */
1141 + proxyInitMem(q->proxydict, proxyGetMaxConn(q->proxydict));
1144 +static int wrr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
1146 + struct wrr_sched_data *q = qdisc_priv(sch);
1147 + int retvalue = ENQUEUE_FAIL;
1149 + /* The packet is in skb */
1150 + int band = mac_classify(&q->filter, skb);
1153 + /* Enque packet for this band */
1154 + struct Qdisc *qdisc = q->bands[band].que;
1156 + if ((retvalue = qdisc->enqueue(skb, qdisc)) == ENQUEUE_SUCCESS) {
1158 + sch->bstats.bytes += skb->len;
1159 + sch->bstats.packets++;
1162 + /* Insert band into heap if not already there */
1163 + if (!heap_contains(&q->h, band)) {
1165 + if (!heap_empty(&q->h))
1167 + heap_get_penalty(&q->h,
1168 + heap_root(&q->h)).
1172 + p.ls = q->counter_low_penal++;
1173 + heap_insert(&q->h, band, p);
1174 + q->bands[band].priosum_val =
1175 + ((q->bands[band].class_modf.weight1.
1178 + ((q->bands[band].class_modf.weight2.
1180 + q->priosum += q->bands[band].priosum_val;
1184 + /* If we decide not to enque it seems like we also need to free the packet */
1188 + if (retvalue != ENQUEUE_SUCCESS) {
1189 + /* Packet not enqued */
1190 + sch->qstats.drops++;
1196 +static struct sk_buff *wrr_dequeue(struct Qdisc *sch)
1198 + struct wrr_sched_data *q = qdisc_priv(sch);
1199 + struct sk_buff *skb;
1201 + u64 weight, priosum;
1202 + struct wrrc_sched_data *b;
1204 + /* Return if heap is empty */
1205 + if (heap_empty(&q->h))
1208 + /* Find root element */
1209 + band = heap_root(&q->h);
1211 + /* Find priority of this element in interval [1;2^32] */
1212 + b = &q->bands[band];
1214 + /* weight is in interval [1;2^32] */
1215 + weight = ((b->class_modf.weight1.val >> 48) + 1) * ((b->class_modf.weight2.val >> 48) + 1);
1216 + priosum = q->priosum;
1217 + q->priosum -= q->bands[band].priosum_val;
1219 + /* Dequeue the packet from the root */
1220 + skb = q->bands[band].que->dequeue(q->bands[band].que);
1223 + /* There was a packet in this queue */
1227 + /* Find length of packet adjusted with priority */
1228 + adjlen = (u32) (weight >> (32 - 16));
1231 + adjlen = (skb->len << 16) / adjlen;
1233 + /* Update penalty information for this class */
1234 + weight_transmit(&b->class_modf.weight1, q->qdisc_modf.weight1,
1235 + q->h.elements, priosum, weight, skb->len);
1236 + weight_transmit(&b->class_modf.weight2, q->qdisc_modf.weight2,
1237 + q->h.elements, priosum, weight, skb->len);
1238 + q->bands[band].priosum_val =
1239 + ((b->class_modf.weight1.val >> 48) +
1240 + 1) * ((b->class_modf.weight2.val >> 48) + 1);
1241 + q->priosum += q->bands[band].priosum_val;
1243 + /* And update the class in the heap */
1244 + p = heap_get_penalty(&q->h, band);
1246 + p.ls = q->counter_high_penal++;
1247 + heap_set_penalty(&q->h, band, p);
1249 + /* Return packet */
1253 + /* No packet - so machine should be removed from heap */
1254 + heap_remove(&q->h, band);
1259 +static int wrr_requeue(struct sk_buff *skb, struct Qdisc *sch)
1261 + struct wrr_sched_data *q = qdisc_priv(sch);
1262 + struct Qdisc *qdisc;
1265 + /* Find band we took it from */
1266 + int band = mac_classify(&q->filter, skb);
1268 + /* Who should now free the pakcet? */
1270 + "sch_wrr: Oops - packet requeued could never have been queued.\n");
1271 + sch->qstats.drops++;
1272 + return ENQUEUE_FAIL;
1275 + q->packets_requed++;
1277 + /* Try to requeue it on that machine */
1278 + qdisc = q->bands[band].que;
1280 + if ((ret = qdisc->ops->requeue(skb, qdisc)) == ENQUEUE_SUCCESS) {
1283 + sch->qstats.requeues++;
1285 + /* We should restore priority information - but we don't
1287 + * p=heap_get_penalty(&q->h,band);
1289 + * heap_set_penalty(&q->h,band,p);
1292 + return ENQUEUE_SUCCESS;
1294 + sch->qstats.drops++;
1299 +static unsigned int wrr_drop(struct Qdisc *sch)
1301 + struct wrr_sched_data *q = qdisc_priv(sch);
1303 + /* Ugly... Drop button up in heap */
1306 + for (i = q->h.elements; i >= 1; i--) {
1307 + int band = q->h.root_1[i].id;
1308 + if (q->bands[band].que->ops->drop(q->bands[band].que)) {
1311 + sch->qstats.drops++;
1319 +static int wrr_dump(struct Qdisc *sch, struct sk_buff *skb)
1321 + struct wrr_sched_data *q = qdisc_priv(sch);
1322 + unsigned char *b = skb->tail;
1323 + struct tc_wrr_qdisc_stats opt;
1325 + opt.qdisc_crt.qdisc_modf = q->qdisc_modf;
1326 + opt.qdisc_crt.srcaddr = q->filter.srcaddr;
1327 + opt.qdisc_crt.usemac = q->filter.usemac;
1328 + opt.qdisc_crt.usemasq = q->filter.usemasq;
1329 + opt.qdisc_crt.bands_max = q->filter.mac_max;
1330 + opt.nodes_in_heap = q->h.elements;
1331 + opt.bands_cur = q->filter.mac_cur;
1332 + opt.bands_reused = q->filter.mac_reused;
1333 + opt.packets_requed = q->packets_requed;
1334 + opt.priosum = q->priosum;
1336 + if (q->proxydict) {
1337 + opt.qdisc_crt.proxy_maxconn = proxyGetMaxConn(q->proxydict);
1338 + opt.proxy_curconn = proxyGetCurConn(q->proxydict);
1340 + opt.qdisc_crt.proxy_maxconn = 0;
1341 + opt.proxy_curconn = 0;
1344 + RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
1347 + rtattr_failure: /* seems like RTA_PUT jump to this label */
1348 + skb_trim(skb, b - skb->data);
1352 +static int wrr_tune_std(struct Qdisc *sch, struct rtattr *opt)
1354 + struct wrr_sched_data *q = qdisc_priv(sch);
1355 + struct tc_wrr_qdisc_modf_std *qopt = RTA_DATA(opt);
1357 + if (opt->rta_len < RTA_LENGTH(sizeof(*qopt)))
1362 + if (qopt->change_class) {
1363 + int idx = lookup_mac(&q->filter, qopt->addr);
1365 + (&q->bands[idx].class_modf.weight1,
1366 + &qopt->class_modf.weight1);
1367 + weight_setvalue(&q->bands[idx].class_modf.weight2,
1368 + &qopt->class_modf.weight2);
1370 + if (qopt->qdisc_modf.weight1.weight_mode != -1)
1371 + q->qdisc_modf.weight1.weight_mode =
1372 + qopt->qdisc_modf.weight1.weight_mode;
1373 + if (qopt->qdisc_modf.weight2.weight_mode != -1)
1374 + q->qdisc_modf.weight2.weight_mode =
1375 + qopt->qdisc_modf.weight2.weight_mode;
1382 +static int wrr_tune_proxy(struct Qdisc *sch, struct rtattr *opt)
1384 + struct wrr_sched_data *q = qdisc_priv(sch);
1385 + struct tc_wrr_qdisc_modf_proxy *qopt = RTA_DATA(opt);
1388 + /* Return if we are not configured with proxy support */
1389 + if (!q->proxydict)
1392 + /* Return if not enough data given */
1393 + if (opt->rta_len < RTA_LENGTH(sizeof(*qopt)) ||
1395 + RTA_LENGTH(sizeof(*qopt) + sizeof(ProxyRemapBlock) * qopt->changec))
1400 + if (qopt->reset) {
1401 + proxyInitMem(q->proxydict, proxyGetMaxConn(q->proxydict));
1403 + /* Do all the changes */
1404 + for (i = 0; i < qopt->changec; i++) {
1405 + proxyConsumeBlock(q->proxydict,
1406 + &((ProxyRemapBlock *) & qopt->changes)[i]);
1414 +static int wrr_tune(struct Qdisc *sch, struct rtattr *opt)
1416 + if (((struct tc_wrr_qdisc_modf_std *)RTA_DATA(opt))->proxy) {
1417 + return wrr_tune_proxy(sch, opt);
1419 + return wrr_tune_std(sch, opt);
1425 + * External and internal IDs are equal. They are the band number plus 1.
1428 +/* Replace a class with another */
1429 +static int wrr_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1430 + struct Qdisc **old)
1432 + struct wrr_sched_data *q = qdisc_priv(sch);
1433 + if (arg > q->bandc || arg == 0)
1438 + new = &noop_qdisc;
1440 + LOCK_START * old = q->bands[arg].que;
1441 + q->bands[arg].que = new;
1442 + qdisc_reset(*old);
1443 + LOCK_END return 0;
1446 +/* Returns the qdisc for a class */
1447 +static struct Qdisc *wrr_leaf(struct Qdisc *sch, unsigned long arg)
1449 + struct wrr_sched_data *q = qdisc_priv(sch);
1450 + if (arg > q->bandc || arg == 0)
1453 + return q->bands[arg].que;
1456 +static unsigned long wrr_get(struct Qdisc *sch, u32 classid)
1458 + struct wrr_sched_data *q = qdisc_priv(sch);
1459 + unsigned long band = TC_H_MIN(classid);
1460 + if (band > q->bandc || band == 0)
1465 +static void wrr_put(struct Qdisc *q, unsigned long cl)
1470 +static int wrr_delete(struct Qdisc *sch, unsigned long cl)
1472 + struct wrr_sched_data *q = qdisc_priv(sch);
1473 + if (cl == 0 || cl > q->bandc)
1479 +static int wrr_dump_class(struct Qdisc *sch, unsigned long cl,
1480 + struct sk_buff *skb, struct tcmsg *tcm)
1482 + struct wrr_sched_data *q = qdisc_priv(sch);
1483 + unsigned char *b = skb->tail;
1484 + struct tc_wrr_class_stats opt;
1486 + /* Handle of this class */
1487 + tcm->tcm_handle = sch->handle | cl;
1489 + if (cl == 0 || cl > q->bandc)
1490 + goto rtattr_failure;
1493 + if ((cl < q->bandc) && q->bands[cl].que)
1494 + tcm->tcm_info = q->bands[cl].que->handle;
1496 + if (cl >= q->filter.mac_cur) {
1497 + /* Band is unused */
1498 + memset(&opt, 0, sizeof(opt));
1502 + opt.class_modf.weight1 = q->bands[cl].class_modf.weight1;
1503 + opt.class_modf.weight2 = q->bands[cl].class_modf.weight2;
1504 + weight_transmit(&opt.class_modf.weight1, q->qdisc_modf.weight1,
1506 + weight_transmit(&opt.class_modf.weight2, q->qdisc_modf.weight2,
1508 + memcpy(opt.addr, q->filter.cls2mac + cl * ETH_ALEN, ETH_ALEN);
1509 + opt.usemac = q->filter.usemac;
1510 + opt.heappos = q->h.root_1[cl + 1].id2idx;
1511 + if (opt.heappos != 0) {
1513 + opt.penal_ls = heap_get_penalty(&q->h, cl).ls;
1514 + opt.penal_ms = heap_get_penalty(&q->h, cl).ms;
1521 + /* Put quing information */
1522 + RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
1526 + skb_trim(skb, b - skb->data);
1530 +static int wrr_change(struct Qdisc *sch, u32 handle, u32 parent,
1531 + struct rtattr **tca, unsigned long *arg)
1533 + unsigned long cl = *arg;
1534 + struct wrr_sched_data *q = qdisc_priv(sch);
1535 + struct rtattr *opt = tca[TCA_OPTIONS - 1];
1536 + struct tc_wrr_class_modf *copt = RTA_DATA(opt);
1538 + if (cl == 0 || cl > q->bandc)
1542 + if (opt->rta_len < RTA_LENGTH(sizeof(*copt)))
1547 + weight_setvalue(&q->bands[cl].class_modf.weight1, &copt->weight1);
1548 + weight_setvalue(&q->bands[cl].class_modf.weight2, &copt->weight2);
1555 +static void wrr_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1557 + struct wrr_sched_data *q = qdisc_priv(sch);
1563 + for (prio = 1; prio <= q->bandc; prio++) {
1564 + if (arg->count < arg->skip) {
1568 + if (arg->fn(sch, prio, arg) < 0) {
1576 +static struct tcf_proto **wrr_find_tcf(struct Qdisc *sch, unsigned long cl)
1581 +static unsigned long wrr_bind(struct Qdisc *sch,
1582 + unsigned long parent, u32 classid)
1584 + return wrr_get(sch, classid);
1591 +static struct Qdisc_class_ops wrr_class_ops = {
1592 + .graft = wrr_graft,
1596 + .change = wrr_change,
1597 + .delete = wrr_delete,
1599 + .tcf_chain = wrr_find_tcf,
1600 + .bind_tcf = wrr_bind,
1601 + .unbind_tcf = wrr_put,
1602 + .dump = wrr_dump_class,
1605 +static struct Qdisc_ops wrr_qdisc_ops = {
1607 + .cl_ops = &wrr_class_ops,
1609 + .priv_size = sizeof(struct wrr_sched_data),
1610 + .enqueue = wrr_enqueue,
1611 + .dequeue = wrr_dequeue,
1612 + .requeue = wrr_requeue,
1615 + .reset = wrr_reset,
1616 + .destroy = wrr_destroy,
1617 + .change = wrr_tune,
1619 + .owner = THIS_MODULE,
1622 +static int __init wrr_module_init(void)
1624 + return register_qdisc(&wrr_qdisc_ops);
1627 +static void __exit wrr_module_exit(void)
1629 + unregister_qdisc(&wrr_qdisc_ops);
1632 +module_init(wrr_module_init)
1633 +module_exit(wrr_module_exit)
1635 +MODULE_LICENSE("GPL");
1636 +MODULE_AUTHOR("Christian Worm Mortensen");
1637 diff -urN linux-2.6.22.1.orig/net/sched/wrr_proxydict.c linux-2.6.22.1/net/sched/wrr_proxydict.c
1638 --- linux-2.6.22.1.orig/net/sched/wrr_proxydict.c 1970-01-01 01:00:00.000000000 +0100
1639 +++ linux-2.6.22.1/net/sched/wrr_proxydict.c 2007-07-17 14:45:53.000000000 +0200
1642 +#include <string.h>
1643 +#include <netinet/in.h>
1646 +#include <linux/wrr.h>
1648 +/* Proxy remapping part of WRR */
1650 +/* Hash function */
1651 +#define hash_fnc(m,server,port,proto) \
1652 + (((proto)*7+(server)*13+(port)*5)%m->hash_size)
1654 +/* Size of hash table given maximal number of connections */
1655 +#define hash_size_max_con(max_con) (2*(max_con))
1657 +/* The memory area we maintain
1659 + Given a connection we map it by hash_fnc into hash_table. This gives an
1660 + index in next which contains a -1 terminated linked list of connections
1661 + mapping to that hash value.
1663 + The entries in next not allocated is also in linked list where
1664 + the first free index is free_first.
1676 +#define Memory(m) ((proxy_memory*)m)
1677 +#define Hash_table(m) ((int*)(((char*)m)+sizeof(proxy_memory)))
1678 +#define Next(m) ((int*)(((char*)m)+sizeof(proxy_memory)+ \
1679 + sizeof(int)*((proxy_memory*)m)->hash_size))
1680 +#define Info(m) ((ProxyRemapBlock*)(((char*)m)+ \
1681 + sizeof(proxy_memory)+ \
1682 + sizeof(int)*((proxy_memory*)m)->hash_size+\
1683 + sizeof(int)*((proxy_memory*)m)->max_con \
1686 +int proxyGetMemSize(int max_con)
1688 + return sizeof(proxy_memory) +
1689 + sizeof(int) * hash_size_max_con(max_con) +
1690 + sizeof(int) * max_con + sizeof(ProxyRemapBlock) * max_con;
1693 +void proxyInitMem(void *data, int max_con)
1695 + proxy_memory *m = Memory(data);
1696 + m->max_con = max_con;
1698 + m->hash_size = hash_size_max_con(max_con);
1701 + /* Get pointers */
1702 + int *hash_table = Hash_table(data);
1703 + int *next = Next(data);
1706 + /* Init the hash table */
1707 + for (i = 0; i < m->hash_size; i++)
1708 + hash_table[i] = -1;
1710 + /* Init the free-list */
1711 + for (i = 0; i < m->max_con; i++)
1713 + m->free_first = 0;
1717 +int proxyGetCurConn(void *data)
1719 + return Memory(data)->cur_con;
1722 +int proxyGetMaxConn(void *data)
1724 + return Memory(data)->max_con;
1727 +ProxyRemapBlock *proxyLookup(void *data, unsigned ipaddr, unsigned short port,
1730 + proxy_memory *m = Memory(data);
1731 + int *hash_table = Hash_table(m);
1732 + int *next = Next(m);
1733 + ProxyRemapBlock *info = Info(m);
1736 + for (i = hash_table[hash_fnc(m, ipaddr, port, proto)]; i != -1;
1738 + if (info[i].proto == proto && info[i].sport == port
1739 + && info[i].saddr == ipaddr)
1746 +int proxyConsumeBlock(void *data, ProxyRemapBlock * blk)
1748 + proxy_memory *m = Memory(data);
1749 + int *hash_table = Hash_table(m);
1750 + int *next = Next(m);
1751 + ProxyRemapBlock *info = Info(m);
1752 + int hash = hash_fnc(m, blk->saddr, blk->sport, blk->proto);
1756 + if (m->cur_con == m->max_con)
1759 + /* Insert the block at a free entry */
1760 + info[m->free_first] = *blk;
1763 + foo = next[m->free_first];
1765 + /* And insert it in the hash tabel */
1766 + next[m->free_first] = hash_table[hash];
1767 + hash_table[hash] = m->free_first;
1768 + m->free_first = foo;
1772 + /* Find the block */
1773 + for (toupdate = &hash_table[hash];
1774 + *toupdate != -1; toupdate = &next[*toupdate]) {
1775 + if (info[*toupdate].proto == blk->proto &&
1776 + info[*toupdate].sport == blk->sport &&
1777 + info[*toupdate].saddr == blk->saddr)
1780 + if (*toupdate == -1)
1785 + /* Delete it from the hashing list */
1786 + *toupdate = next[*toupdate];
1788 + /* And put it on the free list */
1789 + next[foo] = m->free_first;
1790 + m->free_first = foo;