1 diff -urN linux-2.6.26.2.org/include/linux/pkt_sched.h linux-2.6.26.2/include/linux/pkt_sched.h
2 --- linux-2.6.26.2.org/include/linux/pkt_sched.h 2008-07-13 23:51:29.000000000 +0200
3 +++ linux-2.6.26.2/include/linux/pkt_sched.h 2008-08-19 11:20:27.000000000 +0200
8 +#include <linux/if_ether.h>
10 /* Generic queue statistics, available for all the elements.
11 Particular schedulers may have also their private records.
15 #define NETEM_DIST_SCALE 8192
21 + * A sub weight and of a class
22 + * All numbers are represented as parts of (2^64-1).
24 +struct tc_wrr_class_weight {
25 + __u64 val; // Current value (0 is not valid)
26 + __u64 decr; // Value pr bytes (2^64-1 is not valid)
27 + __u64 incr; // Value pr seconds (2^64-1 is not valid)
28 + __u64 min; // Minimal value (0 is not valid)
29 + __u64 max; // Minimal value (0 is not valid)
30 + time_t tim; // The time where the above information was correct
33 +/* Packet send when modifying a class */
34 +struct tc_wrr_class_modf {
35 + /* Not-valid values are ignored */
36 + struct tc_wrr_class_weight weight1;
37 + struct tc_wrr_class_weight weight2;
40 +/* Packet returned when quering a class */
41 +struct tc_wrr_class_stats {
42 + char used; /* If this is false the information below is invalid */
43 + struct tc_wrr_class_modf class_modf;
44 + unsigned char addr[ETH_ALEN];
45 + char usemac; /* True if addr is a MAC address, else it is an IP address
46 + (this value is only for convience, it is always the same
47 + value as in the qdisc) */
48 + int heappos; /* Current heap position or 0 if not in heap */
49 + __u64 penal_ls; /* Penalty value in heap (ls) */
50 + __u64 penal_ms; /* Penalty value in heap (ms) */
53 +/* Qdisc-wide penalty information (boolean values - 2 not valid) */
54 +struct tc_wrr_qdisc_weight {
55 + signed char weight_mode; /* 0=No automatic change to weight
57 + 2=Also multiply with number of machines
58 + 3=Instead multiply with priority divided
59 + with priority of the other.
63 +/* Packet send when modifing a qdisc */
64 +struct tc_wrr_qdisc_modf {
65 + /* Not-valid values are ignored */
66 + struct tc_wrr_qdisc_weight weight1;
67 + struct tc_wrr_qdisc_weight weight2;
70 +/* Packet send when creating a qdisc */
71 +struct tc_wrr_qdisc_crt {
72 + struct tc_wrr_qdisc_modf qdisc_modf;
73 + char srcaddr; /* 1=lookup source, 0=lookup destination */
74 + char usemac; /* 1=Classify on MAC addresses, 0=classify on IP */
75 + char usemasq; /* 1=Classify based on masqgrading - only valid
76 + if usemac is zero */
77 + int bands_max; /* Maximal number of bands (i.e.: classes) */
78 + int proxy_maxconn; /* If differnt from 0 then we support proxy remapping
79 + of packets. And this is the number of maximal
80 + concurrent proxy connections. */
83 +/* Packet returned when quering a qdisc */
84 +struct tc_wrr_qdisc_stats {
85 + struct tc_wrr_qdisc_crt qdisc_crt;
87 + int nodes_in_heap; /* Current number of bands wanting to send something */
88 + int bands_cur; /* Current number of bands used (i.e.: MAC/IP addresses seen) */
89 + int bands_reused; /* Number of times this band has been reused. */
90 + int packets_requed; /* Number of times packets have been requeued. */
91 + __u64 priosum; /* Sum of priorities in heap where 1 is 2^32 */
94 +struct tc_wrr_qdisc_modf_std {
95 + char proxy; /* This indicates which of the tc_wrr_qdisc_modf structers this is. 0=This struct */
96 + char change_class; /* Should we also change a class? */
97 + struct tc_wrr_qdisc_modf qdisc_modf; /* Only valid if change_class is false */
98 + unsigned char addr[ETH_ALEN]; /* Class to change (non-used bytes should be 0). Valid only of change_class is true */
99 + struct tc_wrr_class_modf class_modf; /* The change */
102 +/* Used for proxyremapping */
103 +struct tc_wrr_qdisc_modf_proxy {
104 + char proxy; /* This indicates which of the tc_wrr_qdisc_modf structers this is. 1=This struct */
105 + char reset; /* This is 1 if the proxyremap information should be reset */
106 + int changec; /* changec is the number of elements in changes. */
107 + long changes[0]; /* This is an array of type ProxyRemapBlock */
111 diff -urN linux-2.6.26.2.org/include/linux/wrr.h linux-2.6.26.2/include/linux/wrr.h
112 --- linux-2.6.26.2.org/include/linux/wrr.h 1970-01-01 01:00:00.000000000 +0100
113 +++ linux-2.6.26.2/include/linux/wrr.h 2008-08-19 11:20:27.000000000 +0200
119 + * This describes the information that is written in proxyremap.log and which
120 + * are used in the communication between proxyremapserver and proxyremapclient.
121 + * Everything is in network order.
124 +/* First this header is send */
125 +#define PROXY_WELCOME_LINE "ProxyRemap 1.02. This is a binary protocol.\r\n"
128 + * Then this block is send every time a connection is opened or closed.
129 + * Note how it is alligned to use small space usage - arrays of this
130 + * structure are saved in many places.
133 + /* Server endpoint of connection */
135 + unsigned short sport;
137 + /* IP protocol for this connection (typically udp or tcp) */
138 + unsigned char proto;
140 + /* Is the connection opened or closed? */
141 + unsigned char open;
143 + /* Client the packets should be accounted to */
145 + unsigned char macaddr[6]; /* Might be 0. */
147 + /* An informal two-charecter code from the proxyserver. Used for debugging. */
153 + * This is common code for for handling the tables containing information about
154 + * which proxyserver connections are associated with which machines..
157 +/* Returns the number of bytes that should be available in the area
158 + * maintained by this module given the maximal number of concurrent
160 +int proxyGetMemSize(int max_connections);
162 +/* Initializes a memory area to use. There must be as many bytes
163 + available as returned by getMemSize. */
164 +void proxyInitMem(void *data, int max_connections);
167 +int proxyGetCurConn(void *data); /* Returns current number of connections */
168 +int proxyMaxCurConn(void *data); /* Returns maximal number of connections */
170 +/* This is called to open and close conenctions. Returns -1 if
171 + a protocol error occores (i.e.: If it is discovered) */
172 +int proxyConsumeBlock(void *data, ProxyRemapBlock *);
174 +/* Returns the RemapBlock associated with this connection or 0: */
175 +ProxyRemapBlock *proxyLookup(void *data, unsigned ipaddr, unsigned short port,
178 +/* Return the maximum number of connections */
179 +int proxyGetMaxConn(void *data);
182 diff -urN linux-2.6.26.2.org/MAINTAINERS linux-2.6.26.2/MAINTAINERS
183 --- linux-2.6.26.2.org/MAINTAINERS 2008-07-13 23:51:29.000000000 +0200
184 +++ linux-2.6.26.2/MAINTAINERS 2008-08-19 11:20:27.000000000 +0200
185 @@ -4500,6 +4500,12 @@
186 W: http://opensource.wolfsonmicro.com/node/7
189 +WRR NETWORK SCHEDULER
190 +P: Rasmus Bøg Hansen
192 +W: http://www.zz9.dk/wrr
198 diff -urN linux-2.6.26.2.org/net/sched/Kconfig linux-2.6.26.2/net/sched/Kconfig
199 --- linux-2.6.26.2.org/net/sched/Kconfig 2008-07-13 23:51:29.000000000 +0200
200 +++ linux-2.6.26.2/net/sched/Kconfig 2008-08-19 11:20:27.000000000 +0200
202 To compile this code as a module, choose M here: the
203 module will be called sch_prio.
206 + tristate "WRR packet scheduler"
207 + depends on NET_SCHED && ( NF_CONNTRACK || !NF_CONNTRACK )
209 + The weighted robin-robin scheduling algorithm directs network
210 + connections to different real servers based on server weights
211 + in a round-robin manner. Servers with higher weights receive
212 + new connections first than those with less weights, and servers
213 + with higher weights get more connections than those with less
214 + weights and servers with equal weights get equal connections.
216 + If you want masquerading (the "masq" option to the tc userspace
217 + program) you need to enable connection tracking (IP_NF_CONNTRACK)
218 + in the netfilter options.
220 + If you want to compile it in kernel, say Y. If you want to compile
221 + it as a module, say M here and read Documentation/modules.txt. The
222 + module will be called sch_wrr. If unsure, say N.
225 tristate "Random Early Detection (RED)"
227 diff -urN linux-2.6.26.2.org/net/sched/Makefile linux-2.6.26.2/net/sched/Makefile
228 --- linux-2.6.26.2.org/net/sched/Makefile 2008-07-13 23:51:29.000000000 +0200
229 +++ linux-2.6.26.2/net/sched/Makefile 2008-08-19 11:20:27.000000000 +0200
233 obj-y := sch_generic.o
234 +sch_wrr-objs = wrr.o wrr_proxydict.o
236 obj-$(CONFIG_NET_SCHED) += sch_api.o sch_blackhole.o
237 obj-$(CONFIG_NET_CLS) += cls_api.o
239 obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o
240 obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o
241 obj-$(CONFIG_NET_SCH_ESFQ) += sch_esfq.o
242 +obj-$(CONFIG_NET_SCH_WRR) += sch_wrr.o
243 obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o
244 obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o
245 obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o
246 diff -urN linux-2.6.26.2.org/net/sched/wrr.c linux-2.6.26.2/net/sched/wrr.c
247 --- linux-2.6.26.2.org/net/sched/wrr.c 1970-01-01 01:00:00.000000000 +0100
248 +++ linux-2.6.26.2/net/sched/wrr.c 2008-08-20 16:40:09.000000000 +0200
250 +/*-----------------------------------------------------------------------------
251 +Weighted Round Robin scheduler.
253 +Written by Christian Worm Mortensen, cworm@it-c.dk.
257 +This module implements a weighted round robin queue with build-in classifier.
258 +The classifier currently map each MAC or IP address (configurable either MAC
259 +or IP and either source or destination) to different classes. Each such class
260 +is called a band. Whan using MAC addresses only bridged packets can be
261 +classified other packets go to a default MAC address.
263 +Each band has a weight value, where 0<weight<=1. The bandwidth each band
264 +get is proportional to the weight as can be deduced from the next section.
268 +Each band has a penalty value. Bands having something to sent are kept in
269 +a heap according to this value. The band with the lowest penalty value
270 +is in the root of the heap. The penalty value is a 128 bit number. Initially
271 +no bands are in the heap.
273 +Two global 64 bit values counter_low_penal and couter_high_penal are initialized
274 +to 0 and to 2^63 respectively.
277 + The packet is inserted in the queue for the band it belongs to. If the band
278 + is not in the heap it is inserted into it. In this case, the upper 64 bits
279 + of its penalty value is set to the same as for the root-band of the heap.
280 + If the heap is empty 0 is used. The lower 64 bit is set to couter_low_penal
281 + and couter_low_penal is incremented by 1.
284 + If the heap is empty we have nothing to send.
286 + If the root band has a non-empty queue a packet is dequeued from that.
287 + The upper 64 bit of the penalty value of the band is incremented by the
288 + packet size divided with the weight of the band. The lower 64 bit is set to
289 + couter_high_penal and couter_high_penal is incremented by 1.
291 + If the root element for some reason has an empty queue it is removed from
292 + the heap and we try to dequeue again.
294 +The effect of the heap and the upper 64 bit of the penalty values is to
295 +implement a weighted round robin queue. The effect of counter_low_penal,
296 +counter_high_penal and the lower 64 bit of the penalty value is primarily to
297 +stabilize the queue and to give better quality of service to machines only
298 +sending a packet now and then. For example machines which have a single
299 +interactive connection such as telnet or simple text chatting.
303 +The weight value can be changed dynamically by the queue itself. The weight
304 +value and how it is changed is described by the two members weight1 and
305 +weight2 which has type tc_wrr_class_weight and which are in each class. And
306 +by the two integer value members of the qdisc called penalfact1 and penalfact2.
307 +The structure is defined as:
309 + struct tc_wrr_class_weight {
310 + // All are represented as parts of (2^64-1).
311 + __u64 val; // Current value (0 is not valid)
312 + __u64 decr; // Value pr bytes (2^64-1 is not valid)
313 + __u64 incr; // Value pr seconds (2^64-1 is not valid)
314 + __u64 min; // Minimal value (0 is not valid)
315 + __u64 max; // Minimal value (0 is not valid)
317 + // The time where the above information was correct:
321 +The weight value used by the dequeue operations is calculated as
322 +weight1.val*weight2.val. weight1 and weight2 and handled independently and in the
323 +same way as will be described now.
325 +Every second, the val parameter is incremented by incr.
327 +Every time a packet is transmitted the value is increment by decr times
328 +the packet size. Depending on the value of the weight_mode parameter it
329 +is also mulitplied with other numbers. This makes it possible to give
330 +penalty to machines transferring much data.
332 +-----------------------------------------------------------------------------*/
334 +#include <linux/autoconf.h>
335 +#include <linux/module.h>
336 +#include <asm/uaccess.h>
337 +#include <asm/system.h>
338 +#include <linux/bitops.h>
339 +#include <linux/types.h>
340 +#include <linux/kernel.h>
341 +#include <linux/vmalloc.h>
342 +#include <linux/sched.h>
343 +#include <linux/string.h>
344 +#include <linux/mm.h>
345 +#include <linux/socket.h>
346 +#include <linux/sockios.h>
347 +#include <linux/in.h>
348 +#include <linux/errno.h>
349 +#include <linux/interrupt.h>
350 +#include <linux/if_ether.h>
351 +#include <linux/inet.h>
352 +#include <linux/netdevice.h>
353 +#include <linux/etherdevice.h>
354 +#include <linux/notifier.h>
356 +#include <net/route.h>
357 +#include <linux/skbuff.h>
358 +#include <net/sock.h>
359 +#include <net/pkt_sched.h>
360 +#include <linux/if_arp.h>
361 +#include <linux/version.h>
362 +#include <linux/wrr.h>
364 +#define WRR_VER "080820"
366 +#define my_malloc(size) kmalloc(size,GFP_KERNEL)
367 +#define my_free(ptr) kfree(ptr)
369 +#define LOCK_START sch_tree_lock(sch);
370 +#define LOCK_END sch_tree_unlock(sch);
371 +#define ENQUEUE_SUCCESS 0
372 +#define ENQUEUE_FAIL NET_XMIT_DROP
374 +#if defined CONFIG_IP_NF_CONNTRACK || defined CONFIG_IP_NF_CONNTRACK_MODULE || defined CONFIG_NF_CONNTRACK || defined CONFIG_NF_CONNTRACK_MODULE
375 +#include <net/netfilter/nf_conntrack.h>
376 +#define MASQ_SUPPORT
379 +/* The penalty (priority) type */
380 +typedef u64 penalty_base_t;
381 +#define penalty_base_t_max ((penalty_base_t)-1)
382 +typedef struct penalty_t {
386 +#define penalty_leq(a,b) (a.ms<b.ms || (a.ms==b.ms && a.ls<=b.ls))
387 +#define penalty_le(a,b) (a.ms<b.ms || (a.ms==b.ms && a.ls<b.ls))
388 +static penalty_t penalty_max = { penalty_base_t_max, penalty_base_t_max };
395 +struct heap_element;
397 +/* Initializes an empty heap:
398 + * he: A pointer to an unintialized heap structure identifying the heap
399 + * size: Maximal number of elements the heap can contain
400 + * poll: An array of size "size" used by the heap.
402 +static void heap_init(struct heap *he, int size, struct heap_element *poll);
404 +/* Each element in the heap is identified by a user-assigned id which
405 + * should be a non negative integer less than the size argument
406 + * given to heap_init.
408 +static void heap_insert(struct heap *, int id, penalty_t);
409 +static void heap_remove(struct heap *, int id);
410 +static void heap_set_penalty(struct heap *, int id, penalty_t);
412 +/* Retreviewing information */
413 +static char heap_empty(struct heap *); /* Heap empty? */
414 +static char heap_contains(struct heap *, int id); /* Does heap contain
416 +static int heap_root(struct heap *); /* Returns the id of the root */
417 +static penalty_t heap_get_penalty(struct heap *, int id); /* Returns penalty
421 + * Heap implementation
424 +struct heap_element {
426 + int id; /* The user-assigned id of this element */
427 + int id2idx; /* Maps from user-assigned ids to indices in root_1 */
431 + struct heap_element *root_1;
435 +/* Heap implementation */
436 +static void heap_init(struct heap *h, int size, struct heap_element *poll)
441 + h->root_1 = poll - 1;
443 + for (i = 0; i < size; i++)
444 + poll[i].id2idx = 0;
447 +static char heap_empty(struct heap *h)
449 + return h->elements == 0;
452 +static char heap_contains(struct heap *h, int id)
454 + return h->root_1[id + 1].id2idx != 0;
457 +static int heap_root(struct heap *h)
459 + return h->root_1[1].id;
462 +static penalty_t heap_get_penalty(struct heap *h, int id)
464 + return h->root_1[h->root_1[id + 1].id2idx].penalty;
467 +static void heap_penalty_changed_internal(struct heap *h, int idx);
469 +static void heap_set_penalty(struct heap *h, int id, penalty_t p)
471 + int idx = h->root_1[id + 1].id2idx;
472 + h->root_1[idx].penalty = p;
473 + heap_penalty_changed_internal(h, idx);
476 +static void heap_insert(struct heap *h, int id, penalty_t p)
478 + /* Insert at the end of the heap */
480 + h->root_1[h->elements].id = id;
481 + h->root_1[h->elements].penalty = p;
482 + h->root_1[id + 1].id2idx = h->elements;
484 + /* And put it in the right position */
485 + heap_penalty_changed_internal(h, h->elements);
488 +static void heap_remove(struct heap *h, int id)
490 + int idx = h->root_1[id + 1].id2idx;
492 + h->root_1[id + 1].id2idx = 0;
494 + if (h->elements == idx) {
499 + mvid = h->root_1[h->elements].id;
500 + h->root_1[idx].id = mvid;
501 + h->root_1[idx].penalty = h->root_1[h->elements].penalty;
502 + h->root_1[mvid + 1].id2idx = idx;
505 + heap_penalty_changed_internal(h, idx);
508 +static void heap_swap(struct heap *h, int idx0, int idx1)
514 + /* Simple content */
515 + tmp_p = h->root_1[idx0].penalty;
516 + tmp_id = h->root_1[idx0].id;
517 + h->root_1[idx0].penalty = h->root_1[idx1].penalty;
518 + h->root_1[idx0].id = h->root_1[idx1].id;
519 + h->root_1[idx1].penalty = tmp_p;
520 + h->root_1[idx1].id = tmp_id;
522 + /* Update reverse pointers */
523 + id0 = h->root_1[idx0].id;
524 + id1 = h->root_1[idx1].id;
525 + h->root_1[id0 + 1].id2idx = idx0;
526 + h->root_1[id1 + 1].id2idx = idx1;
529 +static void heap_penalty_changed_internal(struct heap *h, int cur)
532 + || penalty_leq(h->root_1[cur >> 1].penalty,
533 + h->root_1[cur].penalty)) {
534 + /* We are in heap order upwards - so we should move the element down */
536 + int nxt0 = cur << 1;
537 + int nxt1 = nxt0 + 1;
538 + penalty_t pen_c = h->root_1[cur].penalty;
541 + h->elements ? h->root_1[nxt0].penalty : penalty_max;
544 + h->elements ? h->root_1[nxt1].penalty : penalty_max;
546 + if (penalty_le(pen_0, pen_c)
547 + && penalty_leq(pen_0, pen_1)) {
548 + /* Swap with child 0 */
549 + heap_swap(h, cur, nxt0);
551 + } else if (penalty_le(pen_1, pen_c)) {
552 + /* Swap with child 1 */
553 + heap_swap(h, cur, nxt1);
556 + /* Heap in heap order */
561 + /* We are not in heap order upwards (and thus we must be it downwards).
563 + while (cur != 1) { /* While not root */
564 + int nxt = cur >> 1;
566 + (h->root_1[nxt].penalty, h->root_1[cur].penalty))
568 + heap_swap(h, cur, nxt);
575 + * Classification based on MAC or IP adresses. Note that of historical reason
576 + * these are prefixed with mac_ since originally only MAC bases classification
579 + * This code should be in a separate filter module - but it isn't.
587 +/* Initialices/destroys the structure we maintain.
588 + Returns -1 on error */
589 +static int mac_init(struct mac_head *, int max_macs, char srcaddr,
590 + char usemac, char usemasq, void *proxyremap);
591 +static void mac_done(struct mac_head *);
592 +static void mac_reset(struct mac_head *);
594 +/* Classify a packet. Returns a number n where 0<=n<max_macs. Or -1 if
595 + the packet should be dropped. */
596 +static int mac_classify(struct mac_head *, struct sk_buff *skb);
603 + unsigned char addr[ETH_ALEN]; /* Address of this band (last two are 0 on IP) */
604 + unsigned long lastused; /* Last time a packet was encountered */
605 + int class; /* Classid of this band (0<=classid<max_macs) */
608 +static int mac_compare(const void *a, const void *b)
610 + return memcmp(a, b, ETH_ALEN);
614 + int mac_max; /* Maximal number of MAC addresses/classes allowed */
615 + int mac_cur; /* Current number of MAC addresses/classes */
616 + int mac_reused; /* Number of times we have reused a class with a new address. */
618 + char srcaddr; /* True if we classify on the source address of packets,
619 + else we use destination address. */
620 + char usemac; /* If true we use mac, else we use IP */
621 + char usemasq; /* If true we try to demasqgrade */
622 + struct mac_addr *macs; /* Allocated mac_max elements, used max_cur */
623 + char *cls2mac; /* Mapping from classnumbers to addresses -
624 + there is 6 bytes in each entry */
626 + void *proxyremap; /* Information on proxy remapping of data or 0 */
629 +/* This is as the standard C library function with the same name: */
630 +static const void *bsearch(const void *key, const void *base, int nmemb,
632 + int (*compare) (const void *, const void *))
641 + m_idx = nmemb >> 1;
642 + m_ptr = ((const char *)base) + m_idx * size;
644 + i = compare(key, m_ptr);
645 + if (i < 0) /* key is less */
646 + return bsearch(key, base, m_idx, size, compare);
648 + return bsearch(key, ((const char *)m_ptr) + size,
649 + nmemb - m_idx - 1, size, compare);
654 +static int mac_init(struct mac_head *h, int max_macs, char srcaddr,
655 + char usemac, char usemasq, void *proxyremap)
660 + h->srcaddr = srcaddr;
661 + h->usemac = usemac;
662 + h->usemasq = usemasq;
663 + h->mac_max = max_macs;
664 + h->proxyremap = proxyremap;
666 + h->macs = (struct mac_addr *)
667 + my_malloc(sizeof(struct mac_addr) * max_macs);
668 + h->cls2mac = (char *)my_malloc(6 * max_macs);
669 + if (!h->macs || !h->cls2mac) {
673 + my_free(h->cls2mac);
679 +static void mac_done(struct mac_head *h)
682 + my_free(h->cls2mac);
685 +static void mac_reset(struct mac_head *h)
692 +static int lookup_mac(struct mac_head *h, unsigned char *addr)
697 + /* First try to find the address in the table */
698 + struct mac_addr *m = (struct mac_addr *)
699 + bsearch(addr, h->macs, h->mac_cur, sizeof(struct mac_addr),
703 + m->lastused = h->incr_time++;
706 + /* Okay - the MAC adress was not in table */
707 + if (h->mac_cur == h->mac_max) {
708 + /* And the table is full - delete the oldest entry */
710 + /* Find the oldest entry */
713 + for (i = 1; i < h->mac_cur; i++)
714 + if (h->macs[i].lastused < h->macs[lowidx].lastused)
717 + class = h->macs[lowidx].class;
719 + /* And delete it */
720 + memmove(&h->macs[lowidx], &h->macs[lowidx + 1],
721 + (h->mac_cur - lowidx - 1) * sizeof(struct mac_addr));
725 + class = h->mac_cur;
728 + /* The table is not full - find the position we should put the address in */
729 + for (i = 0; i < h->mac_cur; i++)
730 + if (mac_compare(addr, &h->macs[i]) < 0)
733 + /* We should insert at position i */
734 + memmove(&h->macs[i + 1], &h->macs[i],
735 + (h->mac_cur - i) * sizeof(struct mac_addr));
737 + memcpy(m->addr, addr, ETH_ALEN);
738 + m->lastused = h->incr_time++;
742 + /* Finally update the cls2mac variabel */
743 + memcpy(h->cls2mac + ETH_ALEN * class, addr, ETH_ALEN);
748 +int valid_ip_checksum(struct iphdr *ip, int size)
750 + __u16 header_len = ip->ihl << 2;
752 + __u16 *ipu = (u16 *) ip;
755 + /* We require 4 bytes in the packet since we access the port numbers */
756 + if ((size < header_len) || size < sizeof(struct iphdr) + 4)
759 + for (a = 0; a < (header_len >> 1); a++, ipu++) {
761 + /* If not the checksum field */
769 + return ip->check == (__u16) ~ c;
772 +static int mac_classify(struct mac_head *head, struct sk_buff *skb)
774 + /* We set this to the address we map to. In case we map to an IP
775 + address the last two entries are set to 0. */
776 + unsigned char addr[ETH_ALEN];
778 + /* Used later for mac classification */
779 + struct ethhdr *hdr;
781 + /* This is the size of the network part of the packet */
782 + int size = ((char *)skb->data + skb->len) - ((char *)ip_hdr(skb));
784 + /* Set a default value for the address */
785 + memset(addr, 0, ETH_ALEN);
787 + /* Map IPv4 traffic to their correct addresses.
788 + Everything (non-IPv4) else goes to a default address */
789 + if (ntohs(skb->protocol) == ETH_P_IP) {
791 + struct iphdr *iph = ip_hdr(skb); /* This is the IP header */
793 + const __u16 *portp = (__u16 *) & (((char *)iph)[iph->ihl * 4]); /* Port numbers*/
794 + __u16 sport = portp[0];
795 + __u16 dport = portp[1];
797 + unsigned ipaddr; /* IP classification */
798 + ProxyRemapBlock *prm; /* Proxyremapping */
800 + /* IP packets must have valid checsum */
801 + if (!valid_ip_checksum(ip_hdr(skb), size))
806 + ipaddr = iph->saddr;
808 + ipaddr = iph->daddr;
811 + /* Update ipaddr if packet is masqgraded */
812 + if (head->usemasq) {
813 + /* Thanks to Rusty Russell for help with the following code */
814 + enum ip_conntrack_info ctinfo;
815 + struct nf_conn *ct;
816 + ct = nf_ct_get(skb, &ctinfo);
820 + ct->tuplehash[CTINFO2DIR(ctinfo)].
824 + ct->tuplehash[CTINFO2DIR(ctinfo)].
830 + /* Set prm (proxyremap) based on ipaddr */
832 + if (head->proxyremap) {
833 + if (head->srcaddr) {
835 + proxyLookup(head->proxyremap, ipaddr, sport,
836 + ip_hdr(skb)->protocol);
839 + proxyLookup(head->proxyremap, ipaddr, dport,
840 + ip_hdr(skb)->protocol);
843 + /* And finally set the correct address */
845 + /* This packet should be remapped */
847 + memcpy(addr, prm->macaddr, ETH_ALEN);
849 + memcpy(addr, &prm->caddr, sizeof(unsigned));
851 + /* This packet should not be remapped */
852 + if (head->usemac) {
853 + /* Make sure we have a valid mac address */
855 + skb->dev->type == ARPHRD_ETHER &&
856 + skb->len >= ETH_HLEN) {
857 + hdr = (struct ethhdr*)skb->data;
859 + memcpy(addr,hdr->h_source, ETH_ALEN);
861 + memcpy(addr,hdr->h_dest, ETH_ALEN);
864 + memcpy(addr, &ipaddr, 4);
869 + return lookup_mac(head, addr);
876 +/* Pr-class information */
877 +struct wrrc_sched_data {
878 + struct Qdisc *que; /* The queue for this class */
879 + struct tc_wrr_class_modf class_modf; /* Information about the class */
881 + /* For classes in the heap this is the priority value priosum
882 + was updated with for this class */
886 +/* Pr-qdisc information */
887 +struct wrr_sched_data {
888 + struct heap h; /* A heap containing all the bands that will send something */
889 + struct heap_element *poll; /* bandc elements */
891 + /* The sum of the prioities of the elements in the heap where
892 + a priority of 1 is saved as 2^32 */
895 + /* A class for each band */
896 + struct wrrc_sched_data *bands; /* bandc elements */
898 + /* Information maintained by the proxydict module of 0 if we
899 + have no proxy remapping */
902 + /* Always incrementning counters, we always have that any value of
903 + counter_low_penal < any value of counter_high_penal. */
904 + penalty_base_t counter_low_penal;
905 + penalty_base_t counter_high_penal;
907 + struct tc_wrr_qdisc_modf qdisc_modf; /* Penalty updating */
909 + int packets_requed; /* Statistics */
911 + struct mac_head filter; /* The filter */
912 + int bandc; /* Number of bands */
915 +/* Priority handling.
916 + * weight is in interval [0..2^32]
917 + * priosum has whole numbers in the upper and fragments in the lower 32 bits.
919 +static void weight_transmit(struct tc_wrr_class_weight *p,
920 + struct tc_wrr_qdisc_weight q,
922 + u64 priosum, u64 weight, unsigned size)
925 + unsigned long now = jiffies / HZ;
927 + /* Penalty for transmitting */
932 + switch (q.weight_mode) {
934 + change = p->decr * size;
937 + change = p->decr * size * heapsize;
939 + case 3: /* Note: 64 bit division is not always available */
940 + divisor = (u32) (weight >> 16);
943 + change = p->decr * size * (((u32) (priosum >> 16)) / divisor);
948 + if (p->val > old || p->val < p->min)
951 + /* Credit for time went */
952 + change = (now - p->tim) * p->incr;
956 + if (p->val < old || p->val > p->max)
960 +static void weight_setdefault(struct tc_wrr_class_weight *p)
962 + p->val = (u64) - 1;
965 + p->min = (u64) - 1;
966 + p->max = (u64) - 1;
967 + p->tim = jiffies / HZ;
970 +static void weight_setvalue(struct tc_wrr_class_weight *dst,
971 + struct tc_wrr_class_weight *src)
973 + if (src->val != 0) {
974 + dst->val = src->val;
975 + dst->tim = jiffies / HZ;
978 + dst->min = src->min;
980 + dst->max = src->max;
981 + if (src->decr != ((u64) - 1))
982 + dst->decr = src->decr;
983 + if (src->incr != ((u64) - 1))
984 + dst->incr = src->incr;
985 + if (dst->val < dst->min)
986 + dst->val = dst->min;
987 + if (dst->val > dst->max)
988 + dst->val = dst->max;
991 +static void wrr_destroy(struct Qdisc *sch)
993 + struct wrr_sched_data *q = qdisc_priv(sch);
996 + /* Destroy our filter */
997 + mac_done(&q->filter);
999 + /* Destroy all our childre ques */
1000 + for (i = 0; i < q->bandc; i++)
1001 + qdisc_destroy(q->bands[i].que);
1003 + /* And free memory */
1004 + my_free(q->bands);
1007 + my_free(q->proxydict);
1010 +static int wrr_init(struct Qdisc *sch, struct nlattr *opt)
1012 + struct wrr_sched_data *q = qdisc_priv(sch);
1013 + int i, maciniterr;
1015 + struct tc_wrr_qdisc_crt *qopt;
1017 + /* Parse options */
1019 + return -EINVAL; /* Options must be specified */
1020 + if (nla_len(opt) < sizeof(*qopt))
1022 + qopt = nla_data(opt);
1024 + if (qopt->bands_max > 8192 || qopt->bands_max < 2) {
1025 + /* More than 8192 queues or less than 2? That cannot be true - it must be
1030 + if (qopt->proxy_maxconn < 0 || qopt->proxy_maxconn > 20000) {
1031 + /* More than this number of maximal concurrent connections is unrealistic */
1034 +#ifndef MASQ_SUPPORT
1035 + if (qopt->usemasq) {
1039 + q->bandc = qopt->bands_max;
1040 + q->qdisc_modf = qopt->qdisc_modf;
1042 + /* Create structures */
1043 + q->poll = (struct heap_element *)
1044 + my_malloc(sizeof(struct heap_element) * q->bandc);
1045 + q->bands = (struct wrrc_sched_data *)
1046 + my_malloc(sizeof(struct wrrc_sched_data) * q->bandc);
1048 + if (qopt->proxy_maxconn > 0) {
1049 + q->proxydict = my_malloc(proxyGetMemSize(qopt->proxy_maxconn));
1054 + /* Init mac module */
1055 + maciniterr = mac_init(&q->filter, qopt->bands_max, qopt->srcaddr,
1056 + qopt->usemac, qopt->usemasq, q->proxydict);
1058 + /* See if we got the memory we wanted */
1059 + if (!q->poll || !q->bands ||
1060 + (qopt->proxy_maxconn > 0 && !q->proxydict) || maciniterr < 0) {
1064 + my_free(q->bands);
1066 + my_free(q->proxydict);
1067 + if (maciniterr >= 0)
1068 + mac_done(&q->filter);
1071 + /* Initialize proxy */
1073 + proxyInitMem(q->proxydict, qopt->proxy_maxconn);
1074 + /* Initialize values */
1075 + q->counter_low_penal = 0;
1076 + q->counter_high_penal = penalty_base_t_max >> 1;
1077 + q->packets_requed = 0;
1079 + /* Initialize empty heap */
1080 + heap_init(&q->h, q->bandc, q->poll);
1083 + /* Initialize each band */
1085 + for (i = 0; i < q->bandc; i++) {
1086 + weight_setdefault(&q->bands[i].class_modf.weight1);
1087 + weight_setdefault(&q->bands[i].class_modf.weight2);
1089 + struct Qdisc *child =
1090 + qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, sch->handle);
1092 + q->bands[i].que = child;
1094 + /* Queue couldn't be created :-( */
1099 + q->bands[i].que = &noop_qdisc;
1103 + /* Destroy again */
1108 + printk(KERN_DEBUG "sch_wrr: Initialized version " WRR_VER "\n");
1113 +static void wrr_reset(struct Qdisc *sch)
1115 + struct wrr_sched_data *q = qdisc_priv(sch);
1118 + /* Reset own values */
1119 + q->counter_low_penal = 0;
1120 + q->counter_high_penal = penalty_base_t_max >> 1;
1121 + q->packets_requed = 0;
1123 + /* Reset filter */
1124 + mac_reset(&q->filter);
1126 + /* Reinitialize heap */
1127 + heap_init(&q->h, q->bandc, q->poll);
1130 + /* Reset all bands */
1131 + for (i = 0; i < q->bandc; i++) {
1132 + weight_setdefault(&q->bands[i].class_modf.weight1);
1133 + weight_setdefault(&q->bands[i].class_modf.weight2);
1134 + qdisc_reset(q->bands[i].que);
1137 + /* Reset proxy remapping information */
1139 + proxyInitMem(q->proxydict, proxyGetMaxConn(q->proxydict));
1142 +static int wrr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
1144 + struct wrr_sched_data *q = qdisc_priv(sch);
1145 + int retvalue = ENQUEUE_FAIL;
1147 + /* The packet is in skb */
1148 + int band = mac_classify(&q->filter, skb);
1151 + /* Enque packet for this band */
1152 + struct Qdisc *qdisc = q->bands[band].que;
1154 + if ((retvalue = qdisc->enqueue(skb, qdisc)) == ENQUEUE_SUCCESS) {
1156 + sch->bstats.bytes += skb->len;
1157 + sch->bstats.packets++;
1160 + /* Insert band into heap if not already there */
1161 + if (!heap_contains(&q->h, band)) {
1163 + if (!heap_empty(&q->h))
1165 + heap_get_penalty(&q->h,
1166 + heap_root(&q->h)).
1170 + p.ls = q->counter_low_penal++;
1171 + heap_insert(&q->h, band, p);
1172 + q->bands[band].priosum_val =
1173 + ((q->bands[band].class_modf.weight1.
1176 + ((q->bands[band].class_modf.weight2.
1178 + q->priosum += q->bands[band].priosum_val;
1182 + /* If we decide not to enque it seems like we also need to free the packet */
1186 + if (retvalue != ENQUEUE_SUCCESS) {
1187 + /* Packet not enqued */
1188 + sch->qstats.drops++;
1194 +static struct sk_buff *wrr_dequeue(struct Qdisc *sch)
1196 + struct wrr_sched_data *q = qdisc_priv(sch);
1197 + struct sk_buff *skb;
1199 + u64 weight, priosum;
1200 + struct wrrc_sched_data *b;
1202 + /* Return if heap is empty */
1203 + if (heap_empty(&q->h))
1206 + /* Find root element */
1207 + band = heap_root(&q->h);
1209 + /* Find priority of this element in interval [1;2^32] */
1210 + b = &q->bands[band];
1212 + /* weight is in interval [1;2^32] */
1213 + weight = ((b->class_modf.weight1.val >> 48) + 1) * ((b->class_modf.weight2.val >> 48) + 1);
1214 + priosum = q->priosum;
1215 + q->priosum -= q->bands[band].priosum_val;
1217 + /* Dequeue the packet from the root */
1218 + skb = q->bands[band].que->dequeue(q->bands[band].que);
1221 + /* There was a packet in this queue */
1225 + /* Find length of packet adjusted with priority */
1226 + adjlen = (u32) (weight >> (32 - 16));
1229 + adjlen = (skb->len << 16) / adjlen;
1231 + /* Update penalty information for this class */
1232 + weight_transmit(&b->class_modf.weight1, q->qdisc_modf.weight1,
1233 + q->h.elements, priosum, weight, skb->len);
1234 + weight_transmit(&b->class_modf.weight2, q->qdisc_modf.weight2,
1235 + q->h.elements, priosum, weight, skb->len);
1236 + q->bands[band].priosum_val =
1237 + ((b->class_modf.weight1.val >> 48) +
1238 + 1) * ((b->class_modf.weight2.val >> 48) + 1);
1239 + q->priosum += q->bands[band].priosum_val;
1241 + /* And update the class in the heap */
1242 + p = heap_get_penalty(&q->h, band);
1244 + p.ls = q->counter_high_penal++;
1245 + heap_set_penalty(&q->h, band, p);
1247 + /* Return packet */
1251 + /* No packet - so machine should be removed from heap */
1252 + heap_remove(&q->h, band);
1257 +static int wrr_requeue(struct sk_buff *skb, struct Qdisc *sch)
1259 + struct wrr_sched_data *q = qdisc_priv(sch);
1260 + struct Qdisc *qdisc;
1263 + /* Find band we took it from */
1264 + int band = mac_classify(&q->filter, skb);
1266 + /* Who should now free the pakcet? */
1268 + "sch_wrr: Oops - packet requeued could never have been queued.\n");
1269 + sch->qstats.drops++;
1270 + return ENQUEUE_FAIL;
1273 + q->packets_requed++;
1275 + /* Try to requeue it on that machine */
1276 + qdisc = q->bands[band].que;
1278 + if ((ret = qdisc->ops->requeue(skb, qdisc)) == ENQUEUE_SUCCESS) {
1281 + sch->qstats.requeues++;
1283 + /* We should restore priority information - but we don't
1285 + * p=heap_get_penalty(&q->h,band);
1287 + * heap_set_penalty(&q->h,band,p);
1290 + return ENQUEUE_SUCCESS;
1292 + sch->qstats.drops++;
1297 +static unsigned int wrr_drop(struct Qdisc *sch)
1299 + struct wrr_sched_data *q = qdisc_priv(sch);
1301 + /* Ugly... Drop button up in heap */
1304 + for (i = q->h.elements; i >= 1; i--) {
1305 + int band = q->h.root_1[i].id;
1306 + if (q->bands[band].que->ops->drop(q->bands[band].que)) {
1309 + sch->qstats.drops++;
1317 +static int wrr_dump(struct Qdisc *sch, struct sk_buff *skb)
1319 + struct wrr_sched_data *q = qdisc_priv(sch);
1320 + sk_buff_data_t b = skb->tail;
1321 + struct tc_wrr_qdisc_stats opt;
1323 + opt.qdisc_crt.qdisc_modf = q->qdisc_modf;
1324 + opt.qdisc_crt.srcaddr = q->filter.srcaddr;
1325 + opt.qdisc_crt.usemac = q->filter.usemac;
1326 + opt.qdisc_crt.usemasq = q->filter.usemasq;
1327 + opt.qdisc_crt.bands_max = q->filter.mac_max;
1328 + opt.nodes_in_heap = q->h.elements;
1329 + opt.bands_cur = q->filter.mac_cur;
1330 + opt.bands_reused = q->filter.mac_reused;
1331 + opt.packets_requed = q->packets_requed;
1332 + opt.priosum = q->priosum;
1334 + if (q->proxydict) {
1335 + opt.qdisc_crt.proxy_maxconn = proxyGetMaxConn(q->proxydict);
1336 + opt.proxy_curconn = proxyGetCurConn(q->proxydict);
1338 + opt.qdisc_crt.proxy_maxconn = 0;
1339 + opt.proxy_curconn = 0;
1342 + NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
1345 + nla_put_failure: /* NLA_PUT may jump to this label */
1346 + skb_trim(skb, (unsigned char *) &b - skb->data);
1350 +static int wrr_tune_std(struct Qdisc *sch, struct nlattr *opt)
1352 + struct wrr_sched_data *q = qdisc_priv(sch);
1353 + struct tc_wrr_qdisc_modf_std *qopt = nla_data(opt);
1355 + if (nla_len(opt) < sizeof(*qopt))
1360 + if (qopt->change_class) {
1361 + int idx = lookup_mac(&q->filter, qopt->addr);
1363 + (&q->bands[idx].class_modf.weight1,
1364 + &qopt->class_modf.weight1);
1365 + weight_setvalue(&q->bands[idx].class_modf.weight2,
1366 + &qopt->class_modf.weight2);
1368 + if (qopt->qdisc_modf.weight1.weight_mode != -1)
1369 + q->qdisc_modf.weight1.weight_mode =
1370 + qopt->qdisc_modf.weight1.weight_mode;
1371 + if (qopt->qdisc_modf.weight2.weight_mode != -1)
1372 + q->qdisc_modf.weight2.weight_mode =
1373 + qopt->qdisc_modf.weight2.weight_mode;
1380 +static int wrr_tune_proxy(struct Qdisc *sch, struct nlattr *opt)
1382 + struct wrr_sched_data *q = qdisc_priv(sch);
1383 + struct tc_wrr_qdisc_modf_proxy *qopt = nla_data(opt);
1386 + /* Return if we are not configured with proxy support */
1387 + if (!q->proxydict)
1390 + /* Return if not enough data given */
1391 + if (nla_len(opt) < sizeof(*qopt))
1393 + if (nla_len(opt) < sizeof(*qopt) ||
1394 + nla_len(opt) < sizeof(*qopt) + sizeof(ProxyRemapBlock) * qopt->changec)
1399 + if (qopt->reset) {
1400 + proxyInitMem(q->proxydict, proxyGetMaxConn(q->proxydict));
1402 + /* Do all the changes */
1403 + for (i = 0; i < qopt->changec; i++) {
1404 + proxyConsumeBlock(q->proxydict,
1405 + &((ProxyRemapBlock *) & qopt->changes)[i]);
1413 +static int wrr_tune(struct Qdisc *sch, struct nlattr *opt)
1415 + if (((struct tc_wrr_qdisc_modf_std *)nla_data(opt))->proxy) {
1416 + return wrr_tune_proxy(sch, opt);
1418 + return wrr_tune_std(sch, opt);
1424 + * External and internal IDs are equal. They are the band number plus 1.
1427 +/* Replace a class with another */
1428 +static int wrr_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1429 + struct Qdisc **old)
1431 + struct wrr_sched_data *q = qdisc_priv(sch);
1432 + if (arg > q->bandc || arg == 0)
1437 + new = &noop_qdisc;
1439 + LOCK_START * old = q->bands[arg].que;
1440 + q->bands[arg].que = new;
1441 + qdisc_reset(*old);
1442 + LOCK_END return 0;
1445 +/* Returns the qdisc for a class */
1446 +static struct Qdisc *wrr_leaf(struct Qdisc *sch, unsigned long arg)
1448 + struct wrr_sched_data *q = qdisc_priv(sch);
1449 + if (arg > q->bandc || arg == 0)
1452 + return q->bands[arg].que;
1455 +static unsigned long wrr_get(struct Qdisc *sch, u32 classid)
1457 + struct wrr_sched_data *q = qdisc_priv(sch);
1458 + unsigned long band = TC_H_MIN(classid);
1459 + if (band > q->bandc || band == 0)
1464 +static void wrr_put(struct Qdisc *q, unsigned long cl)
1469 +static int wrr_delete(struct Qdisc *sch, unsigned long cl)
1471 + struct wrr_sched_data *q = qdisc_priv(sch);
1472 + if (cl == 0 || cl > q->bandc)
1478 +static int wrr_dump_class(struct Qdisc *sch, unsigned long cl,
1479 + struct sk_buff *skb, struct tcmsg *tcm)
1481 + struct wrr_sched_data *q = qdisc_priv(sch);
1482 + sk_buff_data_t b = skb->tail;
1483 + struct tc_wrr_class_stats opt;
1485 + /* Handle of this class */
1486 + tcm->tcm_handle = sch->handle | cl;
1488 + if (cl == 0 || cl > q->bandc)
1489 + goto nla_put_failure;
1492 + if ((cl < q->bandc) && q->bands[cl].que)
1493 + tcm->tcm_info = q->bands[cl].que->handle;
1495 + if (cl >= q->filter.mac_cur) {
1496 + /* Band is unused */
1497 + memset(&opt, 0, sizeof(opt));
1501 + opt.class_modf.weight1 = q->bands[cl].class_modf.weight1;
1502 + opt.class_modf.weight2 = q->bands[cl].class_modf.weight2;
1503 + weight_transmit(&opt.class_modf.weight1, q->qdisc_modf.weight1,
1505 + weight_transmit(&opt.class_modf.weight2, q->qdisc_modf.weight2,
1507 + memcpy(opt.addr, q->filter.cls2mac + cl * ETH_ALEN, ETH_ALEN);
1508 + opt.usemac = q->filter.usemac;
1509 + opt.heappos = q->h.root_1[cl + 1].id2idx;
1510 + if (opt.heappos != 0) {
1512 + opt.penal_ls = heap_get_penalty(&q->h, cl).ls;
1513 + opt.penal_ms = heap_get_penalty(&q->h, cl).ms;
1520 + /* Put quing information */
1521 + NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
1524 + nla_put_failure: /* NLA_PUT may jump here */
1525 + skb_trim(skb, (unsigned char *) &b - skb->data);
1529 +static int wrr_change(struct Qdisc *sch, u32 handle, u32 parent,
1530 + struct nlattr **tca, unsigned long *arg)
1532 + unsigned long cl = *arg;
1533 + struct wrr_sched_data *q = qdisc_priv(sch);
1534 + struct nlattr *opt = tca[TCA_OPTIONS];
1535 + struct tc_wrr_class_modf *copt = nla_data(opt);
1537 + if (cl == 0 || cl > q->bandc)
1541 + if (nla_len(opt) < sizeof(*copt))
1546 + weight_setvalue(&q->bands[cl].class_modf.weight1, &copt->weight1);
1547 + weight_setvalue(&q->bands[cl].class_modf.weight2, &copt->weight2);
1554 +static void wrr_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1556 + struct wrr_sched_data *q = qdisc_priv(sch);
1562 + for (prio = 1; prio <= q->bandc; prio++) {
1563 + if (arg->count < arg->skip) {
1567 + if (arg->fn(sch, prio, arg) < 0) {
1575 +static struct tcf_proto **wrr_find_tcf(struct Qdisc *sch, unsigned long cl)
1580 +static unsigned long wrr_bind(struct Qdisc *sch,
1581 + unsigned long parent, u32 classid)
1583 + return wrr_get(sch, classid);
1590 +static struct Qdisc_class_ops wrr_class_ops = {
1591 + .graft = wrr_graft,
1595 + .change = wrr_change,
1596 + .delete = wrr_delete,
1598 + .tcf_chain = wrr_find_tcf,
1599 + .bind_tcf = wrr_bind,
1600 + .unbind_tcf = wrr_put,
1601 + .dump = wrr_dump_class,
1604 +static struct Qdisc_ops wrr_qdisc_ops = {
1606 + .cl_ops = &wrr_class_ops,
1608 + .priv_size = sizeof(struct wrr_sched_data),
1609 + .enqueue = wrr_enqueue,
1610 + .dequeue = wrr_dequeue,
1611 + .requeue = wrr_requeue,
1614 + .reset = wrr_reset,
1615 + .destroy = wrr_destroy,
1616 + .change = wrr_tune,
1618 + .owner = THIS_MODULE,
1621 +static int __init wrr_module_init(void)
1623 + return register_qdisc(&wrr_qdisc_ops);
1626 +static void __exit wrr_module_exit(void)
1628 + unregister_qdisc(&wrr_qdisc_ops);
1631 +module_init(wrr_module_init)
1632 +module_exit(wrr_module_exit)
1634 +MODULE_LICENSE("GPL");
1635 +MODULE_AUTHOR("Christian Worm Mortensen");
1636 diff -urN linux-2.6.26.2.org/net/sched/wrr_proxydict.c linux-2.6.26.2/net/sched/wrr_proxydict.c
1637 --- linux-2.6.26.2.org/net/sched/wrr_proxydict.c 1970-01-01 01:00:00.000000000 +0100
1638 +++ linux-2.6.26.2/net/sched/wrr_proxydict.c 2008-08-19 11:20:27.000000000 +0200
1641 +#include <string.h>
1642 +#include <netinet/in.h>
1645 +#include <linux/wrr.h>
1647 +/* Proxy remapping part of WRR */
1649 +/* Hash function */
1650 +#define hash_fnc(m,server,port,proto) \
1651 + (((proto)*7+(server)*13+(port)*5)%m->hash_size)
1653 +/* Size of hash table given maximal number of connections */
1654 +#define hash_size_max_con(max_con) (2*(max_con))
1656 +/* The memory area we maintain
1658 + Given a connection we map it by hash_fnc into hash_table. This gives an
1659 + index in next which contains a -1 terminated linked list of connections
1660 + mapping to that hash value.
1662 + The entries in next not allocated is also in linked list where
1663 + the first free index is free_first.
1675 +#define Memory(m) ((proxy_memory*)m)
1676 +#define Hash_table(m) ((int*)(((char*)m)+sizeof(proxy_memory)))
1677 +#define Next(m) ((int*)(((char*)m)+sizeof(proxy_memory)+ \
1678 + sizeof(int)*((proxy_memory*)m)->hash_size))
1679 +#define Info(m) ((ProxyRemapBlock*)(((char*)m)+ \
1680 + sizeof(proxy_memory)+ \
1681 + sizeof(int)*((proxy_memory*)m)->hash_size+\
1682 + sizeof(int)*((proxy_memory*)m)->max_con \
1685 +int proxyGetMemSize(int max_con)
1687 + return sizeof(proxy_memory) +
1688 + sizeof(int) * hash_size_max_con(max_con) +
1689 + sizeof(int) * max_con + sizeof(ProxyRemapBlock) * max_con;
1692 +void proxyInitMem(void *data, int max_con)
1694 + proxy_memory *m = Memory(data);
1695 + m->max_con = max_con;
1697 + m->hash_size = hash_size_max_con(max_con);
1700 + /* Get pointers */
1701 + int *hash_table = Hash_table(data);
1702 + int *next = Next(data);
1705 + /* Init the hash table */
1706 + for (i = 0; i < m->hash_size; i++)
1707 + hash_table[i] = -1;
1709 + /* Init the free-list */
1710 + for (i = 0; i < m->max_con; i++)
1712 + m->free_first = 0;
1716 +int proxyGetCurConn(void *data)
1718 + return Memory(data)->cur_con;
1721 +int proxyGetMaxConn(void *data)
1723 + return Memory(data)->max_con;
1726 +ProxyRemapBlock *proxyLookup(void *data, unsigned ipaddr, unsigned short port,
1729 + proxy_memory *m = Memory(data);
1730 + int *hash_table = Hash_table(m);
1731 + int *next = Next(m);
1732 + ProxyRemapBlock *info = Info(m);
1735 + for (i = hash_table[hash_fnc(m, ipaddr, port, proto)]; i != -1;
1737 + if (info[i].proto == proto && info[i].sport == port
1738 + && info[i].saddr == ipaddr)
1745 +int proxyConsumeBlock(void *data, ProxyRemapBlock * blk)
1747 + proxy_memory *m = Memory(data);
1748 + int *hash_table = Hash_table(m);
1749 + int *next = Next(m);
1750 + ProxyRemapBlock *info = Info(m);
1751 + int hash = hash_fnc(m, blk->saddr, blk->sport, blk->proto);
1755 + if (m->cur_con == m->max_con)
1758 + /* Insert the block at a free entry */
1759 + info[m->free_first] = *blk;
1762 + foo = next[m->free_first];
1764 + /* And insert it in the hash tabel */
1765 + next[m->free_first] = hash_table[hash];
1766 + hash_table[hash] = m->free_first;
1767 + m->free_first = foo;
1771 + /* Find the block */
1772 + for (toupdate = &hash_table[hash];
1773 + *toupdate != -1; toupdate = &next[*toupdate]) {
1774 + if (info[*toupdate].proto == blk->proto &&
1775 + info[*toupdate].sport == blk->sport &&
1776 + info[*toupdate].saddr == blk->saddr)
1779 + if (*toupdate == -1)
1784 + /* Delete it from the hashing list */
1785 + *toupdate = next[*toupdate];
1787 + /* And put it on the free list */
1788 + next[foo] = m->free_first;
1789 + m->free_first = foo;