1 diff -urN linux-2.6.25.orig/include/linux/pkt_sched.h linux-2.6.25/include/linux/pkt_sched.h
2 --- linux-2.6.25.orig/include/linux/pkt_sched.h 2008-04-17 04:49:44.000000000 +0200
3 +++ linux-2.6.25/include/linux/pkt_sched.h 2008-04-24 16:19:03.000000000 +0200
8 +#include <linux/if_ether.h>
10 /* Generic queue statistics, available for all the elements.
11 Particular schedulers may have also their private records.
15 #define NETEM_DIST_SCALE 8192
21 + * A sub weight and of a class
22 + * All numbers are represented as parts of (2^64-1).
24 +struct tc_wrr_class_weight {
25 + __u64 val; // Current value (0 is not valid)
26 + __u64 decr; // Value pr bytes (2^64-1 is not valid)
27 + __u64 incr; // Value pr seconds (2^64-1 is not valid)
28 + __u64 min; // Minimal value (0 is not valid)
29 + __u64 max; // Minimal value (0 is not valid)
30 + time_t tim; // The time where the above information was correct
33 +/* Packet send when modifying a class */
34 +struct tc_wrr_class_modf {
35 + /* Not-valid values are ignored */
36 + struct tc_wrr_class_weight weight1;
37 + struct tc_wrr_class_weight weight2;
40 +/* Packet returned when quering a class */
41 +struct tc_wrr_class_stats {
42 + char used; /* If this is false the information below is invalid */
43 + struct tc_wrr_class_modf class_modf;
44 + unsigned char addr[ETH_ALEN];
45 + char usemac; /* True if addr is a MAC address, else it is an IP address
46 + (this value is only for convience, it is always the same
47 + value as in the qdisc) */
48 + int heappos; /* Current heap position or 0 if not in heap */
49 + __u64 penal_ls; /* Penalty value in heap (ls) */
50 + __u64 penal_ms; /* Penalty value in heap (ms) */
53 +/* Qdisc-wide penalty information (boolean values - 2 not valid) */
54 +struct tc_wrr_qdisc_weight {
55 + signed char weight_mode; /* 0=No automatic change to weight
57 + 2=Also multiply with number of machines
58 + 3=Instead multiply with priority divided
59 + with priority of the other.
63 +/* Packet send when modifing a qdisc */
64 +struct tc_wrr_qdisc_modf {
65 + /* Not-valid values are ignored */
66 + struct tc_wrr_qdisc_weight weight1;
67 + struct tc_wrr_qdisc_weight weight2;
70 +/* Packet send when creating a qdisc */
71 +struct tc_wrr_qdisc_crt {
72 + struct tc_wrr_qdisc_modf qdisc_modf;
73 + char srcaddr; /* 1=lookup source, 0=lookup destination */
74 + char usemac; /* 1=Classify on MAC addresses, 0=classify on IP */
75 + char usemasq; /* 1=Classify based on masqgrading - only valid
76 + if usemac is zero */
77 + int bands_max; /* Maximal number of bands (i.e.: classes) */
78 + int proxy_maxconn; /* If differnt from 0 then we support proxy remapping
79 + of packets. And this is the number of maximal
80 + concurrent proxy connections. */
83 +/* Packet returned when quering a qdisc */
84 +struct tc_wrr_qdisc_stats {
85 + struct tc_wrr_qdisc_crt qdisc_crt;
87 + int nodes_in_heap; /* Current number of bands wanting to send something */
88 + int bands_cur; /* Current number of bands used (i.e.: MAC/IP addresses seen) */
89 + int bands_reused; /* Number of times this band has been reused. */
90 + int packets_requed; /* Number of times packets have been requeued. */
91 + __u64 priosum; /* Sum of priorities in heap where 1 is 2^32 */
94 +struct tc_wrr_qdisc_modf_std {
95 + char proxy; /* This indicates which of the tc_wrr_qdisc_modf structers this is. 0=This struct */
96 + char change_class; /* Should we also change a class? */
97 + struct tc_wrr_qdisc_modf qdisc_modf; /* Only valid if change_class is false */
98 + unsigned char addr[ETH_ALEN]; /* Class to change (non-used bytes should be 0). Valid only of change_class is true */
99 + struct tc_wrr_class_modf class_modf; /* The change */
102 +/* Used for proxyremapping */
103 +struct tc_wrr_qdisc_modf_proxy {
104 + char proxy; /* This indicates which of the tc_wrr_qdisc_modf structers this is. 1=This struct */
105 + char reset; /* This is 1 if the proxyremap information should be reset */
106 + int changec; /* changec is the number of elements in changes. */
107 + long changes[0]; /* This is an array of type ProxyRemapBlock */
111 diff -urN linux-2.6.25.orig/include/linux/wrr.h linux-2.6.25/include/linux/wrr.h
112 --- linux-2.6.25.orig/include/linux/wrr.h 1970-01-01 01:00:00.000000000 +0100
113 +++ linux-2.6.25/include/linux/wrr.h 2008-04-24 16:19:05.000000000 +0200
119 + * This describes the information that is written in proxyremap.log and which
120 + * are used in the communication between proxyremapserver and proxyremapclient.
121 + * Everything is in network order.
124 +/* First this header is send */
125 +#define PROXY_WELCOME_LINE "ProxyRemap 1.02. This is a binary protocol.\r\n"
128 + * Then this block is send every time a connection is opened or closed.
129 + * Note how it is alligned to use small space usage - arrays of this
130 + * structure are saved in many places.
133 + /* Server endpoint of connection */
135 + unsigned short sport;
137 + /* IP protocol for this connection (typically udp or tcp) */
138 + unsigned char proto;
140 + /* Is the connection opened or closed? */
141 + unsigned char open;
143 + /* Client the packets should be accounted to */
145 + unsigned char macaddr[6]; /* Might be 0. */
147 + /* An informal two-charecter code from the proxyserver. Used for debugging. */
153 + * This is common code for for handling the tables containing information about
154 + * which proxyserver connections are associated with which machines..
157 +/* Returns the number of bytes that should be available in the area
158 + * maintained by this module given the maximal number of concurrent
160 +int proxyGetMemSize(int max_connections);
162 +/* Initializes a memory area to use. There must be as many bytes
163 + available as returned by getMemSize. */
164 +void proxyInitMem(void *data, int max_connections);
167 +int proxyGetCurConn(void *data); /* Returns current number of connections */
168 +int proxyMaxCurConn(void *data); /* Returns maximal number of connections */
170 +/* This is called to open and close conenctions. Returns -1 if
171 + a protocol error occores (i.e.: If it is discovered) */
172 +int proxyConsumeBlock(void *data, ProxyRemapBlock *);
174 +/* Returns the RemapBlock associated with this connection or 0: */
175 +ProxyRemapBlock *proxyLookup(void *data, unsigned ipaddr, unsigned short port,
178 +/* Return the maximum number of connections */
179 +int proxyGetMaxConn(void *data);
182 diff -urN linux-2.6.25.orig/MAINTAINERS linux-2.6.25/MAINTAINERS
183 --- linux-2.6.25.orig/MAINTAINERS 2008-04-17 04:49:44.000000000 +0200
184 +++ linux-2.6.25/MAINTAINERS 2008-04-24 16:19:05.000000000 +0200
185 @@ -4351,6 +4351,12 @@
186 W: http://oops.ghostprotocols.net:81/blog
189 +WRR NETWORK SCHEDULER
190 +P: Rasmus Bøg Hansen
192 +W: http://www.zz9.dk/wrr
198 diff -urN linux-2.6.25.orig/net/sched/Kconfig linux-2.6.25/net/sched/Kconfig
199 --- linux-2.6.25.orig/net/sched/Kconfig 2008-04-17 04:49:44.000000000 +0200
200 +++ linux-2.6.25/net/sched/Kconfig 2008-04-24 16:19:05.000000000 +0200
202 sch_rr, so it will load sch_prio, although it is referred
206 + tristate "WRR packet scheduler"
207 + depends on NET_SCHED && ( NF_CONNTRACK || !NF_CONNTRACK )
209 + The weighted robin-robin scheduling algorithm directs network
210 + connections to different real servers based on server weights
211 + in a round-robin manner. Servers with higher weights receive
212 + new connections first than those with less weights, and servers
213 + with higher weights get more connections than those with less
214 + weights and servers with equal weights get equal connections.
216 + If you want masquerading (the "masq" option to the tc userspace
217 + program) you need to enable connection tracking (IP_NF_CONNTRACK)
218 + in the netfilter options.
220 + If you want to compile it in kernel, say Y. If you want to compile
221 + it as a module, say M here and read Documentation/modules.txt. The
222 + module will be called sch_wrr. If unsure, say N.
225 tristate "Random Early Detection (RED)"
227 diff -urN linux-2.6.25.orig/net/sched/Makefile linux-2.6.25/net/sched/Makefile
228 --- linux-2.6.25.orig/net/sched/Makefile 2008-04-17 04:49:44.000000000 +0200
229 +++ linux-2.6.25/net/sched/Makefile 2008-04-24 16:19:06.000000000 +0200
231 +sch_wrr-objs = wrr.o wrr_proxydict.o
232 +obj-$(CONFIG_NET_SCH_WRR) += sch_wrr.o
233 diff -urN linux-2.6.25.orig/net/sched/wrr.c linux-2.6.25/net/sched/wrr.c
234 --- linux-2.6.25.orig/net/sched/wrr.c 1970-01-01 01:00:00.000000000 +0100
235 +++ linux-2.6.25/net/sched/wrr.c 2008-04-24 16:19:07.000000000 +0200
237 +/*-----------------------------------------------------------------------------
238 +Weighted Round Robin scheduler.
240 +Written by Christian Worm Mortensen, cworm@it-c.dk.
244 +This module implements a weighted round robin queue with build-in classifier.
245 +The classifier currently map each MAC or IP address (configurable either MAC
246 +or IP and either source or destination) to different classes. Each such class
247 +is called a band. Whan using MAC addresses only bridged packets can be
248 +classified other packets go to a default MAC address.
250 +Each band has a weight value, where 0<weight<=1. The bandwidth each band
251 +get is proportional to the weight as can be deduced from the next section.
255 +Each band has a penalty value. Bands having something to sent are kept in
256 +a heap according to this value. The band with the lowest penalty value
257 +is in the root of the heap. The penalty value is a 128 bit number. Initially
258 +no bands are in the heap.
260 +Two global 64 bit values counter_low_penal and couter_high_penal are initialized
261 +to 0 and to 2^63 respectively.
264 + The packet is inserted in the queue for the band it belongs to. If the band
265 + is not in the heap it is inserted into it. In this case, the upper 64 bits
266 + of its penalty value is set to the same as for the root-band of the heap.
267 + If the heap is empty 0 is used. The lower 64 bit is set to couter_low_penal
268 + and couter_low_penal is incremented by 1.
271 + If the heap is empty we have nothing to send.
273 + If the root band has a non-empty queue a packet is dequeued from that.
274 + The upper 64 bit of the penalty value of the band is incremented by the
275 + packet size divided with the weight of the band. The lower 64 bit is set to
276 + couter_high_penal and couter_high_penal is incremented by 1.
278 + If the root element for some reason has an empty queue it is removed from
279 + the heap and we try to dequeue again.
281 +The effect of the heap and the upper 64 bit of the penalty values is to
282 +implement a weighted round robin queue. The effect of counter_low_penal,
283 +counter_high_penal and the lower 64 bit of the penalty value is primarily to
284 +stabilize the queue and to give better quality of service to machines only
285 +sending a packet now and then. For example machines which have a single
286 +interactive connection such as telnet or simple text chatting.
290 +The weight value can be changed dynamically by the queue itself. The weight
291 +value and how it is changed is described by the two members weight1 and
292 +weight2 which has type tc_wrr_class_weight and which are in each class. And
293 +by the two integer value members of the qdisc called penalfact1 and penalfact2.
294 +The structure is defined as:
296 + struct tc_wrr_class_weight {
297 + // All are represented as parts of (2^64-1).
298 + __u64 val; // Current value (0 is not valid)
299 + __u64 decr; // Value pr bytes (2^64-1 is not valid)
300 + __u64 incr; // Value pr seconds (2^64-1 is not valid)
301 + __u64 min; // Minimal value (0 is not valid)
302 + __u64 max; // Minimal value (0 is not valid)
304 + // The time where the above information was correct:
308 +The weight value used by the dequeue operations is calculated as
309 +weight1.val*weight2.val. weight1 and weight2 and handled independently and in the
310 +same way as will be described now.
312 +Every second, the val parameter is incremented by incr.
314 +Every time a packet is transmitted the value is increment by decr times
315 +the packet size. Depending on the value of the weight_mode parameter it
316 +is also mulitplied with other numbers. This makes it possible to give
317 +penalty to machines transferring much data.
319 +-----------------------------------------------------------------------------*/
321 +#include <linux/autoconf.h>
322 +#include <linux/module.h>
323 +#include <asm/uaccess.h>
324 +#include <asm/system.h>
325 +#include <linux/bitops.h>
326 +#include <linux/types.h>
327 +#include <linux/kernel.h>
328 +#include <linux/vmalloc.h>
329 +#include <linux/sched.h>
330 +#include <linux/string.h>
331 +#include <linux/mm.h>
332 +#include <linux/socket.h>
333 +#include <linux/sockios.h>
334 +#include <linux/in.h>
335 +#include <linux/errno.h>
336 +#include <linux/interrupt.h>
337 +#include <linux/if_ether.h>
338 +#include <linux/inet.h>
339 +#include <linux/netdevice.h>
340 +#include <linux/etherdevice.h>
341 +#include <linux/notifier.h>
343 +#include <net/route.h>
344 +#include <linux/skbuff.h>
345 +#include <net/sock.h>
346 +#include <net/pkt_sched.h>
347 +#include <linux/if_arp.h>
348 +#include <linux/version.h>
349 +#include <linux/wrr.h>
351 +#define WRR_VER "051111"
353 +#define my_malloc(size) kmalloc(size,GFP_KERNEL)
354 +#define my_free(ptr) kfree(ptr)
356 +#define LOCK_START sch_tree_lock(sch);
357 +#define LOCK_END sch_tree_unlock(sch);
358 +#define ENQUEUE_SUCCESS 0
359 +#define ENQUEUE_FAIL NET_XMIT_DROP
361 +#if defined CONFIG_IP_NF_CONNTRACK || defined CONFIG_IP_NF_CONNTRACK_MODULE || defined CONFIG_NF_CONNTRACK || defined CONFIG_NF_CONNTRACK_MODULE
362 +#include <net/netfilter/nf_conntrack.h>
363 +#define MASQ_SUPPORT
366 +/* The penalty (priority) type */
367 +typedef u64 penalty_base_t;
368 +#define penalty_base_t_max ((penalty_base_t)-1)
369 +typedef struct penalty_t {
373 +#define penalty_leq(a,b) (a.ms<b.ms || (a.ms==b.ms && a.ls<=b.ls))
374 +#define penalty_le(a,b) (a.ms<b.ms || (a.ms==b.ms && a.ls<b.ls))
375 +static penalty_t penalty_max = { penalty_base_t_max, penalty_base_t_max };
382 +struct heap_element;
384 +/* Initializes an empty heap:
385 + * he: A pointer to an unintialized heap structure identifying the heap
386 + * size: Maximal number of elements the heap can contain
387 + * poll: An array of size "size" used by the heap.
389 +static void heap_init(struct heap *he, int size, struct heap_element *poll);
391 +/* Each element in the heap is identified by a user-assigned id which
392 + * should be a non negative integer less than the size argument
393 + * given to heap_init.
395 +static void heap_insert(struct heap *, int id, penalty_t);
396 +static void heap_remove(struct heap *, int id);
397 +static void heap_set_penalty(struct heap *, int id, penalty_t);
399 +/* Retreviewing information */
400 +static char heap_empty(struct heap *); /* Heap empty? */
401 +static char heap_contains(struct heap *, int id); /* Does heap contain
403 +static int heap_root(struct heap *); /* Returns the id of the root */
404 +static penalty_t heap_get_penalty(struct heap *, int id); /* Returns penalty
408 + * Heap implementation
411 +struct heap_element {
413 + int id; /* The user-assigned id of this element */
414 + int id2idx; /* Maps from user-assigned ids to indices in root_1 */
418 + struct heap_element *root_1;
422 +/* Heap implementation */
423 +static void heap_init(struct heap *h, int size, struct heap_element *poll)
428 + h->root_1 = poll - 1;
430 + for (i = 0; i < size; i++)
431 + poll[i].id2idx = 0;
434 +static char heap_empty(struct heap *h)
436 + return h->elements == 0;
439 +static char heap_contains(struct heap *h, int id)
441 + return h->root_1[id + 1].id2idx != 0;
444 +static int heap_root(struct heap *h)
446 + return h->root_1[1].id;
449 +static penalty_t heap_get_penalty(struct heap *h, int id)
451 + return h->root_1[h->root_1[id + 1].id2idx].penalty;
454 +static void heap_penalty_changed_internal(struct heap *h, int idx);
456 +static void heap_set_penalty(struct heap *h, int id, penalty_t p)
458 + int idx = h->root_1[id + 1].id2idx;
459 + h->root_1[idx].penalty = p;
460 + heap_penalty_changed_internal(h, idx);
463 +static void heap_insert(struct heap *h, int id, penalty_t p)
465 + /* Insert at the end of the heap */
467 + h->root_1[h->elements].id = id;
468 + h->root_1[h->elements].penalty = p;
469 + h->root_1[id + 1].id2idx = h->elements;
471 + /* And put it in the right position */
472 + heap_penalty_changed_internal(h, h->elements);
475 +static void heap_remove(struct heap *h, int id)
477 + int idx = h->root_1[id + 1].id2idx;
479 + h->root_1[id + 1].id2idx = 0;
481 + if (h->elements == idx) {
486 + mvid = h->root_1[h->elements].id;
487 + h->root_1[idx].id = mvid;
488 + h->root_1[idx].penalty = h->root_1[h->elements].penalty;
489 + h->root_1[mvid + 1].id2idx = idx;
492 + heap_penalty_changed_internal(h, idx);
495 +static void heap_swap(struct heap *h, int idx0, int idx1)
501 + /* Simple content */
502 + tmp_p = h->root_1[idx0].penalty;
503 + tmp_id = h->root_1[idx0].id;
504 + h->root_1[idx0].penalty = h->root_1[idx1].penalty;
505 + h->root_1[idx0].id = h->root_1[idx1].id;
506 + h->root_1[idx1].penalty = tmp_p;
507 + h->root_1[idx1].id = tmp_id;
509 + /* Update reverse pointers */
510 + id0 = h->root_1[idx0].id;
511 + id1 = h->root_1[idx1].id;
512 + h->root_1[id0 + 1].id2idx = idx0;
513 + h->root_1[id1 + 1].id2idx = idx1;
516 +static void heap_penalty_changed_internal(struct heap *h, int cur)
519 + || penalty_leq(h->root_1[cur >> 1].penalty,
520 + h->root_1[cur].penalty)) {
521 + /* We are in heap order upwards - so we should move the element down */
523 + int nxt0 = cur << 1;
524 + int nxt1 = nxt0 + 1;
525 + penalty_t pen_c = h->root_1[cur].penalty;
528 + h->elements ? h->root_1[nxt0].penalty : penalty_max;
531 + h->elements ? h->root_1[nxt1].penalty : penalty_max;
533 + if (penalty_le(pen_0, pen_c)
534 + && penalty_leq(pen_0, pen_1)) {
535 + /* Swap with child 0 */
536 + heap_swap(h, cur, nxt0);
538 + } else if (penalty_le(pen_1, pen_c)) {
539 + /* Swap with child 1 */
540 + heap_swap(h, cur, nxt1);
543 + /* Heap in heap order */
548 + /* We are not in heap order upwards (and thus we must be it downwards).
550 + while (cur != 1) { /* While not root */
551 + int nxt = cur >> 1;
553 + (h->root_1[nxt].penalty, h->root_1[cur].penalty))
555 + heap_swap(h, cur, nxt);
562 + * Classification based on MAC or IP adresses. Note that of historical reason
563 + * these are prefixed with mac_ since originally only MAC bases classification
566 + * This code should be in a separate filter module - but it isn't.
574 +/* Initialices/destroys the structure we maintain.
575 + Returns -1 on error */
576 +static int mac_init(struct mac_head *, int max_macs, char srcaddr,
577 + char usemac, char usemasq, void *proxyremap);
578 +static void mac_done(struct mac_head *);
579 +static void mac_reset(struct mac_head *);
581 +/* Classify a packet. Returns a number n where 0<=n<max_macs. Or -1 if
582 + the packet should be dropped. */
583 +static int mac_classify(struct mac_head *, struct sk_buff *skb);
590 + unsigned char addr[ETH_ALEN]; /* Address of this band (last two are 0 on IP) */
591 + unsigned long lastused; /* Last time a packet was encountered */
592 + int class; /* Classid of this band (0<=classid<max_macs) */
595 +static int mac_compare(const void *a, const void *b)
597 + return memcmp(a, b, ETH_ALEN);
601 + int mac_max; /* Maximal number of MAC addresses/classes allowed */
602 + int mac_cur; /* Current number of MAC addresses/classes */
603 + int mac_reused; /* Number of times we have reused a class with a new address. */
605 + char srcaddr; /* True if we classify on the source address of packets,
606 + else we use destination address. */
607 + char usemac; /* If true we use mac, else we use IP */
608 + char usemasq; /* If true we try to demasqgrade */
609 + struct mac_addr *macs; /* Allocated mac_max elements, used max_cur */
610 + char *cls2mac; /* Mapping from classnumbers to addresses -
611 + there is 6 bytes in each entry */
613 + void *proxyremap; /* Information on proxy remapping of data or 0 */
616 +/* This is as the standard C library function with the same name: */
617 +static const void *bsearch(const void *key, const void *base, int nmemb,
619 + int (*compare) (const void *, const void *))
628 + m_idx = nmemb >> 1;
629 + m_ptr = ((const char *)base) + m_idx * size;
631 + i = compare(key, m_ptr);
632 + if (i < 0) /* key is less */
633 + return bsearch(key, base, m_idx, size, compare);
635 + return bsearch(key, ((const char *)m_ptr) + size,
636 + nmemb - m_idx - 1, size, compare);
641 +static int mac_init(struct mac_head *h, int max_macs, char srcaddr,
642 + char usemac, char usemasq, void *proxyremap)
647 + h->srcaddr = srcaddr;
648 + h->usemac = usemac;
649 + h->usemasq = usemasq;
650 + h->mac_max = max_macs;
651 + h->proxyremap = proxyremap;
653 + h->macs = (struct mac_addr *)
654 + my_malloc(sizeof(struct mac_addr) * max_macs);
655 + h->cls2mac = (char *)my_malloc(6 * max_macs);
656 + if (!h->macs || !h->cls2mac) {
660 + my_free(h->cls2mac);
666 +static void mac_done(struct mac_head *h)
669 + my_free(h->cls2mac);
672 +static void mac_reset(struct mac_head *h)
679 +static int lookup_mac(struct mac_head *h, unsigned char *addr)
684 + /* First try to find the address in the table */
685 + struct mac_addr *m = (struct mac_addr *)
686 + bsearch(addr, h->macs, h->mac_cur, sizeof(struct mac_addr),
690 + m->lastused = h->incr_time++;
693 + /* Okay - the MAC adress was not in table */
694 + if (h->mac_cur == h->mac_max) {
695 + /* And the table is full - delete the oldest entry */
697 + /* Find the oldest entry */
700 + for (i = 1; i < h->mac_cur; i++)
701 + if (h->macs[i].lastused < h->macs[lowidx].lastused)
704 + class = h->macs[lowidx].class;
706 + /* And delete it */
707 + memmove(&h->macs[lowidx], &h->macs[lowidx + 1],
708 + (h->mac_cur - lowidx - 1) * sizeof(struct mac_addr));
712 + class = h->mac_cur;
715 + /* The table is not full - find the position we should put the address in */
716 + for (i = 0; i < h->mac_cur; i++)
717 + if (mac_compare(addr, &h->macs[i]) < 0)
720 + /* We should insert at position i */
721 + memmove(&h->macs[i + 1], &h->macs[i],
722 + (h->mac_cur - i) * sizeof(struct mac_addr));
724 + memcpy(m->addr, addr, ETH_ALEN);
725 + m->lastused = h->incr_time++;
729 + /* Finally update the cls2mac variabel */
730 + memcpy(h->cls2mac + ETH_ALEN * class, addr, ETH_ALEN);
735 +int valid_ip_checksum(struct iphdr *ip, int size)
737 + __u16 header_len = ip->ihl << 2;
739 + __u16 *ipu = (u16 *) ip;
742 + /* We require 4 bytes in the packet since we access the port numbers */
743 + if ((size < header_len) || size < sizeof(struct iphdr) + 4)
746 + for (a = 0; a < (header_len >> 1); a++, ipu++) {
748 + /* If not the checksum field */
756 + return ip->check == (__u16) ~ c;
759 +static int mac_classify(struct mac_head *head, struct sk_buff *skb)
761 + /* We set this to the address we map to. In case we map to an IP
762 + address the last two entries are set to 0. */
763 + unsigned char addr[ETH_ALEN];
765 + /* Used later for mac classification */
766 + struct ethhdr *hdr;
768 + /* This is the size of the network part of the packet */
769 + int size = ((char *)skb->data + skb->len) - ((char *)ip_hdr(skb));
771 + /* Set a default value for the address */
772 + memset(addr, 0, ETH_ALEN);
774 + /* Map IPv4 traffic to their correct addresses.
775 + Everything (non-IPv4) else goes to a default address */
776 + if (ntohs(skb->protocol) == ETH_P_IP) {
778 + struct iphdr *iph = ip_hdr(skb); /* This is the IP header */
780 + const __u16 *portp = (__u16 *) & (((char *)iph)[iph->ihl * 4]); /* Port numbers*/
781 + __u16 sport = portp[0];
782 + __u16 dport = portp[1];
784 + unsigned ipaddr; /* IP classification */
785 + ProxyRemapBlock *prm; /* Proxyremapping */
787 + /* IP packets must have valid checsum */
788 + if (!valid_ip_checksum(ip_hdr(skb), size))
793 + ipaddr = iph->saddr;
795 + ipaddr = iph->daddr;
798 + /* Update ipaddr if packet is masqgraded */
799 + if (head->usemasq) {
800 + /* Thanks to Rusty Russell for help with the following code */
801 + enum ip_conntrack_info ctinfo;
802 + struct nf_conn *ct;
803 + ct = nf_ct_get(skb, &ctinfo);
807 + ct->tuplehash[CTINFO2DIR(ctinfo)].
811 + ct->tuplehash[CTINFO2DIR(ctinfo)].
817 + /* Set prm (proxyremap) based on ipaddr */
819 + if (head->proxyremap) {
820 + if (head->srcaddr) {
822 + proxyLookup(head->proxyremap, ipaddr, sport,
823 + ip_hdr(skb)->protocol);
826 + proxyLookup(head->proxyremap, ipaddr, dport,
827 + ip_hdr(skb)->protocol);
830 + /* And finally set the correct address */
832 + /* This packet should be remapped */
834 + memcpy(addr, prm->macaddr, ETH_ALEN);
836 + memcpy(addr, &prm->caddr, sizeof(unsigned));
838 + /* This packet should not be remapped */
839 + if (head->usemac) {
840 + /* Make sure we have a valid mac address */
842 + skb->dev->type == ARPHRD_ETHER &&
843 + skb->len >= ETH_HLEN) {
844 + hdr = (struct ethhdr*)skb->data;
846 + memcpy(addr,hdr->h_source, ETH_ALEN);
848 + memcpy(addr,hdr->h_dest, ETH_ALEN);
851 + memcpy(addr, &ipaddr, 4);
856 + return lookup_mac(head, addr);
863 +/* Pr-class information */
864 +struct wrrc_sched_data {
865 + struct Qdisc *que; /* The queue for this class */
866 + struct tc_wrr_class_modf class_modf; /* Information about the class */
868 + /* For classes in the heap this is the priority value priosum
869 + was updated with for this class */
873 +/* Pr-qdisc information */
874 +struct wrr_sched_data {
875 + struct heap h; /* A heap containing all the bands that will send something */
876 + struct heap_element *poll; /* bandc elements */
878 + /* The sum of the prioities of the elements in the heap where
879 + a priority of 1 is saved as 2^32 */
882 + /* A class for each band */
883 + struct wrrc_sched_data *bands; /* bandc elements */
885 + /* Information maintained by the proxydict module of 0 if we
886 + have no proxy remapping */
889 + /* Always incrementning counters, we always have that any value of
890 + counter_low_penal < any value of counter_high_penal. */
891 + penalty_base_t counter_low_penal;
892 + penalty_base_t counter_high_penal;
894 + struct tc_wrr_qdisc_modf qdisc_modf; /* Penalty updating */
896 + int packets_requed; /* Statistics */
898 + struct mac_head filter; /* The filter */
899 + int bandc; /* Number of bands */
902 +/* Priority handling.
903 + * weight is in interval [0..2^32]
904 + * priosum has whole numbers in the upper and fragments in the lower 32 bits.
906 +static void weight_transmit(struct tc_wrr_class_weight *p,
907 + struct tc_wrr_qdisc_weight q,
909 + u64 priosum, u64 weight, unsigned size)
912 + unsigned long now = jiffies / HZ;
914 + /* Penalty for transmitting */
919 + switch (q.weight_mode) {
921 + change = p->decr * size;
924 + change = p->decr * size * heapsize;
926 + case 3: /* Note: 64 bit division is not always available */
927 + divisor = (u32) (weight >> 16);
930 + change = p->decr * size * (((u32) (priosum >> 16)) / divisor);
935 + if (p->val > old || p->val < p->min)
938 + /* Credit for time went */
939 + change = (now - p->tim) * p->incr;
943 + if (p->val < old || p->val > p->max)
947 +static void weight_setdefault(struct tc_wrr_class_weight *p)
949 + p->val = (u64) - 1;
952 + p->min = (u64) - 1;
953 + p->max = (u64) - 1;
954 + p->tim = jiffies / HZ;
957 +static void weight_setvalue(struct tc_wrr_class_weight *dst,
958 + struct tc_wrr_class_weight *src)
960 + if (src->val != 0) {
961 + dst->val = src->val;
962 + dst->tim = jiffies / HZ;
965 + dst->min = src->min;
967 + dst->max = src->max;
968 + if (src->decr != ((u64) - 1))
969 + dst->decr = src->decr;
970 + if (src->incr != ((u64) - 1))
971 + dst->incr = src->incr;
972 + if (dst->val < dst->min)
973 + dst->val = dst->min;
974 + if (dst->val > dst->max)
975 + dst->val = dst->max;
978 +static void wrr_destroy(struct Qdisc *sch)
980 + struct wrr_sched_data *q = qdisc_priv(sch);
983 + /* Destroy our filter */
984 + mac_done(&q->filter);
986 + /* Destroy all our childre ques */
987 + for (i = 0; i < q->bandc; i++)
988 + qdisc_destroy(q->bands[i].que);
990 + /* And free memory */
994 + my_free(q->proxydict);
997 +static int wrr_init(struct Qdisc *sch, struct rtattr *opt)
999 + struct wrr_sched_data *q = qdisc_priv(sch);
1000 + int i, maciniterr;
1002 + struct tc_wrr_qdisc_crt *qopt;
1004 + /* Parse options */
1006 + return -EINVAL; /* Options must be specified */
1007 + if (opt->rta_len < RTA_LENGTH(sizeof(*qopt)))
1009 + qopt = RTA_DATA(opt);
1011 + if (qopt->bands_max > 8192 || qopt->bands_max < 2) {
1012 + /* More than 8192 queues or less than 2? That cannot be true - it must be
1017 + if (qopt->proxy_maxconn < 0 || qopt->proxy_maxconn > 20000) {
1018 + /* More than this number of maximal concurrent connections is unrealistic */
1021 +#ifndef MASQ_SUPPORT
1022 + if (qopt->usemasq) {
1026 + q->bandc = qopt->bands_max;
1027 + q->qdisc_modf = qopt->qdisc_modf;
1029 + /* Create structures */
1030 + q->poll = (struct heap_element *)
1031 + my_malloc(sizeof(struct heap_element) * q->bandc);
1032 + q->bands = (struct wrrc_sched_data *)
1033 + my_malloc(sizeof(struct wrrc_sched_data) * q->bandc);
1035 + if (qopt->proxy_maxconn > 0) {
1036 + q->proxydict = my_malloc(proxyGetMemSize(qopt->proxy_maxconn));
1041 + /* Init mac module */
1042 + maciniterr = mac_init(&q->filter, qopt->bands_max, qopt->srcaddr,
1043 + qopt->usemac, qopt->usemasq, q->proxydict);
1045 + /* See if we got the memory we wanted */
1046 + if (!q->poll || !q->bands ||
1047 + (qopt->proxy_maxconn > 0 && !q->proxydict) || maciniterr < 0) {
1051 + my_free(q->bands);
1053 + my_free(q->proxydict);
1054 + if (maciniterr >= 0)
1055 + mac_done(&q->filter);
1058 + /* Initialize proxy */
1060 + proxyInitMem(q->proxydict, qopt->proxy_maxconn);
1061 + /* Initialize values */
1062 + q->counter_low_penal = 0;
1063 + q->counter_high_penal = penalty_base_t_max >> 1;
1064 + q->packets_requed = 0;
1066 + /* Initialize empty heap */
1067 + heap_init(&q->h, q->bandc, q->poll);
1070 + /* Initialize each band */
1072 + for (i = 0; i < q->bandc; i++) {
1073 + weight_setdefault(&q->bands[i].class_modf.weight1);
1074 + weight_setdefault(&q->bands[i].class_modf.weight2);
1076 + struct Qdisc *child =
1077 + qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, sch->handle);
1079 + q->bands[i].que = child;
1081 + /* Queue couldn't be created :-( */
1086 + q->bands[i].que = &noop_qdisc;
1090 + /* Destroy again */
1095 + printk(KERN_DEBUG "sch_wrr: Initialized version " WRR_VER "\n");
1100 +static void wrr_reset(struct Qdisc *sch)
1102 + struct wrr_sched_data *q = qdisc_priv(sch);
1105 + /* Reset own values */
1106 + q->counter_low_penal = 0;
1107 + q->counter_high_penal = penalty_base_t_max >> 1;
1108 + q->packets_requed = 0;
1110 + /* Reset filter */
1111 + mac_reset(&q->filter);
1113 + /* Reinitialize heap */
1114 + heap_init(&q->h, q->bandc, q->poll);
1117 + /* Reset all bands */
1118 + for (i = 0; i < q->bandc; i++) {
1119 + weight_setdefault(&q->bands[i].class_modf.weight1);
1120 + weight_setdefault(&q->bands[i].class_modf.weight2);
1121 + qdisc_reset(q->bands[i].que);
1124 + /* Reset proxy remapping information */
1126 + proxyInitMem(q->proxydict, proxyGetMaxConn(q->proxydict));
1129 +static int wrr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
1131 + struct wrr_sched_data *q = qdisc_priv(sch);
1132 + int retvalue = ENQUEUE_FAIL;
1134 + /* The packet is in skb */
1135 + int band = mac_classify(&q->filter, skb);
1138 + /* Enque packet for this band */
1139 + struct Qdisc *qdisc = q->bands[band].que;
1141 + if ((retvalue = qdisc->enqueue(skb, qdisc)) == ENQUEUE_SUCCESS) {
1143 + sch->bstats.bytes += skb->len;
1144 + sch->bstats.packets++;
1147 + /* Insert band into heap if not already there */
1148 + if (!heap_contains(&q->h, band)) {
1150 + if (!heap_empty(&q->h))
1152 + heap_get_penalty(&q->h,
1153 + heap_root(&q->h)).
1157 + p.ls = q->counter_low_penal++;
1158 + heap_insert(&q->h, band, p);
1159 + q->bands[band].priosum_val =
1160 + ((q->bands[band].class_modf.weight1.
1163 + ((q->bands[band].class_modf.weight2.
1165 + q->priosum += q->bands[band].priosum_val;
1169 + /* If we decide not to enque it seems like we also need to free the packet */
1173 + if (retvalue != ENQUEUE_SUCCESS) {
1174 + /* Packet not enqued */
1175 + sch->qstats.drops++;
1181 +static struct sk_buff *wrr_dequeue(struct Qdisc *sch)
1183 + struct wrr_sched_data *q = qdisc_priv(sch);
1184 + struct sk_buff *skb;
1186 + u64 weight, priosum;
1187 + struct wrrc_sched_data *b;
1189 + /* Return if heap is empty */
1190 + if (heap_empty(&q->h))
1193 + /* Find root element */
1194 + band = heap_root(&q->h);
1196 + /* Find priority of this element in interval [1;2^32] */
1197 + b = &q->bands[band];
1199 + /* weight is in interval [1;2^32] */
1200 + weight = ((b->class_modf.weight1.val >> 48) + 1) * ((b->class_modf.weight2.val >> 48) + 1);
1201 + priosum = q->priosum;
1202 + q->priosum -= q->bands[band].priosum_val;
1204 + /* Dequeue the packet from the root */
1205 + skb = q->bands[band].que->dequeue(q->bands[band].que);
1208 + /* There was a packet in this queue */
1212 + /* Find length of packet adjusted with priority */
1213 + adjlen = (u32) (weight >> (32 - 16));
1216 + adjlen = (skb->len << 16) / adjlen;
1218 + /* Update penalty information for this class */
1219 + weight_transmit(&b->class_modf.weight1, q->qdisc_modf.weight1,
1220 + q->h.elements, priosum, weight, skb->len);
1221 + weight_transmit(&b->class_modf.weight2, q->qdisc_modf.weight2,
1222 + q->h.elements, priosum, weight, skb->len);
1223 + q->bands[band].priosum_val =
1224 + ((b->class_modf.weight1.val >> 48) +
1225 + 1) * ((b->class_modf.weight2.val >> 48) + 1);
1226 + q->priosum += q->bands[band].priosum_val;
1228 + /* And update the class in the heap */
1229 + p = heap_get_penalty(&q->h, band);
1231 + p.ls = q->counter_high_penal++;
1232 + heap_set_penalty(&q->h, band, p);
1234 + /* Return packet */
1238 + /* No packet - so machine should be removed from heap */
1239 + heap_remove(&q->h, band);
1244 +static int wrr_requeue(struct sk_buff *skb, struct Qdisc *sch)
1246 + struct wrr_sched_data *q = qdisc_priv(sch);
1247 + struct Qdisc *qdisc;
1250 + /* Find band we took it from */
1251 + int band = mac_classify(&q->filter, skb);
1253 + /* Who should now free the pakcet? */
1255 + "sch_wrr: Oops - packet requeued could never have been queued.\n");
1256 + sch->qstats.drops++;
1257 + return ENQUEUE_FAIL;
1260 + q->packets_requed++;
1262 + /* Try to requeue it on that machine */
1263 + qdisc = q->bands[band].que;
1265 + if ((ret = qdisc->ops->requeue(skb, qdisc)) == ENQUEUE_SUCCESS) {
1268 + sch->qstats.requeues++;
1270 + /* We should restore priority information - but we don't
1272 + * p=heap_get_penalty(&q->h,band);
1274 + * heap_set_penalty(&q->h,band,p);
1277 + return ENQUEUE_SUCCESS;
1279 + sch->qstats.drops++;
1284 +static unsigned int wrr_drop(struct Qdisc *sch)
1286 + struct wrr_sched_data *q = qdisc_priv(sch);
1288 + /* Ugly... Drop button up in heap */
1291 + for (i = q->h.elements; i >= 1; i--) {
1292 + int band = q->h.root_1[i].id;
1293 + if (q->bands[band].que->ops->drop(q->bands[band].que)) {
1296 + sch->qstats.drops++;
1304 +static int wrr_dump(struct Qdisc *sch, struct sk_buff *skb)
1306 + struct wrr_sched_data *q = qdisc_priv(sch);
1307 + unsigned char *b = skb->tail;
1308 + struct tc_wrr_qdisc_stats opt;
1310 + opt.qdisc_crt.qdisc_modf = q->qdisc_modf;
1311 + opt.qdisc_crt.srcaddr = q->filter.srcaddr;
1312 + opt.qdisc_crt.usemac = q->filter.usemac;
1313 + opt.qdisc_crt.usemasq = q->filter.usemasq;
1314 + opt.qdisc_crt.bands_max = q->filter.mac_max;
1315 + opt.nodes_in_heap = q->h.elements;
1316 + opt.bands_cur = q->filter.mac_cur;
1317 + opt.bands_reused = q->filter.mac_reused;
1318 + opt.packets_requed = q->packets_requed;
1319 + opt.priosum = q->priosum;
1321 + if (q->proxydict) {
1322 + opt.qdisc_crt.proxy_maxconn = proxyGetMaxConn(q->proxydict);
1323 + opt.proxy_curconn = proxyGetCurConn(q->proxydict);
1325 + opt.qdisc_crt.proxy_maxconn = 0;
1326 + opt.proxy_curconn = 0;
1329 + RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
1332 + rtattr_failure: /* seems like RTA_PUT jump to this label */
1333 + skb_trim(skb, b - skb->data);
1337 +static int wrr_tune_std(struct Qdisc *sch, struct rtattr *opt)
1339 + struct wrr_sched_data *q = qdisc_priv(sch);
1340 + struct tc_wrr_qdisc_modf_std *qopt = RTA_DATA(opt);
1342 + if (opt->rta_len < RTA_LENGTH(sizeof(*qopt)))
1347 + if (qopt->change_class) {
1348 + int idx = lookup_mac(&q->filter, qopt->addr);
1350 + (&q->bands[idx].class_modf.weight1,
1351 + &qopt->class_modf.weight1);
1352 + weight_setvalue(&q->bands[idx].class_modf.weight2,
1353 + &qopt->class_modf.weight2);
1355 + if (qopt->qdisc_modf.weight1.weight_mode != -1)
1356 + q->qdisc_modf.weight1.weight_mode =
1357 + qopt->qdisc_modf.weight1.weight_mode;
1358 + if (qopt->qdisc_modf.weight2.weight_mode != -1)
1359 + q->qdisc_modf.weight2.weight_mode =
1360 + qopt->qdisc_modf.weight2.weight_mode;
1367 +static int wrr_tune_proxy(struct Qdisc *sch, struct rtattr *opt)
1369 + struct wrr_sched_data *q = qdisc_priv(sch);
1370 + struct tc_wrr_qdisc_modf_proxy *qopt = RTA_DATA(opt);
1373 + /* Return if we are not configured with proxy support */
1374 + if (!q->proxydict)
1377 + /* Return if not enough data given */
1378 + if (opt->rta_len < RTA_LENGTH(sizeof(*qopt)) ||
1380 + RTA_LENGTH(sizeof(*qopt) + sizeof(ProxyRemapBlock) * qopt->changec))
1385 + if (qopt->reset) {
1386 + proxyInitMem(q->proxydict, proxyGetMaxConn(q->proxydict));
1388 + /* Do all the changes */
1389 + for (i = 0; i < qopt->changec; i++) {
1390 + proxyConsumeBlock(q->proxydict,
1391 + &((ProxyRemapBlock *) & qopt->changes)[i]);
1399 +static int wrr_tune(struct Qdisc *sch, struct rtattr *opt)
1401 + if (((struct tc_wrr_qdisc_modf_std *)RTA_DATA(opt))->proxy) {
1402 + return wrr_tune_proxy(sch, opt);
1404 + return wrr_tune_std(sch, opt);
1410 + * External and internal IDs are equal. They are the band number plus 1.
1413 +/* Replace a class with another */
1414 +static int wrr_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1415 + struct Qdisc **old)
1417 + struct wrr_sched_data *q = qdisc_priv(sch);
1418 + if (arg > q->bandc || arg == 0)
1423 + new = &noop_qdisc;
1425 + LOCK_START * old = q->bands[arg].que;
1426 + q->bands[arg].que = new;
1427 + qdisc_reset(*old);
1428 + LOCK_END return 0;
1431 +/* Returns the qdisc for a class */
1432 +static struct Qdisc *wrr_leaf(struct Qdisc *sch, unsigned long arg)
1434 + struct wrr_sched_data *q = qdisc_priv(sch);
1435 + if (arg > q->bandc || arg == 0)
1438 + return q->bands[arg].que;
1441 +static unsigned long wrr_get(struct Qdisc *sch, u32 classid)
1443 + struct wrr_sched_data *q = qdisc_priv(sch);
1444 + unsigned long band = TC_H_MIN(classid);
1445 + if (band > q->bandc || band == 0)
1450 +static void wrr_put(struct Qdisc *q, unsigned long cl)
1455 +static int wrr_delete(struct Qdisc *sch, unsigned long cl)
1457 + struct wrr_sched_data *q = qdisc_priv(sch);
1458 + if (cl == 0 || cl > q->bandc)
1464 +static int wrr_dump_class(struct Qdisc *sch, unsigned long cl,
1465 + struct sk_buff *skb, struct tcmsg *tcm)
1467 + struct wrr_sched_data *q = qdisc_priv(sch);
1468 + unsigned char *b = skb->tail;
1469 + struct tc_wrr_class_stats opt;
1471 + /* Handle of this class */
1472 + tcm->tcm_handle = sch->handle | cl;
1474 + if (cl == 0 || cl > q->bandc)
1475 + goto rtattr_failure;
1478 + if ((cl < q->bandc) && q->bands[cl].que)
1479 + tcm->tcm_info = q->bands[cl].que->handle;
1481 + if (cl >= q->filter.mac_cur) {
1482 + /* Band is unused */
1483 + memset(&opt, 0, sizeof(opt));
1487 + opt.class_modf.weight1 = q->bands[cl].class_modf.weight1;
1488 + opt.class_modf.weight2 = q->bands[cl].class_modf.weight2;
1489 + weight_transmit(&opt.class_modf.weight1, q->qdisc_modf.weight1,
1491 + weight_transmit(&opt.class_modf.weight2, q->qdisc_modf.weight2,
1493 + memcpy(opt.addr, q->filter.cls2mac + cl * ETH_ALEN, ETH_ALEN);
1494 + opt.usemac = q->filter.usemac;
1495 + opt.heappos = q->h.root_1[cl + 1].id2idx;
1496 + if (opt.heappos != 0) {
1498 + opt.penal_ls = heap_get_penalty(&q->h, cl).ls;
1499 + opt.penal_ms = heap_get_penalty(&q->h, cl).ms;
1506 + /* Put quing information */
1507 + RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
1511 + skb_trim(skb, b - skb->data);
1515 +static int wrr_change(struct Qdisc *sch, u32 handle, u32 parent,
1516 + struct rtattr **tca, unsigned long *arg)
1518 + unsigned long cl = *arg;
1519 + struct wrr_sched_data *q = qdisc_priv(sch);
1520 + struct rtattr *opt = tca[TCA_OPTIONS - 1];
1521 + struct tc_wrr_class_modf *copt = RTA_DATA(opt);
1523 + if (cl == 0 || cl > q->bandc)
1527 + if (opt->rta_len < RTA_LENGTH(sizeof(*copt)))
1532 + weight_setvalue(&q->bands[cl].class_modf.weight1, &copt->weight1);
1533 + weight_setvalue(&q->bands[cl].class_modf.weight2, &copt->weight2);
1540 +static void wrr_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1542 + struct wrr_sched_data *q = qdisc_priv(sch);
1548 + for (prio = 1; prio <= q->bandc; prio++) {
1549 + if (arg->count < arg->skip) {
1553 + if (arg->fn(sch, prio, arg) < 0) {
1561 +static struct tcf_proto **wrr_find_tcf(struct Qdisc *sch, unsigned long cl)
1566 +static unsigned long wrr_bind(struct Qdisc *sch,
1567 + unsigned long parent, u32 classid)
1569 + return wrr_get(sch, classid);
1576 +static struct Qdisc_class_ops wrr_class_ops = {
1577 + .graft = wrr_graft,
1581 + .change = wrr_change,
1582 + .delete = wrr_delete,
1584 + .tcf_chain = wrr_find_tcf,
1585 + .bind_tcf = wrr_bind,
1586 + .unbind_tcf = wrr_put,
1587 + .dump = wrr_dump_class,
1590 +static struct Qdisc_ops wrr_qdisc_ops = {
1592 + .cl_ops = &wrr_class_ops,
1594 + .priv_size = sizeof(struct wrr_sched_data),
1595 + .enqueue = wrr_enqueue,
1596 + .dequeue = wrr_dequeue,
1597 + .requeue = wrr_requeue,
1600 + .reset = wrr_reset,
1601 + .destroy = wrr_destroy,
1602 + .change = wrr_tune,
1604 + .owner = THIS_MODULE,
1607 +static int __init wrr_module_init(void)
1609 + return register_qdisc(&wrr_qdisc_ops);
1612 +static void __exit wrr_module_exit(void)
1614 + unregister_qdisc(&wrr_qdisc_ops);
1617 +module_init(wrr_module_init)
1618 +module_exit(wrr_module_exit)
1620 +MODULE_LICENSE("GPL");
1621 +MODULE_AUTHOR("Christian Worm Mortensen");
1622 diff -urN linux-2.6.25.orig/net/sched/wrr_proxydict.c linux-2.6.25/net/sched/wrr_proxydict.c
1623 --- linux-2.6.25.orig/net/sched/wrr_proxydict.c 1970-01-01 01:00:00.000000000 +0100
1624 +++ linux-2.6.25/net/sched/wrr_proxydict.c 2008-04-24 16:19:07.000000000 +0200
1627 +#include <string.h>
1628 +#include <netinet/in.h>
1631 +#include <linux/wrr.h>
1633 +/* Proxy remapping part of WRR */
1635 +/* Hash function */
1636 +#define hash_fnc(m,server,port,proto) \
1637 + (((proto)*7+(server)*13+(port)*5)%m->hash_size)
1639 +/* Size of hash table given maximal number of connections */
1640 +#define hash_size_max_con(max_con) (2*(max_con))
1642 +/* The memory area we maintain
1644 + Given a connection we map it by hash_fnc into hash_table. This gives an
1645 + index in next which contains a -1 terminated linked list of connections
1646 + mapping to that hash value.
1648 + The entries in next not allocated is also in linked list where
1649 + the first free index is free_first.
1661 +#define Memory(m) ((proxy_memory*)m)
1662 +#define Hash_table(m) ((int*)(((char*)m)+sizeof(proxy_memory)))
1663 +#define Next(m) ((int*)(((char*)m)+sizeof(proxy_memory)+ \
1664 + sizeof(int)*((proxy_memory*)m)->hash_size))
1665 +#define Info(m) ((ProxyRemapBlock*)(((char*)m)+ \
1666 + sizeof(proxy_memory)+ \
1667 + sizeof(int)*((proxy_memory*)m)->hash_size+\
1668 + sizeof(int)*((proxy_memory*)m)->max_con \
1671 +int proxyGetMemSize(int max_con)
1673 + return sizeof(proxy_memory) +
1674 + sizeof(int) * hash_size_max_con(max_con) +
1675 + sizeof(int) * max_con + sizeof(ProxyRemapBlock) * max_con;
1678 +void proxyInitMem(void *data, int max_con)
1680 + proxy_memory *m = Memory(data);
1681 + m->max_con = max_con;
1683 + m->hash_size = hash_size_max_con(max_con);
1686 + /* Get pointers */
1687 + int *hash_table = Hash_table(data);
1688 + int *next = Next(data);
1691 + /* Init the hash table */
1692 + for (i = 0; i < m->hash_size; i++)
1693 + hash_table[i] = -1;
1695 + /* Init the free-list */
1696 + for (i = 0; i < m->max_con; i++)
1698 + m->free_first = 0;
1702 +int proxyGetCurConn(void *data)
1704 + return Memory(data)->cur_con;
1707 +int proxyGetMaxConn(void *data)
1709 + return Memory(data)->max_con;
1712 +ProxyRemapBlock *proxyLookup(void *data, unsigned ipaddr, unsigned short port,
1715 + proxy_memory *m = Memory(data);
1716 + int *hash_table = Hash_table(m);
1717 + int *next = Next(m);
1718 + ProxyRemapBlock *info = Info(m);
1721 + for (i = hash_table[hash_fnc(m, ipaddr, port, proto)]; i != -1;
1723 + if (info[i].proto == proto && info[i].sport == port
1724 + && info[i].saddr == ipaddr)
1731 +int proxyConsumeBlock(void *data, ProxyRemapBlock * blk)
1733 + proxy_memory *m = Memory(data);
1734 + int *hash_table = Hash_table(m);
1735 + int *next = Next(m);
1736 + ProxyRemapBlock *info = Info(m);
1737 + int hash = hash_fnc(m, blk->saddr, blk->sport, blk->proto);
1741 + if (m->cur_con == m->max_con)
1744 + /* Insert the block at a free entry */
1745 + info[m->free_first] = *blk;
1748 + foo = next[m->free_first];
1750 + /* And insert it in the hash tabel */
1751 + next[m->free_first] = hash_table[hash];
1752 + hash_table[hash] = m->free_first;
1753 + m->free_first = foo;
1757 + /* Find the block */
1758 + for (toupdate = &hash_table[hash];
1759 + *toupdate != -1; toupdate = &next[*toupdate]) {
1760 + if (info[*toupdate].proto == blk->proto &&
1761 + info[*toupdate].sport == blk->sport &&
1762 + info[*toupdate].saddr == blk->saddr)
1765 + if (*toupdate == -1)
1770 + /* Delete it from the hashing list */
1771 + *toupdate = next[*toupdate];
1773 + /* And put it on the free list */
1774 + next[foo] = m->free_first;
1775 + m->free_first = foo;