1 diff -urN linux-2.6.26.2.org/include/linux/pkt_sched.h linux-2.6.26.2/include/linux/pkt_sched.h
2 --- linux-2.6.26.2.org/include/linux/pkt_sched.h 2008-07-13 23:51:29.000000000 +0200
3 +++ linux-2.6.26.2/include/linux/pkt_sched.h 2008-08-19 11:20:27.000000000 +0200
8 +#include <linux/if_ether.h>
10 /* Generic queue statistics, available for all the elements.
11 Particular schedulers may have also their private records.
15 #define NETEM_DIST_SCALE 8192
21 + * A sub weight and of a class
22 + * All numbers are represented as parts of (2^64-1).
24 +struct tc_wrr_class_weight {
25 + __u64 val; // Current value (0 is not valid)
26 + __u64 decr; // Value pr bytes (2^64-1 is not valid)
27 + __u64 incr; // Value pr seconds (2^64-1 is not valid)
28 + __u64 min; // Minimal value (0 is not valid)
29 + __u64 max; // Minimal value (0 is not valid)
30 + time_t tim; // The time where the above information was correct
33 +/* Packet send when modifying a class */
34 +struct tc_wrr_class_modf {
35 + /* Not-valid values are ignored */
36 + struct tc_wrr_class_weight weight1;
37 + struct tc_wrr_class_weight weight2;
40 +/* Packet returned when quering a class */
41 +struct tc_wrr_class_stats {
42 + char used; /* If this is false the information below is invalid */
43 + struct tc_wrr_class_modf class_modf;
44 + unsigned char addr[ETH_ALEN];
45 + char usemac; /* True if addr is a MAC address, else it is an IP address
46 + (this value is only for convience, it is always the same
47 + value as in the qdisc) */
48 + int heappos; /* Current heap position or 0 if not in heap */
49 + __u64 penal_ls; /* Penalty value in heap (ls) */
50 + __u64 penal_ms; /* Penalty value in heap (ms) */
53 +/* Qdisc-wide penalty information (boolean values - 2 not valid) */
54 +struct tc_wrr_qdisc_weight {
55 + signed char weight_mode; /* 0=No automatic change to weight
57 + 2=Also multiply with number of machines
58 + 3=Instead multiply with priority divided
59 + with priority of the other.
63 +/* Packet send when modifing a qdisc */
64 +struct tc_wrr_qdisc_modf {
65 + /* Not-valid values are ignored */
66 + struct tc_wrr_qdisc_weight weight1;
67 + struct tc_wrr_qdisc_weight weight2;
70 +/* Packet send when creating a qdisc */
71 +struct tc_wrr_qdisc_crt {
72 + struct tc_wrr_qdisc_modf qdisc_modf;
73 + char srcaddr; /* 1=lookup source, 0=lookup destination */
74 + char usemac; /* 1=Classify on MAC addresses, 0=classify on IP */
75 + char usemasq; /* 1=Classify based on masqgrading - only valid
76 + if usemac is zero */
77 + int bands_max; /* Maximal number of bands (i.e.: classes) */
78 + int proxy_maxconn; /* If differnt from 0 then we support proxy remapping
79 + of packets. And this is the number of maximal
80 + concurrent proxy connections. */
83 +/* Packet returned when quering a qdisc */
84 +struct tc_wrr_qdisc_stats {
85 + struct tc_wrr_qdisc_crt qdisc_crt;
87 + int nodes_in_heap; /* Current number of bands wanting to send something */
88 + int bands_cur; /* Current number of bands used (i.e.: MAC/IP addresses seen) */
89 + int bands_reused; /* Number of times this band has been reused. */
90 + __u64 priosum; /* Sum of priorities in heap where 1 is 2^32 */
93 +struct tc_wrr_qdisc_modf_std {
94 + char proxy; /* This indicates which of the tc_wrr_qdisc_modf structers this is. 0=This struct */
95 + char change_class; /* Should we also change a class? */
96 + struct tc_wrr_qdisc_modf qdisc_modf; /* Only valid if change_class is false */
97 + unsigned char addr[ETH_ALEN]; /* Class to change (non-used bytes should be 0). Valid only of change_class is true */
98 + struct tc_wrr_class_modf class_modf; /* The change */
101 +/* Used for proxyremapping */
102 +struct tc_wrr_qdisc_modf_proxy {
103 + char proxy; /* This indicates which of the tc_wrr_qdisc_modf structers this is. 1=This struct */
104 + char reset; /* This is 1 if the proxyremap information should be reset */
105 + int changec; /* changec is the number of elements in changes. */
106 + long changes[0]; /* This is an array of type ProxyRemapBlock */
110 diff -urN linux-2.6.26.2.org/include/linux/wrr.h linux-2.6.26.2/include/linux/wrr.h
111 --- linux-2.6.26.2.org/include/linux/wrr.h 1970-01-01 01:00:00.000000000 +0100
112 +++ linux-2.6.26.2/include/linux/wrr.h 2008-08-19 11:20:27.000000000 +0200
118 + * This describes the information that is written in proxyremap.log and which
119 + * are used in the communication between proxyremapserver and proxyremapclient.
120 + * Everything is in network order.
123 +/* First this header is send */
124 +#define PROXY_WELCOME_LINE "ProxyRemap 1.02. This is a binary protocol.\r\n"
127 + * Then this block is send every time a connection is opened or closed.
128 + * Note how it is alligned to use small space usage - arrays of this
129 + * structure are saved in many places.
132 + /* Server endpoint of connection */
134 + unsigned short sport;
136 + /* IP protocol for this connection (typically udp or tcp) */
137 + unsigned char proto;
139 + /* Is the connection opened or closed? */
140 + unsigned char open;
142 + /* Client the packets should be accounted to */
144 + unsigned char macaddr[6]; /* Might be 0. */
146 + /* An informal two-charecter code from the proxyserver. Used for debugging. */
152 + * This is common code for for handling the tables containing information about
153 + * which proxyserver connections are associated with which machines..
156 +/* Returns the number of bytes that should be available in the area
157 + * maintained by this module given the maximal number of concurrent
159 +int proxyGetMemSize(int max_connections);
161 +/* Initializes a memory area to use. There must be as many bytes
162 + available as returned by getMemSize. */
163 +void proxyInitMem(void *data, int max_connections);
166 +int proxyGetCurConn(void *data); /* Returns current number of connections */
167 +int proxyMaxCurConn(void *data); /* Returns maximal number of connections */
169 +/* This is called to open and close conenctions. Returns -1 if
170 + a protocol error occores (i.e.: If it is discovered) */
171 +int proxyConsumeBlock(void *data, ProxyRemapBlock *);
173 +/* Returns the RemapBlock associated with this connection or 0: */
174 +ProxyRemapBlock *proxyLookup(void *data, unsigned ipaddr, unsigned short port,
177 +/* Return the maximum number of connections */
178 +int proxyGetMaxConn(void *data);
181 diff -urN linux-2.6.26.2.org/MAINTAINERS linux-2.6.26.2/MAINTAINERS
182 --- linux-2.6.26.2.org/MAINTAINERS 2008-07-13 23:51:29.000000000 +0200
183 +++ linux-2.6.26.2/MAINTAINERS 2008-08-19 11:20:27.000000000 +0200
184 @@ -4500,6 +4500,12 @@
185 W: http://opensource.wolfsonmicro.com/node/7
188 +WRR NETWORK SCHEDULER
189 +P: Rasmus Bøg Hansen
191 +W: http://www.zz9.dk/wrr
197 diff -urN linux-2.6.26.2.org/net/sched/Kconfig linux-2.6.26.2/net/sched/Kconfig
198 --- linux-2.6.26.2.org/net/sched/Kconfig 2008-07-13 23:51:29.000000000 +0200
199 +++ linux-2.6.26.2/net/sched/Kconfig 2008-08-19 11:20:27.000000000 +0200
201 To compile this code as a module, choose M here: the
202 module will be called sch_prio.
205 + tristate "WRR packet scheduler"
206 + depends on NET_SCHED && ( NF_CONNTRACK || !NF_CONNTRACK )
208 + The weighted robin-robin scheduling algorithm directs network
209 + connections to different real servers based on server weights
210 + in a round-robin manner. Servers with higher weights receive
211 + new connections first than those with less weights, and servers
212 + with higher weights get more connections than those with less
213 + weights and servers with equal weights get equal connections.
215 + If you want masquerading (the "masq" option to the tc userspace
216 + program) you need to enable connection tracking (IP_NF_CONNTRACK)
217 + in the netfilter options.
219 + If you want to compile it in kernel, say Y. If you want to compile
220 + it as a module, say M here and read Documentation/modules.txt. The
221 + module will be called sch_wrr. If unsure, say N.
224 tristate "Random Early Detection (RED)"
226 diff -urN linux-2.6.26.2.org/net/sched/Makefile linux-2.6.26.2/net/sched/Makefile
227 --- linux-2.6.26.2.org/net/sched/Makefile 2008-07-13 23:51:29.000000000 +0200
228 +++ linux-2.6.26.2/net/sched/Makefile 2008-08-19 11:20:27.000000000 +0200
232 obj-y := sch_generic.o
233 +sch_wrr-objs = wrr.o wrr_proxydict.o
235 obj-$(CONFIG_NET_SCHED) += sch_api.o sch_blackhole.o
236 obj-$(CONFIG_NET_CLS) += cls_api.o
238 obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
239 obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
240 obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o
241 +obj-$(CONFIG_NET_SCH_WRR) += sch_wrr.o
242 obj-$(CONFIG_NET_SCH_RED) += sch_red.o
243 obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o
244 obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o
245 diff -urN linux-2.6.26.2.org/net/sched/wrr.c linux-2.6.26.2/net/sched/wrr.c
246 --- linux-2.6.26.2.org/net/sched/wrr.c 1970-01-01 01:00:00.000000000 +0100
247 +++ linux-2.6.26.2/net/sched/wrr.c 2008-08-20 16:40:09.000000000 +0200
249 +/*-----------------------------------------------------------------------------
250 +Weighted Round Robin scheduler.
252 +Written by Christian Worm Mortensen, cworm@it-c.dk.
256 +This module implements a weighted round robin queue with build-in classifier.
257 +The classifier currently map each MAC or IP address (configurable either MAC
258 +or IP and either source or destination) to different classes. Each such class
259 +is called a band. Whan using MAC addresses only bridged packets can be
260 +classified other packets go to a default MAC address.
262 +Each band has a weight value, where 0<weight<=1. The bandwidth each band
263 +get is proportional to the weight as can be deduced from the next section.
267 +Each band has a penalty value. Bands having something to sent are kept in
268 +a heap according to this value. The band with the lowest penalty value
269 +is in the root of the heap. The penalty value is a 128 bit number. Initially
270 +no bands are in the heap.
272 +Two global 64 bit values counter_low_penal and couter_high_penal are initialized
273 +to 0 and to 2^63 respectively.
276 + The packet is inserted in the queue for the band it belongs to. If the band
277 + is not in the heap it is inserted into it. In this case, the upper 64 bits
278 + of its penalty value is set to the same as for the root-band of the heap.
279 + If the heap is empty 0 is used. The lower 64 bit is set to couter_low_penal
280 + and couter_low_penal is incremented by 1.
283 + If the heap is empty we have nothing to send.
285 + If the root band has a non-empty queue a packet is dequeued from that.
286 + The upper 64 bit of the penalty value of the band is incremented by the
287 + packet size divided with the weight of the band. The lower 64 bit is set to
288 + couter_high_penal and couter_high_penal is incremented by 1.
290 + If the root element for some reason has an empty queue it is removed from
291 + the heap and we try to dequeue again.
293 +The effect of the heap and the upper 64 bit of the penalty values is to
294 +implement a weighted round robin queue. The effect of counter_low_penal,
295 +counter_high_penal and the lower 64 bit of the penalty value is primarily to
296 +stabilize the queue and to give better quality of service to machines only
297 +sending a packet now and then. For example machines which have a single
298 +interactive connection such as telnet or simple text chatting.
302 +The weight value can be changed dynamically by the queue itself. The weight
303 +value and how it is changed is described by the two members weight1 and
304 +weight2 which has type tc_wrr_class_weight and which are in each class. And
305 +by the two integer value members of the qdisc called penalfact1 and penalfact2.
306 +The structure is defined as:
308 + struct tc_wrr_class_weight {
309 + // All are represented as parts of (2^64-1).
310 + __u64 val; // Current value (0 is not valid)
311 + __u64 decr; // Value pr bytes (2^64-1 is not valid)
312 + __u64 incr; // Value pr seconds (2^64-1 is not valid)
313 + __u64 min; // Minimal value (0 is not valid)
314 + __u64 max; // Minimal value (0 is not valid)
316 + // The time where the above information was correct:
320 +The weight value used by the dequeue operations is calculated as
321 +weight1.val*weight2.val. weight1 and weight2 and handled independently and in the
322 +same way as will be described now.
324 +Every second, the val parameter is incremented by incr.
326 +Every time a packet is transmitted the value is increment by decr times
327 +the packet size. Depending on the value of the weight_mode parameter it
328 +is also mulitplied with other numbers. This makes it possible to give
329 +penalty to machines transferring much data.
331 +-----------------------------------------------------------------------------*/
333 +#include <linux/autoconf.h>
334 +#include <linux/module.h>
335 +#include <asm/uaccess.h>
336 +#include <asm/system.h>
337 +#include <linux/bitops.h>
338 +#include <linux/types.h>
339 +#include <linux/kernel.h>
340 +#include <linux/vmalloc.h>
341 +#include <linux/sched.h>
342 +#include <linux/string.h>
343 +#include <linux/mm.h>
344 +#include <linux/socket.h>
345 +#include <linux/sockios.h>
346 +#include <linux/in.h>
347 +#include <linux/errno.h>
348 +#include <linux/interrupt.h>
349 +#include <linux/if_ether.h>
350 +#include <linux/inet.h>
351 +#include <linux/netdevice.h>
352 +#include <linux/etherdevice.h>
353 +#include <linux/notifier.h>
355 +#include <net/route.h>
356 +#include <linux/skbuff.h>
357 +#include <net/sock.h>
358 +#include <net/pkt_sched.h>
359 +#include <linux/if_arp.h>
360 +#include <linux/version.h>
361 +#include <linux/wrr.h>
363 +#define WRR_VER "080820"
365 +#define my_malloc(size) kmalloc(size,GFP_KERNEL)
366 +#define my_free(ptr) kfree(ptr)
368 +#define LOCK_START sch_tree_lock(sch);
369 +#define LOCK_END sch_tree_unlock(sch);
370 +#define ENQUEUE_SUCCESS 0
371 +#define ENQUEUE_FAIL NET_XMIT_DROP
373 +#if defined CONFIG_IP_NF_CONNTRACK || defined CONFIG_IP_NF_CONNTRACK_MODULE || defined CONFIG_NF_CONNTRACK || defined CONFIG_NF_CONNTRACK_MODULE
374 +#include <net/netfilter/nf_conntrack.h>
375 +#define MASQ_SUPPORT
378 +/* The penalty (priority) type */
379 +typedef u64 penalty_base_t;
380 +#define penalty_base_t_max ((penalty_base_t)-1)
381 +typedef struct penalty_t {
385 +#define penalty_leq(a,b) (a.ms<b.ms || (a.ms==b.ms && a.ls<=b.ls))
386 +#define penalty_le(a,b) (a.ms<b.ms || (a.ms==b.ms && a.ls<b.ls))
387 +static penalty_t penalty_max = { penalty_base_t_max, penalty_base_t_max };
394 +struct heap_element;
396 +/* Initializes an empty heap:
397 + * he: A pointer to an unintialized heap structure identifying the heap
398 + * size: Maximal number of elements the heap can contain
399 + * poll: An array of size "size" used by the heap.
401 +static void heap_init(struct heap *he, int size, struct heap_element *poll);
403 +/* Each element in the heap is identified by a user-assigned id which
404 + * should be a non negative integer less than the size argument
405 + * given to heap_init.
407 +static void heap_insert(struct heap *, int id, penalty_t);
408 +static void heap_remove(struct heap *, int id);
409 +static void heap_set_penalty(struct heap *, int id, penalty_t);
411 +/* Retreviewing information */
412 +static char heap_empty(struct heap *); /* Heap empty? */
413 +static char heap_contains(struct heap *, int id); /* Does heap contain
415 +static int heap_root(struct heap *); /* Returns the id of the root */
416 +static penalty_t heap_get_penalty(struct heap *, int id); /* Returns penalty
420 + * Heap implementation
423 +struct heap_element {
425 + int id; /* The user-assigned id of this element */
426 + int id2idx; /* Maps from user-assigned ids to indices in root_1 */
430 + struct heap_element *root_1;
434 +/* Heap implementation */
435 +static void heap_init(struct heap *h, int size, struct heap_element *poll)
440 + h->root_1 = poll - 1;
442 + for (i = 0; i < size; i++)
443 + poll[i].id2idx = 0;
446 +static char heap_empty(struct heap *h)
448 + return h->elements == 0;
451 +static char heap_contains(struct heap *h, int id)
453 + return h->root_1[id + 1].id2idx != 0;
456 +static int heap_root(struct heap *h)
458 + return h->root_1[1].id;
461 +static penalty_t heap_get_penalty(struct heap *h, int id)
463 + return h->root_1[h->root_1[id + 1].id2idx].penalty;
466 +static void heap_penalty_changed_internal(struct heap *h, int idx);
468 +static void heap_set_penalty(struct heap *h, int id, penalty_t p)
470 + int idx = h->root_1[id + 1].id2idx;
471 + h->root_1[idx].penalty = p;
472 + heap_penalty_changed_internal(h, idx);
475 +static void heap_insert(struct heap *h, int id, penalty_t p)
477 + /* Insert at the end of the heap */
479 + h->root_1[h->elements].id = id;
480 + h->root_1[h->elements].penalty = p;
481 + h->root_1[id + 1].id2idx = h->elements;
483 + /* And put it in the right position */
484 + heap_penalty_changed_internal(h, h->elements);
487 +static void heap_remove(struct heap *h, int id)
489 + int idx = h->root_1[id + 1].id2idx;
491 + h->root_1[id + 1].id2idx = 0;
493 + if (h->elements == idx) {
498 + mvid = h->root_1[h->elements].id;
499 + h->root_1[idx].id = mvid;
500 + h->root_1[idx].penalty = h->root_1[h->elements].penalty;
501 + h->root_1[mvid + 1].id2idx = idx;
504 + heap_penalty_changed_internal(h, idx);
507 +static void heap_swap(struct heap *h, int idx0, int idx1)
513 + /* Simple content */
514 + tmp_p = h->root_1[idx0].penalty;
515 + tmp_id = h->root_1[idx0].id;
516 + h->root_1[idx0].penalty = h->root_1[idx1].penalty;
517 + h->root_1[idx0].id = h->root_1[idx1].id;
518 + h->root_1[idx1].penalty = tmp_p;
519 + h->root_1[idx1].id = tmp_id;
521 + /* Update reverse pointers */
522 + id0 = h->root_1[idx0].id;
523 + id1 = h->root_1[idx1].id;
524 + h->root_1[id0 + 1].id2idx = idx0;
525 + h->root_1[id1 + 1].id2idx = idx1;
528 +static void heap_penalty_changed_internal(struct heap *h, int cur)
531 + || penalty_leq(h->root_1[cur >> 1].penalty,
532 + h->root_1[cur].penalty)) {
533 + /* We are in heap order upwards - so we should move the element down */
535 + int nxt0 = cur << 1;
536 + int nxt1 = nxt0 + 1;
537 + penalty_t pen_c = h->root_1[cur].penalty;
540 + h->elements ? h->root_1[nxt0].penalty : penalty_max;
543 + h->elements ? h->root_1[nxt1].penalty : penalty_max;
545 + if (penalty_le(pen_0, pen_c)
546 + && penalty_leq(pen_0, pen_1)) {
547 + /* Swap with child 0 */
548 + heap_swap(h, cur, nxt0);
550 + } else if (penalty_le(pen_1, pen_c)) {
551 + /* Swap with child 1 */
552 + heap_swap(h, cur, nxt1);
555 + /* Heap in heap order */
560 + /* We are not in heap order upwards (and thus we must be it downwards).
562 + while (cur != 1) { /* While not root */
563 + int nxt = cur >> 1;
565 + (h->root_1[nxt].penalty, h->root_1[cur].penalty))
567 + heap_swap(h, cur, nxt);
574 + * Classification based on MAC or IP adresses. Note that of historical reason
575 + * these are prefixed with mac_ since originally only MAC bases classification
578 + * This code should be in a separate filter module - but it isn't.
586 +/* Initialices/destroys the structure we maintain.
587 + Returns -1 on error */
588 +static int mac_init(struct mac_head *, int max_macs, char srcaddr,
589 + char usemac, char usemasq, void *proxyremap);
590 +static void mac_done(struct mac_head *);
591 +static void mac_reset(struct mac_head *);
593 +/* Classify a packet. Returns a number n where 0<=n<max_macs. Or -1 if
594 + the packet should be dropped. */
595 +static int mac_classify(struct mac_head *, struct sk_buff *skb);
602 + unsigned char addr[ETH_ALEN]; /* Address of this band (last two are 0 on IP) */
603 + unsigned long lastused; /* Last time a packet was encountered */
604 + int class; /* Classid of this band (0<=classid<max_macs) */
607 +static int mac_compare(const void *a, const void *b)
609 + return memcmp(a, b, ETH_ALEN);
613 + int mac_max; /* Maximal number of MAC addresses/classes allowed */
614 + int mac_cur; /* Current number of MAC addresses/classes */
615 + int mac_reused; /* Number of times we have reused a class with a new address. */
617 + char srcaddr; /* True if we classify on the source address of packets,
618 + else we use destination address. */
619 + char usemac; /* If true we use mac, else we use IP */
620 + char usemasq; /* If true we try to demasqgrade */
621 + struct mac_addr *macs; /* Allocated mac_max elements, used max_cur */
622 + char *cls2mac; /* Mapping from classnumbers to addresses -
623 + there is 6 bytes in each entry */
625 + void *proxyremap; /* Information on proxy remapping of data or 0 */
628 +/* This is as the standard C library function with the same name: */
629 +static const void *bsearch(const void *key, const void *base, int nmemb,
631 + int (*compare) (const void *, const void *))
640 + m_idx = nmemb >> 1;
641 + m_ptr = ((const char *)base) + m_idx * size;
643 + i = compare(key, m_ptr);
644 + if (i < 0) /* key is less */
645 + return bsearch(key, base, m_idx, size, compare);
647 + return bsearch(key, ((const char *)m_ptr) + size,
648 + nmemb - m_idx - 1, size, compare);
653 +static int mac_init(struct mac_head *h, int max_macs, char srcaddr,
654 + char usemac, char usemasq, void *proxyremap)
659 + h->srcaddr = srcaddr;
660 + h->usemac = usemac;
661 + h->usemasq = usemasq;
662 + h->mac_max = max_macs;
663 + h->proxyremap = proxyremap;
665 + h->macs = (struct mac_addr *)
666 + my_malloc(sizeof(struct mac_addr) * max_macs);
667 + h->cls2mac = (char *)my_malloc(6 * max_macs);
668 + if (!h->macs || !h->cls2mac) {
672 + my_free(h->cls2mac);
678 +static void mac_done(struct mac_head *h)
681 + my_free(h->cls2mac);
684 +static void mac_reset(struct mac_head *h)
691 +static int lookup_mac(struct mac_head *h, unsigned char *addr)
696 + /* First try to find the address in the table */
697 + struct mac_addr *m = (struct mac_addr *)
698 + bsearch(addr, h->macs, h->mac_cur, sizeof(struct mac_addr),
702 + m->lastused = h->incr_time++;
705 + /* Okay - the MAC adress was not in table */
706 + if (h->mac_cur == h->mac_max) {
707 + /* And the table is full - delete the oldest entry */
709 + /* Find the oldest entry */
712 + for (i = 1; i < h->mac_cur; i++)
713 + if (h->macs[i].lastused < h->macs[lowidx].lastused)
716 + class = h->macs[lowidx].class;
718 + /* And delete it */
719 + memmove(&h->macs[lowidx], &h->macs[lowidx + 1],
720 + (h->mac_cur - lowidx - 1) * sizeof(struct mac_addr));
724 + class = h->mac_cur;
727 + /* The table is not full - find the position we should put the address in */
728 + for (i = 0; i < h->mac_cur; i++)
729 + if (mac_compare(addr, &h->macs[i]) < 0)
732 + /* We should insert at position i */
733 + memmove(&h->macs[i + 1], &h->macs[i],
734 + (h->mac_cur - i) * sizeof(struct mac_addr));
736 + memcpy(m->addr, addr, ETH_ALEN);
737 + m->lastused = h->incr_time++;
741 + /* Finally update the cls2mac variabel */
742 + memcpy(h->cls2mac + ETH_ALEN * class, addr, ETH_ALEN);
747 +int valid_ip_checksum(struct iphdr *ip, int size)
749 + __u16 header_len = ip->ihl << 2;
751 + __u16 *ipu = (u16 *) ip;
754 + /* We require 4 bytes in the packet since we access the port numbers */
755 + if ((size < header_len) || size < sizeof(struct iphdr) + 4)
758 + for (a = 0; a < (header_len >> 1); a++, ipu++) {
760 + /* If not the checksum field */
768 + return ip->check == (__u16) ~ c;
771 +static int mac_classify(struct mac_head *head, struct sk_buff *skb)
773 + /* We set this to the address we map to. In case we map to an IP
774 + address the last two entries are set to 0. */
775 + unsigned char addr[ETH_ALEN];
777 + /* Used later for mac classification */
778 + struct ethhdr *hdr;
780 + /* This is the size of the network part of the packet */
781 + int size = ((char *)skb->data + skb->len) - ((char *)ip_hdr(skb));
783 + /* Set a default value for the address */
784 + memset(addr, 0, ETH_ALEN);
786 + /* Map IPv4 traffic to their correct addresses.
787 + Everything (non-IPv4) else goes to a default address */
788 + if (ntohs(skb->protocol) == ETH_P_IP) {
790 + struct iphdr *iph = ip_hdr(skb); /* This is the IP header */
792 + const __u16 *portp = (__u16 *) & (((char *)iph)[iph->ihl * 4]); /* Port numbers*/
793 + __u16 sport = portp[0];
794 + __u16 dport = portp[1];
796 + unsigned ipaddr; /* IP classification */
797 + ProxyRemapBlock *prm; /* Proxyremapping */
799 + /* IP packets must have valid checsum */
800 + if (!valid_ip_checksum(ip_hdr(skb), size))
805 + ipaddr = iph->saddr;
807 + ipaddr = iph->daddr;
810 + /* Update ipaddr if packet is masqgraded */
811 + if (head->usemasq) {
812 + /* Thanks to Rusty Russell for help with the following code */
813 + enum ip_conntrack_info ctinfo;
814 + struct nf_conn *ct;
815 + ct = nf_ct_get(skb, &ctinfo);
819 + ct->tuplehash[CTINFO2DIR(ctinfo)].
823 + ct->tuplehash[CTINFO2DIR(ctinfo)].
829 + /* Set prm (proxyremap) based on ipaddr */
831 + if (head->proxyremap) {
832 + if (head->srcaddr) {
834 + proxyLookup(head->proxyremap, ipaddr, sport,
835 + ip_hdr(skb)->protocol);
838 + proxyLookup(head->proxyremap, ipaddr, dport,
839 + ip_hdr(skb)->protocol);
842 + /* And finally set the correct address */
844 + /* This packet should be remapped */
846 + memcpy(addr, prm->macaddr, ETH_ALEN);
848 + memcpy(addr, &prm->caddr, sizeof(unsigned));
850 + /* This packet should not be remapped */
851 + if (head->usemac) {
852 + /* Make sure we have a valid mac address */
854 + skb->dev->type == ARPHRD_ETHER &&
855 + skb->len >= ETH_HLEN) {
856 + hdr = (struct ethhdr*)skb->data;
858 + memcpy(addr,hdr->h_source, ETH_ALEN);
860 + memcpy(addr,hdr->h_dest, ETH_ALEN);
863 + memcpy(addr, &ipaddr, 4);
868 + return lookup_mac(head, addr);
875 +/* Pr-class information */
876 +struct wrrc_sched_data {
877 + struct Qdisc *que; /* The queue for this class */
878 + struct tc_wrr_class_modf class_modf; /* Information about the class */
880 + /* For classes in the heap this is the priority value priosum
881 + was updated with for this class */
885 +/* Pr-qdisc information */
886 +struct wrr_sched_data {
887 + struct heap h; /* A heap containing all the bands that will send something */
888 + struct heap_element *poll; /* bandc elements */
890 + /* The sum of the prioities of the elements in the heap where
891 + a priority of 1 is saved as 2^32 */
894 + /* A class for each band */
895 + struct wrrc_sched_data *bands; /* bandc elements */
897 + /* Information maintained by the proxydict module of 0 if we
898 + have no proxy remapping */
901 + /* Always incrementning counters, we always have that any value of
902 + counter_low_penal < any value of counter_high_penal. */
903 + penalty_base_t counter_low_penal;
904 + penalty_base_t counter_high_penal;
906 + struct tc_wrr_qdisc_modf qdisc_modf; /* Penalty updating */
908 + struct mac_head filter; /* The filter */
909 + int bandc; /* Number of bands */
912 +/* Priority handling.
913 + * weight is in interval [0..2^32]
914 + * priosum has whole numbers in the upper and fragments in the lower 32 bits.
916 +static void weight_transmit(struct tc_wrr_class_weight *p,
917 + struct tc_wrr_qdisc_weight q,
919 + u64 priosum, u64 weight, unsigned size)
922 + unsigned long now = jiffies / HZ;
924 + /* Penalty for transmitting */
929 + switch (q.weight_mode) {
931 + change = p->decr * size;
934 + change = p->decr * size * heapsize;
936 + case 3: /* Note: 64 bit division is not always available */
937 + divisor = (u32) (weight >> 16);
940 + change = p->decr * size * (((u32) (priosum >> 16)) / divisor);
945 + if (p->val > old || p->val < p->min)
948 + /* Credit for time went */
949 + change = (now - p->tim) * p->incr;
953 + if (p->val < old || p->val > p->max)
957 +static void weight_setdefault(struct tc_wrr_class_weight *p)
959 + p->val = (u64) - 1;
962 + p->min = (u64) - 1;
963 + p->max = (u64) - 1;
964 + p->tim = jiffies / HZ;
967 +static void weight_setvalue(struct tc_wrr_class_weight *dst,
968 + struct tc_wrr_class_weight *src)
970 + if (src->val != 0) {
971 + dst->val = src->val;
972 + dst->tim = jiffies / HZ;
975 + dst->min = src->min;
977 + dst->max = src->max;
978 + if (src->decr != ((u64) - 1))
979 + dst->decr = src->decr;
980 + if (src->incr != ((u64) - 1))
981 + dst->incr = src->incr;
982 + if (dst->val < dst->min)
983 + dst->val = dst->min;
984 + if (dst->val > dst->max)
985 + dst->val = dst->max;
988 +static void wrr_destroy(struct Qdisc *sch)
990 + struct wrr_sched_data *q = qdisc_priv(sch);
993 + /* Destroy our filter */
994 + mac_done(&q->filter);
996 + /* Destroy all our childre ques */
997 + for (i = 0; i < q->bandc; i++)
998 + qdisc_destroy(q->bands[i].que);
1000 + /* And free memory */
1001 + my_free(q->bands);
1004 + my_free(q->proxydict);
1007 +static int wrr_init(struct Qdisc *sch, struct nlattr *opt)
1009 + struct wrr_sched_data *q = qdisc_priv(sch);
1010 + int i, maciniterr;
1012 + struct tc_wrr_qdisc_crt *qopt;
1014 + /* Parse options */
1016 + return -EINVAL; /* Options must be specified */
1017 + if (nla_len(opt) < sizeof(*qopt))
1019 + qopt = nla_data(opt);
1021 + if (qopt->bands_max > 8192 || qopt->bands_max < 2) {
1022 + /* More than 8192 queues or less than 2? That cannot be true - it must be
1027 + if (qopt->proxy_maxconn < 0 || qopt->proxy_maxconn > 20000) {
1028 + /* More than this number of maximal concurrent connections is unrealistic */
1031 +#ifndef MASQ_SUPPORT
1032 + if (qopt->usemasq) {
1036 + q->bandc = qopt->bands_max;
1037 + q->qdisc_modf = qopt->qdisc_modf;
1039 + /* Create structures */
1040 + q->poll = (struct heap_element *)
1041 + my_malloc(sizeof(struct heap_element) * q->bandc);
1042 + q->bands = (struct wrrc_sched_data *)
1043 + my_malloc(sizeof(struct wrrc_sched_data) * q->bandc);
1045 + if (qopt->proxy_maxconn > 0) {
1046 + q->proxydict = my_malloc(proxyGetMemSize(qopt->proxy_maxconn));
1051 + /* Init mac module */
1052 + maciniterr = mac_init(&q->filter, qopt->bands_max, qopt->srcaddr,
1053 + qopt->usemac, qopt->usemasq, q->proxydict);
1055 + /* See if we got the memory we wanted */
1056 + if (!q->poll || !q->bands ||
1057 + (qopt->proxy_maxconn > 0 && !q->proxydict) || maciniterr < 0) {
1061 + my_free(q->bands);
1063 + my_free(q->proxydict);
1064 + if (maciniterr >= 0)
1065 + mac_done(&q->filter);
1068 + /* Initialize proxy */
1070 + proxyInitMem(q->proxydict, qopt->proxy_maxconn);
1071 + /* Initialize values */
1072 + q->counter_low_penal = 0;
1073 + q->counter_high_penal = penalty_base_t_max >> 1;
1075 + /* Initialize empty heap */
1076 + heap_init(&q->h, q->bandc, q->poll);
1079 + /* Initialize each band */
1081 + for (i = 0; i < q->bandc; i++) {
1082 + weight_setdefault(&q->bands[i].class_modf.weight1);
1083 + weight_setdefault(&q->bands[i].class_modf.weight2);
1085 + struct Qdisc *child =
1086 + qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, &pfifo_qdisc_ops, sch->handle);
1088 + q->bands[i].que = child;
1090 + /* Queue couldn't be created :-( */
1095 + q->bands[i].que = &noop_qdisc;
1099 + /* Destroy again */
1104 + printk(KERN_DEBUG "sch_wrr: Initialized version " WRR_VER "\n");
1109 +static void wrr_reset(struct Qdisc *sch)
1111 + struct wrr_sched_data *q = qdisc_priv(sch);
1114 + /* Reset own values */
1115 + q->counter_low_penal = 0;
1116 + q->counter_high_penal = penalty_base_t_max >> 1;
1118 + /* Reset filter */
1119 + mac_reset(&q->filter);
1121 + /* Reinitialize heap */
1122 + heap_init(&q->h, q->bandc, q->poll);
1125 + /* Reset all bands */
1126 + for (i = 0; i < q->bandc; i++) {
1127 + weight_setdefault(&q->bands[i].class_modf.weight1);
1128 + weight_setdefault(&q->bands[i].class_modf.weight2);
1129 + qdisc_reset(q->bands[i].que);
1132 + /* Reset proxy remapping information */
1134 + proxyInitMem(q->proxydict, proxyGetMaxConn(q->proxydict));
1137 +static int wrr_enqueue(struct sk_buff *skb, struct Qdisc *sch)
1139 + struct wrr_sched_data *q = qdisc_priv(sch);
1140 + int retvalue = ENQUEUE_FAIL;
1142 + /* The packet is in skb */
1143 + int band = mac_classify(&q->filter, skb);
1146 + /* Enque packet for this band */
1147 + struct Qdisc *qdisc = q->bands[band].que;
1149 + if ((retvalue = qdisc->enqueue(skb, qdisc)) == ENQUEUE_SUCCESS) {
1151 + sch->bstats.bytes += skb->len;
1152 + sch->bstats.packets++;
1155 + /* Insert band into heap if not already there */
1156 + if (!heap_contains(&q->h, band)) {
1158 + if (!heap_empty(&q->h))
1160 + heap_get_penalty(&q->h,
1161 + heap_root(&q->h)).
1165 + p.ls = q->counter_low_penal++;
1166 + heap_insert(&q->h, band, p);
1167 + q->bands[band].priosum_val =
1168 + ((q->bands[band].class_modf.weight1.
1171 + ((q->bands[band].class_modf.weight2.
1173 + q->priosum += q->bands[band].priosum_val;
1177 + /* If we decide not to enque it seems like we also need to free the packet */
1181 + if (retvalue != ENQUEUE_SUCCESS) {
1182 + /* Packet not enqued */
1183 + sch->qstats.drops++;
1189 +static struct sk_buff *wrr_dequeue(struct Qdisc *sch)
1191 + struct wrr_sched_data *q = qdisc_priv(sch);
1192 + struct sk_buff *skb;
1194 + u64 weight, priosum;
1195 + struct wrrc_sched_data *b;
1197 + /* Return if heap is empty */
1198 + if (heap_empty(&q->h))
1201 + /* Find root element */
1202 + band = heap_root(&q->h);
1204 + /* Find priority of this element in interval [1;2^32] */
1205 + b = &q->bands[band];
1207 + /* weight is in interval [1;2^32] */
1208 + weight = ((b->class_modf.weight1.val >> 48) + 1) * ((b->class_modf.weight2.val >> 48) + 1);
1209 + priosum = q->priosum;
1210 + q->priosum -= q->bands[band].priosum_val;
1212 + /* Dequeue the packet from the root */
1213 + skb = q->bands[band].que->dequeue(q->bands[band].que);
1216 + /* There was a packet in this queue */
1220 + /* Find length of packet adjusted with priority */
1221 + adjlen = (u32) (weight >> (32 - 16));
1224 + adjlen = (skb->len << 16) / adjlen;
1226 + /* Update penalty information for this class */
1227 + weight_transmit(&b->class_modf.weight1, q->qdisc_modf.weight1,
1228 + q->h.elements, priosum, weight, skb->len);
1229 + weight_transmit(&b->class_modf.weight2, q->qdisc_modf.weight2,
1230 + q->h.elements, priosum, weight, skb->len);
1231 + q->bands[band].priosum_val =
1232 + ((b->class_modf.weight1.val >> 48) +
1233 + 1) * ((b->class_modf.weight2.val >> 48) + 1);
1234 + q->priosum += q->bands[band].priosum_val;
1236 + /* And update the class in the heap */
1237 + p = heap_get_penalty(&q->h, band);
1239 + p.ls = q->counter_high_penal++;
1240 + heap_set_penalty(&q->h, band, p);
1242 + /* Return packet */
1246 + /* No packet - so machine should be removed from heap */
1247 + heap_remove(&q->h, band);
1252 +static struct sk_buff *wrr_peek(struct Qdisc *sch)
1254 + struct wrr_sched_data *q = qdisc_priv(sch);
1257 + /* Return if heap is empty */
1258 + if (heap_empty(&q->h))
1261 + /* Find root element */
1262 + band = heap_root(&q->h);
1264 + /* Dequeue the packet from the root */
1265 + return qdisc_peek_head(q->bands[band].que);
1268 +static unsigned int wrr_drop(struct Qdisc *sch)
1270 + struct wrr_sched_data *q = qdisc_priv(sch);
1272 + /* Ugly... Drop button up in heap */
1275 + for (i = q->h.elements; i >= 1; i--) {
1276 + int band = q->h.root_1[i].id;
1277 + if (q->bands[band].que->ops->drop(q->bands[band].que)) {
1280 + sch->qstats.drops++;
1288 +static int wrr_dump(struct Qdisc *sch, struct sk_buff *skb)
1290 + struct wrr_sched_data *q = qdisc_priv(sch);
1291 + sk_buff_data_t b = skb->tail;
1292 + struct tc_wrr_qdisc_stats opt;
1294 + opt.qdisc_crt.qdisc_modf = q->qdisc_modf;
1295 + opt.qdisc_crt.srcaddr = q->filter.srcaddr;
1296 + opt.qdisc_crt.usemac = q->filter.usemac;
1297 + opt.qdisc_crt.usemasq = q->filter.usemasq;
1298 + opt.qdisc_crt.bands_max = q->filter.mac_max;
1299 + opt.nodes_in_heap = q->h.elements;
1300 + opt.bands_cur = q->filter.mac_cur;
1301 + opt.bands_reused = q->filter.mac_reused;
1302 + opt.priosum = q->priosum;
1304 + if (q->proxydict) {
1305 + opt.qdisc_crt.proxy_maxconn = proxyGetMaxConn(q->proxydict);
1306 + opt.proxy_curconn = proxyGetCurConn(q->proxydict);
1308 + opt.qdisc_crt.proxy_maxconn = 0;
1309 + opt.proxy_curconn = 0;
1312 + NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
1315 + nla_put_failure: /* NLA_PUT may jump to this label */
1316 + skb_trim(skb, (unsigned char *) &b - skb->data);
1320 +static int wrr_tune_std(struct Qdisc *sch, struct nlattr *opt)
1322 + struct wrr_sched_data *q = qdisc_priv(sch);
1323 + struct tc_wrr_qdisc_modf_std *qopt = nla_data(opt);
1325 + if (nla_len(opt) < sizeof(*qopt))
1330 + if (qopt->change_class) {
1331 + int idx = lookup_mac(&q->filter, qopt->addr);
1333 + (&q->bands[idx].class_modf.weight1,
1334 + &qopt->class_modf.weight1);
1335 + weight_setvalue(&q->bands[idx].class_modf.weight2,
1336 + &qopt->class_modf.weight2);
1338 + if (qopt->qdisc_modf.weight1.weight_mode != -1)
1339 + q->qdisc_modf.weight1.weight_mode =
1340 + qopt->qdisc_modf.weight1.weight_mode;
1341 + if (qopt->qdisc_modf.weight2.weight_mode != -1)
1342 + q->qdisc_modf.weight2.weight_mode =
1343 + qopt->qdisc_modf.weight2.weight_mode;
1350 +static int wrr_tune_proxy(struct Qdisc *sch, struct nlattr *opt)
1352 + struct wrr_sched_data *q = qdisc_priv(sch);
1353 + struct tc_wrr_qdisc_modf_proxy *qopt = nla_data(opt);
1356 + /* Return if we are not configured with proxy support */
1357 + if (!q->proxydict)
1360 + /* Return if not enough data given */
1361 + if (nla_len(opt) < sizeof(*qopt))
1363 + if (nla_len(opt) < sizeof(*qopt) ||
1364 + nla_len(opt) < sizeof(*qopt) + sizeof(ProxyRemapBlock) * qopt->changec)
1369 + if (qopt->reset) {
1370 + proxyInitMem(q->proxydict, proxyGetMaxConn(q->proxydict));
1372 + /* Do all the changes */
1373 + for (i = 0; i < qopt->changec; i++) {
1374 + proxyConsumeBlock(q->proxydict,
1375 + &((ProxyRemapBlock *) & qopt->changes)[i]);
1383 +static int wrr_tune(struct Qdisc *sch, struct nlattr *opt)
1385 + if (((struct tc_wrr_qdisc_modf_std *)nla_data(opt))->proxy) {
1386 + return wrr_tune_proxy(sch, opt);
1388 + return wrr_tune_std(sch, opt);
1394 + * External and internal IDs are equal. They are the band number plus 1.
1397 +/* Replace a class with another */
1398 +static int wrr_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1399 + struct Qdisc **old)
1401 + struct wrr_sched_data *q = qdisc_priv(sch);
1402 + if (arg > q->bandc || arg == 0)
1407 + new = &noop_qdisc;
1409 + LOCK_START * old = q->bands[arg].que;
1410 + q->bands[arg].que = new;
1411 + qdisc_reset(*old);
1412 + LOCK_END return 0;
1415 +/* Returns the qdisc for a class */
1416 +static struct Qdisc *wrr_leaf(struct Qdisc *sch, unsigned long arg)
1418 + struct wrr_sched_data *q = qdisc_priv(sch);
1419 + if (arg > q->bandc || arg == 0)
1422 + return q->bands[arg].que;
1425 +static unsigned long wrr_get(struct Qdisc *sch, u32 classid)
1427 + struct wrr_sched_data *q = qdisc_priv(sch);
1428 + unsigned long band = TC_H_MIN(classid);
1429 + if (band > q->bandc || band == 0)
1434 +static void wrr_put(struct Qdisc *q, unsigned long cl)
1439 +static int wrr_delete(struct Qdisc *sch, unsigned long cl)
1441 + struct wrr_sched_data *q = qdisc_priv(sch);
1442 + if (cl == 0 || cl > q->bandc)
1448 +static int wrr_dump_class(struct Qdisc *sch, unsigned long cl,
1449 + struct sk_buff *skb, struct tcmsg *tcm)
1451 + struct wrr_sched_data *q = qdisc_priv(sch);
1452 + sk_buff_data_t b = skb->tail;
1453 + struct tc_wrr_class_stats opt;
1455 + /* Handle of this class */
1456 + tcm->tcm_handle = sch->handle | cl;
1458 + if (cl == 0 || cl > q->bandc)
1459 + goto nla_put_failure;
1462 + if ((cl < q->bandc) && q->bands[cl].que)
1463 + tcm->tcm_info = q->bands[cl].que->handle;
1465 + if (cl >= q->filter.mac_cur) {
1466 + /* Band is unused */
1467 + memset(&opt, 0, sizeof(opt));
1471 + opt.class_modf.weight1 = q->bands[cl].class_modf.weight1;
1472 + opt.class_modf.weight2 = q->bands[cl].class_modf.weight2;
1473 + weight_transmit(&opt.class_modf.weight1, q->qdisc_modf.weight1,
1475 + weight_transmit(&opt.class_modf.weight2, q->qdisc_modf.weight2,
1477 + memcpy(opt.addr, q->filter.cls2mac + cl * ETH_ALEN, ETH_ALEN);
1478 + opt.usemac = q->filter.usemac;
1479 + opt.heappos = q->h.root_1[cl + 1].id2idx;
1480 + if (opt.heappos != 0) {
1482 + opt.penal_ls = heap_get_penalty(&q->h, cl).ls;
1483 + opt.penal_ms = heap_get_penalty(&q->h, cl).ms;
1490 + /* Put quing information */
1491 + NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
1494 + nla_put_failure: /* NLA_PUT may jump here */
1495 + skb_trim(skb, (unsigned char *) &b - skb->data);
1499 +static int wrr_change(struct Qdisc *sch, u32 handle, u32 parent,
1500 + struct nlattr **tca, unsigned long *arg)
1502 + unsigned long cl = *arg;
1503 + struct wrr_sched_data *q = qdisc_priv(sch);
1504 + struct nlattr *opt = tca[TCA_OPTIONS];
1505 + struct tc_wrr_class_modf *copt = nla_data(opt);
1507 + if (cl == 0 || cl > q->bandc)
1511 + if (nla_len(opt) < sizeof(*copt))
1516 + weight_setvalue(&q->bands[cl].class_modf.weight1, &copt->weight1);
1517 + weight_setvalue(&q->bands[cl].class_modf.weight2, &copt->weight2);
1524 +static void wrr_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1526 + struct wrr_sched_data *q = qdisc_priv(sch);
1532 + for (prio = 1; prio <= q->bandc; prio++) {
1533 + if (arg->count < arg->skip) {
1537 + if (arg->fn(sch, prio, arg) < 0) {
1545 +static struct tcf_proto **wrr_find_tcf(struct Qdisc *sch, unsigned long cl)
1550 +static unsigned long wrr_bind(struct Qdisc *sch,
1551 + unsigned long parent, u32 classid)
1553 + return wrr_get(sch, classid);
1560 +static struct Qdisc_class_ops wrr_class_ops = {
1561 + .graft = wrr_graft,
1565 + .change = wrr_change,
1566 + .delete = wrr_delete,
1568 + .tcf_chain = wrr_find_tcf,
1569 + .bind_tcf = wrr_bind,
1570 + .unbind_tcf = wrr_put,
1571 + .dump = wrr_dump_class,
1574 +static struct Qdisc_ops wrr_qdisc_ops = {
1576 + .cl_ops = &wrr_class_ops,
1578 + .priv_size = sizeof(struct wrr_sched_data),
1579 + .enqueue = wrr_enqueue,
1580 + .dequeue = wrr_dequeue,
1584 + .reset = wrr_reset,
1585 + .destroy = wrr_destroy,
1586 + .change = wrr_tune,
1588 + .owner = THIS_MODULE,
1591 +static int __init wrr_module_init(void)
1593 + return register_qdisc(&wrr_qdisc_ops);
1596 +static void __exit wrr_module_exit(void)
1598 + unregister_qdisc(&wrr_qdisc_ops);
1601 +module_init(wrr_module_init)
1602 +module_exit(wrr_module_exit)
1604 +MODULE_LICENSE("GPL");
1605 +MODULE_AUTHOR("Christian Worm Mortensen");
1606 diff -urN linux-2.6.26.2.org/net/sched/wrr_proxydict.c linux-2.6.26.2/net/sched/wrr_proxydict.c
1607 --- linux-2.6.26.2.org/net/sched/wrr_proxydict.c 1970-01-01 01:00:00.000000000 +0100
1608 +++ linux-2.6.26.2/net/sched/wrr_proxydict.c 2008-08-19 11:20:27.000000000 +0200
1611 +#include <string.h>
1612 +#include <netinet/in.h>
1615 +#include <linux/wrr.h>
1617 +/* Proxy remapping part of WRR */
1619 +/* Hash function */
1620 +#define hash_fnc(m,server,port,proto) \
1621 + (((proto)*7+(server)*13+(port)*5)%m->hash_size)
1623 +/* Size of hash table given maximal number of connections */
1624 +#define hash_size_max_con(max_con) (2*(max_con))
1626 +/* The memory area we maintain
1628 + Given a connection we map it by hash_fnc into hash_table. This gives an
1629 + index in next which contains a -1 terminated linked list of connections
1630 + mapping to that hash value.
1632 + The entries in next not allocated is also in linked list where
1633 + the first free index is free_first.
1645 +#define Memory(m) ((proxy_memory*)m)
1646 +#define Hash_table(m) ((int*)(((char*)m)+sizeof(proxy_memory)))
1647 +#define Next(m) ((int*)(((char*)m)+sizeof(proxy_memory)+ \
1648 + sizeof(int)*((proxy_memory*)m)->hash_size))
1649 +#define Info(m) ((ProxyRemapBlock*)(((char*)m)+ \
1650 + sizeof(proxy_memory)+ \
1651 + sizeof(int)*((proxy_memory*)m)->hash_size+\
1652 + sizeof(int)*((proxy_memory*)m)->max_con \
1655 +int proxyGetMemSize(int max_con)
1657 + return sizeof(proxy_memory) +
1658 + sizeof(int) * hash_size_max_con(max_con) +
1659 + sizeof(int) * max_con + sizeof(ProxyRemapBlock) * max_con;
1662 +void proxyInitMem(void *data, int max_con)
1664 + proxy_memory *m = Memory(data);
1665 + m->max_con = max_con;
1667 + m->hash_size = hash_size_max_con(max_con);
1670 + /* Get pointers */
1671 + int *hash_table = Hash_table(data);
1672 + int *next = Next(data);
1675 + /* Init the hash table */
1676 + for (i = 0; i < m->hash_size; i++)
1677 + hash_table[i] = -1;
1679 + /* Init the free-list */
1680 + for (i = 0; i < m->max_con; i++)
1682 + m->free_first = 0;
1686 +int proxyGetCurConn(void *data)
1688 + return Memory(data)->cur_con;
1691 +int proxyGetMaxConn(void *data)
1693 + return Memory(data)->max_con;
1696 +ProxyRemapBlock *proxyLookup(void *data, unsigned ipaddr, unsigned short port,
1699 + proxy_memory *m = Memory(data);
1700 + int *hash_table = Hash_table(m);
1701 + int *next = Next(m);
1702 + ProxyRemapBlock *info = Info(m);
1705 + for (i = hash_table[hash_fnc(m, ipaddr, port, proto)]; i != -1;
1707 + if (info[i].proto == proto && info[i].sport == port
1708 + && info[i].saddr == ipaddr)
1715 +int proxyConsumeBlock(void *data, ProxyRemapBlock * blk)
1717 + proxy_memory *m = Memory(data);
1718 + int *hash_table = Hash_table(m);
1719 + int *next = Next(m);
1720 + ProxyRemapBlock *info = Info(m);
1721 + int hash = hash_fnc(m, blk->saddr, blk->sport, blk->proto);
1725 + if (m->cur_con == m->max_con)
1728 + /* Insert the block at a free entry */
1729 + info[m->free_first] = *blk;
1732 + foo = next[m->free_first];
1734 + /* And insert it in the hash tabel */
1735 + next[m->free_first] = hash_table[hash];
1736 + hash_table[hash] = m->free_first;
1737 + m->free_first = foo;
1741 + /* Find the block */
1742 + for (toupdate = &hash_table[hash];
1743 + *toupdate != -1; toupdate = &next[*toupdate]) {
1744 + if (info[*toupdate].proto == blk->proto &&
1745 + info[*toupdate].sport == blk->sport &&
1746 + info[*toupdate].saddr == blk->saddr)
1749 + if (*toupdate == -1)
1754 + /* Delete it from the hashing list */
1755 + *toupdate = next[*toupdate];
1757 + /* And put it on the free list */
1758 + next[foo] = m->free_first;
1759 + m->free_first = foo;