]> git.pld-linux.org Git - packages/kernel.git/blame - 2.6.4-wrr.patch
- obsolete
[packages/kernel.git] / 2.6.4-wrr.patch
CommitLineData
df26885d 1--- linux-2.6.4/include/linux/pkt_sched.h.orig 2004-04-02 23:46:35.869438752 +0200
2+++ linux-2.6.4/include/linux/pkt_sched.h 2004-04-02 23:59:09.936803088 +0200
3@@ -442,4 +442,116 @@
4
5 #define TCA_ATM_MAX TCA_ATM_STATE
6
7+/* WRR section */
8+
9+/* Other includes */
10+#include <linux/if_ether.h>
11+
12+// A sub weight and of a class
13+// All numbers are represented as parts of (2^64-1).
14+struct tc_wrr_class_weight {
15+ __u64 val; // Current value (0 is not valid)
16+ __u64 decr; // Value pr bytes (2^64-1 is not valid)
17+ __u64 incr; // Value pr seconds (2^64-1 is not valid)
18+ __u64 min; // Minimal value (0 is not valid)
19+ __u64 max; // Minimal value (0 is not valid)
20+
21+ // The time where the above information was correct:
22+ time_t tim;
23+};
24+
25+// Pakcet send when modifying a class:
26+struct tc_wrr_class_modf {
27+ // Not-valid values are ignored.
28+ struct tc_wrr_class_weight weight1;
29+ struct tc_wrr_class_weight weight2;
30+};
31+
32+// Packet returned when quering a class:
33+struct tc_wrr_class_stats {
34+ char used; // If this is false the information below is invalid
35+
36+ struct tc_wrr_class_modf class_modf;
37+
38+ unsigned char addr[ETH_ALEN];
39+ char usemac; // True if addr is a MAC address, else it is an IP address
40+ // (this value is only for convience, it is always the same
41+ // value as in the qdisc)
42+ int heappos; // Current heap position or 0 if not in heap
43+ __u64 penal_ls; // Penalty value in heap (ls)
44+ __u64 penal_ms; // Penalty value in heap (ms)
45+};
46+
47+// Qdisc-wide penalty information (boolean values - 2 not valid)
48+struct tc_wrr_qdisc_weight {
49+ char weight_mode; // 0=No automatic change to weight
50+ // 1=Decrease normally
51+ // 2=Also multiply with number of machines
52+ // 3=Instead multiply with priority divided
53+ // with priority of the other.
54+ // -1=no change
55+};
56+
57+// Packet send when modifing a qdisc:
58+struct tc_wrr_qdisc_modf {
59+ // Not-valid values are ignored:
60+ struct tc_wrr_qdisc_weight weight1;
61+ struct tc_wrr_qdisc_weight weight2;
62+};
63+
64+// Packet send when creating a qdisc:
65+struct tc_wrr_qdisc_crt {
66+ struct tc_wrr_qdisc_modf qdisc_modf;
67+
68+ char srcaddr; // 1=lookup source, 0=lookup destination
69+ char usemac; // 1=Classify on MAC addresses, 0=classify on IP
70+ char usemasq; // 1=Classify based on masqgrading - only valid
71+ // if usemac is zero
72+ int bands_max; // Maximal number of bands (i.e.: classes)
73+ int proxy_maxconn; // If differnt from 0 then we support proxy remapping
74+ // of packets. And this is the number of maximal
75+ // concurrent proxy connections.
76+};
77+
78+// Packet returned when quering a qdisc:
79+struct tc_wrr_qdisc_stats {
80+ struct tc_wrr_qdisc_crt qdisc_crt;
81+ int proxy_curconn;
82+ int nodes_in_heap; // Current number of bands wanting to send something
83+ int bands_cur; // Current number of bands used (i.e.: MAC/IP addresses seen)
84+ int bands_reused; // Number of times this band has been reused.
85+ int packets_requed; // Number of times packets have been requeued.
86+ __u64 priosum; // Sum of priorities in heap where 1 is 2^32
87+};
88+
89+struct tc_wrr_qdisc_modf_std {
90+ // This indicates which of the tc_wrr_qdisc_modf structers this is:
91+ char proxy; // 0=This struct
92+
93+ // Should we also change a class?
94+ char change_class;
95+
96+ // Only valid if change_class is false
97+ struct tc_wrr_qdisc_modf qdisc_modf;
98+
99+ // Only valid if change_class is true:
100+ unsigned char addr[ETH_ALEN]; // Class to change (non-used bytes should be 0)
101+ struct tc_wrr_class_modf class_modf; // The change
102+};
103+
104+// Used for proxyrempping:
105+struct tc_wrr_qdisc_modf_proxy {
106+ // This indicates which of the tc_wrr_qdisc_modf structers this is:
107+ char proxy; // 1=This struct
108+
109+ // This is 1 if the proxyremap information should be reset
110+ char reset;
111+
112+ // changec is the number of elements in changes.
113+ int changec;
114+
115+ // This is an array of type ProxyRemapBlock:
116+ long changes[0];
117+};
118+
c9d1c54c
AM
119 /* Network emulator */
120 struct tc_netem_qopt
df26885d 121 {
122diff -uNr linux-2.6.4/net/sched.orig/Kconfig linux-2.6.4/net/sched/Kconfig
123--- linux-2.6.4/net/sched.orig/Kconfig 2004-04-02 23:46:35.000000000 +0200
124+++ linux-2.6.4/net/sched/Kconfig 2004-04-03 00:06:53.438340144 +0200
125@@ -39,6 +39,10 @@
126 To compile this code as a module, choose M here: the
127 module will be called sch_htb.
128
129+config NET_SCH_WRR
130+ tristate "WRR packet scheduler"
131+ depends on NET_SCHED
132+
133 config NET_SCH_HFSC
134 tristate "HFSC packet scheduler"
135 depends on NET_SCHED
136diff -uNr linux-2.6.4/net/sched.orig/Makefile linux-2.6.4/net/sched/Makefile
137--- linux-2.6.4/net/sched.orig/Makefile 2004-04-02 23:46:35.000000000 +0200
138+++ linux-2.6.4/net/sched/Makefile 2004-04-03 00:05:52.359625520 +0200
139@@ -10,6 +10,7 @@
140 obj-$(CONFIG_NET_CLS_POLICE) += police.o
141 obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
142 obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
143+obj-$(CONFIG_NET_SCH_WRR) += sch_wrr.o
df26885d 144 obj-$(CONFIG_NET_SCH_HPFQ) += sch_hpfq.o
145 obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o
c9d1c54c 146 obj-$(CONFIG_NET_SCH_RED) += sch_red.o
df26885d 147diff -uNr linux-2.6.4/net/sched.orig/proxydict.c linux-2.6.4/net/sched/proxydict.c
148--- linux-2.6.4/net/sched.orig/proxydict.c 1970-01-01 01:00:00.000000000 +0100
149+++ linux-2.6.4/net/sched/proxydict.c 2004-04-03 00:02:21.565671072 +0200
150@@ -0,0 +1,153 @@
151+#ifndef __KERNEL__
152+#include <string.h>
153+#include <netinet/in.h>
154+#endif
155+
156+#include "proxyremap.h"
157+#include "proxydict.h"
158+
159+
160+/*--------------------------------------------------------------------------
161+Implementation.
162+*/
163+
164+// Hash function
165+#define hash_fnc(m,server,port,proto) \
166+ (((proto)*7+(server)*13+(port)*5)%m->hash_size)
167+
168+// Size of hash table given maximal number of connections:
169+#define hash_size_max_con(max_con) (2*(max_con))
170+
c9d1c54c 171+// The __proxydict_memory area we maintain:
df26885d 172+typedef struct {
173+ int hash_size;
174+ int max_con;
175+ int cur_con;
176+
177+ int free_first;
178+
179+ // Then we have:
180+ // int hash_table[hash_size];
181+ // int next[max_con];
182+ // ProxyRemapBlock info[max_con];
183+ //
184+ // The idea is the following:
185+ // Given a connection we map it by hash_fnc into hash_table. This gives an
186+ // index in next which contains a -1 terminated linked list of connections
187+ // mapping to that hash value.
188+ //
189+ // The entries in next not allocated is also in linked list where
190+ // the first free index is free_first.
c9d1c54c 191+} __proxydict_memory;
df26885d 192+
c9d1c54c
AM
193+#define Memory(m) ((__proxydict_memory*)m)
194+#define Hash_table(m) ((int*)(((char*)m)+sizeof(__proxydict_memory)))
195+#define Next(m) ((int*)(((char*)m)+sizeof(__proxydict_memory)+ \
196+ sizeof(int)*((__proxydict_memory*)m)->hash_size))
df26885d 197+#define Info(m) ((ProxyRemapBlock*)(((char*)m)+ \
c9d1c54c
AM
198+ sizeof(__proxydict_memory)+ \
199+ sizeof(int)*((__proxydict_memory*)m)->hash_size+\
200+ sizeof(int)*((__proxydict_memory*)m)->max_con \
df26885d 201+ ))
202+
203+int proxyGetMemSize(int max_con) {
c9d1c54c 204+ return sizeof(__proxydict_memory)+
df26885d 205+ sizeof(int)*hash_size_max_con(max_con)+
206+ sizeof(int)*max_con+
207+ sizeof(ProxyRemapBlock)*max_con;
208+}
209+
210+void proxyInitMem(void* data, int max_con) {
211+ // Init m:
c9d1c54c 212+ __proxydict_memory* m=Memory(data);
df26885d 213+ m->max_con=max_con;
214+ m->cur_con=0;
215+ m->hash_size=hash_size_max_con(max_con);
216+
217+ {
218+ // Get pointers:
219+ int* hash_table=Hash_table(data);
220+ int* next=Next(data);
221+ int i;
222+
223+ // Init the hash table:
224+ for(i=0; i<m->hash_size; i++) hash_table[i]=-1;
225+
226+ // Init the free-list
227+ for(i=0; i<m->max_con; i++) next[i]=i+1;
228+ m->free_first=0;
229+ }
230+}
231+
232+int proxyGetCurConn(void* data) {
233+ return Memory(data)->cur_con;
234+}
235+
236+int proxyGetMaxConn(void* data) {
237+ return Memory(data)->max_con;
238+}
239+
240+ProxyRemapBlock* proxyLookup(void* data, unsigned ipaddr, unsigned short port, char proto) {
c9d1c54c 241+ __proxydict_memory* m=Memory(data);
df26885d 242+ int* hash_table=Hash_table(m);
243+ int* next=Next(m);
244+ ProxyRemapBlock* info=Info(m);
245+ int i;
246+
247+ for(i=hash_table[hash_fnc(m,ipaddr,port,proto)]; i!=-1; i=next[i]) {
248+ if(info[i].proto==proto &&
249+ info[i].sport==port &&
250+ info[i].saddr==ipaddr) return &info[i];
251+ }
252+
253+ return 0;
254+}
255+
256+int proxyConsumeBlock(void* data, ProxyRemapBlock* blk) {
c9d1c54c 257+ __proxydict_memory* m=Memory(data);
df26885d 258+ int* hash_table=Hash_table(m);
259+ int* next=Next(m);
260+ ProxyRemapBlock* info=Info(m);
261+ int hash=hash_fnc(m,blk->saddr,blk->sport,blk->proto);
262+ int foo;
263+
264+ if(blk->open) {
265+ if(m->cur_con == m->max_con) return -1;
266+
267+ // Insert the block at a free entry:
268+ info[m->free_first]=*blk;
269+ m->cur_con++;
270+
271+ foo=next[m->free_first];
272+
273+ // And insert it in the hash tabel:
274+ next[m->free_first]=hash_table[hash];
275+ hash_table[hash]=m->free_first;
276+ m->free_first=foo;
277+ } else {
278+ int* toupdate;
279+
280+ // Find the block
281+ for(toupdate=&hash_table[hash];
282+ *toupdate!=-1;
283+ toupdate=&next[*toupdate]) {
284+ if(info[*toupdate].proto==blk->proto &&
285+ info[*toupdate].sport==blk->sport &&
286+ info[*toupdate].saddr==blk->saddr) break;
287+ }
288+ if(*toupdate==-1) return -1;
289+
290+ foo=*toupdate;
291+
292+ // Delete it from the hashing list:
293+ *toupdate=next[*toupdate];
294+
295+ // And put it on the free list:
296+ next[foo]=m->free_first;
297+ m->free_first=foo;
298+
299+ m->cur_con--;
300+ }
301+
302+ return 0;
303+}
304diff -uNr linux-2.6.4/net/sched.orig/proxydict.h linux-2.6.4/net/sched/proxydict.h
305--- linux-2.6.4/net/sched.orig/proxydict.h 1970-01-01 01:00:00.000000000 +0100
306+++ linux-2.6.4/net/sched/proxydict.h 2004-04-03 00:02:21.567670768 +0200
307@@ -0,0 +1,32 @@
308+#ifdef __cplusplus
309+extern "C" {
310+#endif
311+
312+/*--------------------------------------------------------------------------
313+This is common code for for handling the tabels containing information about
314+which proxyserver connections are associated with which machines..
315+*/
316+
317+// Returns the number of bytes that should be available in the area
318+// maintained by this module given the maximal number of concurrent
319+// connections.
320+int proxyGetMemSize(int max_connections);
321+
322+// Initializes a memory area to use. There must be as many bytes
323+// available as returned by getMemSize.
324+void proxyInitMem(void* data, int max_connections);
325+
326+// Queries:
327+int proxyGetCurConn(void* data); // Returns current number of connections
328+int proxyMaxCurConn(void* data); // Returns maximal number of connections
329+
330+// This is called to open and close conenctions. Returns -1 if
331+// a protocol error occores (i.e.: If it is discovered)
332+int proxyConsumeBlock(void* data, ProxyRemapBlock*);
333+
334+// Returns the RemapBlock associated with this connection or 0:
335+ProxyRemapBlock* proxyLookup(void* data, unsigned ipaddr, unsigned short port, char proto);
336+
337+#ifdef __cplusplus
338+}
339+#endif
340diff -uNr linux-2.6.4/net/sched.orig/proxyremap.h linux-2.6.4/net/sched/proxyremap.h
341--- linux-2.6.4/net/sched.orig/proxyremap.h 1970-01-01 01:00:00.000000000 +0100
342+++ linux-2.6.4/net/sched/proxyremap.h 2004-04-03 00:02:21.568670616 +0200
343@@ -0,0 +1,33 @@
344+#ifndef PROXYREMAP_H
345+#define PROXYREMAP_H
346+
347+// This describes the information that is written in proxyremap.log and which
348+// are used in the communication between proxyremapserver and proxyremapclient.
349+// Everything is in network order.
350+
351+// First this header is send:
352+#define PROXY_WELCOME_LINE "ProxyRemap 1.02. This is a binary protocol.\r\n"
353+
354+// Then this block is send every time a connection is opened or closed.
355+// Note how it is alligned to use small space usage - arrays of this
356+// structure are saved in many places.
357+typedef struct {
358+ // Server endpoint of connection:
359+ unsigned saddr;
360+ unsigned short sport;
361+
362+ // IP protocol for this connection (typically udp or tcp):
363+ unsigned char proto;
364+
365+ // Is the connection opened or closed?
366+ unsigned char open;
367+
368+ // Client the packets should be accounted to:
369+ unsigned caddr;
370+ unsigned char macaddr[6]; // Might be 0.
371+
372+ // An informal two-charecter code from the proxyserver. Used for debugging.
373+ char proxyinfo[2];
374+} ProxyRemapBlock;
375+
376+#endif
377diff -uNr linux-2.6.4/net/sched.orig/sch_wrr.c linux-2.6.4/net/sched/sch_wrr.c
378--- linux-2.6.4/net/sched.orig/sch_wrr.c 1970-01-01 01:00:00.000000000 +0100
379+++ linux-2.6.4/net/sched/sch_wrr.c 2004-04-03 00:02:21.574669704 +0200
c9d1c54c 380@@ -0,0 +1,1360 @@
df26885d 381+/*-----------------------------------------------------------------------------
382+Weighted Round Robin scheduler.
383+
384+Written by Christian Worm Mortensen, cworm@it-c.dk.
385+
386+Introduction
387+============
388+This module implements a weighted round robin queue with build-in classifier.
389+The classifier currently map each MAC or IP address (configurable either MAC
390+or IP and either source or destination) to different classes. Each such class
391+is called a band. Whan using MAC addresses only bridged packets can be
392+classified other packets go to a default MAC address.
393+
394+Each band has a weight value, where 0<weight<=1. The bandwidth each band
395+get is proportional to the weight as can be deduced from the next section.
396+
397+
398+The queue
399+=========
400+Each band has a penalty value. Bands having something to sent are kept in
401+a heap according to this value. The band with the lowest penalty value
402+is in the root of the heap. The penalty value is a 128 bit number. Initially
403+no bands are in the heap.
404+
405+Two global 64 bit values counter_low_penal and couter_high_penal are initialized
406+to 0 and to 2^63 respectively.
407+
408+Enqueing:
409+ The packet is inserted in the queue for the band it belongs to. If the band
410+ is not in the heap it is inserted into it. In this case, the upper 64 bits
411+ of its penalty value is set to the same as for the root-band of the heap.
412+ If the heap is empty 0 is used. The lower 64 bit is set to couter_low_penal
413+ and couter_low_penal is incremented by 1.
414+
415+Dequing:
416+ If the heap is empty we have nothing to send.
417+
418+ If the root band has a non-empty queue a packet is dequeued from that.
419+ The upper 64 bit of the penalty value of the band is incremented by the
420+ packet size divided with the weight of the band. The lower 64 bit is set to
421+ couter_high_penal and couter_high_penal is incremented by 1.
422+
423+ If the root element for some reason has an empty queue it is removed from
424+ the heap and we try to dequeue again.
425+
426+The effect of the heap and the upper 64 bit of the penalty values is to
427+implement a weighted round robin queue. The effect of counter_low_penal,
428+counter_high_penal and the lower 64 bit of the penalty value is primarily to
429+stabilize the queue and to give better quality of service to machines only
430+sending a packet now and then. For example machines which have a single
431+interactive connection such as telnet or simple text chatting.
432+
433+
434+Setting weight
435+==============
436+The weight value can be changed dynamically by the queue itself. The weight
437+value and how it is changed is described by the two members weight1 and
438+weight2 which has type tc_wrr_class_weight and which are in each class. And
439+by the two integer value members of the qdisc called penalfact1 and penalfact2.
440+The structure is defined as:
441+
442+ struct tc_wrr_class_weight {
443+ // All are represented as parts of (2^64-1).
444+ __u64 val; // Current value (0 is not valid)
445+ __u64 decr; // Value pr bytes (2^64-1 is not valid)
446+ __u64 incr; // Value pr seconds (2^64-1 is not valid)
447+ __u64 min; // Minimal value (0 is not valid)
448+ __u64 max; // Minimal value (0 is not valid)
449+
450+ // The time where the above information was correct:
451+ time_t tim;
452+ };
453+
454+The weight value used by the dequeue operations is calculated as
455+weight1.val*weight2.val. weight1 and weight2 and handled independently and in the
456+same way as will be described now.
457+
458+Every second, the val parameter is incremented by incr.
459+
460+Every time a packet is transmitted the value is increment by decr times
461+the packet size. Depending on the value of the weight_mode parameter it
462+is also mulitplied with other numbers. This makes it possible to give
463+penalty to machines transferring much data.
464+
465+-----------------------------------------------------------------------------*/
466+
467+#include <linux/config.h>
468+#include <linux/module.h>
469+#include <asm/uaccess.h>
470+#include <asm/system.h>
471+#include <asm/bitops.h>
472+#include <linux/types.h>
473+#include <linux/kernel.h>
474+#include <linux/vmalloc.h>
475+#include <linux/sched.h>
476+#include <linux/string.h>
477+#include <linux/mm.h>
478+#include <linux/socket.h>
479+#include <linux/sockios.h>
480+#include <linux/in.h>
481+#include <linux/errno.h>
482+#include <linux/interrupt.h>
483+#include <linux/if_ether.h>
484+#include <linux/inet.h>
485+#include <linux/netdevice.h>
486+#include <linux/etherdevice.h>
487+#include <linux/notifier.h>
488+#include <net/ip.h>
489+#include <net/route.h>
490+#include <linux/skbuff.h>
491+#include <net/sock.h>
492+#include <net/pkt_sched.h>
493+
494+#include <linux/if_arp.h>
495+#include <linux/version.h>
496+
497+// There seems to be problems when calling functions from userspace when
498+// using vmalloc and vfree.
499+//#define my_malloc(size) vmalloc(size)
500+//#define my_free(ptr) vfree(ptr)
501+#define my_malloc(size) kmalloc(size,GFP_KERNEL)
502+#define my_free(ptr) kfree(ptr)
503+
504+// Kernel depend stuff:
505+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0)
506+ #define KERNEL22
507+#endif
508+
509+#ifdef KERNEL22
510+ #define LOCK_START start_bh_atomic();
511+ #define LOCK_END end_bh_atomic();
512+ #define ENQUEUE_SUCCESS 1
513+ #define ENQUEUE_FAIL 0
514+ #ifdef CONFIG_IP_MASQUERADE
515+ #include <net/ip_masq.h>
516+ #define MASQ_SUPPORT
517+ #endif
518+#else
519+ #define LOCK_START sch_tree_lock(sch);
520+ #define LOCK_END sch_tree_unlock(sch);
521+ #define ENQUEUE_SUCCESS 0
522+ #define ENQUEUE_FAIL NET_XMIT_DROP
523+ #ifdef CONFIG_NETFILTER
524+ #include <linux/netfilter_ipv4/ip_conntrack.h>
525+ #define MASQ_SUPPORT
526+ #endif
527+#endif
528+
529+#include "proxydict.c"
530+
531+// The penalty (priority) type:
532+typedef u64 penalty_base_t;
533+#define penalty_base_t_max ((penalty_base_t)-1)
534+typedef struct penalty_t {
535+ penalty_base_t ms;
536+ penalty_base_t ls;
537+} penalty_t;
538+#define penalty_leq(a,b) (a.ms<b.ms || (a.ms==b.ms && a.ls<=b.ls))
539+#define penalty_le(a,b) (a.ms<b.ms || (a.ms==b.ms && a.ls<b.ls))
540+static penalty_t penalty_max={penalty_base_t_max,penalty_base_t_max};
541+
542+//-----------------------------------------------------------------------------
543+// A generel heap.
544+
545+struct heap;
546+struct heap_element;
547+
548+// Initializes an empty heap:
549+// he: A pointer to an unintialized heap structure identifying the heap
550+// size: Maximal number of elements the heap can contain
551+// poll: An array of size "size" used by the heap.
552+static void heap_init(struct heap* he,int size, struct heap_element* poll);
553+
554+// Each element in the heap is identified by a user-assigned id which
555+// should be a non negative integer less than the size argument
556+// given to heap_init.
557+static void heap_insert(struct heap*, int id, penalty_t);
558+static void heap_remove(struct heap*, int id);
559+static void heap_set_penalty(struct heap*, int id, penalty_t);
560+
561+// Retreviewing information:
562+static char heap_empty(struct heap*); // Heap empty?
563+static char heap_contains(struct heap*, int id); // Does heap contain
564+ // the given id?
565+static int heap_root(struct heap*); // Returns the id of the root
566+static penalty_t heap_get_penalty(struct heap*, int id); // Returns penaly
567+ // of root node
568+
569+//--------------------
570+// Heap implementation
571+
572+struct heap_element {
573+ penalty_t penalty;
574+ int id; // The user-assigned id of this element
575+ int id2idx; // Maps from user-assigned ids to indices in root_1
576+};
577+
578+struct heap {
579+ struct heap_element* root_1;
580+ int elements;
581+};
582+
583+// Heap implementation:
584+static void heap_init(struct heap* h, int size, struct heap_element* poll) {
585+ int i;
586+
587+ h->elements=0;
588+ h->root_1=poll-1;
589+
590+ for(i=0; i<size; i++) poll[i].id2idx=0;
591+};
592+
593+static char heap_empty(struct heap* h) {
594+ return h->elements==0;
595+}
596+
597+static char heap_contains(struct heap* h, int id) {
598+ return h->root_1[id+1].id2idx!=0;
599+}
600+
601+static int heap_root(struct heap* h) {
602+ return h->root_1[1].id;
603+}
604+
605+static penalty_t heap_get_penalty(struct heap* h, int id) {
606+ return h->root_1[ h->root_1[id+1].id2idx ].penalty;
607+}
608+
609+static void heap_penalty_changed_internal(struct heap* h,int idx);
610+
611+static void heap_set_penalty(struct heap* h, int id, penalty_t p) {
612+ int idx=h->root_1[id+1].id2idx;
613+ h->root_1[idx].penalty=p;
614+ heap_penalty_changed_internal(h,idx);
615+}
616+
617+static void heap_insert(struct heap* h, int id, penalty_t p) {
618+ // Insert at the end of the heap:
619+ h->elements++;
620+ h->root_1[h->elements].id=id;
621+ h->root_1[h->elements].penalty=p;
622+ h->root_1[id+1].id2idx=h->elements;
623+
624+ // And put it in the right position:
625+ heap_penalty_changed_internal(h,h->elements);
626+}
627+
628+static void heap_remove(struct heap* h, int id) {
629+ int idx=h->root_1[id+1].id2idx;
630+ int mvid;
631+ h->root_1[id+1].id2idx=0;
632+
633+ if(h->elements==idx) { h->elements--; return; }
634+
635+ mvid=h->root_1[h->elements].id;
636+ h->root_1[idx].id=mvid;
637+ h->root_1[idx].penalty=h->root_1[h->elements].penalty;
638+ h->root_1[mvid+1].id2idx=idx;
639+
640+ h->elements--;
641+ heap_penalty_changed_internal(h,idx);
642+}
643+
644+static void heap_swap(struct heap* h, int idx0, int idx1) {
645+ penalty_t tmp_p;
646+ int tmp_id;
647+ int id0,id1;
648+
649+ // Simple content:
650+ tmp_p=h->root_1[idx0].penalty;
651+ tmp_id=h->root_1[idx0].id;
652+ h->root_1[idx0].penalty=h->root_1[idx1].penalty;
653+ h->root_1[idx0].id=h->root_1[idx1].id;
654+ h->root_1[idx1].penalty=tmp_p;
655+ h->root_1[idx1].id=tmp_id;
656+
657+ // Update reverse pointers:
658+ id0=h->root_1[idx0].id;
659+ id1=h->root_1[idx1].id;
660+ h->root_1[id0+1].id2idx=idx0;
661+ h->root_1[id1+1].id2idx=idx1;
662+}
663+
664+static void heap_penalty_changed_internal(struct heap* h,int cur) {
665+ if(cur==1 || penalty_leq(h->root_1[cur>>1].penalty,h->root_1[cur].penalty)) {
666+ // We are in heap order upwards - so we should move the element down
667+ for(;;) {
668+ int nxt0=cur<<1;
669+ int nxt1=nxt0+1;
670+ penalty_t pen_c=h->root_1[cur].penalty;
671+ penalty_t pen_0=nxt0<=h->elements ? h->root_1[nxt0].penalty : penalty_max;
672+ penalty_t pen_1=nxt1<=h->elements ? h->root_1[nxt1].penalty : penalty_max;
673+
674+ if(penalty_le(pen_0,pen_c) && penalty_leq(pen_0,pen_1)) {
675+ // Swap with child 0:
676+ heap_swap(h,cur,nxt0);
677+ cur=nxt0;
678+ } else if(penalty_le(pen_1,pen_c)) {
679+ // Swap with child 1:
680+ heap_swap(h,cur,nxt1);
681+ cur=nxt1;
682+ } else {
683+ // Heap in heap order:
684+ return;
685+ }
686+ }
687+ } else {
688+ // We are not in heap order upwards (and thus we must be it downwards).
689+ // We move up:
690+ while(cur!=1) { // While not root
691+ int nxt=cur>>1;
692+ if(penalty_leq(h->root_1[nxt].penalty,h->root_1[cur].penalty)) return;
693+ heap_swap(h,cur,nxt);
694+ cur=nxt;
695+ }
696+ }
697+};
698+
699+//-----------------------------------------------------------------------------
700+// Classification based on MAC or IP adresses. Note that of historical reason
701+// these are prefixed with mac_ since originally only MAC bases classification
702+// was supported.
703+//
704+// This code should be in a separate filter module - but it isn't.
705+
706+// Interface:
707+
708+struct mac_head;
709+
710+// Initialices/destroys the structure we maintain.
711+// Returns -1 on error
712+static int mac_init(struct mac_head*, int max_macs, char srcaddr,
713+ char usemac, char usemasq, void* proxyremap);
714+static void mac_done(struct mac_head*);
715+static void mac_reset(struct mac_head*);
716+
717+// Classify a packet. Returns a number n where 0<=n<max_macs. Or -1 if
718+// the packet should be dropped.
719+static int mac_classify(struct mac_head*, struct sk_buff *skb);
720+
721+//-------------
722+// Implementation:
723+
724+struct mac_addr {
725+ unsigned char addr[ETH_ALEN]; // Address of this band (last two are 0 on IP)
726+ unsigned long lastused; // Last time a packet was encountered
727+ int class; // Classid of this band (0<=classid<max_macs)
728+};
729+
730+static int mac_compare(const void* a, const void* b) {
731+ return memcmp(a,b,ETH_ALEN);
732+}
733+
734+struct mac_head {
735+ int mac_max; // Maximal number of MAC addresses/classes allowed
736+ int mac_cur; // Current number of MAC addresses/classes
737+ int mac_reused; // Number of times we have reused a class with a new
738+ // address.
739+ u64 incr_time;
740+ char srcaddr; // True if we classify on the source address of packets,
741+ // else we use destination address.
742+ char usemac; // If true we use mac, else we use IP
743+ char usemasq; // If true we try to demasqgrade
744+ struct mac_addr* macs; // Allocated mac_max elements, used max_cur
745+ char* cls2mac; // Mapping from classnumbers to addresses -
746+ // there is 6 bytes in each entry
747+
748+ void* proxyremap; // Information on proxy remapping of data or 0
749+};
750+
751+// This is as the standard C library function with the same name:
752+static const void* bsearch(const void* key, const void* base, int nmemb,
753+ size_t size,
754+ int (*compare)(const void*, const void*)) {
755+ int m_idx;
756+ const void* m_ptr;
757+ int i;
758+
759+ if(nmemb<=0) return 0;
760+
761+ m_idx=nmemb>>1;
762+ m_ptr=((const char*)base)+m_idx*size;
763+
764+ i=compare(key,m_ptr);
765+ if(i<0) // key is less
766+ return bsearch(key,base,m_idx,size,compare);
767+ else if(i>0)
768+ return bsearch(key,((const char*)m_ptr)+size,nmemb-m_idx-1,size,compare);
769+
770+ return m_ptr;
771+}
772+
773+static int mac_init(struct mac_head* h, int max_macs, char srcaddr,
774+ char usemac, char usemasq,void* proxyremap) {
775+ h->mac_cur=0;
776+ h->mac_reused=0;
777+ h->incr_time=0;
778+ h->srcaddr=srcaddr;
779+ h->usemac=usemac;
780+ h->usemasq=usemasq;
781+ h->mac_max=max_macs;
782+ h->proxyremap=proxyremap;
783+
784+ h->macs=(struct mac_addr*)
785+ my_malloc( sizeof(struct mac_addr)*max_macs);
786+ h->cls2mac=(char*)my_malloc( 6*max_macs);
787+ if(!h->macs || !h->cls2mac) {
788+ if(h->macs) my_free(h->macs);
789+ if(h->cls2mac) my_free(h->cls2mac);
790+ return -1;
791+ }
792+ return 0;
793+}
794+
795+static void mac_done(struct mac_head* h) {
796+ my_free(h->macs);
797+ my_free(h->cls2mac);
798+}
799+
800+static void mac_reset(struct mac_head* h) {
801+ h->mac_cur=0;
802+ h->mac_reused=0;
803+ h->incr_time=0;
804+}
805+
806+static int lookup_mac(struct mac_head* h, unsigned char* addr) {
807+ int i;
808+ int class;
809+
810+ // First try to find the address in the table:
811+ struct mac_addr* m=(struct mac_addr*)
812+ bsearch(addr,h->macs,h->mac_cur,sizeof(struct mac_addr),mac_compare);
813+ if(m) {
814+ // Found:
815+ m->lastused=h->incr_time++;
816+ return m->class;
817+ }
818+
819+ // Okay - the MAC adress was not in table
820+ if(h->mac_cur==h->mac_max) {
821+ // And the table is full - delete the oldest entry:
822+
823+ // Find the oldest entry:
824+ int lowidx=0;
825+ int i;
826+ for(i=1; i<h->mac_cur; i++)
827+ if(h->macs[i].lastused < h->macs[lowidx].lastused) lowidx=i;
828+
829+ class=h->macs[lowidx].class;
830+
831+ // And delete it:
832+ memmove(&h->macs[lowidx],&h->macs[lowidx+1],
833+ (h->mac_cur-lowidx-1)*sizeof(struct mac_addr));
834+ h->mac_reused++;
835+ h->mac_cur--;
836+ } else {
837+ class=h->mac_cur;
838+ }
839+
840+ // The table is now not full - find the position we should put the address in:
841+ for(i=0; i<h->mac_cur; i++) if(mac_compare(addr,&h->macs[i])<0) break;
842+
843+ // We should insert at position i:
844+ memmove(&h->macs[i+1],&h->macs[i],(h->mac_cur-i)*sizeof(struct mac_addr));
845+ m=&h->macs[i];
846+ memcpy(m->addr,addr,ETH_ALEN);
847+ m->lastused=h->incr_time++;
848+ m->class=class;
849+ h->mac_cur++;
850+
851+ // Finally update the cls2mac variabel:
852+ memcpy(h->cls2mac+ETH_ALEN*class,addr,ETH_ALEN);
853+
854+ return m->class;
855+}
856+
857+int valid_ip_checksum(struct iphdr* ip, int size) {
858+ __u16 header_len=ip->ihl<<2;
859+ __u16 c=0;
860+ __u16* ipu=(u16*)ip;
861+ int a;
862+
863+ // We require 4 bytes in the packet since we access the port numbers:
864+ if((size<header_len) || size<sizeof(struct iphdr)+4) return 0;
865+
866+ for(a=0; a<(header_len>>1); a++, ipu++) {
867+ if(a!=5) { // If not the checksum field
868+ __u16 oldc=c;
869+ c+=(*ipu);
870+ if(c<oldc) c++;
871+ }
872+ }
873+
874+ return ip->check==(__u16)~c;
875+}
876+
877+static int mac_classify(struct mac_head* head, struct sk_buff *skb)
878+{
879+ // We set this to the address we map to. In case we map to an IP
880+ // address the last two entries are set to 0.
881+ unsigned char addr[ETH_ALEN];
882+
883+
884+ // This is the size of the network part of the packet, I think:
885+ int size=((char*)skb->data+skb->len)-((char*)skb->nh.iph);
886+
887+ // Set a default value for the address:
888+ memset(addr,0,ETH_ALEN);
889+
890+ // Accept IP-ARP traffic with big-enough packets:
891+ if(ntohs(skb->protocol)==ETH_P_ARP &&
892+ ntohs(skb->nh.arph->ar_pro)==ETH_P_IP) {
893+ // Map all ARP trafic to a default adress to make sure
894+ // it goes through
895+ } else if ((ntohs(skb->protocol)==ETH_P_IP) &&
896+ valid_ip_checksum(skb->nh.iph,size)) {
897+ // Accept IP packets which have correct checksum.
898+
899+ // This is the IP header:
900+ struct iphdr* iph=skb->nh.iph;
901+
902+ // And this is the port numbers:
903+ const __u16 *portp = (__u16 *)&(((char *)iph)[iph->ihl*4]);
904+ __u16 sport=portp[0];
905+ __u16 dport=portp[1];
906+
907+ // We will set this to the IP address of the packet that should be
908+ // accounted to:
909+ unsigned ipaddr;
910+
911+ // Used below:
912+ ProxyRemapBlock* prm;
913+
914+ // Set ipaddr:
915+ if(head->srcaddr)
916+ ipaddr=iph->saddr;
917+ else
918+ ipaddr=iph->daddr;
919+
920+#ifdef MASQ_SUPPORT
921+ // Update ipaddr if packet is masqgraded:
922+ if(head->usemasq) {
923+ #ifdef KERNEL22
924+ struct ip_masq* src;
925+
926+ // HACK!:
927+ // ip_masq_in_get must be called for packets comming from the outside
928+ // to the firewall. We have a a packet which is comming from the
929+ // firewall to the outside - so we switch the parameters:
930+ if((src=ip_masq_in_get(
931+ iph->protocol,
932+ iph->daddr,dport,
933+ iph->saddr,sport))) {
934+ // Use masqgraded address:
935+ ipaddr=src->saddr;
936+
937+ // It seems like we must put it back:
938+ ip_masq_put(src);
939+ }
940+ #else
941+ // Thanks to Rusty Russell for help with the following code:
942+ enum ip_conntrack_info ctinfo;
943+ struct ip_conntrack *ct;
944+ ct = ip_conntrack_get(skb, &ctinfo);
945+ if (ct) {
946+ if(head->srcaddr)
947+ ipaddr=ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.src.ip;
948+ else
949+ ipaddr=ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.dst.ip;
950+ }
951+ #endif
952+ }
953+#endif
954+
955+ // Set prm based on ipaddr:
956+ prm=0;
957+ if(head->proxyremap) {
958+ if(head->srcaddr) {
959+ prm=proxyLookup(head->proxyremap,ipaddr,sport,skb->nh.iph->protocol);
960+ } else {
961+ prm=proxyLookup(head->proxyremap,ipaddr,dport,skb->nh.iph->protocol);
962+ }
963+ }
964+
965+ // And finally set addr to the address:
966+ memset(addr,0,ETH_ALEN);
967+ if(prm) {
968+ // This package should be remapped:
969+ if(head->usemac)
970+ memcpy(addr,prm->macaddr,ETH_ALEN);
971+ else {
972+ memcpy(addr,&prm->caddr,sizeof(unsigned));
973+ }
974+ } else {
975+ // This packet should not be remapped:
976+ if(head->usemac) {
977+ // We should find MAC address of packet.
978+ // Unfortunatly, this is not always available.
979+ // On bridged packets it always is, however..
980+ #ifdef KERNEL22
981+ if(skb->pkt_bridged) {
982+ if(head->srcaddr) {
983+ memcpy(addr,skb->mac.ethernet->h_source,ETH_ALEN);
984+ } else {
985+ memcpy(addr,skb->mac.ethernet->h_dest,ETH_ALEN);
986+ }
987+ }
988+ #endif
989+ } else {
990+ memcpy(addr,&ipaddr,4);
991+ }
992+ }
993+ } else {
994+ // All other traffic is dropped - this ensures that packets
995+ // we consider probably have valid addresses so we don't
996+ // get to many strange addresses into our table. And that we
997+ // don't use bandwidth on strange packets..
998+ return -1;
999+ }
1000+
1001+ return lookup_mac(head,addr);
1002+}
1003+
1004+//-----------------------------------------------------------------------------
1005+// The qdisc itself
1006+
1007+// Pr-class information.
1008+struct wrrc_sched_data {
1009+ struct Qdisc* que; // The queue for this class
1010+ struct tc_wrr_class_modf class_modf; // Information about the class.
1011+
1012+ // For classes in the heap this is the priority value priosum
1013+ // was updated with for this class:
1014+ u64 priosum_val;
1015+};
1016+
1017+// Pr-qdisc information:
1018+struct wrr_sched_data
1019+{
1020+ // A heap containing all the bands that will send something
1021+ struct heap h;
1022+ struct heap_element* poll; // bandc elements
1023+
1024+ // The sum of the prioities of the elements in the heap where
1025+ // a priority of 1 is saved as 2^32
1026+ u64 priosum;
1027+
1028+ // A class for each band
1029+ struct wrrc_sched_data* bands; // bandc elements
1030+
1031+ // Information maintained by the proxydict module of 0 if we
1032+ // have no proxy remapping
1033+ void* proxydict;
1034+
1035+ // Always incrementning counters, we always have that any value of
1036+ // counter_low_penal < any value of counter_high_penal.
1037+ penalty_base_t counter_low_penal;
1038+ penalty_base_t counter_high_penal;
1039+
1040+ // Penalty updating:
1041+ struct tc_wrr_qdisc_modf qdisc_modf;
1042+
1043+ // Statistics:
1044+ int packets_requed;
1045+
1046+ // The filter:
1047+ struct mac_head filter;
1048+ int bandc; // Number of bands
1049+};
1050+
1051+// Priority handling.
1052+// weight is in interval [0..2^32]
1053+// priosum has whole numbers in the upper and fragments in the lower 32 bits.
1054+static void weight_transmit(struct tc_wrr_class_weight* p,
1055+ struct tc_wrr_qdisc_weight q,
1056+ unsigned heapsize,
1057+ u64 priosum, u64 weight,
1058+ unsigned size) {
1059+
1060+ unsigned long now=jiffies/HZ;
1061+
1062+ // Penalty for transmitting:
1063+ u64 change,old;
1064+ u32 divisor;
1065+
1066+ change=0;
1067+ switch(q.weight_mode) {
1068+ case 1: change=p->decr*size; break;
1069+ case 2: change=p->decr*size*heapsize; break;
1070+ case 3: // Note: 64 bit division is not always available..
1071+ divisor=(u32)(weight>>16);
1072+ if(divisor<=0) divisor=1;
1073+ change=p->decr*size*(((u32)(priosum>>16))/divisor); break;
1074+ }
1075+ old=p->val;
1076+ p->val-=change;
1077+ if(p->val>old || p->val<p->min) p->val=p->min;
1078+
1079+ // Credit for time went:
1080+ change=(now-p->tim)*p->incr;
1081+ p->tim=now;
1082+ old=p->val;
1083+ p->val+=change;
1084+ if(p->val<old || p->val>p->max) p->val=p->max;
1085+}
1086+
1087+static void weight_setdefault(struct tc_wrr_class_weight* p) {
1088+ p->val=(u64)-1;
1089+ p->decr=0;
1090+ p->incr=0;
1091+ p->min=(u64)-1;
1092+ p->max=(u64)-1;
1093+ p->tim=jiffies/HZ;
1094+}
1095+
1096+static void weight_setvalue(struct tc_wrr_class_weight* dst,
1097+ struct tc_wrr_class_weight* src) {
1098+ if(src->val!=0) {
1099+ dst->val=src->val;
1100+ dst->tim=jiffies/HZ;
1101+ }
1102+ if(src->min!=0) dst->min=src->min;
1103+ if(src->max!=0) dst->max=src->max;
1104+ if(src->decr!=((u64)-1)) dst->decr=src->decr;
1105+ if(src->incr!=((u64)-1)) dst->incr=src->incr;
1106+ if(dst->val<dst->min) dst->val=dst->min;
1107+ if(dst->val>dst->max) dst->val=dst->max;
1108+}
1109+
1110+static void wrr_destroy(struct Qdisc *sch)
1111+{
c9d1c54c 1112+ struct wrr_sched_data *q = qdisc_priv(sch);
df26885d 1113+ int i;
1114+
1115+ // Destroy our filter:
1116+ mac_done(&q->filter);
1117+
1118+ // Destroy all our childre ques:
1119+ for(i=0; i<q->bandc; i++)
1120+ qdisc_destroy(q->bands[i].que);
1121+
1122+ // And free memory:
1123+ my_free(q->bands);
1124+ my_free(q->poll);
1125+ if(q->proxydict) my_free(q->proxydict);
df26885d 1126+}
1127+
1128+static int wrr_init(struct Qdisc *sch, struct rtattr *opt)
1129+{
c9d1c54c 1130+ struct wrr_sched_data *q = qdisc_priv(sch);
df26885d 1131+ int i,maciniterr;
1132+ char crterr;
1133+ struct tc_wrr_qdisc_crt *qopt;
1134+
1135+ // Parse options:
1136+ if (!opt) return -EINVAL; // Options must be specified
1137+ if (opt->rta_len < RTA_LENGTH(sizeof(*qopt))) return -EINVAL;
1138+ qopt = RTA_DATA(opt);
1139+
1140+ if(qopt->bands_max>8192 || qopt->bands_max<2) {
1141+ // More than 8192 queues or less than 2? That cannot be true - it must be
1142+ // an error...
1143+ return -EINVAL;
1144+ }
1145+
1146+ if(qopt->proxy_maxconn<0 || qopt->proxy_maxconn>20000) {
1147+ // More than this number of maximal concurrent connections is unrealistic
1148+ return -EINVAL;
1149+ }
1150+
1151+#ifndef MASQ_SUPPORT
1152+ if(qopt->usemasq) {
1153+ return -ENOSYS;
1154+ }
1155+#endif
1156+
1157+#ifndef KERNEL22
1158+ if(qopt->usemac) { // Not supported - please fix this!
1159+ return -ENOSYS;
1160+ }
1161+#endif
1162+
1163+ q->bandc=qopt->bands_max;
1164+ q->qdisc_modf=qopt->qdisc_modf;
1165+
1166+ // Create structures:
1167+ q->poll=(struct heap_element*)
1168+ my_malloc( sizeof(struct heap_element)*q->bandc);
1169+ q->bands=(struct wrrc_sched_data*)
1170+ my_malloc( sizeof(struct wrrc_sched_data)*q->bandc);
1171+
1172+ if(qopt->proxy_maxconn>0) {
1173+ q->proxydict=my_malloc(proxyGetMemSize(qopt->proxy_maxconn));
1174+ } else {
1175+ q->proxydict=0;
1176+ }
1177+
1178+ // Init mac module:
1179+ maciniterr=mac_init(&q->filter,qopt->bands_max,qopt->srcaddr,
1180+ qopt->usemac,qopt->usemasq,q->proxydict);
1181+
1182+ // See if we got the memory we wanted:
1183+ if(!q->poll || !q->bands ||
1184+ (qopt->proxy_maxconn>0 && !q->proxydict) || maciniterr<0) {
1185+ if(q->poll) my_free(q->poll);
1186+ if(q->bands) my_free(q->bands);
1187+ if(q->proxydict) my_free(q->proxydict);
1188+ if(maciniterr>=0) mac_done(&q->filter);
1189+ return -ENOMEM;
1190+ }
1191+
1192+ // Initialize proxy:
1193+ if(q->proxydict) {
1194+ proxyInitMem(q->proxydict,qopt->proxy_maxconn);
1195+ }
1196+
1197+ // Initialize values:
1198+ q->counter_low_penal=0;
1199+ q->counter_high_penal=penalty_base_t_max>>1;
1200+ q->packets_requed=0;
1201+
1202+ // Initialize empty heap:
1203+ heap_init(&q->h,q->bandc,q->poll);
1204+ q->priosum=0;
1205+
1206+ // Initialize each band:
1207+ crterr=0;
1208+ for (i=0; i<q->bandc; i++) {
1209+ weight_setdefault(&q->bands[i].class_modf.weight1);
1210+ weight_setdefault(&q->bands[i].class_modf.weight2);
1211+ if(!crterr) {
1212+ struct Qdisc *child=qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
1213+ if(child)
1214+ q->bands[i].que = child;
1215+ else {
1216+ // Queue couldn't be created :-(
1217+ crterr=1;
1218+ }
1219+ }
1220+ if(crterr) q->bands[i].que = &noop_qdisc;
1221+ }
1222+
df26885d 1223+ if(crterr) {
1224+ // Destroy again:
1225+ wrr_destroy(sch);
1226+ return -ENOMEM;
1227+ }
1228+
1229+ return 0;
1230+}
1231+
1232+static void wrr_reset(struct Qdisc* sch)
1233+{
c9d1c54c 1234+ struct wrr_sched_data *q = qdisc_priv(sch);
df26885d 1235+ int i;
1236+
1237+ // Reset own values:
1238+ q->counter_low_penal=0;
1239+ q->counter_high_penal=penalty_base_t_max>>1;
1240+ q->packets_requed=0;
1241+
1242+ // Reset filter:
1243+ mac_reset(&q->filter);
1244+
1245+ // Reinitialize heap:
1246+ heap_init(&q->h,q->bandc,q->poll);
1247+ q->priosum=0;
1248+
1249+ // Reset all bands:
1250+ for (i=0; i<q->bandc; i++) {
1251+ weight_setdefault(&q->bands[i].class_modf.weight1);
1252+ weight_setdefault(&q->bands[i].class_modf.weight2);
1253+ qdisc_reset(q->bands[i].que);
1254+ }
1255+
1256+ // Reset proxy remapping information:
1257+ if(q->proxydict)
1258+ proxyInitMem(q->proxydict,proxyGetMaxConn(q->proxydict));
1259+}
1260+
1261+static int wrr_enqueue(struct sk_buff *skb, struct Qdisc* sch)
1262+{
c9d1c54c 1263+ struct wrr_sched_data *q = qdisc_priv(sch);
df26885d 1264+ int retvalue=ENQUEUE_FAIL;
1265+
1266+ // The packet is in skb.
1267+ int band=mac_classify(&q->filter,skb);
1268+
1269+ if(band>=0) {
1270+ // Enque packet for this band:
1271+ struct Qdisc* qdisc = q->bands[band].que;
1272+
1273+ if ((retvalue=qdisc->enqueue(skb, qdisc)) == ENQUEUE_SUCCESS) {
1274+ // Successfull
1275+ sch->stats.bytes += skb->len;
1276+ sch->stats.packets++;
1277+ sch->q.qlen++;
1278+
1279+ // Insert band into heap if not already there:
1280+ if(!heap_contains(&q->h,band)) {
1281+ penalty_t p;
1282+ if(!heap_empty(&q->h))
1283+ p.ms=heap_get_penalty(&q->h,heap_root(&q->h)).ms;
1284+ else
1285+ p.ms=0;
1286+ p.ls=q->counter_low_penal++;
1287+ heap_insert(&q->h,band,p);
1288+ q->bands[band].priosum_val=
1289+ ((q->bands[band].class_modf.weight1.val>>48)+1)*
1290+ ((q->bands[band].class_modf.weight2.val>>48)+1);
1291+ q->priosum+=q->bands[band].priosum_val;
1292+ }
1293+ }
1294+ } else {
1295+ // If we decide not to enque it seems like we also need to free the packet:
1296+ kfree_skb(skb);
1297+ }
1298+
1299+ if(retvalue!=ENQUEUE_SUCCESS) {
1300+ // Packet not enqued:
1301+ sch->stats.drops++;
1302+ }
1303+
1304+ return retvalue;
1305+}
1306+
1307+static struct sk_buff *wrr_dequeue(struct Qdisc* sch)
1308+{
c9d1c54c 1309+ struct wrr_sched_data *q = qdisc_priv(sch);
df26885d 1310+ struct sk_buff* skb;
1311+ int band;
1312+ u64 weight,priosum;
1313+ struct wrrc_sched_data* b;
1314+
1315+ // Return if heap is empty:
1316+ if(heap_empty(&q->h)) return 0;
1317+
1318+ // Find root element:
1319+ band=heap_root(&q->h);
1320+
1321+ // Find priority of this element in interval [1;2^32]
1322+ b=&q->bands[band];
1323+ weight=((b->class_modf.weight1.val>>48)+1)*
1324+ ((b->class_modf.weight2.val>>48)+1); //weight is in interval [1;2^32]
1325+ priosum=q->priosum;
1326+ q->priosum-=q->bands[band].priosum_val;
1327+
1328+ // Deque the packet from the root:
1329+ skb=q->bands[band].que->dequeue(q->bands[band].que);
1330+
1331+ if(skb) {
1332+ // There was a packet in this que.
1333+ unsigned adjlen;
1334+ penalty_t p;
1335+
1336+ // Find length of packet adjusted with priority:
1337+ adjlen=(u32)(weight>>(32-16));
1338+ if(adjlen==0) adjlen=1;
1339+ adjlen=(skb->len<<16)/adjlen;
1340+
1341+ // Update penalty information for this class:
1342+ weight_transmit(&b->class_modf.weight1,q->qdisc_modf.weight1,q->h.elements,priosum,weight,skb->len);
1343+ weight_transmit(&b->class_modf.weight2,q->qdisc_modf.weight2,q->h.elements,priosum,weight,skb->len);
1344+ q->bands[band].priosum_val=((b->class_modf.weight1.val>>48)+1)*
1345+ ((b->class_modf.weight2.val>>48)+1);
1346+ q->priosum+=q->bands[band].priosum_val;
1347+
1348+ // And update the class in the heap
1349+ p=heap_get_penalty(&q->h,band);
1350+ p.ms+=adjlen;
1351+ p.ls=q->counter_high_penal++;
1352+ heap_set_penalty(&q->h,band,p);
1353+
1354+ // Return packet:
1355+ sch->q.qlen--;
1356+ return skb;
1357+ }
1358+
1359+ // No packet - so machine should be removed from heap:
1360+ heap_remove(&q->h,band);
1361+
1362+ // And try again:
1363+ return wrr_dequeue(sch);
1364+}
1365+
1366+static int wrr_requeue(struct sk_buff *skb, struct Qdisc* sch)
1367+{
c9d1c54c 1368+ struct wrr_sched_data *q = qdisc_priv(sch);
df26885d 1369+ struct Qdisc* qdisc;
1370+ int ret;
1371+
1372+ // Find band we took it from:
1373+ int band=mac_classify(&q->filter,skb);
1374+ if(band<0) {
1375+ // Who should now free the pakcet?
1376+ printk(KERN_DEBUG "sch_wrr: Oops - packet requed could never have been queued.\n");
1377+ sch->stats.drops++;
1378+ return ENQUEUE_FAIL;
1379+ }
1380+
1381+ q->packets_requed++;
1382+
1383+ // Try to requeue it on that machine:
1384+ qdisc=q->bands[band].que;
1385+
1386+ if((ret=qdisc->ops->requeue(skb,qdisc))==ENQUEUE_SUCCESS) {
1387+ // On success:
1388+ sch->q.qlen++;
1389+
1390+ // We should restore priority information - but we don't
1391+ //
1392+ // p=heap_get_penalty(&q->h,band);
1393+ // ...
1394+ // heap_set_penalty(&q->h,band,p);
1395+
1396+ return ENQUEUE_SUCCESS;
1397+ } else {
1398+ sch->stats.drops++;
1399+ return ret;
1400+ }
1401+}
1402+
1403+static unsigned wrr_drop(struct Qdisc* sch)
1404+{
c9d1c54c 1405+ struct wrr_sched_data *q = qdisc_priv(sch);
df26885d 1406+
1407+ // Ugly... Drop button up in heap:
1408+ int i;
1409+
1410+ for(i=q->h.elements; i>=1; i--) {
1411+ int band=q->h.root_1[i].id;
1412+ if(q->bands[band].que->ops->drop(q->bands[band].que)) {
1413+ // On success
1414+ sch->q.qlen--;
1415+ return 1;
1416+ }
1417+ }
1418+
1419+ return 0;
1420+}
1421+
1422+static int wrr_dump(struct Qdisc *sch, struct sk_buff *skb)
1423+{
c9d1c54c 1424+ struct wrr_sched_data *q = qdisc_priv(sch);
df26885d 1425+ unsigned char *b = skb->tail;
1426+ struct tc_wrr_qdisc_stats opt;
1427+
1428+ opt.qdisc_crt.qdisc_modf=q->qdisc_modf;
1429+ opt.qdisc_crt.srcaddr=q->filter.srcaddr;
1430+ opt.qdisc_crt.usemac=q->filter.usemac;
1431+ opt.qdisc_crt.usemasq=q->filter.usemasq;
1432+ opt.qdisc_crt.bands_max=q->filter.mac_max;
1433+ opt.nodes_in_heap=q->h.elements;
1434+ opt.bands_cur=q->filter.mac_cur;
1435+ opt.bands_reused=q->filter.mac_reused;
1436+ opt.packets_requed=q->packets_requed;
1437+ opt.priosum=q->priosum;
1438+
1439+ if(q->proxydict) {
1440+ opt.qdisc_crt.proxy_maxconn=proxyGetMaxConn(q->proxydict);
1441+ opt.proxy_curconn=proxyGetCurConn(q->proxydict);
1442+ } else {
1443+ opt.qdisc_crt.proxy_maxconn=0;
1444+ opt.proxy_curconn=0;
1445+ }
1446+
1447+ RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
1448+ return skb->len;
1449+
1450+rtattr_failure: // seems like RTA_PUT jump to this label..
1451+ skb_trim(skb, b - skb->data);
1452+ return -1;
1453+}
1454+
1455+static int wrr_tune_std(struct Qdisc *sch, struct rtattr *opt)
1456+{
c9d1c54c 1457+ struct wrr_sched_data *q = qdisc_priv(sch);
df26885d 1458+ struct tc_wrr_qdisc_modf_std *qopt = RTA_DATA(opt);
1459+
1460+ if(opt->rta_len < RTA_LENGTH(sizeof(*qopt))) return -EINVAL;
1461+
1462+ LOCK_START
1463+
1464+ if(qopt->change_class) {
1465+ int idx=lookup_mac(&q->filter,qopt->addr);
1466+ weight_setvalue
1467+ (&q->bands[idx].class_modf.weight1,&qopt->class_modf.weight1);
1468+ weight_setvalue
1469+ (&q->bands[idx].class_modf.weight2,&qopt->class_modf.weight2);
1470+ } else {
1471+ if(qopt->qdisc_modf.weight1.weight_mode!=-1)
1472+ q->qdisc_modf.weight1.weight_mode=qopt->qdisc_modf.weight1.weight_mode;
1473+ if(qopt->qdisc_modf.weight2.weight_mode!=-1)
1474+ q->qdisc_modf.weight2.weight_mode=qopt->qdisc_modf.weight2.weight_mode;
1475+ }
1476+
1477+ LOCK_END
1478+
1479+ return 0;
1480+}
1481+
1482+static int wrr_tune_proxy(struct Qdisc *sch, struct rtattr *opt)
1483+{
c9d1c54c 1484+ struct wrr_sched_data *q = qdisc_priv(sch);
df26885d 1485+ struct tc_wrr_qdisc_modf_proxy *qopt = RTA_DATA(opt);
1486+ int i;
1487+
1488+ // Return if we are not configured with proxy support:
1489+ if(!q->proxydict) return -ENOSYS;
1490+
1491+ // Return if not enough data given:
1492+ if(opt->rta_len<RTA_LENGTH(sizeof(*qopt)) ||
1493+ opt->rta_len<
1494+ RTA_LENGTH(sizeof(*qopt)+sizeof(ProxyRemapBlock)*qopt->changec))
1495+ return -EINVAL;
1496+
1497+ LOCK_START;
1498+
1499+ if(qopt->reset) {
1500+ proxyInitMem(q->proxydict,proxyGetMaxConn(q->proxydict));
1501+ }
1502+
1503+ // Do all the changes:
1504+ for(i=0; i<qopt->changec; i++) {
1505+ proxyConsumeBlock(q->proxydict,&((ProxyRemapBlock*)&qopt->changes)[i]);
1506+ }
1507+
1508+ LOCK_END;
1509+
1510+ return 0;
1511+}
1512+
1513+static int wrr_tune(struct Qdisc *sch, struct rtattr *opt) {
1514+ if(((struct tc_wrr_qdisc_modf_std*)RTA_DATA(opt))->proxy) {
1515+ return wrr_tune_proxy(sch,opt);
1516+ } else {
1517+ return wrr_tune_std(sch,opt);
1518+ }
1519+}
1520+
1521+//-----------------------------------------------------------------------------
1522+// Classes.
1523+// External and internal IDs are equal. They are the band number plus 1.
1524+
1525+// Replace a class with another:
1526+static int wrr_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1527+ struct Qdisc **old)
1528+{
c9d1c54c 1529+ struct wrr_sched_data *q = qdisc_priv(sch);
df26885d 1530+ if(arg>q->bandc || arg==0) return -EINVAL;
1531+ arg--;
1532+
1533+ if (new == NULL)
1534+ new = &noop_qdisc;
1535+
1536+#ifdef KERNEL22
1537+ *old = xchg(&q->bands[arg].que, new);
1538+#else
1539+ LOCK_START
1540+ *old = q->bands[arg].que;
1541+ q->bands[arg].que = new;
1542+ qdisc_reset(*old);
1543+ LOCK_END
1544+#endif
1545+
1546+ return 0;
1547+}
1548+
1549+// Returns the qdisc for a class:
1550+static struct Qdisc * wrr_leaf(struct Qdisc *sch, unsigned long arg)
1551+{
c9d1c54c 1552+ struct wrr_sched_data *q = qdisc_priv(sch);
df26885d 1553+ if(arg>q->bandc || arg==0) return NULL;
1554+ arg--;
1555+ return q->bands[arg].que;
1556+}
1557+
1558+static unsigned long wrr_get(struct Qdisc *sch, u32 classid)
1559+{
c9d1c54c 1560+ struct wrr_sched_data *q = qdisc_priv(sch);
df26885d 1561+ unsigned long band = TC_H_MIN(classid);
1562+ if(band>q->bandc || band==0) return 0;
1563+ return band;
1564+}
1565+
1566+static void wrr_put(struct Qdisc *q, unsigned long cl)
1567+{
1568+ return;
1569+}
1570+
1571+static int wrr_delete(struct Qdisc *sch, unsigned long cl)
1572+{
c9d1c54c 1573+ struct wrr_sched_data *q = qdisc_priv(sch);
df26885d 1574+ if(cl==0 || cl>q->bandc) return -ENOENT;
1575+ cl--;
1576+ return 0;
1577+}
1578+
1579+static int wrr_dump_class(struct Qdisc *sch, unsigned long cl,
1580+ struct sk_buff *skb, struct tcmsg *tcm)
1581+{
c9d1c54c 1582+ struct wrr_sched_data *q = qdisc_priv(sch);
df26885d 1583+ unsigned char *b = skb->tail;
1584+ struct tc_wrr_class_stats opt;
1585+
1586+ // Handle of this class:
1587+ tcm->tcm_handle = sch->handle|cl;
1588+
1589+ if(cl==0 || cl>q->bandc)
1590+ goto rtattr_failure;
1591+ cl--;
1592+
1593+ if(cl>=q->filter.mac_cur) {
1594+ // Band is unused:
1595+ memset(&opt,0,sizeof(opt));
1596+ opt.used=0;
1597+ } else {
1598+ opt.used=1;
1599+ opt.class_modf.weight1=q->bands[cl].class_modf.weight1;
1600+ opt.class_modf.weight2=q->bands[cl].class_modf.weight2;
1601+ weight_transmit(&opt.class_modf.weight1,q->qdisc_modf.weight1,0,0,0,0);
1602+ weight_transmit(&opt.class_modf.weight2,q->qdisc_modf.weight2,0,0,0,0);
1603+ memcpy(opt.addr,q->filter.cls2mac+cl*ETH_ALEN,ETH_ALEN);
1604+ opt.usemac=q->filter.usemac;
1605+ opt.heappos=q->h.root_1[cl+1].id2idx;
1606+ if(opt.heappos!=0) { // Is in heap
1607+ opt.penal_ls=heap_get_penalty(&q->h,cl).ls;
1608+ opt.penal_ms=heap_get_penalty(&q->h,cl).ms;
1609+ } else {
1610+ opt.penal_ls=0;
1611+ opt.penal_ms=0;
1612+ }
1613+ }
1614+
1615+ // Put quing information:
1616+ RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
1617+ return skb->len;
1618+
1619+rtattr_failure:
1620+ skb_trim(skb, b - skb->data);
1621+ return -1;
1622+}
1623+
1624+static int wrr_change(struct Qdisc *sch, u32 handle, u32 parent,
1625+ struct rtattr **tca, unsigned long *arg)
1626+{
1627+ unsigned long cl = *arg;
c9d1c54c 1628+ struct wrr_sched_data *q = qdisc_priv(sch);
df26885d 1629+ struct rtattr *opt = tca[TCA_OPTIONS-1];
1630+ struct tc_wrr_class_modf *copt = RTA_DATA(opt);
1631+
1632+ if(cl==0 || cl>q->bandc) return -EINVAL;
1633+ cl--;
1634+
1635+ if (opt->rta_len < RTA_LENGTH(sizeof(*copt))) return -EINVAL;
1636+
1637+ LOCK_START;
1638+
1639+ weight_setvalue(&q->bands[cl].class_modf.weight1,&copt->weight1);
1640+ weight_setvalue(&q->bands[cl].class_modf.weight2,&copt->weight2);
1641+
1642+ LOCK_END;
1643+
1644+ return 0;
1645+}
1646+
1647+static void wrr_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1648+{
c9d1c54c 1649+ struct wrr_sched_data *q = qdisc_priv(sch);
df26885d 1650+ int prio;
1651+
1652+ if (arg->stop) return;
1653+
1654+ for (prio = 1; prio <= q->bandc; prio++) {
1655+ if (arg->count < arg->skip) {
1656+ arg->count++;
1657+ continue;
1658+ }
1659+ if (arg->fn(sch, prio, arg) < 0) {
1660+ arg->stop = 1;
1661+ break;
1662+ }
1663+ arg->count++;
1664+ }
1665+}
1666+
1667+static struct tcf_proto ** wrr_find_tcf(struct Qdisc *sch, unsigned long cl)
1668+{
1669+ return NULL;
1670+}
1671+
1672+static unsigned long wrr_bind(struct Qdisc *sch,
1673+ unsigned long parent, u32 classid)
1674+{
1675+ return wrr_get(sch, classid);
1676+}
1677+
1678+//-----------------------------------------------------------------------------
1679+// Generel
1680+
1681+static struct Qdisc_class_ops wrr_class_ops =
1682+{
1683+ wrr_graft,
1684+ wrr_leaf,
1685+
1686+ wrr_get,
1687+ wrr_put,
1688+ wrr_change,
1689+ wrr_delete,
1690+ wrr_walk,
1691+
1692+ wrr_find_tcf,
1693+ wrr_bind,
1694+ wrr_put,
1695+
1696+#if !defined(KERNEL22) || defined(CONFIG_RTNETLINK)
1697+ wrr_dump_class,
1698+#endif
1699+};
1700+
1701+struct Qdisc_ops wrr_qdisc_ops =
1702+{
1703+ NULL,
1704+ &wrr_class_ops,
1705+ "wrr",
1706+ sizeof(struct wrr_sched_data),
1707+
1708+ wrr_enqueue,
1709+ wrr_dequeue,
1710+ wrr_requeue,
1711+ wrr_drop,
1712+
1713+ wrr_init,
1714+ wrr_reset,
1715+ wrr_destroy,
1716+ wrr_tune,
1717+
1718+#if !defined(KERNEL22) || defined(CONFIG_RTNETLINK)
1719+ wrr_dump,
1720+#endif
1721+};
1722+
1723+#ifdef MODULE
1724+
1725+int init_module(void)
1726+{
1727+ return register_qdisc(&wrr_qdisc_ops);
1728+}
1729+
1730+void cleanup_module(void)
1731+{
1732+ unregister_qdisc(&wrr_qdisc_ops);
1733+}
1734+
1735+#ifndef KERNEL22
1736+ MODULE_LICENSE("GPL");
1737+#endif
1738+
1739+#endif
1740+
This page took 0.829756 seconds and 4 git commands to generate.