]> git.pld-linux.org Git - packages/kernel.git/blame - wrr-linux-2.4.9.patch
- CONFIG_AGP_ATI=y
[packages/kernel.git] / wrr-linux-2.4.9.patch
CommitLineData
9d12e591 1diff -uNrbB v24-org/include/linux/pkt_sched.h v24-new/include/linux/pkt_sched.h
2--- v24-org/include/linux/pkt_sched.h Tue Apr 28 20:10:10 1998
3+++ v24-new/include/linux/pkt_sched.h Sun Sep 9 14:34:14 2001
4@@ -274,4 +274,116 @@
5
6 #define TCA_CBQ_MAX TCA_CBQ_POLICE
7
8+/* WRR section */
9+
10+/* Other includes */
11+#include <linux/if_ether.h>
12+
13+// A sub weight and of a class
14+// All numbers are represented as parts of (2^64-1).
15+struct tc_wrr_class_weight {
16+ __u64 val; // Current value (0 is not valid)
17+ __u64 decr; // Value pr bytes (2^64-1 is not valid)
18+ __u64 incr; // Value pr seconds (2^64-1 is not valid)
19+ __u64 min; // Minimal value (0 is not valid)
20+ __u64 max; // Minimal value (0 is not valid)
21+
22+ // The time where the above information was correct:
23+ time_t tim;
24+};
25+
26+// Pakcet send when modifying a class:
27+struct tc_wrr_class_modf {
28+ // Not-valid values are ignored.
29+ struct tc_wrr_class_weight weight1;
30+ struct tc_wrr_class_weight weight2;
31+};
32+
33+// Packet returned when quering a class:
34+struct tc_wrr_class_stats {
35+ char used; // If this is false the information below is invalid
36+
37+ struct tc_wrr_class_modf class_modf;
38+
39+ unsigned char addr[ETH_ALEN];
40+ char usemac; // True if addr is a MAC address, else it is an IP address
41+ // (this value is only for convience, it is always the same
42+ // value as in the qdisc)
43+ int heappos; // Current heap position or 0 if not in heap
44+ __u64 penal_ls; // Penalty value in heap (ls)
45+ __u64 penal_ms; // Penalty value in heap (ms)
46+};
47+
48+// Qdisc-wide penalty information (boolean values - 2 not valid)
49+struct tc_wrr_qdisc_weight {
50+ char weight_mode; // 0=No automatic change to weight
51+ // 1=Decrease normally
52+ // 2=Also multiply with number of machines
53+ // 3=Instead multiply with priority divided
54+ // with priority of the other.
55+ // -1=no change
56+};
57+
58+// Packet send when modifing a qdisc:
59+struct tc_wrr_qdisc_modf {
60+ // Not-valid values are ignored:
61+ struct tc_wrr_qdisc_weight weight1;
62+ struct tc_wrr_qdisc_weight weight2;
63+};
64+
65+// Packet send when creating a qdisc:
66+struct tc_wrr_qdisc_crt {
67+ struct tc_wrr_qdisc_modf qdisc_modf;
68+
69+ char srcaddr; // 1=lookup source, 0=lookup destination
70+ char usemac; // 1=Classify on MAC addresses, 0=classify on IP
71+ char usemasq; // 1=Classify based on masqgrading - only valid
72+ // if usemac is zero
73+ int bands_max; // Maximal number of bands (i.e.: classes)
74+ int proxy_maxconn; // If differnt from 0 then we support proxy remapping
75+ // of packets. And this is the number of maximal
76+ // concurrent proxy connections.
77+};
78+
79+// Packet returned when quering a qdisc:
80+struct tc_wrr_qdisc_stats {
81+ struct tc_wrr_qdisc_crt qdisc_crt;
82+ int proxy_curconn;
83+ int nodes_in_heap; // Current number of bands wanting to send something
84+ int bands_cur; // Current number of bands used (i.e.: MAC/IP addresses seen)
85+ int bands_reused; // Number of times this band has been reused.
86+ int packets_requed; // Number of times packets have been requeued.
87+ __u64 priosum; // Sum of priorities in heap where 1 is 2^32
88+};
89+
90+struct tc_wrr_qdisc_modf_std {
91+ // This indicates which of the tc_wrr_qdisc_modf structers this is:
92+ char proxy; // 0=This struct
93+
94+ // Should we also change a class?
95+ char change_class;
96+
97+ // Only valid if change_class is false
98+ struct tc_wrr_qdisc_modf qdisc_modf;
99+
100+ // Only valid if change_class is true:
101+ unsigned char addr[ETH_ALEN]; // Class to change (non-used bytes should be 0)
102+ struct tc_wrr_class_modf class_modf; // The change
103+};
104+
105+// Used for proxyrempping:
106+struct tc_wrr_qdisc_modf_proxy {
107+ // This indicates which of the tc_wrr_qdisc_modf structers this is:
108+ char proxy; // 1=This struct
109+
110+ // This is 1 if the proxyremap information should be reset
111+ char reset;
112+
113+ // changec is the number of elements in changes.
114+ int changec;
115+
116+ // This is an array of type ProxyRemapBlock:
117+ long changes[0];
118+};
119+
120 #endif
121diff -uNrbB v24-org/net/sched/Config.in v24-new/net/sched/Config.in
122--- v24-org/net/sched/Config.in Sat Jan 15 04:18:53 2000
123+++ v24-new/net/sched/Config.in Wed Jan 2 09:39:30 2002
124@@ -11,6 +11,7 @@
125 bool ' ATM pseudo-scheduler' CONFIG_NET_SCH_ATM
126 fi
127 tristate ' The simplest PRIO pseudoscheduler' CONFIG_NET_SCH_PRIO
128+tristate ' WRR packet scheduler' CONFIG_NET_SCH_WRR
129 tristate ' RED queue' CONFIG_NET_SCH_RED
130 tristate ' SFQ queue' CONFIG_NET_SCH_SFQ
131 tristate ' TEQL queue' CONFIG_NET_SCH_TEQL
132diff -uNrbB v24-org/net/sched/Makefile v24-new/net/sched/Makefile
133--- v24-org/net/sched/Makefile Wed Mar 7 07:44:15 2001
134+++ v24-new/net/sched/Makefile Wed May 30 13:28:41 2001
135@@ -13,6 +13,7 @@
136 obj-$(CONFIG_NET_CLS_POLICE) += police.o
137 obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o
138 obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
139+obj-$(CONFIG_NET_SCH_WRR) += sch_wrr.o
140 obj-$(CONFIG_NET_SCH_CSZ) += sch_csz.o
141 obj-$(CONFIG_NET_SCH_HPFQ) += sch_hpfq.o
142 obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o
143diff -uNrbB v24-org/net/sched/proxydict.c v24-new/net/sched/proxydict.c
144--- v24-org/net/sched/proxydict.c Thu Jan 1 01:00:00 1970
145+++ v24-new/net/sched/proxydict.c Thu Mar 1 20:33:02 2001
146@@ -0,0 +1,153 @@
147+#ifndef __KERNEL__
148+#include <string.h>
149+#include <netinet/in.h>
150+#endif
151+
152+#include "proxyremap.h"
153+#include "proxydict.h"
154+
155+
156+/*--------------------------------------------------------------------------
157+Implementation.
158+*/
159+
160+// Hash function
161+#define hash_fnc(m,server,port,proto) \
162+ (((proto)*7+(server)*13+(port)*5)%m->hash_size)
163+
164+// Size of hash table given maximal number of connections:
165+#define hash_size_max_con(max_con) (2*(max_con))
166+
167+// The memory area we maintain:
168+typedef struct {
169+ int hash_size;
170+ int max_con;
171+ int cur_con;
172+
173+ int free_first;
174+
175+ // Then we have:
176+ // int hash_table[hash_size];
177+ // int next[max_con];
178+ // ProxyRemapBlock info[max_con];
179+ //
180+ // The idea is the following:
181+ // Given a connection we map it by hash_fnc into hash_table. This gives an
182+ // index in next which contains a -1 terminated linked list of connections
183+ // mapping to that hash value.
184+ //
185+ // The entries in next not allocated is also in linked list where
186+ // the first free index is free_first.
187+} memory;
188+
189+#define Memory(m) ((memory*)m)
190+#define Hash_table(m) ((int*)(((char*)m)+sizeof(memory)))
191+#define Next(m) ((int*)(((char*)m)+sizeof(memory)+ \
192+ sizeof(int)*((memory*)m)->hash_size))
193+#define Info(m) ((ProxyRemapBlock*)(((char*)m)+ \
194+ sizeof(memory)+ \
195+ sizeof(int)*((memory*)m)->hash_size+\
196+ sizeof(int)*((memory*)m)->max_con \
197+ ))
198+
199+int proxyGetMemSize(int max_con) {
200+ return sizeof(memory)+
201+ sizeof(int)*hash_size_max_con(max_con)+
202+ sizeof(int)*max_con+
203+ sizeof(ProxyRemapBlock)*max_con;
204+}
205+
206+void proxyInitMem(void* data, int max_con) {
207+ // Init m:
208+ memory* m=Memory(data);
209+ m->max_con=max_con;
210+ m->cur_con=0;
211+ m->hash_size=hash_size_max_con(max_con);
212+
213+ {
214+ // Get pointers:
215+ int* hash_table=Hash_table(data);
216+ int* next=Next(data);
217+ int i;
218+
219+ // Init the hash table:
220+ for(i=0; i<m->hash_size; i++) hash_table[i]=-1;
221+
222+ // Init the free-list
223+ for(i=0; i<m->max_con; i++) next[i]=i+1;
224+ m->free_first=0;
225+ }
226+}
227+
228+int proxyGetCurConn(void* data) {
229+ return Memory(data)->cur_con;
230+}
231+
232+int proxyGetMaxConn(void* data) {
233+ return Memory(data)->max_con;
234+}
235+
236+ProxyRemapBlock* proxyLookup(void* data, unsigned ipaddr, unsigned short port, char proto) {
237+ memory* m=Memory(data);
238+ int* hash_table=Hash_table(m);
239+ int* next=Next(m);
240+ ProxyRemapBlock* info=Info(m);
241+ int i;
242+
243+ for(i=hash_table[hash_fnc(m,ipaddr,port,proto)]; i!=-1; i=next[i]) {
244+ if(info[i].proto==proto &&
245+ info[i].sport==port &&
246+ info[i].saddr==ipaddr) return &info[i];
247+ }
248+
249+ return 0;
250+}
251+
252+int proxyConsumeBlock(void* data, ProxyRemapBlock* blk) {
253+ memory* m=Memory(data);
254+ int* hash_table=Hash_table(m);
255+ int* next=Next(m);
256+ ProxyRemapBlock* info=Info(m);
257+ int hash=hash_fnc(m,blk->saddr,blk->sport,blk->proto);
258+ int foo;
259+
260+ if(blk->open) {
261+ if(m->cur_con == m->max_con) return -1;
262+
263+ // Insert the block at a free entry:
264+ info[m->free_first]=*blk;
265+ m->cur_con++;
266+
267+ foo=next[m->free_first];
268+
269+ // And insert it in the hash tabel:
270+ next[m->free_first]=hash_table[hash];
271+ hash_table[hash]=m->free_first;
272+ m->free_first=foo;
273+ } else {
274+ int* toupdate;
275+
276+ // Find the block
277+ for(toupdate=&hash_table[hash];
278+ *toupdate!=-1;
279+ toupdate=&next[*toupdate]) {
280+ if(info[*toupdate].proto==blk->proto &&
281+ info[*toupdate].sport==blk->sport &&
282+ info[*toupdate].saddr==blk->saddr) break;
283+ }
284+ if(*toupdate==-1) return -1;
285+
286+ foo=*toupdate;
287+
288+ // Delete it from the hashing list:
289+ *toupdate=next[*toupdate];
290+
291+ // And put it on the free list:
292+ next[foo]=m->free_first;
293+ m->free_first=foo;
294+
295+ m->cur_con--;
296+ }
297+
298+ return 0;
299+}
300diff -uNrbB v24-org/net/sched/proxydict.h v24-new/net/sched/proxydict.h
301--- v24-org/net/sched/proxydict.h Thu Jan 1 01:00:00 1970
302+++ v24-new/net/sched/proxydict.h Tue Feb 13 22:47:00 2001
303@@ -0,0 +1,32 @@
304+#ifdef __cplusplus
305+extern "C" {
306+#endif
307+
308+/*--------------------------------------------------------------------------
309+This is common code for for handling the tabels containing information about
310+which proxyserver connections are associated with which machines..
311+*/
312+
313+// Returns the number of bytes that should be available in the area
314+// maintained by this module given the maximal number of concurrent
315+// connections.
316+int proxyGetMemSize(int max_connections);
317+
318+// Initializes a memory area to use. There must be as many bytes
319+// available as returned by getMemSize.
320+void proxyInitMem(void* data, int max_connections);
321+
322+// Queries:
323+int proxyGetCurConn(void* data); // Returns current number of connections
324+int proxyMaxCurConn(void* data); // Returns maximal number of connections
325+
326+// This is called to open and close conenctions. Returns -1 if
327+// a protocol error occores (i.e.: If it is discovered)
328+int proxyConsumeBlock(void* data, ProxyRemapBlock*);
329+
330+// Returns the RemapBlock associated with this connection or 0:
331+ProxyRemapBlock* proxyLookup(void* data, unsigned ipaddr, unsigned short port, char proto);
332+
333+#ifdef __cplusplus
334+}
335+#endif
336diff -uNrbB v24-org/net/sched/proxyremap.h v24-new/net/sched/proxyremap.h
337--- v24-org/net/sched/proxyremap.h Thu Jan 1 01:00:00 1970
338+++ v24-new/net/sched/proxyremap.h Thu May 17 10:54:11 2001
339@@ -0,0 +1,33 @@
340+#ifndef PROXYREMAP_H
341+#define PROXYREMAP_H
342+
343+// This describes the information that is written in proxyremap.log and which
344+// are used in the communication between proxyremapserver and proxyremapclient.
345+// Everything is in network order.
346+
347+// First this header is send:
348+#define PROXY_WELCOME_LINE "ProxyRemap 1.02. This is a binary protocol.\r\n"
349+
350+// Then this block is send every time a connection is opened or closed.
351+// Note how it is alligned to use small space usage - arrays of this
352+// structure are saved in many places.
353+typedef struct {
354+ // Server endpoint of connection:
355+ unsigned saddr;
356+ unsigned short sport;
357+
358+ // IP protocol for this connection (typically udp or tcp):
359+ unsigned char proto;
360+
361+ // Is the connection opened or closed?
362+ unsigned char open;
363+
364+ // Client the packets should be accounted to:
365+ unsigned caddr;
366+ unsigned char macaddr[6]; // Might be 0.
367+
368+ // An informal two-charecter code from the proxyserver. Used for debugging.
369+ char proxyinfo[2];
370+} ProxyRemapBlock;
371+
372+#endif
373diff -uNrbB v24-org/net/sched/sch_wrr.c v24-new/net/sched/sch_wrr.c
374--- v24-org/net/sched/sch_wrr.c Thu Jan 1 01:00:00 1970
375+++ v24-new/net/sched/sch_wrr.c Mon Apr 1 17:07:51 2002
376@@ -0,0 +1,1357 @@
377+/*-----------------------------------------------------------------------------
378+Weighted Round Robin scheduler.
379+
380+Written by Christian Worm Mortensen, cworm@it-c.dk.
381+
382+Introduction
383+============
384+This module implements a weighted round robin queue with build-in classifier.
385+The classifier currently map each MAC or IP address (configurable either MAC
386+or IP and either source or destination) to different classes. Each such class
387+is called a band. Whan using MAC addresses only bridged packets can be
388+classified other packets go to a default MAC address.
389+
390+Each band has a weight value, where 0<weight<=1. The bandwidth each band
391+get is proportional to the weight as can be deduced from the next section.
392+
393+
394+The queue
395+=========
396+Each band has a penalty value. Bands having something to sent are kept in
397+a heap according to this value. The band with the lowest penalty value
398+is in the root of the heap. The penalty value is a 128 bit number. Initially
399+no bands are in the heap.
400+
401+Two global 64 bit values counter_low_penal and couter_high_penal are initialized
402+to 0 and to 2^63 respectively.
403+
404+Enqueing:
405+ The packet is inserted in the queue for the band it belongs to. If the band
406+ is not in the heap it is inserted into it. In this case, the upper 64 bits
407+ of its penalty value is set to the same as for the root-band of the heap.
408+ If the heap is empty 0 is used. The lower 64 bit is set to couter_low_penal
409+ and couter_low_penal is incremented by 1.
410+
411+Dequing:
412+ If the heap is empty we have nothing to send.
413+
414+ If the root band has a non-empty queue a packet is dequeued from that.
415+ The upper 64 bit of the penalty value of the band is incremented by the
416+ packet size divided with the weight of the band. The lower 64 bit is set to
417+ couter_high_penal and couter_high_penal is incremented by 1.
418+
419+ If the root element for some reason has an empty queue it is removed from
420+ the heap and we try to dequeue again.
421+
422+The effect of the heap and the upper 64 bit of the penalty values is to
423+implement a weighted round robin queue. The effect of counter_low_penal,
424+counter_high_penal and the lower 64 bit of the penalty value is primarily to
425+stabilize the queue and to give better quality of service to machines only
426+sending a packet now and then. For example machines which have a single
427+interactive connection such as telnet or simple text chatting.
428+
429+
430+Setting weight
431+==============
432+The weight value can be changed dynamically by the queue itself. The weight
433+value and how it is changed is described by the two members weight1 and
434+weight2 which has type tc_wrr_class_weight and which are in each class. And
435+by the two integer value members of the qdisc called penalfact1 and penalfact2.
436+The structure is defined as:
437+
438+ struct tc_wrr_class_weight {
439+ // All are represented as parts of (2^64-1).
440+ __u64 val; // Current value (0 is not valid)
441+ __u64 decr; // Value pr bytes (2^64-1 is not valid)
442+ __u64 incr; // Value pr seconds (2^64-1 is not valid)
443+ __u64 min; // Minimal value (0 is not valid)
444+ __u64 max; // Minimal value (0 is not valid)
445+
446+ // The time where the above information was correct:
447+ time_t tim;
448+ };
449+
450+The weight value used by the dequeue operations is calculated as
451+weight1.val*weight2.val. weight1 and weight2 and handled independently and in the
452+same way as will be described now.
453+
454+Every second, the val parameter is incremented by incr.
455+
456+Every time a packet is transmitted the value is increment by decr times
457+the packet size. Depending on the value of the weight_mode parameter it
458+is also mulitplied with other numbers. This makes it possible to give
459+penalty to machines transferring much data.
460+
461+-----------------------------------------------------------------------------*/
462+
463+#include <linux/config.h>
464+#include <linux/module.h>
465+#include <asm/uaccess.h>
466+#include <asm/system.h>
467+#include <asm/bitops.h>
468+#include <linux/types.h>
469+#include <linux/kernel.h>
470+#include <linux/sched.h>
471+#include <linux/string.h>
472+#include <linux/mm.h>
473+#include <linux/socket.h>
474+#include <linux/sockios.h>
475+#include <linux/in.h>
476+#include <linux/errno.h>
477+#include <linux/interrupt.h>
478+#include <linux/if_ether.h>
479+#include <linux/inet.h>
480+#include <linux/netdevice.h>
481+#include <linux/etherdevice.h>
482+#include <linux/notifier.h>
483+#include <net/ip.h>
484+#include <net/route.h>
485+#include <linux/skbuff.h>
486+#include <net/sock.h>
487+#include <net/pkt_sched.h>
488+
489+#include <linux/if_arp.h>
490+#include <linux/version.h>
491+
492+// Kernel depend stuff:
493+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0)
494+ #define KERNEL22
495+#endif
496+
497+#ifdef KERNEL22
498+ #define LOCK_START start_bh_atomic();
499+ #define LOCK_END end_bh_atomic();
500+ #define ENQUEUE_SUCCESS 1
501+ #define ENQUEUE_FAIL 0
502+ #ifdef CONFIG_IP_MASQUERADE
503+ #include <net/ip_masq.h>
504+ #define MASQ_SUPPORT
505+ #endif
506+#else
507+ #define LOCK_START sch_tree_lock(sch);
508+ #define LOCK_END sch_tree_unlock(sch);
509+ #define ENQUEUE_SUCCESS 0
510+ #define ENQUEUE_FAIL NET_XMIT_DROP
511+ #ifdef CONFIG_NETFILTER
512+ #include <linux/netfilter_ipv4/ip_conntrack.h>
513+ #define MASQ_SUPPORT
514+ #endif
515+#endif
516+
517+#include "proxydict.c"
518+
519+// The penalty (priority) type:
520+typedef u64 penalty_base_t;
521+#define penalty_base_t_max ((penalty_base_t)-1)
522+typedef struct penalty_t {
523+ penalty_base_t ms;
524+ penalty_base_t ls;
525+} penalty_t;
526+#define penalty_leq(a,b) (a.ms<b.ms || (a.ms==b.ms && a.ls<=b.ls))
527+#define penalty_le(a,b) (a.ms<b.ms || (a.ms==b.ms && a.ls<b.ls))
528+static penalty_t penalty_max={penalty_base_t_max,penalty_base_t_max};
529+
530+//-----------------------------------------------------------------------------
531+// A generel heap.
532+
533+struct heap;
534+struct heap_element;
535+
536+// Initializes an empty heap:
537+// he: A pointer to an unintialized heap structure identifying the heap
538+// size: Maximal number of elements the heap can contain
539+// poll: An array of size "size" used by the heap.
540+static void heap_init(struct heap* he,int size, struct heap_element* poll);
541+
542+// Each element in the heap is identified by a user-assigned id which
543+// should be a non negative integer less than the size argument
544+// given to heap_init.
545+static void heap_insert(struct heap*, int id, penalty_t);
546+static void heap_remove(struct heap*, int id);
547+static void heap_set_penalty(struct heap*, int id, penalty_t);
548+
549+// Retreviewing information:
550+static char heap_empty(struct heap*); // Heap empty?
551+static char heap_contains(struct heap*, int id); // Does heap contain
552+ // the given id?
553+static int heap_root(struct heap*); // Returns the id of the root
554+static penalty_t heap_get_penalty(struct heap*, int id); // Returns penaly
555+ // of root node
556+
557+//--------------------
558+// Heap implementation
559+
560+struct heap_element {
561+ penalty_t penalty;
562+ int id; // The user-assigned id of this element
563+ int id2idx; // Maps from user-assigned ids to indices in root_1
564+};
565+
566+struct heap {
567+ struct heap_element* root_1;
568+ int elements;
569+};
570+
571+// Heap implementation:
572+static void heap_init(struct heap* h, int size, struct heap_element* poll) {
573+ int i;
574+
575+ h->elements=0;
576+ h->root_1=poll-1;
577+
578+ for(i=0; i<size; i++) poll[i].id2idx=0;
579+};
580+
581+static char heap_empty(struct heap* h) {
582+ return h->elements==0;
583+}
584+
585+static char heap_contains(struct heap* h, int id) {
586+ return h->root_1[id+1].id2idx!=0;
587+}
588+
589+static int heap_root(struct heap* h) {
590+ return h->root_1[1].id;
591+}
592+
593+static penalty_t heap_get_penalty(struct heap* h, int id) {
594+ return h->root_1[ h->root_1[id+1].id2idx ].penalty;
595+}
596+
597+static void heap_penalty_changed_internal(struct heap* h,int idx);
598+
599+static void heap_set_penalty(struct heap* h, int id, penalty_t p) {
600+ int idx=h->root_1[id+1].id2idx;
601+ h->root_1[idx].penalty=p;
602+ heap_penalty_changed_internal(h,idx);
603+}
604+
605+static void heap_insert(struct heap* h, int id, penalty_t p) {
606+ // Insert at the end of the heap:
607+ h->elements++;
608+ h->root_1[h->elements].id=id;
609+ h->root_1[h->elements].penalty=p;
610+ h->root_1[id+1].id2idx=h->elements;
611+
612+ // And put it in the right position:
613+ heap_penalty_changed_internal(h,h->elements);
614+}
615+
616+static void heap_remove(struct heap* h, int id) {
617+ int idx=h->root_1[id+1].id2idx;
618+ int mvid;
619+ h->root_1[id+1].id2idx=0;
620+
621+ if(h->elements==idx) { h->elements--; return; }
622+
623+ mvid=h->root_1[h->elements].id;
624+ h->root_1[idx].id=mvid;
625+ h->root_1[idx].penalty=h->root_1[h->elements].penalty;
626+ h->root_1[mvid+1].id2idx=idx;
627+
628+ h->elements--;
629+ heap_penalty_changed_internal(h,idx);
630+}
631+
632+static void heap_swap(struct heap* h, int idx0, int idx1) {
633+ penalty_t tmp_p;
634+ int tmp_id;
635+ int id0,id1;
636+
637+ // Simple content:
638+ tmp_p=h->root_1[idx0].penalty;
639+ tmp_id=h->root_1[idx0].id;
640+ h->root_1[idx0].penalty=h->root_1[idx1].penalty;
641+ h->root_1[idx0].id=h->root_1[idx1].id;
642+ h->root_1[idx1].penalty=tmp_p;
643+ h->root_1[idx1].id=tmp_id;
644+
645+ // Update reverse pointers:
646+ id0=h->root_1[idx0].id;
647+ id1=h->root_1[idx1].id;
648+ h->root_1[id0+1].id2idx=idx0;
649+ h->root_1[id1+1].id2idx=idx1;
650+}
651+
652+static void heap_penalty_changed_internal(struct heap* h,int cur) {
653+ if(cur==1 || penalty_leq(h->root_1[cur>>1].penalty,h->root_1[cur].penalty)) {
654+ // We are in heap order upwards - so we should move the element down
655+ for(;;) {
656+ int nxt0=cur<<1;
657+ int nxt1=nxt0+1;
658+ penalty_t pen_c=h->root_1[cur].penalty;
659+ penalty_t pen_0=nxt0<=h->elements ? h->root_1[nxt0].penalty : penalty_max;
660+ penalty_t pen_1=nxt1<=h->elements ? h->root_1[nxt1].penalty : penalty_max;
661+
662+ if(penalty_le(pen_0,pen_c) && penalty_leq(pen_0,pen_1)) {
663+ // Swap with child 0:
664+ heap_swap(h,cur,nxt0);
665+ cur=nxt0;
666+ } else if(penalty_le(pen_1,pen_c)) {
667+ // Swap with child 1:
668+ heap_swap(h,cur,nxt1);
669+ cur=nxt1;
670+ } else {
671+ // Heap in heap order:
672+ return;
673+ }
674+ }
675+ } else {
676+ // We are not in heap order upwards (and thus we must be it downwards).
677+ // We move up:
678+ while(cur!=1) { // While not root
679+ int nxt=cur>>1;
680+ if(penalty_leq(h->root_1[nxt].penalty,h->root_1[cur].penalty)) return;
681+ heap_swap(h,cur,nxt);
682+ cur=nxt;
683+ }
684+ }
685+};
686+
687+//-----------------------------------------------------------------------------
688+// Classification based on MAC or IP adresses. Note that of historical reason
689+// these are prefixed with mac_ since originally only MAC bases classification
690+// was supported.
691+//
692+// This code should be in a separate filter module - but it isn't.
693+
694+// Interface:
695+
696+struct mac_head;
697+
698+// Initialices/destroys the structure we maintain.
699+// Returns -1 on error
700+static int mac_init(struct mac_head*, int max_macs, char srcaddr,
701+ char usemac, char usemasq, void* proxyremap);
702+static void mac_done(struct mac_head*);
703+static void mac_reset(struct mac_head*);
704+
705+// Classify a packet. Returns a number n where 0<=n<max_macs. Or -1 if
706+// the packet should be dropped.
707+static int mac_classify(struct mac_head*, struct sk_buff *skb);
708+
709+//-------------
710+// Implementation:
711+
712+struct mac_addr {
713+ unsigned char addr[ETH_ALEN]; // Address of this band (last two are 0 on IP)
714+ unsigned long lastused; // Last time a packet was encountered
715+ int class; // Classid of this band (0<=classid<max_macs)
716+};
717+
718+static int mac_compare(const void* a, const void* b) {
719+ return memcmp(a,b,ETH_ALEN);
720+}
721+
722+struct mac_head {
723+ int mac_max; // Maximal number of MAC addresses/classes allowed
724+ int mac_cur; // Current number of MAC addresses/classes
725+ int mac_reused; // Number of times we have reused a class with a new
726+ // address.
727+ u64 incr_time;
728+ char srcaddr; // True if we classify on the source address of packets,
729+ // else we use destination address.
730+ char usemac; // If true we use mac, else we use IP
731+ char usemasq; // If true we try to demasqgrade
732+ struct mac_addr* macs; // Allocated mac_max elements, used max_cur
733+ char* cls2mac; // Mapping from classnumbers to addresses -
734+ // there is 6 bytes in each entry
735+
736+ void* proxyremap; // Information on proxy remapping of data or 0
737+};
738+
739+// This is as the standard C library function with the same name:
740+static const void* bsearch(const void* key, const void* base, int nmemb,
741+ size_t size,
742+ int (*compare)(const void*, const void*)) {
743+ int m_idx;
744+ const void* m_ptr;
745+ int i;
746+
747+ if(nmemb<=0) return 0;
748+
749+ m_idx=nmemb>>1;
750+ m_ptr=((const char*)base)+m_idx*size;
751+
752+ i=compare(key,m_ptr);
753+ if(i<0) // key is less
754+ return bsearch(key,base,m_idx,size,compare);
755+ else if(i>0)
756+ return bsearch(key,((const char*)m_ptr)+size,nmemb-m_idx-1,size,compare);
757+
758+ return m_ptr;
759+}
760+
761+static int mac_init(struct mac_head* h, int max_macs, char srcaddr,
762+ char usemac, char usemasq,void* proxyremap) {
763+ h->mac_cur=0;
764+ h->mac_reused=0;
765+ h->incr_time=0;
766+ h->srcaddr=srcaddr;
767+ h->usemac=usemac;
768+ h->usemasq=usemasq;
769+ h->mac_max=max_macs;
770+ h->proxyremap=proxyremap;
771+
772+ h->macs=(struct mac_addr*)
773+ kmalloc( sizeof(struct mac_addr)*max_macs, GFP_KERNEL);
774+ h->cls2mac=(char*)kmalloc( 6*max_macs, GFP_KERNEL);
775+ if(!h->macs || !h->cls2mac) {
776+ if(h->macs) kfree(h->macs);
777+ if(h->cls2mac) kfree(h->cls2mac);
778+ return -1;
779+ }
780+ return 0;
781+}
782+
783+static void mac_done(struct mac_head* h) {
784+ kfree(h->macs);
785+ kfree(h->cls2mac);
786+}
787+
788+static void mac_reset(struct mac_head* h) {
789+ h->mac_cur=0;
790+ h->mac_reused=0;
791+ h->incr_time=0;
792+}
793+
794+static int lookup_mac(struct mac_head* h, unsigned char* addr) {
795+ int i;
796+ int class;
797+
798+ // First try to find the address in the table:
799+ struct mac_addr* m=(struct mac_addr*)
800+ bsearch(addr,h->macs,h->mac_cur,sizeof(struct mac_addr),mac_compare);
801+ if(m) {
802+ // Found:
803+ m->lastused=h->incr_time++;
804+ return m->class;
805+ }
806+
807+ // Okay - the MAC adress was not in table
808+ if(h->mac_cur==h->mac_max) {
809+ // And the table is full - delete the oldest entry:
810+
811+ // Find the oldest entry:
812+ int lowidx=0;
813+ int i;
814+ for(i=1; i<h->mac_cur; i++)
815+ if(h->macs[i].lastused < h->macs[lowidx].lastused) lowidx=i;
816+
817+ class=h->macs[lowidx].class;
818+
819+ // And delete it:
820+ memmove(&h->macs[lowidx],&h->macs[lowidx+1],
821+ (h->mac_cur-lowidx-1)*sizeof(struct mac_addr));
822+ h->mac_reused++;
823+ h->mac_cur--;
824+ } else {
825+ class=h->mac_cur;
826+ }
827+
828+ // The table is now not full - find the position we should put the address in:
829+ for(i=0; i<h->mac_cur; i++) if(mac_compare(addr,&h->macs[i])<0) break;
830+
831+ // We should insert at position i:
832+ memmove(&h->macs[i+1],&h->macs[i],(h->mac_cur-i)*sizeof(struct mac_addr));
833+ m=&h->macs[i];
834+ memcpy(m->addr,addr,ETH_ALEN);
835+ m->lastused=h->incr_time++;
836+ m->class=class;
837+ h->mac_cur++;
838+
839+ // Finally update the cls2mac variabel:
840+ memcpy(h->cls2mac+ETH_ALEN*class,addr,ETH_ALEN);
841+
842+ return m->class;
843+}
844+
845+int valid_ip_checksum(struct iphdr* ip, int size) {
846+ __u16 header_len=ip->ihl<<2;
847+ __u16 c=0;
848+ __u16* ipu=(u16*)ip;
849+ int a;
850+
851+ // We require 4 bytes in the packet since we access the port numbers:
852+ if((size<header_len) || size<sizeof(struct iphdr)+4) return 0;
853+
854+ for(a=0; a<(header_len>>1); a++, ipu++) {
855+ if(a!=5) { // If not the checksum field
856+ __u16 oldc=c;
857+ c+=(*ipu);
858+ if(c<oldc) c++;
859+ }
860+ }
861+
862+ return ip->check==(__u16)~c;
863+}
864+
865+static int mac_classify(struct mac_head* head, struct sk_buff *skb)
866+{
867+ // We set this to the address we map to. In case we map to an IP
868+ // address the last two entries are set to 0.
869+ unsigned char addr[ETH_ALEN];
870+
871+
872+ // This is the size of the network part of the packet, I think:
873+ int size=((char*)skb->data+skb->len)-((char*)skb->nh.iph);
874+
875+ // Set a default value for the address:
876+ memset(addr,0,ETH_ALEN);
877+
878+ // Accept IP-ARP traffic with big-enough packets:
879+ if(ntohs(skb->protocol)==ETH_P_ARP &&
880+ ntohs(skb->nh.arph->ar_pro)==ETH_P_IP) {
881+ // Map all ARP trafic to a default adress to make sure
882+ // it goes through
883+ } else if ((ntohs(skb->protocol)==ETH_P_IP) &&
884+ valid_ip_checksum(skb->nh.iph,size)) {
885+ // Accept IP packets which have correct checksum.
886+
887+ // This is the IP header:
888+ struct iphdr* iph=skb->nh.iph;
889+
890+ // And this is the port numbers:
891+ const __u16 *portp = (__u16 *)&(((char *)iph)[iph->ihl*4]);
892+ __u16 sport=portp[0];
893+ __u16 dport=portp[1];
894+
895+ // We will set this to the IP address of the packet that should be
896+ // accounted to:
897+ unsigned ipaddr;
898+
899+ // Used below:
900+ ProxyRemapBlock* prm;
901+
902+ // Set ipaddr:
903+ if(head->srcaddr)
904+ ipaddr=iph->saddr;
905+ else
906+ ipaddr=iph->daddr;
907+
908+#ifdef MASQ_SUPPORT
909+ // Update ipaddr if packet is masqgraded:
910+ if(head->usemasq) {
911+ #ifdef KERNEL22
912+ struct ip_masq* src;
913+
914+ // HACK!:
915+ // ip_masq_in_get must be called for packets comming from the outside
916+ // to the firewall. We have a a packet which is comming from the
917+ // firewall to the outside - so we switch the parameters:
918+ if((src=ip_masq_in_get(
919+ iph->protocol,
920+ iph->daddr,dport,
921+ iph->saddr,sport))) {
922+ // Use masqgraded address:
923+ ipaddr=src->saddr;
924+
925+ // It seems like we must put it back:
926+ ip_masq_put(src);
927+ }
928+ #else
929+ // Thanks to Rusty Russell for help with the following code:
930+ enum ip_conntrack_info ctinfo;
931+ struct ip_conntrack *ct;
932+ ct = ip_conntrack_get(skb, &ctinfo);
933+ if (ct) {
934+ if(head->srcaddr)
935+ ipaddr=ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.src.ip;
936+ else
937+ ipaddr=ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.dst.ip;
938+ }
939+ #endif
940+ }
941+#endif
942+
943+ // Set prm based on ipaddr:
944+ prm=0;
945+ if(head->proxyremap) {
946+ if(head->srcaddr) {
947+ prm=proxyLookup(head->proxyremap,ipaddr,sport,skb->nh.iph->protocol);
948+ } else {
949+ prm=proxyLookup(head->proxyremap,ipaddr,dport,skb->nh.iph->protocol);
950+ }
951+ }
952+
953+ // And finally set addr to the address:
954+ memset(addr,0,ETH_ALEN);
955+ if(prm) {
956+ // This package should be remapped:
957+ if(head->usemac)
958+ memcpy(addr,prm->macaddr,ETH_ALEN);
959+ else {
960+ memcpy(addr,&prm->caddr,sizeof(unsigned));
961+ }
962+ } else {
963+ // This packet should not be remapped:
964+ if(head->usemac) {
965+ // We should find MAC address of packet.
966+ // Unfortunatly, this is not always available.
967+ // On bridged packets it always is, however..
968+ #ifdef KERNEL22
969+ if(skb->pkt_bridged) {
970+ if(head->srcaddr) {
971+ memcpy(addr,skb->mac.ethernet->h_source,ETH_ALEN);
972+ } else {
973+ memcpy(addr,skb->mac.ethernet->h_dest,ETH_ALEN);
974+ }
975+ }
976+ #endif
977+ } else {
978+ memcpy(addr,&ipaddr,4);
979+ }
980+ }
981+ } else {
982+ // All other traffic is dropped - this ensures that packets
983+ // we consider probably have valid addresses so we don't
984+ // get to many strange addresses into our table. And that we
985+ // don't use bandwidth on strange packets..
986+ return -1;
987+ }
988+
989+ return lookup_mac(head,addr);
990+}
991+
992+//-----------------------------------------------------------------------------
993+// The qdisc itself
994+
995+// Pr-class information.
996+struct wrrc_sched_data {
997+ struct Qdisc* que; // The queue for this class
998+ struct tc_wrr_class_modf class_modf; // Information about the class.
999+
1000+ // For classes in the heap this is the priority value priosum
1001+ // was updated with for this class:
1002+ u64 priosum_val;
1003+};
1004+
1005+// Pr-qdisc information:
1006+struct wrr_sched_data
1007+{
1008+ // A heap containing all the bands that will send something
1009+ struct heap h;
1010+ struct heap_element* poll; // bandc elements
1011+
1012+ // The sum of the prioities of the elements in the heap where
1013+ // a priority of 1 is saved as 2^32
1014+ u64 priosum;
1015+
1016+ // A class for each band
1017+ struct wrrc_sched_data* bands; // bandc elements
1018+
1019+ // Information maintained by the proxydict module of 0 if we
1020+ // have no proxy remapping
1021+ void* proxydict;
1022+
1023+ // Always incrementning counters, we always have that any value of
1024+ // counter_low_penal < any value of counter_high_penal.
1025+ penalty_base_t counter_low_penal;
1026+ penalty_base_t counter_high_penal;
1027+
1028+ // Penalty updating:
1029+ struct tc_wrr_qdisc_modf qdisc_modf;
1030+
1031+ // Statistics:
1032+ int packets_requed;
1033+
1034+ // The filter:
1035+ struct mac_head filter;
1036+ int bandc; // Number of bands
1037+};
1038+
1039+// Priority handling.
1040+// weight is in interval [0..2^32]
1041+// priosum has whole numbers in the upper and fragments in the lower 32 bits.
1042+static void weight_transmit(struct tc_wrr_class_weight* p,
1043+ struct tc_wrr_qdisc_weight q,
1044+ unsigned heapsize,
1045+ u64 priosum, u64 weight,
1046+ unsigned size) {
1047+
1048+ unsigned long now=jiffies/HZ;
1049+
1050+ // Penalty for transmitting:
1051+ u64 change,old;
1052+ u32 divisor;
1053+
1054+ change=0;
1055+ switch(q.weight_mode) {
1056+ case 1: change=p->decr*size; break;
1057+ case 2: change=p->decr*size*heapsize; break;
1058+ case 3: // Note: 64 bit division is not always available..
1059+ divisor=(u32)(weight>>16);
1060+ if(divisor<=0) divisor=1;
1061+ change=p->decr*size*(((u32)(priosum>>16))/divisor); break;
1062+ }
1063+ old=p->val;
1064+ p->val-=change;
1065+ if(p->val>old || p->val<p->min) p->val=p->min;
1066+
1067+ // Credit for time went:
1068+ change=(now-p->tim)*p->incr;
1069+ p->tim=now;
1070+ old=p->val;
1071+ p->val+=change;
1072+ if(p->val<old || p->val>p->max) p->val=p->max;
1073+}
1074+
1075+static void weight_setdefault(struct tc_wrr_class_weight* p) {
1076+ p->val=(u64)-1;
1077+ p->decr=0;
1078+ p->incr=0;
1079+ p->min=(u64)-1;
1080+ p->max=(u64)-1;
1081+ p->tim=jiffies/HZ;
1082+}
1083+
1084+static void weight_setvalue(struct tc_wrr_class_weight* dst,
1085+ struct tc_wrr_class_weight* src) {
1086+ if(src->val!=0) {
1087+ dst->val=src->val;
1088+ dst->tim=jiffies/HZ;
1089+ }
1090+ if(src->min!=0) dst->min=src->min;
1091+ if(src->max!=0) dst->max=src->max;
1092+ if(src->decr!=((u64)-1)) dst->decr=src->decr;
1093+ if(src->incr!=((u64)-1)) dst->incr=src->incr;
1094+ if(dst->val<dst->min) dst->val=dst->min;
1095+ if(dst->val>dst->max) dst->val=dst->max;
1096+}
1097+
1098+static void wrr_destroy(struct Qdisc *sch)
1099+{
1100+ struct wrr_sched_data *q=(struct wrr_sched_data *)sch->data;
1101+ int i;
1102+
1103+ // Destroy our filter:
1104+ mac_done(&q->filter);
1105+
1106+ // Destroy all our childre ques:
1107+ for(i=0; i<q->bandc; i++)
1108+ qdisc_destroy(q->bands[i].que);
1109+
1110+ // And free memory:
1111+ kfree(q->bands);
1112+ kfree(q->poll);
1113+ if(q->proxydict) kfree(q->proxydict);
1114+
1115+ MOD_DEC_USE_COUNT;
1116+}
1117+
1118+static int wrr_init(struct Qdisc *sch, struct rtattr *opt)
1119+{
1120+ struct wrr_sched_data *q = (struct wrr_sched_data *)sch->data;
1121+ int i,maciniterr;
1122+ char crterr;
1123+ struct tc_wrr_qdisc_crt *qopt;
1124+
1125+ // Parse options:
1126+ if (!opt) return -EINVAL; // Options must be specified
1127+ if (opt->rta_len < RTA_LENGTH(sizeof(*qopt))) return -EINVAL;
1128+ qopt = RTA_DATA(opt);
1129+
1130+ if(qopt->bands_max>2048|| qopt->bands_max<2) {
1131+ // More than 2048 queues or less than 2? That cannot be true - it must be
1132+ // an error...
1133+ return -EINVAL;
1134+ }
1135+
1136+ if(qopt->proxy_maxconn<0 || qopt->proxy_maxconn>20000) {
1137+ // More than this number of maximal concurrent connections is unrealistic
1138+ return -EINVAL;
1139+ }
1140+
1141+#ifndef MASQ_SUPPORT
1142+ if(qopt->usemasq) {
1143+ return -ENOSYS;
1144+ }
1145+#endif
1146+
1147+#ifndef KERNEL22
1148+ if(qopt->usemac) { // Not supported - please fix this!
1149+ return -ENOSYS;
1150+ }
1151+#endif
1152+
1153+ q->bandc=qopt->bands_max;
1154+ q->qdisc_modf=qopt->qdisc_modf;
1155+
1156+ // Create structures:
1157+ q->poll=(struct heap_element*)
1158+ kmalloc( sizeof(struct heap_element)*q->bandc, GFP_KERNEL);
1159+ q->bands=(struct wrrc_sched_data*)
1160+ kmalloc( sizeof(struct wrrc_sched_data)*q->bandc, GFP_KERNEL);
1161+
1162+ if(qopt->proxy_maxconn>0) {
1163+ q->proxydict=kmalloc(proxyGetMemSize(qopt->proxy_maxconn),GFP_KERNEL);
1164+ } else {
1165+ q->proxydict=0;
1166+ }
1167+
1168+ // Init mac module:
1169+ maciniterr=mac_init(&q->filter,qopt->bands_max,qopt->srcaddr,
1170+ qopt->usemac,qopt->usemasq,q->proxydict);
1171+
1172+ // See if we got the memory we wanted:
1173+ if(!q->poll || !q->bands ||
1174+ (qopt->proxy_maxconn>0 && !q->proxydict) || maciniterr<0) {
1175+ if(q->poll) kfree(q->poll);
1176+ if(q->bands) kfree(q->bands);
1177+ if(q->proxydict) kfree(q->proxydict);
1178+ if(maciniterr>=0) mac_done(&q->filter);
1179+ return -ENOSPC;
1180+ }
1181+
1182+ // Initialize proxy:
1183+ if(q->proxydict) {
1184+ proxyInitMem(q->proxydict,qopt->proxy_maxconn);
1185+ }
1186+
1187+ // Initialize values:
1188+ q->counter_low_penal=0;
1189+ q->counter_high_penal=penalty_base_t_max>>1;
1190+ q->packets_requed=0;
1191+
1192+ // Initialize empty heap:
1193+ heap_init(&q->h,q->bandc,q->poll);
1194+ q->priosum=0;
1195+
1196+ // Initialize each band:
1197+ crterr=0;
1198+ for (i=0; i<q->bandc; i++) {
1199+ weight_setdefault(&q->bands[i].class_modf.weight1);
1200+ weight_setdefault(&q->bands[i].class_modf.weight2);
1201+ if(!crterr) {
1202+ struct Qdisc *child=qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops);
1203+ if(child)
1204+ q->bands[i].que = child;
1205+ else {
1206+ // Queue couldn't be created :-(
1207+ crterr=1;
1208+ }
1209+ }
1210+ if(crterr) q->bands[i].que = &noop_qdisc;
1211+ }
1212+
1213+ MOD_INC_USE_COUNT;
1214+
1215+ if(crterr) {
1216+ // Destroy again:
1217+ wrr_destroy(sch);
1218+ return -ENOMEM;
1219+ }
1220+
1221+ return 0;
1222+}
1223+
1224+static void wrr_reset(struct Qdisc* sch)
1225+{
1226+ struct wrr_sched_data *q = (struct wrr_sched_data *)sch->data;
1227+ int i;
1228+
1229+ // Reset own values:
1230+ q->counter_low_penal=0;
1231+ q->counter_high_penal=penalty_base_t_max>>1;
1232+ q->packets_requed=0;
1233+
1234+ // Reset filter:
1235+ mac_reset(&q->filter);
1236+
1237+ // Reinitialize heap:
1238+ heap_init(&q->h,q->bandc,q->poll);
1239+ q->priosum=0;
1240+
1241+ // Reset all bands:
1242+ for (i=0; i<q->bandc; i++) {
1243+ weight_setdefault(&q->bands[i].class_modf.weight1);
1244+ weight_setdefault(&q->bands[i].class_modf.weight2);
1245+ qdisc_reset(q->bands[i].que);
1246+ }
1247+
1248+ // Reset proxy remapping information:
1249+ if(q->proxydict)
1250+ proxyInitMem(q->proxydict,proxyGetMaxConn(q->proxydict));
1251+}
1252+
1253+static int wrr_enqueue(struct sk_buff *skb, struct Qdisc* sch)
1254+{
1255+ struct wrr_sched_data *q = (struct wrr_sched_data *)sch->data;
1256+ int retvalue=ENQUEUE_FAIL;
1257+
1258+ // The packet is in skb.
1259+ int band=mac_classify(&q->filter,skb);
1260+
1261+ if(band>=0) {
1262+ // Enque packet for this band:
1263+ struct Qdisc* qdisc = q->bands[band].que;
1264+
1265+ if ((retvalue=qdisc->enqueue(skb, qdisc)) == ENQUEUE_SUCCESS) {
1266+ // Successfull
1267+ sch->stats.bytes += skb->len;
1268+ sch->stats.packets++;
1269+ sch->q.qlen++;
1270+
1271+ // Insert band into heap if not already there:
1272+ if(!heap_contains(&q->h,band)) {
1273+ penalty_t p;
1274+ if(!heap_empty(&q->h))
1275+ p.ms=heap_get_penalty(&q->h,heap_root(&q->h)).ms;
1276+ else
1277+ p.ms=0;
1278+ p.ls=q->counter_low_penal++;
1279+ heap_insert(&q->h,band,p);
1280+ q->bands[band].priosum_val=
1281+ ((q->bands[band].class_modf.weight1.val>>48)+1)*
1282+ ((q->bands[band].class_modf.weight2.val>>48)+1);
1283+ q->priosum+=q->bands[band].priosum_val;
1284+ }
1285+ }
1286+ } else {
1287+ // If we decide not to enque it seems like we also need to free the packet:
1288+ kfree_skb(skb);
1289+ }
1290+
1291+ if(retvalue!=ENQUEUE_SUCCESS) {
1292+ // Packet not enqued:
1293+ sch->stats.drops++;
1294+ }
1295+
1296+ return retvalue;
1297+}
1298+
1299+static struct sk_buff *wrr_dequeue(struct Qdisc* sch)
1300+{
1301+ struct wrr_sched_data *q = (struct wrr_sched_data *)sch->data;
1302+ struct sk_buff* skb;
1303+ int band;
1304+ u64 weight,priosum;
1305+ struct wrrc_sched_data* b;
1306+
1307+ // Return if heap is empty:
1308+ if(heap_empty(&q->h)) return 0;
1309+
1310+ // Find root element:
1311+ band=heap_root(&q->h);
1312+
1313+ // Find priority of this element in interval [1;2^32]
1314+ b=&q->bands[band];
1315+ weight=((b->class_modf.weight1.val>>48)+1)*
1316+ ((b->class_modf.weight2.val>>48)+1); //weight is in interval [1;2^32]
1317+ priosum=q->priosum;
1318+ q->priosum-=q->bands[band].priosum_val;
1319+
1320+ // Deque the packet from the root:
1321+ skb=q->bands[band].que->dequeue(q->bands[band].que);
1322+
1323+ if(skb) {
1324+ // There was a packet in this que.
1325+ unsigned adjlen;
1326+ penalty_t p;
1327+
1328+ // Find length of packet adjusted with priority:
1329+ adjlen=(u32)(weight>>(32-16));
1330+ if(adjlen==0) adjlen=1;
1331+ adjlen=(skb->len<<16)/adjlen;
1332+
1333+ // Update penalty information for this class:
1334+ weight_transmit(&b->class_modf.weight1,q->qdisc_modf.weight1,q->h.elements,priosum,weight,skb->len);
1335+ weight_transmit(&b->class_modf.weight2,q->qdisc_modf.weight2,q->h.elements,priosum,weight,skb->len);
1336+ q->bands[band].priosum_val=((b->class_modf.weight1.val>>48)+1)*
1337+ ((b->class_modf.weight2.val>>48)+1);
1338+ q->priosum+=q->bands[band].priosum_val;
1339+
1340+ // And update the class in the heap
1341+ p=heap_get_penalty(&q->h,band);
1342+ p.ms+=adjlen;
1343+ p.ls=q->counter_high_penal++;
1344+ heap_set_penalty(&q->h,band,p);
1345+
1346+ // Return packet:
1347+ sch->q.qlen--;
1348+ return skb;
1349+ }
1350+
1351+ // No packet - so machine should be removed from heap:
1352+ heap_remove(&q->h,band);
1353+
1354+ // And try again:
1355+ return wrr_dequeue(sch);
1356+}
1357+
1358+static int wrr_requeue(struct sk_buff *skb, struct Qdisc* sch)
1359+{
1360+ struct wrr_sched_data *q = (struct wrr_sched_data *)sch->data;
1361+ struct Qdisc* qdisc;
1362+ int ret;
1363+
1364+ // Find band we took it from:
1365+ int band=mac_classify(&q->filter,skb);
1366+ if(band<0) {
1367+ // Who should now free the pakcet?
1368+ printk(KERN_DEBUG "sch_wrr: Oops - packet requed could never have been queued.\n");
1369+ sch->stats.drops++;
1370+ return ENQUEUE_FAIL;
1371+ }
1372+
1373+ q->packets_requed++;
1374+
1375+ // Try to requeue it on that machine:
1376+ qdisc=q->bands[band].que;
1377+
1378+ if((ret=qdisc->ops->requeue(skb,qdisc))==ENQUEUE_SUCCESS) {
1379+ // On success:
1380+ sch->q.qlen++;
1381+
1382+ // We should restore priority information - but we don't
1383+ //
1384+ // p=heap_get_penalty(&q->h,band);
1385+ // ...
1386+ // heap_set_penalty(&q->h,band,p);
1387+
1388+ return ENQUEUE_SUCCESS;
1389+ } else {
1390+ sch->stats.drops++;
1391+ return ret;
1392+ }
1393+}
1394+
1395+static int wrr_drop(struct Qdisc* sch)
1396+{
1397+ struct wrr_sched_data *q = (struct wrr_sched_data *)sch->data;
1398+
1399+ // Ugly... Drop button up in heap:
1400+ int i;
1401+
1402+ for(i=q->h.elements; i>=1; i--) {
1403+ int band=q->h.root_1[i].id;
1404+ if(q->bands[band].que->ops->drop(q->bands[band].que)) {
1405+ // On success
1406+ sch->q.qlen--;
1407+ return 1;
1408+ }
1409+ }
1410+
1411+ return 0;
1412+}
1413+
1414+#ifdef CONFIG_RTNETLINK
1415+static int wrr_dump(struct Qdisc *sch, struct sk_buff *skb)
1416+{
1417+ struct wrr_sched_data *q = (struct wrr_sched_data *)sch->data;
1418+ unsigned char *b = skb->tail;
1419+ struct tc_wrr_qdisc_stats opt;
1420+
1421+ opt.qdisc_crt.qdisc_modf=q->qdisc_modf;
1422+ opt.qdisc_crt.srcaddr=q->filter.srcaddr;
1423+ opt.qdisc_crt.usemac=q->filter.usemac;
1424+ opt.qdisc_crt.usemasq=q->filter.usemasq;
1425+ opt.qdisc_crt.bands_max=q->filter.mac_max;
1426+ opt.nodes_in_heap=q->h.elements;
1427+ opt.bands_cur=q->filter.mac_cur;
1428+ opt.bands_reused=q->filter.mac_reused;
1429+ opt.packets_requed=q->packets_requed;
1430+ opt.priosum=q->priosum;
1431+
1432+ if(q->proxydict) {
1433+ opt.qdisc_crt.proxy_maxconn=proxyGetMaxConn(q->proxydict);
1434+ opt.proxy_curconn=proxyGetCurConn(q->proxydict);
1435+ } else {
1436+ opt.qdisc_crt.proxy_maxconn=0;
1437+ opt.proxy_curconn=0;
1438+ }
1439+
1440+ RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
1441+ return skb->len;
1442+
1443+rtattr_failure: // seems like RTA_PUT jump to this label..
1444+ skb_trim(skb, b - skb->data);
1445+ return -1;
1446+}
1447+#endif
1448+
1449+static int wrr_tune_std(struct Qdisc *sch, struct rtattr *opt)
1450+{
1451+ struct wrr_sched_data *q = (struct wrr_sched_data *)sch->data;
1452+ struct tc_wrr_qdisc_modf_std *qopt = RTA_DATA(opt);
1453+
1454+ if(opt->rta_len < RTA_LENGTH(sizeof(*qopt))) return -EINVAL;
1455+
1456+ LOCK_START
1457+
1458+ if(qopt->change_class) {
1459+ int idx=lookup_mac(&q->filter,qopt->addr);
1460+ weight_setvalue
1461+ (&q->bands[idx].class_modf.weight1,&qopt->class_modf.weight1);
1462+ weight_setvalue
1463+ (&q->bands[idx].class_modf.weight2,&qopt->class_modf.weight2);
1464+ } else {
1465+ if(qopt->qdisc_modf.weight1.weight_mode!=-1)
1466+ q->qdisc_modf.weight1.weight_mode=qopt->qdisc_modf.weight1.weight_mode;
1467+ if(qopt->qdisc_modf.weight2.weight_mode!=-1)
1468+ q->qdisc_modf.weight2.weight_mode=qopt->qdisc_modf.weight2.weight_mode;
1469+ }
1470+
1471+ LOCK_END
1472+
1473+ return 0;
1474+}
1475+
1476+static int wrr_tune_proxy(struct Qdisc *sch, struct rtattr *opt)
1477+{
1478+ struct wrr_sched_data *q = (struct wrr_sched_data *)sch->data;
1479+ struct tc_wrr_qdisc_modf_proxy *qopt = RTA_DATA(opt);
1480+ int i;
1481+
1482+ // Return if we are not configured with proxy support:
1483+ if(!q->proxydict) return -ENOSYS;
1484+
1485+ // Return if not enough data given:
1486+ if(opt->rta_len<RTA_LENGTH(sizeof(*qopt)) ||
1487+ opt->rta_len<
1488+ RTA_LENGTH(sizeof(*qopt)+sizeof(ProxyRemapBlock)*qopt->changec))
1489+ return -EINVAL;
1490+
1491+ LOCK_START;
1492+
1493+ if(qopt->reset) {
1494+ proxyInitMem(q->proxydict,proxyGetMaxConn(q->proxydict));
1495+ }
1496+
1497+ // Do all the changes:
1498+ for(i=0; i<qopt->changec; i++) {
1499+ proxyConsumeBlock(q->proxydict,&((ProxyRemapBlock*)&qopt->changes)[i]);
1500+ }
1501+
1502+ LOCK_END;
1503+
1504+ return 0;
1505+}
1506+
1507+static int wrr_tune(struct Qdisc *sch, struct rtattr *opt) {
1508+ if(((struct tc_wrr_qdisc_modf_std*)RTA_DATA(opt))->proxy) {
1509+ return wrr_tune_proxy(sch,opt);
1510+ } else {
1511+ return wrr_tune_std(sch,opt);
1512+ }
1513+}
1514+
1515+//-----------------------------------------------------------------------------
1516+// Classes.
1517+// External and internal IDs are equal. They are the band number plus 1.
1518+
1519+// Replace a class with another:
1520+static int wrr_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1521+ struct Qdisc **old)
1522+{
1523+ struct wrr_sched_data *q = (struct wrr_sched_data *)sch->data;
1524+ if(arg>q->bandc || arg==0) return -EINVAL;
1525+ arg--;
1526+
1527+ if (new == NULL)
1528+ new = &noop_qdisc;
1529+
1530+#ifdef KERNEL22
1531+ *old = xchg(&q->bands[arg].que, new);
1532+#else
1533+ LOCK_START
1534+ *old = q->bands[arg].que;
1535+ q->bands[arg].que = new;
1536+ qdisc_reset(*old);
1537+ LOCK_END
1538+#endif
1539+
1540+ return 0;
1541+}
1542+
1543+// Returns the qdisc for a class:
1544+static struct Qdisc * wrr_leaf(struct Qdisc *sch, unsigned long arg)
1545+{
1546+ struct wrr_sched_data *q = (struct wrr_sched_data *)sch->data;
1547+ if(arg>q->bandc || arg==0) return NULL;
1548+ arg--;
1549+ return q->bands[arg].que;
1550+}
1551+
1552+static unsigned long wrr_get(struct Qdisc *sch, u32 classid)
1553+{
1554+ struct wrr_sched_data *q = (struct wrr_sched_data *)sch->data;
1555+ unsigned long band = TC_H_MIN(classid);
1556+ if(band>q->bandc || band==0) return 0;
1557+ return band;
1558+}
1559+
1560+static void wrr_put(struct Qdisc *q, unsigned long cl)
1561+{
1562+ return;
1563+}
1564+
1565+static int wrr_delete(struct Qdisc *sch, unsigned long cl)
1566+{
1567+ struct wrr_sched_data *q = (struct wrr_sched_data *)sch->data;
1568+ if(cl==0 || cl>q->bandc) return -ENOENT;
1569+ cl--;
1570+ return 0;
1571+}
1572+
1573+
1574+#ifdef CONFIG_RTNETLINK
1575+static int wrr_dump_class(struct Qdisc *sch, unsigned long cl,
1576+ struct sk_buff *skb, struct tcmsg *tcm)
1577+{
1578+ struct wrr_sched_data *q = (struct wrr_sched_data *)sch->data;
1579+ unsigned char *b = skb->tail;
1580+ struct tc_wrr_class_stats opt;
1581+
1582+ // Handle of this class:
1583+ tcm->tcm_handle = sch->handle|cl;
1584+
1585+ if(cl==0 || cl>q->bandc)
1586+ goto rtattr_failure;
1587+ cl--;
1588+
1589+ if(cl>=q->filter.mac_cur) {
1590+ // Band is unused:
1591+ memset(&opt,0,sizeof(opt));
1592+ opt.used=0;
1593+ } else {
1594+ opt.used=1;
1595+ opt.class_modf.weight1=q->bands[cl].class_modf.weight1;
1596+ opt.class_modf.weight2=q->bands[cl].class_modf.weight2;
1597+ weight_transmit(&opt.class_modf.weight1,q->qdisc_modf.weight1,0,0,0,0);
1598+ weight_transmit(&opt.class_modf.weight2,q->qdisc_modf.weight2,0,0,0,0);
1599+ memcpy(opt.addr,q->filter.cls2mac+cl*ETH_ALEN,ETH_ALEN);
1600+ opt.usemac=q->filter.usemac;
1601+ opt.heappos=q->h.root_1[cl+1].id2idx;
1602+ if(opt.heappos!=0) { // Is in heap
1603+ opt.penal_ls=heap_get_penalty(&q->h,cl).ls;
1604+ opt.penal_ms=heap_get_penalty(&q->h,cl).ms;
1605+ } else {
1606+ opt.penal_ls=0;
1607+ opt.penal_ms=0;
1608+ }
1609+ }
1610+
1611+ // Put quing information:
1612+ RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
1613+ return skb->len;
1614+
1615+rtattr_failure:
1616+ skb_trim(skb, b - skb->data);
1617+ return -1;
1618+}
1619+#endif
1620+
1621+static int wrr_change(struct Qdisc *sch, u32 handle, u32 parent,
1622+ struct rtattr **tca, unsigned long *arg)
1623+{
1624+ unsigned long cl = *arg;
1625+ struct wrr_sched_data *q = (struct wrr_sched_data *)sch->data;
1626+ struct rtattr *opt = tca[TCA_OPTIONS-1];
1627+ struct tc_wrr_class_modf *copt = RTA_DATA(opt);
1628+
1629+ if(cl==0 || cl>q->bandc) return -EINVAL;
1630+ cl--;
1631+
1632+ if (opt->rta_len < RTA_LENGTH(sizeof(*copt))) return -EINVAL;
1633+
1634+ LOCK_START;
1635+
1636+ weight_setvalue(&q->bands[cl].class_modf.weight1,&copt->weight1);
1637+ weight_setvalue(&q->bands[cl].class_modf.weight2,&copt->weight2);
1638+
1639+ LOCK_END;
1640+
1641+ return 0;
1642+}
1643+
1644+static void wrr_walk(struct Qdisc *sch, struct qdisc_walker *arg)
1645+{
1646+ struct wrr_sched_data *q = (struct wrr_sched_data *)sch->data;
1647+ int prio;
1648+
1649+ if (arg->stop) return;
1650+
1651+ for (prio = 1; prio <= q->bandc; prio++) {
1652+ if (arg->count < arg->skip) {
1653+ arg->count++;
1654+ continue;
1655+ }
1656+ if (arg->fn(sch, prio, arg) < 0) {
1657+ arg->stop = 1;
1658+ break;
1659+ }
1660+ arg->count++;
1661+ }
1662+}
1663+
1664+static struct tcf_proto ** wrr_find_tcf(struct Qdisc *sch, unsigned long cl)
1665+{
1666+ return NULL;
1667+}
1668+
1669+static unsigned long wrr_bind(struct Qdisc *sch,
1670+ unsigned long parent, u32 classid)
1671+{
1672+ return wrr_get(sch, classid);
1673+}
1674+
1675+//-----------------------------------------------------------------------------
1676+// Generel
1677+
1678+static struct Qdisc_class_ops wrr_class_ops =
1679+{
1680+ wrr_graft,
1681+ wrr_leaf,
1682+
1683+ wrr_get,
1684+ wrr_put,
1685+ wrr_change,
1686+ wrr_delete,
1687+ wrr_walk,
1688+
1689+ wrr_find_tcf,
1690+ wrr_bind,
1691+ wrr_put,
1692+
1693+#ifdef CONFIG_RTNETLINK
1694+ wrr_dump_class,
1695+#endif
1696+};
1697+
1698+struct Qdisc_ops wrr_qdisc_ops =
1699+{
1700+ NULL,
1701+ &wrr_class_ops,
1702+ "wrr",
1703+ sizeof(struct wrr_sched_data),
1704+
1705+ wrr_enqueue,
1706+ wrr_dequeue,
1707+ wrr_requeue,
1708+ wrr_drop,
1709+
1710+ wrr_init,
1711+ wrr_reset,
1712+ wrr_destroy,
1713+ wrr_tune,
1714+
1715+#ifdef CONFIG_RTNETLINK
1716+ wrr_dump,
1717+#endif
1718+};
1719+
1720+#ifdef MODULE
1721+
1722+int init_module(void)
1723+{
1724+ return register_qdisc(&wrr_qdisc_ops);
1725+}
1726+
1727+void cleanup_module(void)
1728+{
1729+ unregister_qdisc(&wrr_qdisc_ops);
1730+}
1731+
1732+#endif
1733+
This page took 0.288598 seconds and 4 git commands to generate.