]>
Commit | Line | Data |
---|---|---|
df26885d | 1 | --- linux-2.6.4/include/linux/pkt_sched.h.orig 2004-04-02 23:46:35.869438752 +0200 |
2 | +++ linux-2.6.4/include/linux/pkt_sched.h 2004-04-02 23:59:09.936803088 +0200 | |
3 | @@ -442,4 +442,116 @@ | |
4 | ||
5 | #define TCA_ATM_MAX TCA_ATM_STATE | |
6 | ||
7 | +/* WRR section */ | |
8 | + | |
9 | +/* Other includes */ | |
10 | +#include <linux/if_ether.h> | |
11 | + | |
12 | +// A sub weight and of a class | |
13 | +// All numbers are represented as parts of (2^64-1). | |
14 | +struct tc_wrr_class_weight { | |
15 | + __u64 val; // Current value (0 is not valid) | |
16 | + __u64 decr; // Value pr bytes (2^64-1 is not valid) | |
17 | + __u64 incr; // Value pr seconds (2^64-1 is not valid) | |
18 | + __u64 min; // Minimal value (0 is not valid) | |
19 | + __u64 max; // Minimal value (0 is not valid) | |
20 | + | |
21 | + // The time where the above information was correct: | |
22 | + time_t tim; | |
23 | +}; | |
24 | + | |
25 | +// Pakcet send when modifying a class: | |
26 | +struct tc_wrr_class_modf { | |
27 | + // Not-valid values are ignored. | |
28 | + struct tc_wrr_class_weight weight1; | |
29 | + struct tc_wrr_class_weight weight2; | |
30 | +}; | |
31 | + | |
32 | +// Packet returned when quering a class: | |
33 | +struct tc_wrr_class_stats { | |
34 | + char used; // If this is false the information below is invalid | |
35 | + | |
36 | + struct tc_wrr_class_modf class_modf; | |
37 | + | |
38 | + unsigned char addr[ETH_ALEN]; | |
39 | + char usemac; // True if addr is a MAC address, else it is an IP address | |
40 | + // (this value is only for convience, it is always the same | |
41 | + // value as in the qdisc) | |
42 | + int heappos; // Current heap position or 0 if not in heap | |
43 | + __u64 penal_ls; // Penalty value in heap (ls) | |
44 | + __u64 penal_ms; // Penalty value in heap (ms) | |
45 | +}; | |
46 | + | |
47 | +// Qdisc-wide penalty information (boolean values - 2 not valid) | |
48 | +struct tc_wrr_qdisc_weight { | |
49 | + char weight_mode; // 0=No automatic change to weight | |
50 | + // 1=Decrease normally | |
51 | + // 2=Also multiply with number of machines | |
52 | + // 3=Instead multiply with priority divided | |
53 | + // with priority of the other. | |
54 | + // -1=no change | |
55 | +}; | |
56 | + | |
57 | +// Packet send when modifing a qdisc: | |
58 | +struct tc_wrr_qdisc_modf { | |
59 | + // Not-valid values are ignored: | |
60 | + struct tc_wrr_qdisc_weight weight1; | |
61 | + struct tc_wrr_qdisc_weight weight2; | |
62 | +}; | |
63 | + | |
64 | +// Packet send when creating a qdisc: | |
65 | +struct tc_wrr_qdisc_crt { | |
66 | + struct tc_wrr_qdisc_modf qdisc_modf; | |
67 | + | |
68 | + char srcaddr; // 1=lookup source, 0=lookup destination | |
69 | + char usemac; // 1=Classify on MAC addresses, 0=classify on IP | |
70 | + char usemasq; // 1=Classify based on masqgrading - only valid | |
71 | + // if usemac is zero | |
72 | + int bands_max; // Maximal number of bands (i.e.: classes) | |
73 | + int proxy_maxconn; // If differnt from 0 then we support proxy remapping | |
74 | + // of packets. And this is the number of maximal | |
75 | + // concurrent proxy connections. | |
76 | +}; | |
77 | + | |
78 | +// Packet returned when quering a qdisc: | |
79 | +struct tc_wrr_qdisc_stats { | |
80 | + struct tc_wrr_qdisc_crt qdisc_crt; | |
81 | + int proxy_curconn; | |
82 | + int nodes_in_heap; // Current number of bands wanting to send something | |
83 | + int bands_cur; // Current number of bands used (i.e.: MAC/IP addresses seen) | |
84 | + int bands_reused; // Number of times this band has been reused. | |
85 | + int packets_requed; // Number of times packets have been requeued. | |
86 | + __u64 priosum; // Sum of priorities in heap where 1 is 2^32 | |
87 | +}; | |
88 | + | |
89 | +struct tc_wrr_qdisc_modf_std { | |
90 | + // This indicates which of the tc_wrr_qdisc_modf structers this is: | |
91 | + char proxy; // 0=This struct | |
92 | + | |
93 | + // Should we also change a class? | |
94 | + char change_class; | |
95 | + | |
96 | + // Only valid if change_class is false | |
97 | + struct tc_wrr_qdisc_modf qdisc_modf; | |
98 | + | |
99 | + // Only valid if change_class is true: | |
100 | + unsigned char addr[ETH_ALEN]; // Class to change (non-used bytes should be 0) | |
101 | + struct tc_wrr_class_modf class_modf; // The change | |
102 | +}; | |
103 | + | |
104 | +// Used for proxyrempping: | |
105 | +struct tc_wrr_qdisc_modf_proxy { | |
106 | + // This indicates which of the tc_wrr_qdisc_modf structers this is: | |
107 | + char proxy; // 1=This struct | |
108 | + | |
109 | + // This is 1 if the proxyremap information should be reset | |
110 | + char reset; | |
111 | + | |
112 | + // changec is the number of elements in changes. | |
113 | + int changec; | |
114 | + | |
115 | + // This is an array of type ProxyRemapBlock: | |
116 | + long changes[0]; | |
117 | +}; | |
118 | + | |
c9d1c54c AM |
119 | /* Network emulator */ |
120 | struct tc_netem_qopt | |
df26885d | 121 | { |
122 | diff -uNr linux-2.6.4/net/sched.orig/Kconfig linux-2.6.4/net/sched/Kconfig | |
123 | --- linux-2.6.4/net/sched.orig/Kconfig 2004-04-02 23:46:35.000000000 +0200 | |
124 | +++ linux-2.6.4/net/sched/Kconfig 2004-04-03 00:06:53.438340144 +0200 | |
125 | @@ -39,6 +39,10 @@ | |
126 | To compile this code as a module, choose M here: the | |
127 | module will be called sch_htb. | |
128 | ||
129 | +config NET_SCH_WRR | |
130 | + tristate "WRR packet scheduler" | |
131 | + depends on NET_SCHED | |
132 | + | |
133 | config NET_SCH_HFSC | |
134 | tristate "HFSC packet scheduler" | |
135 | depends on NET_SCHED | |
136 | diff -uNr linux-2.6.4/net/sched.orig/Makefile linux-2.6.4/net/sched/Makefile | |
137 | --- linux-2.6.4/net/sched.orig/Makefile 2004-04-02 23:46:35.000000000 +0200 | |
138 | +++ linux-2.6.4/net/sched/Makefile 2004-04-03 00:05:52.359625520 +0200 | |
139 | @@ -10,6 +10,7 @@ | |
140 | obj-$(CONFIG_NET_CLS_POLICE) += police.o | |
141 | obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o | |
142 | obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o | |
143 | +obj-$(CONFIG_NET_SCH_WRR) += sch_wrr.o | |
df26885d | 144 | obj-$(CONFIG_NET_SCH_HPFQ) += sch_hpfq.o |
145 | obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o | |
c9d1c54c | 146 | obj-$(CONFIG_NET_SCH_RED) += sch_red.o |
df26885d | 147 | diff -uNr linux-2.6.4/net/sched.orig/proxydict.c linux-2.6.4/net/sched/proxydict.c |
148 | --- linux-2.6.4/net/sched.orig/proxydict.c 1970-01-01 01:00:00.000000000 +0100 | |
149 | +++ linux-2.6.4/net/sched/proxydict.c 2004-04-03 00:02:21.565671072 +0200 | |
150 | @@ -0,0 +1,153 @@ | |
151 | +#ifndef __KERNEL__ | |
152 | +#include <string.h> | |
153 | +#include <netinet/in.h> | |
154 | +#endif | |
155 | + | |
156 | +#include "proxyremap.h" | |
157 | +#include "proxydict.h" | |
158 | + | |
159 | + | |
160 | +/*-------------------------------------------------------------------------- | |
161 | +Implementation. | |
162 | +*/ | |
163 | + | |
164 | +// Hash function | |
165 | +#define hash_fnc(m,server,port,proto) \ | |
166 | + (((proto)*7+(server)*13+(port)*5)%m->hash_size) | |
167 | + | |
168 | +// Size of hash table given maximal number of connections: | |
169 | +#define hash_size_max_con(max_con) (2*(max_con)) | |
170 | + | |
c9d1c54c | 171 | +// The __proxydict_memory area we maintain: |
df26885d | 172 | +typedef struct { |
173 | + int hash_size; | |
174 | + int max_con; | |
175 | + int cur_con; | |
176 | + | |
177 | + int free_first; | |
178 | + | |
179 | + // Then we have: | |
180 | + // int hash_table[hash_size]; | |
181 | + // int next[max_con]; | |
182 | + // ProxyRemapBlock info[max_con]; | |
183 | + // | |
184 | + // The idea is the following: | |
185 | + // Given a connection we map it by hash_fnc into hash_table. This gives an | |
186 | + // index in next which contains a -1 terminated linked list of connections | |
187 | + // mapping to that hash value. | |
188 | + // | |
189 | + // The entries in next not allocated is also in linked list where | |
190 | + // the first free index is free_first. | |
c9d1c54c | 191 | +} __proxydict_memory; |
df26885d | 192 | + |
c9d1c54c AM |
193 | +#define Memory(m) ((__proxydict_memory*)m) |
194 | +#define Hash_table(m) ((int*)(((char*)m)+sizeof(__proxydict_memory))) | |
195 | +#define Next(m) ((int*)(((char*)m)+sizeof(__proxydict_memory)+ \ | |
196 | + sizeof(int)*((__proxydict_memory*)m)->hash_size)) | |
df26885d | 197 | +#define Info(m) ((ProxyRemapBlock*)(((char*)m)+ \ |
c9d1c54c AM |
198 | + sizeof(__proxydict_memory)+ \ |
199 | + sizeof(int)*((__proxydict_memory*)m)->hash_size+\ | |
200 | + sizeof(int)*((__proxydict_memory*)m)->max_con \ | |
df26885d | 201 | + )) |
202 | + | |
203 | +int proxyGetMemSize(int max_con) { | |
c9d1c54c | 204 | + return sizeof(__proxydict_memory)+ |
df26885d | 205 | + sizeof(int)*hash_size_max_con(max_con)+ |
206 | + sizeof(int)*max_con+ | |
207 | + sizeof(ProxyRemapBlock)*max_con; | |
208 | +} | |
209 | + | |
210 | +void proxyInitMem(void* data, int max_con) { | |
211 | + // Init m: | |
c9d1c54c | 212 | + __proxydict_memory* m=Memory(data); |
df26885d | 213 | + m->max_con=max_con; |
214 | + m->cur_con=0; | |
215 | + m->hash_size=hash_size_max_con(max_con); | |
216 | + | |
217 | + { | |
218 | + // Get pointers: | |
219 | + int* hash_table=Hash_table(data); | |
220 | + int* next=Next(data); | |
221 | + int i; | |
222 | + | |
223 | + // Init the hash table: | |
224 | + for(i=0; i<m->hash_size; i++) hash_table[i]=-1; | |
225 | + | |
226 | + // Init the free-list | |
227 | + for(i=0; i<m->max_con; i++) next[i]=i+1; | |
228 | + m->free_first=0; | |
229 | + } | |
230 | +} | |
231 | + | |
232 | +int proxyGetCurConn(void* data) { | |
233 | + return Memory(data)->cur_con; | |
234 | +} | |
235 | + | |
236 | +int proxyGetMaxConn(void* data) { | |
237 | + return Memory(data)->max_con; | |
238 | +} | |
239 | + | |
240 | +ProxyRemapBlock* proxyLookup(void* data, unsigned ipaddr, unsigned short port, char proto) { | |
c9d1c54c | 241 | + __proxydict_memory* m=Memory(data); |
df26885d | 242 | + int* hash_table=Hash_table(m); |
243 | + int* next=Next(m); | |
244 | + ProxyRemapBlock* info=Info(m); | |
245 | + int i; | |
246 | + | |
247 | + for(i=hash_table[hash_fnc(m,ipaddr,port,proto)]; i!=-1; i=next[i]) { | |
248 | + if(info[i].proto==proto && | |
249 | + info[i].sport==port && | |
250 | + info[i].saddr==ipaddr) return &info[i]; | |
251 | + } | |
252 | + | |
253 | + return 0; | |
254 | +} | |
255 | + | |
256 | +int proxyConsumeBlock(void* data, ProxyRemapBlock* blk) { | |
c9d1c54c | 257 | + __proxydict_memory* m=Memory(data); |
df26885d | 258 | + int* hash_table=Hash_table(m); |
259 | + int* next=Next(m); | |
260 | + ProxyRemapBlock* info=Info(m); | |
261 | + int hash=hash_fnc(m,blk->saddr,blk->sport,blk->proto); | |
262 | + int foo; | |
263 | + | |
264 | + if(blk->open) { | |
265 | + if(m->cur_con == m->max_con) return -1; | |
266 | + | |
267 | + // Insert the block at a free entry: | |
268 | + info[m->free_first]=*blk; | |
269 | + m->cur_con++; | |
270 | + | |
271 | + foo=next[m->free_first]; | |
272 | + | |
273 | + // And insert it in the hash tabel: | |
274 | + next[m->free_first]=hash_table[hash]; | |
275 | + hash_table[hash]=m->free_first; | |
276 | + m->free_first=foo; | |
277 | + } else { | |
278 | + int* toupdate; | |
279 | + | |
280 | + // Find the block | |
281 | + for(toupdate=&hash_table[hash]; | |
282 | + *toupdate!=-1; | |
283 | + toupdate=&next[*toupdate]) { | |
284 | + if(info[*toupdate].proto==blk->proto && | |
285 | + info[*toupdate].sport==blk->sport && | |
286 | + info[*toupdate].saddr==blk->saddr) break; | |
287 | + } | |
288 | + if(*toupdate==-1) return -1; | |
289 | + | |
290 | + foo=*toupdate; | |
291 | + | |
292 | + // Delete it from the hashing list: | |
293 | + *toupdate=next[*toupdate]; | |
294 | + | |
295 | + // And put it on the free list: | |
296 | + next[foo]=m->free_first; | |
297 | + m->free_first=foo; | |
298 | + | |
299 | + m->cur_con--; | |
300 | + } | |
301 | + | |
302 | + return 0; | |
303 | +} | |
304 | diff -uNr linux-2.6.4/net/sched.orig/proxydict.h linux-2.6.4/net/sched/proxydict.h | |
305 | --- linux-2.6.4/net/sched.orig/proxydict.h 1970-01-01 01:00:00.000000000 +0100 | |
306 | +++ linux-2.6.4/net/sched/proxydict.h 2004-04-03 00:02:21.567670768 +0200 | |
307 | @@ -0,0 +1,32 @@ | |
308 | +#ifdef __cplusplus | |
309 | +extern "C" { | |
310 | +#endif | |
311 | + | |
312 | +/*-------------------------------------------------------------------------- | |
313 | +This is common code for for handling the tabels containing information about | |
314 | +which proxyserver connections are associated with which machines.. | |
315 | +*/ | |
316 | + | |
317 | +// Returns the number of bytes that should be available in the area | |
318 | +// maintained by this module given the maximal number of concurrent | |
319 | +// connections. | |
320 | +int proxyGetMemSize(int max_connections); | |
321 | + | |
322 | +// Initializes a memory area to use. There must be as many bytes | |
323 | +// available as returned by getMemSize. | |
324 | +void proxyInitMem(void* data, int max_connections); | |
325 | + | |
326 | +// Queries: | |
327 | +int proxyGetCurConn(void* data); // Returns current number of connections | |
328 | +int proxyMaxCurConn(void* data); // Returns maximal number of connections | |
329 | + | |
330 | +// This is called to open and close conenctions. Returns -1 if | |
331 | +// a protocol error occores (i.e.: If it is discovered) | |
332 | +int proxyConsumeBlock(void* data, ProxyRemapBlock*); | |
333 | + | |
334 | +// Returns the RemapBlock associated with this connection or 0: | |
335 | +ProxyRemapBlock* proxyLookup(void* data, unsigned ipaddr, unsigned short port, char proto); | |
336 | + | |
337 | +#ifdef __cplusplus | |
338 | +} | |
339 | +#endif | |
340 | diff -uNr linux-2.6.4/net/sched.orig/proxyremap.h linux-2.6.4/net/sched/proxyremap.h | |
341 | --- linux-2.6.4/net/sched.orig/proxyremap.h 1970-01-01 01:00:00.000000000 +0100 | |
342 | +++ linux-2.6.4/net/sched/proxyremap.h 2004-04-03 00:02:21.568670616 +0200 | |
343 | @@ -0,0 +1,33 @@ | |
344 | +#ifndef PROXYREMAP_H | |
345 | +#define PROXYREMAP_H | |
346 | + | |
347 | +// This describes the information that is written in proxyremap.log and which | |
348 | +// are used in the communication between proxyremapserver and proxyremapclient. | |
349 | +// Everything is in network order. | |
350 | + | |
351 | +// First this header is send: | |
352 | +#define PROXY_WELCOME_LINE "ProxyRemap 1.02. This is a binary protocol.\r\n" | |
353 | + | |
354 | +// Then this block is send every time a connection is opened or closed. | |
355 | +// Note how it is alligned to use small space usage - arrays of this | |
356 | +// structure are saved in many places. | |
357 | +typedef struct { | |
358 | + // Server endpoint of connection: | |
359 | + unsigned saddr; | |
360 | + unsigned short sport; | |
361 | + | |
362 | + // IP protocol for this connection (typically udp or tcp): | |
363 | + unsigned char proto; | |
364 | + | |
365 | + // Is the connection opened or closed? | |
366 | + unsigned char open; | |
367 | + | |
368 | + // Client the packets should be accounted to: | |
369 | + unsigned caddr; | |
370 | + unsigned char macaddr[6]; // Might be 0. | |
371 | + | |
372 | + // An informal two-charecter code from the proxyserver. Used for debugging. | |
373 | + char proxyinfo[2]; | |
374 | +} ProxyRemapBlock; | |
375 | + | |
376 | +#endif | |
377 | diff -uNr linux-2.6.4/net/sched.orig/sch_wrr.c linux-2.6.4/net/sched/sch_wrr.c | |
378 | --- linux-2.6.4/net/sched.orig/sch_wrr.c 1970-01-01 01:00:00.000000000 +0100 | |
379 | +++ linux-2.6.4/net/sched/sch_wrr.c 2004-04-03 00:02:21.574669704 +0200 | |
c9d1c54c | 380 | @@ -0,0 +1,1360 @@ |
df26885d | 381 | +/*----------------------------------------------------------------------------- |
382 | +Weighted Round Robin scheduler. | |
383 | + | |
384 | +Written by Christian Worm Mortensen, cworm@it-c.dk. | |
385 | + | |
386 | +Introduction | |
387 | +============ | |
388 | +This module implements a weighted round robin queue with build-in classifier. | |
389 | +The classifier currently map each MAC or IP address (configurable either MAC | |
390 | +or IP and either source or destination) to different classes. Each such class | |
391 | +is called a band. Whan using MAC addresses only bridged packets can be | |
392 | +classified other packets go to a default MAC address. | |
393 | + | |
394 | +Each band has a weight value, where 0<weight<=1. The bandwidth each band | |
395 | +get is proportional to the weight as can be deduced from the next section. | |
396 | + | |
397 | + | |
398 | +The queue | |
399 | +========= | |
400 | +Each band has a penalty value. Bands having something to sent are kept in | |
401 | +a heap according to this value. The band with the lowest penalty value | |
402 | +is in the root of the heap. The penalty value is a 128 bit number. Initially | |
403 | +no bands are in the heap. | |
404 | + | |
405 | +Two global 64 bit values counter_low_penal and couter_high_penal are initialized | |
406 | +to 0 and to 2^63 respectively. | |
407 | + | |
408 | +Enqueing: | |
409 | + The packet is inserted in the queue for the band it belongs to. If the band | |
410 | + is not in the heap it is inserted into it. In this case, the upper 64 bits | |
411 | + of its penalty value is set to the same as for the root-band of the heap. | |
412 | + If the heap is empty 0 is used. The lower 64 bit is set to couter_low_penal | |
413 | + and couter_low_penal is incremented by 1. | |
414 | + | |
415 | +Dequing: | |
416 | + If the heap is empty we have nothing to send. | |
417 | + | |
418 | + If the root band has a non-empty queue a packet is dequeued from that. | |
419 | + The upper 64 bit of the penalty value of the band is incremented by the | |
420 | + packet size divided with the weight of the band. The lower 64 bit is set to | |
421 | + couter_high_penal and couter_high_penal is incremented by 1. | |
422 | + | |
423 | + If the root element for some reason has an empty queue it is removed from | |
424 | + the heap and we try to dequeue again. | |
425 | + | |
426 | +The effect of the heap and the upper 64 bit of the penalty values is to | |
427 | +implement a weighted round robin queue. The effect of counter_low_penal, | |
428 | +counter_high_penal and the lower 64 bit of the penalty value is primarily to | |
429 | +stabilize the queue and to give better quality of service to machines only | |
430 | +sending a packet now and then. For example machines which have a single | |
431 | +interactive connection such as telnet or simple text chatting. | |
432 | + | |
433 | + | |
434 | +Setting weight | |
435 | +============== | |
436 | +The weight value can be changed dynamically by the queue itself. The weight | |
437 | +value and how it is changed is described by the two members weight1 and | |
438 | +weight2 which has type tc_wrr_class_weight and which are in each class. And | |
439 | +by the two integer value members of the qdisc called penalfact1 and penalfact2. | |
440 | +The structure is defined as: | |
441 | + | |
442 | + struct tc_wrr_class_weight { | |
443 | + // All are represented as parts of (2^64-1). | |
444 | + __u64 val; // Current value (0 is not valid) | |
445 | + __u64 decr; // Value pr bytes (2^64-1 is not valid) | |
446 | + __u64 incr; // Value pr seconds (2^64-1 is not valid) | |
447 | + __u64 min; // Minimal value (0 is not valid) | |
448 | + __u64 max; // Minimal value (0 is not valid) | |
449 | + | |
450 | + // The time where the above information was correct: | |
451 | + time_t tim; | |
452 | + }; | |
453 | + | |
454 | +The weight value used by the dequeue operations is calculated as | |
455 | +weight1.val*weight2.val. weight1 and weight2 and handled independently and in the | |
456 | +same way as will be described now. | |
457 | + | |
458 | +Every second, the val parameter is incremented by incr. | |
459 | + | |
460 | +Every time a packet is transmitted the value is increment by decr times | |
461 | +the packet size. Depending on the value of the weight_mode parameter it | |
462 | +is also mulitplied with other numbers. This makes it possible to give | |
463 | +penalty to machines transferring much data. | |
464 | + | |
465 | +-----------------------------------------------------------------------------*/ | |
466 | + | |
467 | +#include <linux/config.h> | |
468 | +#include <linux/module.h> | |
469 | +#include <asm/uaccess.h> | |
470 | +#include <asm/system.h> | |
471 | +#include <asm/bitops.h> | |
472 | +#include <linux/types.h> | |
473 | +#include <linux/kernel.h> | |
474 | +#include <linux/vmalloc.h> | |
475 | +#include <linux/sched.h> | |
476 | +#include <linux/string.h> | |
477 | +#include <linux/mm.h> | |
478 | +#include <linux/socket.h> | |
479 | +#include <linux/sockios.h> | |
480 | +#include <linux/in.h> | |
481 | +#include <linux/errno.h> | |
482 | +#include <linux/interrupt.h> | |
483 | +#include <linux/if_ether.h> | |
484 | +#include <linux/inet.h> | |
485 | +#include <linux/netdevice.h> | |
486 | +#include <linux/etherdevice.h> | |
487 | +#include <linux/notifier.h> | |
488 | +#include <net/ip.h> | |
489 | +#include <net/route.h> | |
490 | +#include <linux/skbuff.h> | |
491 | +#include <net/sock.h> | |
492 | +#include <net/pkt_sched.h> | |
493 | + | |
494 | +#include <linux/if_arp.h> | |
495 | +#include <linux/version.h> | |
496 | + | |
497 | +// There seems to be problems when calling functions from userspace when | |
498 | +// using vmalloc and vfree. | |
499 | +//#define my_malloc(size) vmalloc(size) | |
500 | +//#define my_free(ptr) vfree(ptr) | |
501 | +#define my_malloc(size) kmalloc(size,GFP_KERNEL) | |
502 | +#define my_free(ptr) kfree(ptr) | |
503 | + | |
504 | +// Kernel depend stuff: | |
505 | +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0) | |
506 | + #define KERNEL22 | |
507 | +#endif | |
508 | + | |
509 | +#ifdef KERNEL22 | |
510 | + #define LOCK_START start_bh_atomic(); | |
511 | + #define LOCK_END end_bh_atomic(); | |
512 | + #define ENQUEUE_SUCCESS 1 | |
513 | + #define ENQUEUE_FAIL 0 | |
514 | + #ifdef CONFIG_IP_MASQUERADE | |
515 | + #include <net/ip_masq.h> | |
516 | + #define MASQ_SUPPORT | |
517 | + #endif | |
518 | +#else | |
519 | + #define LOCK_START sch_tree_lock(sch); | |
520 | + #define LOCK_END sch_tree_unlock(sch); | |
521 | + #define ENQUEUE_SUCCESS 0 | |
522 | + #define ENQUEUE_FAIL NET_XMIT_DROP | |
523 | + #ifdef CONFIG_NETFILTER | |
524 | + #include <linux/netfilter_ipv4/ip_conntrack.h> | |
525 | + #define MASQ_SUPPORT | |
526 | + #endif | |
527 | +#endif | |
528 | + | |
529 | +#include "proxydict.c" | |
530 | + | |
531 | +// The penalty (priority) type: | |
532 | +typedef u64 penalty_base_t; | |
533 | +#define penalty_base_t_max ((penalty_base_t)-1) | |
534 | +typedef struct penalty_t { | |
535 | + penalty_base_t ms; | |
536 | + penalty_base_t ls; | |
537 | +} penalty_t; | |
538 | +#define penalty_leq(a,b) (a.ms<b.ms || (a.ms==b.ms && a.ls<=b.ls)) | |
539 | +#define penalty_le(a,b) (a.ms<b.ms || (a.ms==b.ms && a.ls<b.ls)) | |
540 | +static penalty_t penalty_max={penalty_base_t_max,penalty_base_t_max}; | |
541 | + | |
542 | +//----------------------------------------------------------------------------- | |
543 | +// A generel heap. | |
544 | + | |
545 | +struct heap; | |
546 | +struct heap_element; | |
547 | + | |
548 | +// Initializes an empty heap: | |
549 | +// he: A pointer to an unintialized heap structure identifying the heap | |
550 | +// size: Maximal number of elements the heap can contain | |
551 | +// poll: An array of size "size" used by the heap. | |
552 | +static void heap_init(struct heap* he,int size, struct heap_element* poll); | |
553 | + | |
554 | +// Each element in the heap is identified by a user-assigned id which | |
555 | +// should be a non negative integer less than the size argument | |
556 | +// given to heap_init. | |
557 | +static void heap_insert(struct heap*, int id, penalty_t); | |
558 | +static void heap_remove(struct heap*, int id); | |
559 | +static void heap_set_penalty(struct heap*, int id, penalty_t); | |
560 | + | |
561 | +// Retreviewing information: | |
562 | +static char heap_empty(struct heap*); // Heap empty? | |
563 | +static char heap_contains(struct heap*, int id); // Does heap contain | |
564 | + // the given id? | |
565 | +static int heap_root(struct heap*); // Returns the id of the root | |
566 | +static penalty_t heap_get_penalty(struct heap*, int id); // Returns penaly | |
567 | + // of root node | |
568 | + | |
569 | +//-------------------- | |
570 | +// Heap implementation | |
571 | + | |
572 | +struct heap_element { | |
573 | + penalty_t penalty; | |
574 | + int id; // The user-assigned id of this element | |
575 | + int id2idx; // Maps from user-assigned ids to indices in root_1 | |
576 | +}; | |
577 | + | |
578 | +struct heap { | |
579 | + struct heap_element* root_1; | |
580 | + int elements; | |
581 | +}; | |
582 | + | |
583 | +// Heap implementation: | |
584 | +static void heap_init(struct heap* h, int size, struct heap_element* poll) { | |
585 | + int i; | |
586 | + | |
587 | + h->elements=0; | |
588 | + h->root_1=poll-1; | |
589 | + | |
590 | + for(i=0; i<size; i++) poll[i].id2idx=0; | |
591 | +}; | |
592 | + | |
593 | +static char heap_empty(struct heap* h) { | |
594 | + return h->elements==0; | |
595 | +} | |
596 | + | |
597 | +static char heap_contains(struct heap* h, int id) { | |
598 | + return h->root_1[id+1].id2idx!=0; | |
599 | +} | |
600 | + | |
601 | +static int heap_root(struct heap* h) { | |
602 | + return h->root_1[1].id; | |
603 | +} | |
604 | + | |
605 | +static penalty_t heap_get_penalty(struct heap* h, int id) { | |
606 | + return h->root_1[ h->root_1[id+1].id2idx ].penalty; | |
607 | +} | |
608 | + | |
609 | +static void heap_penalty_changed_internal(struct heap* h,int idx); | |
610 | + | |
611 | +static void heap_set_penalty(struct heap* h, int id, penalty_t p) { | |
612 | + int idx=h->root_1[id+1].id2idx; | |
613 | + h->root_1[idx].penalty=p; | |
614 | + heap_penalty_changed_internal(h,idx); | |
615 | +} | |
616 | + | |
617 | +static void heap_insert(struct heap* h, int id, penalty_t p) { | |
618 | + // Insert at the end of the heap: | |
619 | + h->elements++; | |
620 | + h->root_1[h->elements].id=id; | |
621 | + h->root_1[h->elements].penalty=p; | |
622 | + h->root_1[id+1].id2idx=h->elements; | |
623 | + | |
624 | + // And put it in the right position: | |
625 | + heap_penalty_changed_internal(h,h->elements); | |
626 | +} | |
627 | + | |
628 | +static void heap_remove(struct heap* h, int id) { | |
629 | + int idx=h->root_1[id+1].id2idx; | |
630 | + int mvid; | |
631 | + h->root_1[id+1].id2idx=0; | |
632 | + | |
633 | + if(h->elements==idx) { h->elements--; return; } | |
634 | + | |
635 | + mvid=h->root_1[h->elements].id; | |
636 | + h->root_1[idx].id=mvid; | |
637 | + h->root_1[idx].penalty=h->root_1[h->elements].penalty; | |
638 | + h->root_1[mvid+1].id2idx=idx; | |
639 | + | |
640 | + h->elements--; | |
641 | + heap_penalty_changed_internal(h,idx); | |
642 | +} | |
643 | + | |
644 | +static void heap_swap(struct heap* h, int idx0, int idx1) { | |
645 | + penalty_t tmp_p; | |
646 | + int tmp_id; | |
647 | + int id0,id1; | |
648 | + | |
649 | + // Simple content: | |
650 | + tmp_p=h->root_1[idx0].penalty; | |
651 | + tmp_id=h->root_1[idx0].id; | |
652 | + h->root_1[idx0].penalty=h->root_1[idx1].penalty; | |
653 | + h->root_1[idx0].id=h->root_1[idx1].id; | |
654 | + h->root_1[idx1].penalty=tmp_p; | |
655 | + h->root_1[idx1].id=tmp_id; | |
656 | + | |
657 | + // Update reverse pointers: | |
658 | + id0=h->root_1[idx0].id; | |
659 | + id1=h->root_1[idx1].id; | |
660 | + h->root_1[id0+1].id2idx=idx0; | |
661 | + h->root_1[id1+1].id2idx=idx1; | |
662 | +} | |
663 | + | |
664 | +static void heap_penalty_changed_internal(struct heap* h,int cur) { | |
665 | + if(cur==1 || penalty_leq(h->root_1[cur>>1].penalty,h->root_1[cur].penalty)) { | |
666 | + // We are in heap order upwards - so we should move the element down | |
667 | + for(;;) { | |
668 | + int nxt0=cur<<1; | |
669 | + int nxt1=nxt0+1; | |
670 | + penalty_t pen_c=h->root_1[cur].penalty; | |
671 | + penalty_t pen_0=nxt0<=h->elements ? h->root_1[nxt0].penalty : penalty_max; | |
672 | + penalty_t pen_1=nxt1<=h->elements ? h->root_1[nxt1].penalty : penalty_max; | |
673 | + | |
674 | + if(penalty_le(pen_0,pen_c) && penalty_leq(pen_0,pen_1)) { | |
675 | + // Swap with child 0: | |
676 | + heap_swap(h,cur,nxt0); | |
677 | + cur=nxt0; | |
678 | + } else if(penalty_le(pen_1,pen_c)) { | |
679 | + // Swap with child 1: | |
680 | + heap_swap(h,cur,nxt1); | |
681 | + cur=nxt1; | |
682 | + } else { | |
683 | + // Heap in heap order: | |
684 | + return; | |
685 | + } | |
686 | + } | |
687 | + } else { | |
688 | + // We are not in heap order upwards (and thus we must be it downwards). | |
689 | + // We move up: | |
690 | + while(cur!=1) { // While not root | |
691 | + int nxt=cur>>1; | |
692 | + if(penalty_leq(h->root_1[nxt].penalty,h->root_1[cur].penalty)) return; | |
693 | + heap_swap(h,cur,nxt); | |
694 | + cur=nxt; | |
695 | + } | |
696 | + } | |
697 | +}; | |
698 | + | |
699 | +//----------------------------------------------------------------------------- | |
700 | +// Classification based on MAC or IP adresses. Note that of historical reason | |
701 | +// these are prefixed with mac_ since originally only MAC bases classification | |
702 | +// was supported. | |
703 | +// | |
704 | +// This code should be in a separate filter module - but it isn't. | |
705 | + | |
706 | +// Interface: | |
707 | + | |
708 | +struct mac_head; | |
709 | + | |
710 | +// Initialices/destroys the structure we maintain. | |
711 | +// Returns -1 on error | |
712 | +static int mac_init(struct mac_head*, int max_macs, char srcaddr, | |
713 | + char usemac, char usemasq, void* proxyremap); | |
714 | +static void mac_done(struct mac_head*); | |
715 | +static void mac_reset(struct mac_head*); | |
716 | + | |
717 | +// Classify a packet. Returns a number n where 0<=n<max_macs. Or -1 if | |
718 | +// the packet should be dropped. | |
719 | +static int mac_classify(struct mac_head*, struct sk_buff *skb); | |
720 | + | |
721 | +//------------- | |
722 | +// Implementation: | |
723 | + | |
724 | +struct mac_addr { | |
725 | + unsigned char addr[ETH_ALEN]; // Address of this band (last two are 0 on IP) | |
726 | + unsigned long lastused; // Last time a packet was encountered | |
727 | + int class; // Classid of this band (0<=classid<max_macs) | |
728 | +}; | |
729 | + | |
730 | +static int mac_compare(const void* a, const void* b) { | |
731 | + return memcmp(a,b,ETH_ALEN); | |
732 | +} | |
733 | + | |
734 | +struct mac_head { | |
735 | + int mac_max; // Maximal number of MAC addresses/classes allowed | |
736 | + int mac_cur; // Current number of MAC addresses/classes | |
737 | + int mac_reused; // Number of times we have reused a class with a new | |
738 | + // address. | |
739 | + u64 incr_time; | |
740 | + char srcaddr; // True if we classify on the source address of packets, | |
741 | + // else we use destination address. | |
742 | + char usemac; // If true we use mac, else we use IP | |
743 | + char usemasq; // If true we try to demasqgrade | |
744 | + struct mac_addr* macs; // Allocated mac_max elements, used max_cur | |
745 | + char* cls2mac; // Mapping from classnumbers to addresses - | |
746 | + // there is 6 bytes in each entry | |
747 | + | |
748 | + void* proxyremap; // Information on proxy remapping of data or 0 | |
749 | +}; | |
750 | + | |
751 | +// This is as the standard C library function with the same name: | |
752 | +static const void* bsearch(const void* key, const void* base, int nmemb, | |
753 | + size_t size, | |
754 | + int (*compare)(const void*, const void*)) { | |
755 | + int m_idx; | |
756 | + const void* m_ptr; | |
757 | + int i; | |
758 | + | |
759 | + if(nmemb<=0) return 0; | |
760 | + | |
761 | + m_idx=nmemb>>1; | |
762 | + m_ptr=((const char*)base)+m_idx*size; | |
763 | + | |
764 | + i=compare(key,m_ptr); | |
765 | + if(i<0) // key is less | |
766 | + return bsearch(key,base,m_idx,size,compare); | |
767 | + else if(i>0) | |
768 | + return bsearch(key,((const char*)m_ptr)+size,nmemb-m_idx-1,size,compare); | |
769 | + | |
770 | + return m_ptr; | |
771 | +} | |
772 | + | |
773 | +static int mac_init(struct mac_head* h, int max_macs, char srcaddr, | |
774 | + char usemac, char usemasq,void* proxyremap) { | |
775 | + h->mac_cur=0; | |
776 | + h->mac_reused=0; | |
777 | + h->incr_time=0; | |
778 | + h->srcaddr=srcaddr; | |
779 | + h->usemac=usemac; | |
780 | + h->usemasq=usemasq; | |
781 | + h->mac_max=max_macs; | |
782 | + h->proxyremap=proxyremap; | |
783 | + | |
784 | + h->macs=(struct mac_addr*) | |
785 | + my_malloc( sizeof(struct mac_addr)*max_macs); | |
786 | + h->cls2mac=(char*)my_malloc( 6*max_macs); | |
787 | + if(!h->macs || !h->cls2mac) { | |
788 | + if(h->macs) my_free(h->macs); | |
789 | + if(h->cls2mac) my_free(h->cls2mac); | |
790 | + return -1; | |
791 | + } | |
792 | + return 0; | |
793 | +} | |
794 | + | |
795 | +static void mac_done(struct mac_head* h) { | |
796 | + my_free(h->macs); | |
797 | + my_free(h->cls2mac); | |
798 | +} | |
799 | + | |
800 | +static void mac_reset(struct mac_head* h) { | |
801 | + h->mac_cur=0; | |
802 | + h->mac_reused=0; | |
803 | + h->incr_time=0; | |
804 | +} | |
805 | + | |
806 | +static int lookup_mac(struct mac_head* h, unsigned char* addr) { | |
807 | + int i; | |
808 | + int class; | |
809 | + | |
810 | + // First try to find the address in the table: | |
811 | + struct mac_addr* m=(struct mac_addr*) | |
812 | + bsearch(addr,h->macs,h->mac_cur,sizeof(struct mac_addr),mac_compare); | |
813 | + if(m) { | |
814 | + // Found: | |
815 | + m->lastused=h->incr_time++; | |
816 | + return m->class; | |
817 | + } | |
818 | + | |
819 | + // Okay - the MAC adress was not in table | |
820 | + if(h->mac_cur==h->mac_max) { | |
821 | + // And the table is full - delete the oldest entry: | |
822 | + | |
823 | + // Find the oldest entry: | |
824 | + int lowidx=0; | |
825 | + int i; | |
826 | + for(i=1; i<h->mac_cur; i++) | |
827 | + if(h->macs[i].lastused < h->macs[lowidx].lastused) lowidx=i; | |
828 | + | |
829 | + class=h->macs[lowidx].class; | |
830 | + | |
831 | + // And delete it: | |
832 | + memmove(&h->macs[lowidx],&h->macs[lowidx+1], | |
833 | + (h->mac_cur-lowidx-1)*sizeof(struct mac_addr)); | |
834 | + h->mac_reused++; | |
835 | + h->mac_cur--; | |
836 | + } else { | |
837 | + class=h->mac_cur; | |
838 | + } | |
839 | + | |
840 | + // The table is now not full - find the position we should put the address in: | |
841 | + for(i=0; i<h->mac_cur; i++) if(mac_compare(addr,&h->macs[i])<0) break; | |
842 | + | |
843 | + // We should insert at position i: | |
844 | + memmove(&h->macs[i+1],&h->macs[i],(h->mac_cur-i)*sizeof(struct mac_addr)); | |
845 | + m=&h->macs[i]; | |
846 | + memcpy(m->addr,addr,ETH_ALEN); | |
847 | + m->lastused=h->incr_time++; | |
848 | + m->class=class; | |
849 | + h->mac_cur++; | |
850 | + | |
851 | + // Finally update the cls2mac variabel: | |
852 | + memcpy(h->cls2mac+ETH_ALEN*class,addr,ETH_ALEN); | |
853 | + | |
854 | + return m->class; | |
855 | +} | |
856 | + | |
857 | +int valid_ip_checksum(struct iphdr* ip, int size) { | |
858 | + __u16 header_len=ip->ihl<<2; | |
859 | + __u16 c=0; | |
860 | + __u16* ipu=(u16*)ip; | |
861 | + int a; | |
862 | + | |
863 | + // We require 4 bytes in the packet since we access the port numbers: | |
864 | + if((size<header_len) || size<sizeof(struct iphdr)+4) return 0; | |
865 | + | |
866 | + for(a=0; a<(header_len>>1); a++, ipu++) { | |
867 | + if(a!=5) { // If not the checksum field | |
868 | + __u16 oldc=c; | |
869 | + c+=(*ipu); | |
870 | + if(c<oldc) c++; | |
871 | + } | |
872 | + } | |
873 | + | |
874 | + return ip->check==(__u16)~c; | |
875 | +} | |
876 | + | |
877 | +static int mac_classify(struct mac_head* head, struct sk_buff *skb) | |
878 | +{ | |
879 | + // We set this to the address we map to. In case we map to an IP | |
880 | + // address the last two entries are set to 0. | |
881 | + unsigned char addr[ETH_ALEN]; | |
882 | + | |
883 | + | |
884 | + // This is the size of the network part of the packet, I think: | |
885 | + int size=((char*)skb->data+skb->len)-((char*)skb->nh.iph); | |
886 | + | |
887 | + // Set a default value for the address: | |
888 | + memset(addr,0,ETH_ALEN); | |
889 | + | |
890 | + // Accept IP-ARP traffic with big-enough packets: | |
891 | + if(ntohs(skb->protocol)==ETH_P_ARP && | |
892 | + ntohs(skb->nh.arph->ar_pro)==ETH_P_IP) { | |
893 | + // Map all ARP trafic to a default adress to make sure | |
894 | + // it goes through | |
895 | + } else if ((ntohs(skb->protocol)==ETH_P_IP) && | |
896 | + valid_ip_checksum(skb->nh.iph,size)) { | |
897 | + // Accept IP packets which have correct checksum. | |
898 | + | |
899 | + // This is the IP header: | |
900 | + struct iphdr* iph=skb->nh.iph; | |
901 | + | |
902 | + // And this is the port numbers: | |
903 | + const __u16 *portp = (__u16 *)&(((char *)iph)[iph->ihl*4]); | |
904 | + __u16 sport=portp[0]; | |
905 | + __u16 dport=portp[1]; | |
906 | + | |
907 | + // We will set this to the IP address of the packet that should be | |
908 | + // accounted to: | |
909 | + unsigned ipaddr; | |
910 | + | |
911 | + // Used below: | |
912 | + ProxyRemapBlock* prm; | |
913 | + | |
914 | + // Set ipaddr: | |
915 | + if(head->srcaddr) | |
916 | + ipaddr=iph->saddr; | |
917 | + else | |
918 | + ipaddr=iph->daddr; | |
919 | + | |
920 | +#ifdef MASQ_SUPPORT | |
921 | + // Update ipaddr if packet is masqgraded: | |
922 | + if(head->usemasq) { | |
923 | + #ifdef KERNEL22 | |
924 | + struct ip_masq* src; | |
925 | + | |
926 | + // HACK!: | |
927 | + // ip_masq_in_get must be called for packets comming from the outside | |
928 | + // to the firewall. We have a a packet which is comming from the | |
929 | + // firewall to the outside - so we switch the parameters: | |
930 | + if((src=ip_masq_in_get( | |
931 | + iph->protocol, | |
932 | + iph->daddr,dport, | |
933 | + iph->saddr,sport))) { | |
934 | + // Use masqgraded address: | |
935 | + ipaddr=src->saddr; | |
936 | + | |
937 | + // It seems like we must put it back: | |
938 | + ip_masq_put(src); | |
939 | + } | |
940 | + #else | |
941 | + // Thanks to Rusty Russell for help with the following code: | |
942 | + enum ip_conntrack_info ctinfo; | |
943 | + struct ip_conntrack *ct; | |
944 | + ct = ip_conntrack_get(skb, &ctinfo); | |
945 | + if (ct) { | |
946 | + if(head->srcaddr) | |
947 | + ipaddr=ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.src.ip; | |
948 | + else | |
949 | + ipaddr=ct->tuplehash[CTINFO2DIR(ctinfo)].tuple.dst.ip; | |
950 | + } | |
951 | + #endif | |
952 | + } | |
953 | +#endif | |
954 | + | |
955 | + // Set prm based on ipaddr: | |
956 | + prm=0; | |
957 | + if(head->proxyremap) { | |
958 | + if(head->srcaddr) { | |
959 | + prm=proxyLookup(head->proxyremap,ipaddr,sport,skb->nh.iph->protocol); | |
960 | + } else { | |
961 | + prm=proxyLookup(head->proxyremap,ipaddr,dport,skb->nh.iph->protocol); | |
962 | + } | |
963 | + } | |
964 | + | |
965 | + // And finally set addr to the address: | |
966 | + memset(addr,0,ETH_ALEN); | |
967 | + if(prm) { | |
968 | + // This package should be remapped: | |
969 | + if(head->usemac) | |
970 | + memcpy(addr,prm->macaddr,ETH_ALEN); | |
971 | + else { | |
972 | + memcpy(addr,&prm->caddr,sizeof(unsigned)); | |
973 | + } | |
974 | + } else { | |
975 | + // This packet should not be remapped: | |
976 | + if(head->usemac) { | |
977 | + // We should find MAC address of packet. | |
978 | + // Unfortunatly, this is not always available. | |
979 | + // On bridged packets it always is, however.. | |
980 | + #ifdef KERNEL22 | |
981 | + if(skb->pkt_bridged) { | |
982 | + if(head->srcaddr) { | |
983 | + memcpy(addr,skb->mac.ethernet->h_source,ETH_ALEN); | |
984 | + } else { | |
985 | + memcpy(addr,skb->mac.ethernet->h_dest,ETH_ALEN); | |
986 | + } | |
987 | + } | |
988 | + #endif | |
989 | + } else { | |
990 | + memcpy(addr,&ipaddr,4); | |
991 | + } | |
992 | + } | |
993 | + } else { | |
994 | + // All other traffic is dropped - this ensures that packets | |
995 | + // we consider probably have valid addresses so we don't | |
996 | + // get to many strange addresses into our table. And that we | |
997 | + // don't use bandwidth on strange packets.. | |
998 | + return -1; | |
999 | + } | |
1000 | + | |
1001 | + return lookup_mac(head,addr); | |
1002 | +} | |
1003 | + | |
1004 | +//----------------------------------------------------------------------------- | |
1005 | +// The qdisc itself | |
1006 | + | |
1007 | +// Pr-class information. | |
1008 | +struct wrrc_sched_data { | |
1009 | + struct Qdisc* que; // The queue for this class | |
1010 | + struct tc_wrr_class_modf class_modf; // Information about the class. | |
1011 | + | |
1012 | + // For classes in the heap this is the priority value priosum | |
1013 | + // was updated with for this class: | |
1014 | + u64 priosum_val; | |
1015 | +}; | |
1016 | + | |
1017 | +// Pr-qdisc information: | |
1018 | +struct wrr_sched_data | |
1019 | +{ | |
1020 | + // A heap containing all the bands that will send something | |
1021 | + struct heap h; | |
1022 | + struct heap_element* poll; // bandc elements | |
1023 | + | |
1024 | + // The sum of the prioities of the elements in the heap where | |
1025 | + // a priority of 1 is saved as 2^32 | |
1026 | + u64 priosum; | |
1027 | + | |
1028 | + // A class for each band | |
1029 | + struct wrrc_sched_data* bands; // bandc elements | |
1030 | + | |
1031 | + // Information maintained by the proxydict module of 0 if we | |
1032 | + // have no proxy remapping | |
1033 | + void* proxydict; | |
1034 | + | |
1035 | + // Always incrementning counters, we always have that any value of | |
1036 | + // counter_low_penal < any value of counter_high_penal. | |
1037 | + penalty_base_t counter_low_penal; | |
1038 | + penalty_base_t counter_high_penal; | |
1039 | + | |
1040 | + // Penalty updating: | |
1041 | + struct tc_wrr_qdisc_modf qdisc_modf; | |
1042 | + | |
1043 | + // Statistics: | |
1044 | + int packets_requed; | |
1045 | + | |
1046 | + // The filter: | |
1047 | + struct mac_head filter; | |
1048 | + int bandc; // Number of bands | |
1049 | +}; | |
1050 | + | |
1051 | +// Priority handling. | |
1052 | +// weight is in interval [0..2^32] | |
1053 | +// priosum has whole numbers in the upper and fragments in the lower 32 bits. | |
1054 | +static void weight_transmit(struct tc_wrr_class_weight* p, | |
1055 | + struct tc_wrr_qdisc_weight q, | |
1056 | + unsigned heapsize, | |
1057 | + u64 priosum, u64 weight, | |
1058 | + unsigned size) { | |
1059 | + | |
1060 | + unsigned long now=jiffies/HZ; | |
1061 | + | |
1062 | + // Penalty for transmitting: | |
1063 | + u64 change,old; | |
1064 | + u32 divisor; | |
1065 | + | |
1066 | + change=0; | |
1067 | + switch(q.weight_mode) { | |
1068 | + case 1: change=p->decr*size; break; | |
1069 | + case 2: change=p->decr*size*heapsize; break; | |
1070 | + case 3: // Note: 64 bit division is not always available.. | |
1071 | + divisor=(u32)(weight>>16); | |
1072 | + if(divisor<=0) divisor=1; | |
1073 | + change=p->decr*size*(((u32)(priosum>>16))/divisor); break; | |
1074 | + } | |
1075 | + old=p->val; | |
1076 | + p->val-=change; | |
1077 | + if(p->val>old || p->val<p->min) p->val=p->min; | |
1078 | + | |
1079 | + // Credit for time went: | |
1080 | + change=(now-p->tim)*p->incr; | |
1081 | + p->tim=now; | |
1082 | + old=p->val; | |
1083 | + p->val+=change; | |
1084 | + if(p->val<old || p->val>p->max) p->val=p->max; | |
1085 | +} | |
1086 | + | |
1087 | +static void weight_setdefault(struct tc_wrr_class_weight* p) { | |
1088 | + p->val=(u64)-1; | |
1089 | + p->decr=0; | |
1090 | + p->incr=0; | |
1091 | + p->min=(u64)-1; | |
1092 | + p->max=(u64)-1; | |
1093 | + p->tim=jiffies/HZ; | |
1094 | +} | |
1095 | + | |
1096 | +static void weight_setvalue(struct tc_wrr_class_weight* dst, | |
1097 | + struct tc_wrr_class_weight* src) { | |
1098 | + if(src->val!=0) { | |
1099 | + dst->val=src->val; | |
1100 | + dst->tim=jiffies/HZ; | |
1101 | + } | |
1102 | + if(src->min!=0) dst->min=src->min; | |
1103 | + if(src->max!=0) dst->max=src->max; | |
1104 | + if(src->decr!=((u64)-1)) dst->decr=src->decr; | |
1105 | + if(src->incr!=((u64)-1)) dst->incr=src->incr; | |
1106 | + if(dst->val<dst->min) dst->val=dst->min; | |
1107 | + if(dst->val>dst->max) dst->val=dst->max; | |
1108 | +} | |
1109 | + | |
1110 | +static void wrr_destroy(struct Qdisc *sch) | |
1111 | +{ | |
c9d1c54c | 1112 | + struct wrr_sched_data *q = qdisc_priv(sch); |
df26885d | 1113 | + int i; |
1114 | + | |
1115 | + // Destroy our filter: | |
1116 | + mac_done(&q->filter); | |
1117 | + | |
1118 | + // Destroy all our childre ques: | |
1119 | + for(i=0; i<q->bandc; i++) | |
1120 | + qdisc_destroy(q->bands[i].que); | |
1121 | + | |
1122 | + // And free memory: | |
1123 | + my_free(q->bands); | |
1124 | + my_free(q->poll); | |
1125 | + if(q->proxydict) my_free(q->proxydict); | |
df26885d | 1126 | +} |
1127 | + | |
1128 | +static int wrr_init(struct Qdisc *sch, struct rtattr *opt) | |
1129 | +{ | |
c9d1c54c | 1130 | + struct wrr_sched_data *q = qdisc_priv(sch); |
df26885d | 1131 | + int i,maciniterr; |
1132 | + char crterr; | |
1133 | + struct tc_wrr_qdisc_crt *qopt; | |
1134 | + | |
1135 | + // Parse options: | |
1136 | + if (!opt) return -EINVAL; // Options must be specified | |
1137 | + if (opt->rta_len < RTA_LENGTH(sizeof(*qopt))) return -EINVAL; | |
1138 | + qopt = RTA_DATA(opt); | |
1139 | + | |
1140 | + if(qopt->bands_max>8192 || qopt->bands_max<2) { | |
1141 | + // More than 8192 queues or less than 2? That cannot be true - it must be | |
1142 | + // an error... | |
1143 | + return -EINVAL; | |
1144 | + } | |
1145 | + | |
1146 | + if(qopt->proxy_maxconn<0 || qopt->proxy_maxconn>20000) { | |
1147 | + // More than this number of maximal concurrent connections is unrealistic | |
1148 | + return -EINVAL; | |
1149 | + } | |
1150 | + | |
1151 | +#ifndef MASQ_SUPPORT | |
1152 | + if(qopt->usemasq) { | |
1153 | + return -ENOSYS; | |
1154 | + } | |
1155 | +#endif | |
1156 | + | |
1157 | +#ifndef KERNEL22 | |
1158 | + if(qopt->usemac) { // Not supported - please fix this! | |
1159 | + return -ENOSYS; | |
1160 | + } | |
1161 | +#endif | |
1162 | + | |
1163 | + q->bandc=qopt->bands_max; | |
1164 | + q->qdisc_modf=qopt->qdisc_modf; | |
1165 | + | |
1166 | + // Create structures: | |
1167 | + q->poll=(struct heap_element*) | |
1168 | + my_malloc( sizeof(struct heap_element)*q->bandc); | |
1169 | + q->bands=(struct wrrc_sched_data*) | |
1170 | + my_malloc( sizeof(struct wrrc_sched_data)*q->bandc); | |
1171 | + | |
1172 | + if(qopt->proxy_maxconn>0) { | |
1173 | + q->proxydict=my_malloc(proxyGetMemSize(qopt->proxy_maxconn)); | |
1174 | + } else { | |
1175 | + q->proxydict=0; | |
1176 | + } | |
1177 | + | |
1178 | + // Init mac module: | |
1179 | + maciniterr=mac_init(&q->filter,qopt->bands_max,qopt->srcaddr, | |
1180 | + qopt->usemac,qopt->usemasq,q->proxydict); | |
1181 | + | |
1182 | + // See if we got the memory we wanted: | |
1183 | + if(!q->poll || !q->bands || | |
1184 | + (qopt->proxy_maxconn>0 && !q->proxydict) || maciniterr<0) { | |
1185 | + if(q->poll) my_free(q->poll); | |
1186 | + if(q->bands) my_free(q->bands); | |
1187 | + if(q->proxydict) my_free(q->proxydict); | |
1188 | + if(maciniterr>=0) mac_done(&q->filter); | |
1189 | + return -ENOMEM; | |
1190 | + } | |
1191 | + | |
1192 | + // Initialize proxy: | |
1193 | + if(q->proxydict) { | |
1194 | + proxyInitMem(q->proxydict,qopt->proxy_maxconn); | |
1195 | + } | |
1196 | + | |
1197 | + // Initialize values: | |
1198 | + q->counter_low_penal=0; | |
1199 | + q->counter_high_penal=penalty_base_t_max>>1; | |
1200 | + q->packets_requed=0; | |
1201 | + | |
1202 | + // Initialize empty heap: | |
1203 | + heap_init(&q->h,q->bandc,q->poll); | |
1204 | + q->priosum=0; | |
1205 | + | |
1206 | + // Initialize each band: | |
1207 | + crterr=0; | |
1208 | + for (i=0; i<q->bandc; i++) { | |
1209 | + weight_setdefault(&q->bands[i].class_modf.weight1); | |
1210 | + weight_setdefault(&q->bands[i].class_modf.weight2); | |
1211 | + if(!crterr) { | |
1212 | + struct Qdisc *child=qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); | |
1213 | + if(child) | |
1214 | + q->bands[i].que = child; | |
1215 | + else { | |
1216 | + // Queue couldn't be created :-( | |
1217 | + crterr=1; | |
1218 | + } | |
1219 | + } | |
1220 | + if(crterr) q->bands[i].que = &noop_qdisc; | |
1221 | + } | |
1222 | + | |
df26885d | 1223 | + if(crterr) { |
1224 | + // Destroy again: | |
1225 | + wrr_destroy(sch); | |
1226 | + return -ENOMEM; | |
1227 | + } | |
1228 | + | |
1229 | + return 0; | |
1230 | +} | |
1231 | + | |
1232 | +static void wrr_reset(struct Qdisc* sch) | |
1233 | +{ | |
c9d1c54c | 1234 | + struct wrr_sched_data *q = qdisc_priv(sch); |
df26885d | 1235 | + int i; |
1236 | + | |
1237 | + // Reset own values: | |
1238 | + q->counter_low_penal=0; | |
1239 | + q->counter_high_penal=penalty_base_t_max>>1; | |
1240 | + q->packets_requed=0; | |
1241 | + | |
1242 | + // Reset filter: | |
1243 | + mac_reset(&q->filter); | |
1244 | + | |
1245 | + // Reinitialize heap: | |
1246 | + heap_init(&q->h,q->bandc,q->poll); | |
1247 | + q->priosum=0; | |
1248 | + | |
1249 | + // Reset all bands: | |
1250 | + for (i=0; i<q->bandc; i++) { | |
1251 | + weight_setdefault(&q->bands[i].class_modf.weight1); | |
1252 | + weight_setdefault(&q->bands[i].class_modf.weight2); | |
1253 | + qdisc_reset(q->bands[i].que); | |
1254 | + } | |
1255 | + | |
1256 | + // Reset proxy remapping information: | |
1257 | + if(q->proxydict) | |
1258 | + proxyInitMem(q->proxydict,proxyGetMaxConn(q->proxydict)); | |
1259 | +} | |
1260 | + | |
1261 | +static int wrr_enqueue(struct sk_buff *skb, struct Qdisc* sch) | |
1262 | +{ | |
c9d1c54c | 1263 | + struct wrr_sched_data *q = qdisc_priv(sch); |
df26885d | 1264 | + int retvalue=ENQUEUE_FAIL; |
1265 | + | |
1266 | + // The packet is in skb. | |
1267 | + int band=mac_classify(&q->filter,skb); | |
1268 | + | |
1269 | + if(band>=0) { | |
1270 | + // Enque packet for this band: | |
1271 | + struct Qdisc* qdisc = q->bands[band].que; | |
1272 | + | |
1273 | + if ((retvalue=qdisc->enqueue(skb, qdisc)) == ENQUEUE_SUCCESS) { | |
1274 | + // Successfull | |
1275 | + sch->stats.bytes += skb->len; | |
1276 | + sch->stats.packets++; | |
1277 | + sch->q.qlen++; | |
1278 | + | |
1279 | + // Insert band into heap if not already there: | |
1280 | + if(!heap_contains(&q->h,band)) { | |
1281 | + penalty_t p; | |
1282 | + if(!heap_empty(&q->h)) | |
1283 | + p.ms=heap_get_penalty(&q->h,heap_root(&q->h)).ms; | |
1284 | + else | |
1285 | + p.ms=0; | |
1286 | + p.ls=q->counter_low_penal++; | |
1287 | + heap_insert(&q->h,band,p); | |
1288 | + q->bands[band].priosum_val= | |
1289 | + ((q->bands[band].class_modf.weight1.val>>48)+1)* | |
1290 | + ((q->bands[band].class_modf.weight2.val>>48)+1); | |
1291 | + q->priosum+=q->bands[band].priosum_val; | |
1292 | + } | |
1293 | + } | |
1294 | + } else { | |
1295 | + // If we decide not to enque it seems like we also need to free the packet: | |
1296 | + kfree_skb(skb); | |
1297 | + } | |
1298 | + | |
1299 | + if(retvalue!=ENQUEUE_SUCCESS) { | |
1300 | + // Packet not enqued: | |
1301 | + sch->stats.drops++; | |
1302 | + } | |
1303 | + | |
1304 | + return retvalue; | |
1305 | +} | |
1306 | + | |
1307 | +static struct sk_buff *wrr_dequeue(struct Qdisc* sch) | |
1308 | +{ | |
c9d1c54c | 1309 | + struct wrr_sched_data *q = qdisc_priv(sch); |
df26885d | 1310 | + struct sk_buff* skb; |
1311 | + int band; | |
1312 | + u64 weight,priosum; | |
1313 | + struct wrrc_sched_data* b; | |
1314 | + | |
1315 | + // Return if heap is empty: | |
1316 | + if(heap_empty(&q->h)) return 0; | |
1317 | + | |
1318 | + // Find root element: | |
1319 | + band=heap_root(&q->h); | |
1320 | + | |
1321 | + // Find priority of this element in interval [1;2^32] | |
1322 | + b=&q->bands[band]; | |
1323 | + weight=((b->class_modf.weight1.val>>48)+1)* | |
1324 | + ((b->class_modf.weight2.val>>48)+1); //weight is in interval [1;2^32] | |
1325 | + priosum=q->priosum; | |
1326 | + q->priosum-=q->bands[band].priosum_val; | |
1327 | + | |
1328 | + // Deque the packet from the root: | |
1329 | + skb=q->bands[band].que->dequeue(q->bands[band].que); | |
1330 | + | |
1331 | + if(skb) { | |
1332 | + // There was a packet in this que. | |
1333 | + unsigned adjlen; | |
1334 | + penalty_t p; | |
1335 | + | |
1336 | + // Find length of packet adjusted with priority: | |
1337 | + adjlen=(u32)(weight>>(32-16)); | |
1338 | + if(adjlen==0) adjlen=1; | |
1339 | + adjlen=(skb->len<<16)/adjlen; | |
1340 | + | |
1341 | + // Update penalty information for this class: | |
1342 | + weight_transmit(&b->class_modf.weight1,q->qdisc_modf.weight1,q->h.elements,priosum,weight,skb->len); | |
1343 | + weight_transmit(&b->class_modf.weight2,q->qdisc_modf.weight2,q->h.elements,priosum,weight,skb->len); | |
1344 | + q->bands[band].priosum_val=((b->class_modf.weight1.val>>48)+1)* | |
1345 | + ((b->class_modf.weight2.val>>48)+1); | |
1346 | + q->priosum+=q->bands[band].priosum_val; | |
1347 | + | |
1348 | + // And update the class in the heap | |
1349 | + p=heap_get_penalty(&q->h,band); | |
1350 | + p.ms+=adjlen; | |
1351 | + p.ls=q->counter_high_penal++; | |
1352 | + heap_set_penalty(&q->h,band,p); | |
1353 | + | |
1354 | + // Return packet: | |
1355 | + sch->q.qlen--; | |
1356 | + return skb; | |
1357 | + } | |
1358 | + | |
1359 | + // No packet - so machine should be removed from heap: | |
1360 | + heap_remove(&q->h,band); | |
1361 | + | |
1362 | + // And try again: | |
1363 | + return wrr_dequeue(sch); | |
1364 | +} | |
1365 | + | |
1366 | +static int wrr_requeue(struct sk_buff *skb, struct Qdisc* sch) | |
1367 | +{ | |
c9d1c54c | 1368 | + struct wrr_sched_data *q = qdisc_priv(sch); |
df26885d | 1369 | + struct Qdisc* qdisc; |
1370 | + int ret; | |
1371 | + | |
1372 | + // Find band we took it from: | |
1373 | + int band=mac_classify(&q->filter,skb); | |
1374 | + if(band<0) { | |
1375 | + // Who should now free the pakcet? | |
1376 | + printk(KERN_DEBUG "sch_wrr: Oops - packet requed could never have been queued.\n"); | |
1377 | + sch->stats.drops++; | |
1378 | + return ENQUEUE_FAIL; | |
1379 | + } | |
1380 | + | |
1381 | + q->packets_requed++; | |
1382 | + | |
1383 | + // Try to requeue it on that machine: | |
1384 | + qdisc=q->bands[band].que; | |
1385 | + | |
1386 | + if((ret=qdisc->ops->requeue(skb,qdisc))==ENQUEUE_SUCCESS) { | |
1387 | + // On success: | |
1388 | + sch->q.qlen++; | |
1389 | + | |
1390 | + // We should restore priority information - but we don't | |
1391 | + // | |
1392 | + // p=heap_get_penalty(&q->h,band); | |
1393 | + // ... | |
1394 | + // heap_set_penalty(&q->h,band,p); | |
1395 | + | |
1396 | + return ENQUEUE_SUCCESS; | |
1397 | + } else { | |
1398 | + sch->stats.drops++; | |
1399 | + return ret; | |
1400 | + } | |
1401 | +} | |
1402 | + | |
1403 | +static unsigned wrr_drop(struct Qdisc* sch) | |
1404 | +{ | |
c9d1c54c | 1405 | + struct wrr_sched_data *q = qdisc_priv(sch); |
df26885d | 1406 | + |
1407 | + // Ugly... Drop button up in heap: | |
1408 | + int i; | |
1409 | + | |
1410 | + for(i=q->h.elements; i>=1; i--) { | |
1411 | + int band=q->h.root_1[i].id; | |
1412 | + if(q->bands[band].que->ops->drop(q->bands[band].que)) { | |
1413 | + // On success | |
1414 | + sch->q.qlen--; | |
1415 | + return 1; | |
1416 | + } | |
1417 | + } | |
1418 | + | |
1419 | + return 0; | |
1420 | +} | |
1421 | + | |
1422 | +static int wrr_dump(struct Qdisc *sch, struct sk_buff *skb) | |
1423 | +{ | |
c9d1c54c | 1424 | + struct wrr_sched_data *q = qdisc_priv(sch); |
df26885d | 1425 | + unsigned char *b = skb->tail; |
1426 | + struct tc_wrr_qdisc_stats opt; | |
1427 | + | |
1428 | + opt.qdisc_crt.qdisc_modf=q->qdisc_modf; | |
1429 | + opt.qdisc_crt.srcaddr=q->filter.srcaddr; | |
1430 | + opt.qdisc_crt.usemac=q->filter.usemac; | |
1431 | + opt.qdisc_crt.usemasq=q->filter.usemasq; | |
1432 | + opt.qdisc_crt.bands_max=q->filter.mac_max; | |
1433 | + opt.nodes_in_heap=q->h.elements; | |
1434 | + opt.bands_cur=q->filter.mac_cur; | |
1435 | + opt.bands_reused=q->filter.mac_reused; | |
1436 | + opt.packets_requed=q->packets_requed; | |
1437 | + opt.priosum=q->priosum; | |
1438 | + | |
1439 | + if(q->proxydict) { | |
1440 | + opt.qdisc_crt.proxy_maxconn=proxyGetMaxConn(q->proxydict); | |
1441 | + opt.proxy_curconn=proxyGetCurConn(q->proxydict); | |
1442 | + } else { | |
1443 | + opt.qdisc_crt.proxy_maxconn=0; | |
1444 | + opt.proxy_curconn=0; | |
1445 | + } | |
1446 | + | |
1447 | + RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); | |
1448 | + return skb->len; | |
1449 | + | |
1450 | +rtattr_failure: // seems like RTA_PUT jump to this label.. | |
1451 | + skb_trim(skb, b - skb->data); | |
1452 | + return -1; | |
1453 | +} | |
1454 | + | |
1455 | +static int wrr_tune_std(struct Qdisc *sch, struct rtattr *opt) | |
1456 | +{ | |
c9d1c54c | 1457 | + struct wrr_sched_data *q = qdisc_priv(sch); |
df26885d | 1458 | + struct tc_wrr_qdisc_modf_std *qopt = RTA_DATA(opt); |
1459 | + | |
1460 | + if(opt->rta_len < RTA_LENGTH(sizeof(*qopt))) return -EINVAL; | |
1461 | + | |
1462 | + LOCK_START | |
1463 | + | |
1464 | + if(qopt->change_class) { | |
1465 | + int idx=lookup_mac(&q->filter,qopt->addr); | |
1466 | + weight_setvalue | |
1467 | + (&q->bands[idx].class_modf.weight1,&qopt->class_modf.weight1); | |
1468 | + weight_setvalue | |
1469 | + (&q->bands[idx].class_modf.weight2,&qopt->class_modf.weight2); | |
1470 | + } else { | |
1471 | + if(qopt->qdisc_modf.weight1.weight_mode!=-1) | |
1472 | + q->qdisc_modf.weight1.weight_mode=qopt->qdisc_modf.weight1.weight_mode; | |
1473 | + if(qopt->qdisc_modf.weight2.weight_mode!=-1) | |
1474 | + q->qdisc_modf.weight2.weight_mode=qopt->qdisc_modf.weight2.weight_mode; | |
1475 | + } | |
1476 | + | |
1477 | + LOCK_END | |
1478 | + | |
1479 | + return 0; | |
1480 | +} | |
1481 | + | |
1482 | +static int wrr_tune_proxy(struct Qdisc *sch, struct rtattr *opt) | |
1483 | +{ | |
c9d1c54c | 1484 | + struct wrr_sched_data *q = qdisc_priv(sch); |
df26885d | 1485 | + struct tc_wrr_qdisc_modf_proxy *qopt = RTA_DATA(opt); |
1486 | + int i; | |
1487 | + | |
1488 | + // Return if we are not configured with proxy support: | |
1489 | + if(!q->proxydict) return -ENOSYS; | |
1490 | + | |
1491 | + // Return if not enough data given: | |
1492 | + if(opt->rta_len<RTA_LENGTH(sizeof(*qopt)) || | |
1493 | + opt->rta_len< | |
1494 | + RTA_LENGTH(sizeof(*qopt)+sizeof(ProxyRemapBlock)*qopt->changec)) | |
1495 | + return -EINVAL; | |
1496 | + | |
1497 | + LOCK_START; | |
1498 | + | |
1499 | + if(qopt->reset) { | |
1500 | + proxyInitMem(q->proxydict,proxyGetMaxConn(q->proxydict)); | |
1501 | + } | |
1502 | + | |
1503 | + // Do all the changes: | |
1504 | + for(i=0; i<qopt->changec; i++) { | |
1505 | + proxyConsumeBlock(q->proxydict,&((ProxyRemapBlock*)&qopt->changes)[i]); | |
1506 | + } | |
1507 | + | |
1508 | + LOCK_END; | |
1509 | + | |
1510 | + return 0; | |
1511 | +} | |
1512 | + | |
1513 | +static int wrr_tune(struct Qdisc *sch, struct rtattr *opt) { | |
1514 | + if(((struct tc_wrr_qdisc_modf_std*)RTA_DATA(opt))->proxy) { | |
1515 | + return wrr_tune_proxy(sch,opt); | |
1516 | + } else { | |
1517 | + return wrr_tune_std(sch,opt); | |
1518 | + } | |
1519 | +} | |
1520 | + | |
1521 | +//----------------------------------------------------------------------------- | |
1522 | +// Classes. | |
1523 | +// External and internal IDs are equal. They are the band number plus 1. | |
1524 | + | |
1525 | +// Replace a class with another: | |
1526 | +static int wrr_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, | |
1527 | + struct Qdisc **old) | |
1528 | +{ | |
c9d1c54c | 1529 | + struct wrr_sched_data *q = qdisc_priv(sch); |
df26885d | 1530 | + if(arg>q->bandc || arg==0) return -EINVAL; |
1531 | + arg--; | |
1532 | + | |
1533 | + if (new == NULL) | |
1534 | + new = &noop_qdisc; | |
1535 | + | |
1536 | +#ifdef KERNEL22 | |
1537 | + *old = xchg(&q->bands[arg].que, new); | |
1538 | +#else | |
1539 | + LOCK_START | |
1540 | + *old = q->bands[arg].que; | |
1541 | + q->bands[arg].que = new; | |
1542 | + qdisc_reset(*old); | |
1543 | + LOCK_END | |
1544 | +#endif | |
1545 | + | |
1546 | + return 0; | |
1547 | +} | |
1548 | + | |
1549 | +// Returns the qdisc for a class: | |
1550 | +static struct Qdisc * wrr_leaf(struct Qdisc *sch, unsigned long arg) | |
1551 | +{ | |
c9d1c54c | 1552 | + struct wrr_sched_data *q = qdisc_priv(sch); |
df26885d | 1553 | + if(arg>q->bandc || arg==0) return NULL; |
1554 | + arg--; | |
1555 | + return q->bands[arg].que; | |
1556 | +} | |
1557 | + | |
1558 | +static unsigned long wrr_get(struct Qdisc *sch, u32 classid) | |
1559 | +{ | |
c9d1c54c | 1560 | + struct wrr_sched_data *q = qdisc_priv(sch); |
df26885d | 1561 | + unsigned long band = TC_H_MIN(classid); |
1562 | + if(band>q->bandc || band==0) return 0; | |
1563 | + return band; | |
1564 | +} | |
1565 | + | |
1566 | +static void wrr_put(struct Qdisc *q, unsigned long cl) | |
1567 | +{ | |
1568 | + return; | |
1569 | +} | |
1570 | + | |
1571 | +static int wrr_delete(struct Qdisc *sch, unsigned long cl) | |
1572 | +{ | |
c9d1c54c | 1573 | + struct wrr_sched_data *q = qdisc_priv(sch); |
df26885d | 1574 | + if(cl==0 || cl>q->bandc) return -ENOENT; |
1575 | + cl--; | |
1576 | + return 0; | |
1577 | +} | |
1578 | + | |
1579 | +static int wrr_dump_class(struct Qdisc *sch, unsigned long cl, | |
1580 | + struct sk_buff *skb, struct tcmsg *tcm) | |
1581 | +{ | |
c9d1c54c | 1582 | + struct wrr_sched_data *q = qdisc_priv(sch); |
df26885d | 1583 | + unsigned char *b = skb->tail; |
1584 | + struct tc_wrr_class_stats opt; | |
1585 | + | |
1586 | + // Handle of this class: | |
1587 | + tcm->tcm_handle = sch->handle|cl; | |
1588 | + | |
1589 | + if(cl==0 || cl>q->bandc) | |
1590 | + goto rtattr_failure; | |
1591 | + cl--; | |
1592 | + | |
1593 | + if(cl>=q->filter.mac_cur) { | |
1594 | + // Band is unused: | |
1595 | + memset(&opt,0,sizeof(opt)); | |
1596 | + opt.used=0; | |
1597 | + } else { | |
1598 | + opt.used=1; | |
1599 | + opt.class_modf.weight1=q->bands[cl].class_modf.weight1; | |
1600 | + opt.class_modf.weight2=q->bands[cl].class_modf.weight2; | |
1601 | + weight_transmit(&opt.class_modf.weight1,q->qdisc_modf.weight1,0,0,0,0); | |
1602 | + weight_transmit(&opt.class_modf.weight2,q->qdisc_modf.weight2,0,0,0,0); | |
1603 | + memcpy(opt.addr,q->filter.cls2mac+cl*ETH_ALEN,ETH_ALEN); | |
1604 | + opt.usemac=q->filter.usemac; | |
1605 | + opt.heappos=q->h.root_1[cl+1].id2idx; | |
1606 | + if(opt.heappos!=0) { // Is in heap | |
1607 | + opt.penal_ls=heap_get_penalty(&q->h,cl).ls; | |
1608 | + opt.penal_ms=heap_get_penalty(&q->h,cl).ms; | |
1609 | + } else { | |
1610 | + opt.penal_ls=0; | |
1611 | + opt.penal_ms=0; | |
1612 | + } | |
1613 | + } | |
1614 | + | |
1615 | + // Put quing information: | |
1616 | + RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); | |
1617 | + return skb->len; | |
1618 | + | |
1619 | +rtattr_failure: | |
1620 | + skb_trim(skb, b - skb->data); | |
1621 | + return -1; | |
1622 | +} | |
1623 | + | |
1624 | +static int wrr_change(struct Qdisc *sch, u32 handle, u32 parent, | |
1625 | + struct rtattr **tca, unsigned long *arg) | |
1626 | +{ | |
1627 | + unsigned long cl = *arg; | |
c9d1c54c | 1628 | + struct wrr_sched_data *q = qdisc_priv(sch); |
df26885d | 1629 | + struct rtattr *opt = tca[TCA_OPTIONS-1]; |
1630 | + struct tc_wrr_class_modf *copt = RTA_DATA(opt); | |
1631 | + | |
1632 | + if(cl==0 || cl>q->bandc) return -EINVAL; | |
1633 | + cl--; | |
1634 | + | |
1635 | + if (opt->rta_len < RTA_LENGTH(sizeof(*copt))) return -EINVAL; | |
1636 | + | |
1637 | + LOCK_START; | |
1638 | + | |
1639 | + weight_setvalue(&q->bands[cl].class_modf.weight1,&copt->weight1); | |
1640 | + weight_setvalue(&q->bands[cl].class_modf.weight2,&copt->weight2); | |
1641 | + | |
1642 | + LOCK_END; | |
1643 | + | |
1644 | + return 0; | |
1645 | +} | |
1646 | + | |
1647 | +static void wrr_walk(struct Qdisc *sch, struct qdisc_walker *arg) | |
1648 | +{ | |
c9d1c54c | 1649 | + struct wrr_sched_data *q = qdisc_priv(sch); |
df26885d | 1650 | + int prio; |
1651 | + | |
1652 | + if (arg->stop) return; | |
1653 | + | |
1654 | + for (prio = 1; prio <= q->bandc; prio++) { | |
1655 | + if (arg->count < arg->skip) { | |
1656 | + arg->count++; | |
1657 | + continue; | |
1658 | + } | |
1659 | + if (arg->fn(sch, prio, arg) < 0) { | |
1660 | + arg->stop = 1; | |
1661 | + break; | |
1662 | + } | |
1663 | + arg->count++; | |
1664 | + } | |
1665 | +} | |
1666 | + | |
1667 | +static struct tcf_proto ** wrr_find_tcf(struct Qdisc *sch, unsigned long cl) | |
1668 | +{ | |
1669 | + return NULL; | |
1670 | +} | |
1671 | + | |
1672 | +static unsigned long wrr_bind(struct Qdisc *sch, | |
1673 | + unsigned long parent, u32 classid) | |
1674 | +{ | |
1675 | + return wrr_get(sch, classid); | |
1676 | +} | |
1677 | + | |
1678 | +//----------------------------------------------------------------------------- | |
1679 | +// Generel | |
1680 | + | |
1681 | +static struct Qdisc_class_ops wrr_class_ops = | |
1682 | +{ | |
1683 | + wrr_graft, | |
1684 | + wrr_leaf, | |
1685 | + | |
1686 | + wrr_get, | |
1687 | + wrr_put, | |
1688 | + wrr_change, | |
1689 | + wrr_delete, | |
1690 | + wrr_walk, | |
1691 | + | |
1692 | + wrr_find_tcf, | |
1693 | + wrr_bind, | |
1694 | + wrr_put, | |
1695 | + | |
1696 | +#if !defined(KERNEL22) || defined(CONFIG_RTNETLINK) | |
1697 | + wrr_dump_class, | |
1698 | +#endif | |
1699 | +}; | |
1700 | + | |
1701 | +struct Qdisc_ops wrr_qdisc_ops = | |
1702 | +{ | |
1703 | + NULL, | |
1704 | + &wrr_class_ops, | |
1705 | + "wrr", | |
1706 | + sizeof(struct wrr_sched_data), | |
1707 | + | |
1708 | + wrr_enqueue, | |
1709 | + wrr_dequeue, | |
1710 | + wrr_requeue, | |
1711 | + wrr_drop, | |
1712 | + | |
1713 | + wrr_init, | |
1714 | + wrr_reset, | |
1715 | + wrr_destroy, | |
1716 | + wrr_tune, | |
1717 | + | |
1718 | +#if !defined(KERNEL22) || defined(CONFIG_RTNETLINK) | |
1719 | + wrr_dump, | |
1720 | +#endif | |
1721 | +}; | |
1722 | + | |
1723 | +#ifdef MODULE | |
1724 | + | |
1725 | +int init_module(void) | |
1726 | +{ | |
1727 | + return register_qdisc(&wrr_qdisc_ops); | |
1728 | +} | |
1729 | + | |
1730 | +void cleanup_module(void) | |
1731 | +{ | |
1732 | + unregister_qdisc(&wrr_qdisc_ops); | |
1733 | +} | |
1734 | + | |
1735 | +#ifndef KERNEL22 | |
1736 | + MODULE_LICENSE("GPL"); | |
1737 | +#endif | |
1738 | + | |
1739 | +#endif | |
1740 | + |