]> git.pld-linux.org Git - packages/kernel.git/blob - kernel-imq.patch
- updated imq patch
[packages/kernel.git] / kernel-imq.patch
1 diff -uNr linux-2.6.35/drivers/net/imq.c linux-2.6.35-imq-multiqueue-test1/drivers/net/imq.c
2 --- linux-2.6.35/drivers/net/imq.c      1970-01-01 02:00:00.000000000 +0200
3 +++ linux-2.6.35-imq-multiqueue-test1/drivers/net/imq.c 2010-08-15 13:54:30.070063067 +0300
4 @@ -0,0 +1,774 @@
5 +/*
6 + *             Pseudo-driver for the intermediate queue device.
7 + *
8 + *             This program is free software; you can redistribute it and/or
9 + *             modify it under the terms of the GNU General Public License
10 + *             as published by the Free Software Foundation; either version
11 + *             2 of the License, or (at your option) any later version.
12 + *
13 + * Authors:    Patrick McHardy, <kaber@trash.net>
14 + *
15 + *            The first version was written by Martin Devera, <devik@cdi.cz>
16 + *
17 + * Credits:    Jan Rafaj <imq2t@cedric.vabo.cz>
18 + *              - Update patch to 2.4.21
19 + *             Sebastian Strollo <sstrollo@nortelnetworks.com>
20 + *              - Fix "Dead-loop on netdevice imq"-issue
21 + *             Marcel Sebek <sebek64@post.cz>
22 + *              - Update to 2.6.2-rc1
23 + *
24 + *            After some time of inactivity there is a group taking care
25 + *            of IMQ again: http://www.linuximq.net
26 + *
27 + *
28 + *            2004/06/30 - New version of IMQ patch to kernels <=2.6.7
29 + *             including the following changes:
30 + *
31 + *            - Correction of ipv6 support "+"s issue (Hasso Tepper)
32 + *            - Correction of imq_init_devs() issue that resulted in
33 + *            kernel OOPS unloading IMQ as module (Norbert Buchmuller)
34 + *            - Addition of functionality to choose number of IMQ devices
35 + *            during kernel config (Andre Correa)
36 + *            - Addition of functionality to choose how IMQ hooks on
37 + *            PRE and POSTROUTING (after or before NAT) (Andre Correa)
38 + *            - Cosmetic corrections (Norbert Buchmuller) (Andre Correa)
39 + *
40 + *
41 + *             2005/12/16 - IMQ versions between 2.6.7 and 2.6.13 were
42 + *             released with almost no problems. 2.6.14-x was released
43 + *             with some important changes: nfcache was removed; After
44 + *             some weeks of trouble we figured out that some IMQ fields
45 + *             in skb were missing in skbuff.c - skb_clone and copy_skb_header.
46 + *             These functions are correctly patched by this new patch version.
47 + *
48 + *             Thanks for all who helped to figure out all the problems with
49 + *             2.6.14.x: Patrick McHardy, Rune Kock, VeNoMouS, Max CtRiX,
50 + *             Kevin Shanahan, Richard Lucassen, Valery Dachev (hopefully
51 + *             I didn't forget anybody). I apologize again for my lack of time.
52 + *
53 + *
54 + *             2008/06/17 - 2.6.25 - Changed imq.c to use qdisc_run() instead
55 + *             of qdisc_restart() and moved qdisc_run() to tasklet to avoid
56 + *             recursive locking. New initialization routines to fix 'rmmod' not
57 + *             working anymore. Used code from ifb.c. (Jussi Kivilinna)
58 + *
59 + *             2008/08/06 - 2.6.26 - (JK)
60 + *              - Replaced tasklet with 'netif_schedule()'.
61 + *              - Cleaned up and added comments for imq_nf_queue().
62 + *
63 + *             2009/04/12
64 + *              - Add skb_save_cb/skb_restore_cb helper functions for backuping
65 + *                control buffer. This is needed because qdisc-layer on kernels
66 + *                2.6.27 and newer overwrite control buffer. (Jussi Kivilinna)
67 + *              - Add better locking for IMQ device. Hopefully this will solve
68 + *                SMP issues. (Jussi Kivilinna)
69 + *              - Port to 2.6.27
70 + *              - Port to 2.6.28
71 + *              - Port to 2.6.29 + fix rmmod not working
72 + *
73 + *             2009/04/20 - (Jussi Kivilinna)
74 + *              - Use netdevice feature flags to avoid extra packet handling
75 + *                by core networking layer and possibly increase performance.
76 + *
77 + *             2009/09/26 - (Jussi Kivilinna)
78 + *              - Add imq_nf_reinject_lockless to fix deadlock with
79 + *                imq_nf_queue/imq_nf_reinject.
80 + *
81 + *             2009/12/08 - (Jussi Kivilinna)
82 + *              - Port to 2.6.32
83 + *              - Add check for skb->nf_queue_entry==NULL in imq_dev_xmit()
84 + *              - Also add better error checking for skb->nf_queue_entry usage
85 + *
86 + *             2010/02/25 - (Jussi Kivilinna)
87 + *              - Port to 2.6.33
88 + *
89 + *             2010/08/15 - (Jussi Kivilinna)
90 + *              - Port to 2.6.35
91 + *              - Simplify hook registration by using nf_register_hooks.
92 + *              - nf_reinject doesn't need spinlock around it, therefore remove
93 + *                imq_nf_reinject function. Other nf_reinject users protect
94 + *                their own data with spinlock. With IMQ however all data is
95 + *                needed is stored per skbuff, so no locking is needed.
96 + *              - Changed IMQ to use 'separate' NF_IMQ_QUEUE instead of
97 + *                NF_QUEUE, this allows working coexistance of IMQ and other
98 + *                NF_QUEUE users.
99 + *              - Make IMQ multi-queue. Number of IMQ device queues can be
100 + *                increased with 'numqueues' module parameters. Default number
101 + *                of queues is 1, in other words by default IMQ works as
102 + *                single-queue device. Multi-queue selection is based on 
103 + *                IFB multi-queue patch by Changli Gao <xiaosuo@gmail.com>.
104 + *
105 + *            Also, many thanks to pablo Sebastian Greco for making the initial
106 + *            patch and to those who helped the testing.
107 + *
108 + *             More info at: http://www.linuximq.net/ (Andre Correa)
109 + */
110 +
111 +#include <linux/module.h>
112 +#include <linux/kernel.h>
113 +#include <linux/moduleparam.h>
114 +#include <linux/list.h>
115 +#include <linux/skbuff.h>
116 +#include <linux/netdevice.h>
117 +#include <linux/etherdevice.h>
118 +#include <linux/rtnetlink.h>
119 +#include <linux/if_arp.h>
120 +#include <linux/netfilter.h>
121 +#include <linux/netfilter_ipv4.h>
122 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
123 +       #include <linux/netfilter_ipv6.h>
124 +#endif
125 +#include <linux/imq.h>
126 +#include <net/pkt_sched.h>
127 +#include <net/netfilter/nf_queue.h>
128 +#include <net/sock.h>
129 +#include <linux/ip.h>
130 +#include <linux/ipv6.h>
131 +#include <linux/if_vlan.h>
132 +#include <linux/if_pppox.h>
133 +#include <net/ip.h>
134 +#include <net/ipv6.h>
135 +
136 +static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num);
137 +
138 +static nf_hookfn imq_nf_hook;
139 +
140 +static struct nf_hook_ops imq_ops[] = {
141 +       {
142 +       /* imq_ingress_ipv4 */
143 +               .hook           = imq_nf_hook,
144 +               .owner          = THIS_MODULE,
145 +               .pf             = PF_INET,
146 +               .hooknum        = NF_INET_PRE_ROUTING,
147 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
148 +               .priority       = NF_IP_PRI_MANGLE + 1,
149 +#else
150 +               .priority       = NF_IP_PRI_NAT_DST + 1,
151 +#endif
152 +       },
153 +       {
154 +       /* imq_egress_ipv4 */
155 +               .hook           = imq_nf_hook,
156 +               .owner          = THIS_MODULE,
157 +               .pf             = PF_INET,
158 +               .hooknum        = NF_INET_POST_ROUTING,
159 +#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
160 +               .priority       = NF_IP_PRI_LAST,
161 +#else
162 +               .priority       = NF_IP_PRI_NAT_SRC - 1,
163 +#endif
164 +       },
165 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
166 +       {
167 +       /* imq_ingress_ipv6 */
168 +               .hook           = imq_nf_hook,
169 +               .owner          = THIS_MODULE,
170 +               .pf             = PF_INET6,
171 +               .hooknum        = NF_INET_PRE_ROUTING,
172 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
173 +               .priority       = NF_IP6_PRI_MANGLE + 1,
174 +#else
175 +               .priority       = NF_IP6_PRI_NAT_DST + 1,
176 +#endif
177 +       },
178 +       {
179 +       /* imq_egress_ipv6 */
180 +               .hook           = imq_nf_hook,
181 +               .owner          = THIS_MODULE,
182 +               .pf             = PF_INET6,
183 +               .hooknum        = NF_INET_POST_ROUTING,
184 +#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
185 +               .priority       = NF_IP6_PRI_LAST,
186 +#else
187 +               .priority       = NF_IP6_PRI_NAT_SRC - 1,
188 +#endif
189 +       },
190 +#endif
191 +};
192 +
193 +#if defined(CONFIG_IMQ_NUM_DEVS)
194 +static int numdevs = CONFIG_IMQ_NUM_DEVS;
195 +#else
196 +static int numdevs = IMQ_MAX_DEVS;
197 +#endif
198 +
199 +#define IMQ_MAX_QUEUES 32
200 +static int numqueues = 1;
201 +
202 +/*static DEFINE_SPINLOCK(imq_nf_queue_lock);*/
203 +
204 +static struct net_device *imq_devs_cache[IMQ_MAX_DEVS];
205 +
206 +
207 +static struct net_device_stats *imq_get_stats(struct net_device *dev)
208 +{
209 +       return &dev->stats;
210 +}
211 +
212 +/* called for packets kfree'd in qdiscs at places other than enqueue */
213 +static void imq_skb_destructor(struct sk_buff *skb)
214 +{
215 +       struct nf_queue_entry *entry = skb->nf_queue_entry;
216 +
217 +       skb->nf_queue_entry = NULL;
218 +
219 +       if (entry) {
220 +               nf_queue_entry_release_refs(entry);
221 +               kfree(entry);
222 +       }
223 +
224 +       skb_restore_cb(skb); /* kfree backup */
225 +}
226 +
227 +static netdev_tx_t imq_dev_xmit(struct sk_buff *skb, struct net_device *dev)
228 +{
229 +       struct nf_queue_entry *entry = skb->nf_queue_entry;
230 +
231 +       skb->nf_queue_entry = NULL;
232 +       dev->trans_start = jiffies;
233 +
234 +       dev->stats.tx_bytes += skb->len;
235 +       dev->stats.tx_packets++;
236 +
237 +       if (entry == NULL) {
238 +               /* We don't know what is going on here.. packet is queued for
239 +                * imq device, but (probably) not by us.
240 +                *
241 +                * If this packet was not send here by imq_nf_queue(), then
242 +                * skb_save_cb() was not used and skb_free() should not show:
243 +                *   WARNING: IMQ: kfree_skb: skb->cb_next:..
244 +                * and/or
245 +                *   WARNING: IMQ: kfree_skb: skb->nf_queue_entry...
246 +                *
247 +                * However if this message is shown, then IMQ is somehow broken
248 +                * and you should report this to linuximq.net.
249 +                */
250 +
251 +               /* imq_dev_xmit is black hole that eats all packets, report that
252 +                * we eat this packet happily and increase dropped counters.
253 +                */
254 +
255 +               dev->stats.tx_dropped++;
256 +               dev_kfree_skb(skb);
257 +
258 +               return NETDEV_TX_OK;
259 +       }
260 +
261 +       skb_restore_cb(skb); /* restore skb->cb */
262 +
263 +       skb->imq_flags = 0;
264 +       skb->destructor = NULL;
265 +
266 +       nf_reinject(entry, NF_ACCEPT);
267 +
268 +       return NETDEV_TX_OK;
269 +}
270 +
271 +static u32 imq_hashrnd;
272 +
273 +static inline __be16 pppoe_proto(const struct sk_buff *skb)
274 +{
275 +       return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
276 +                       sizeof(struct pppoe_hdr)));
277 +}
278 +
279 +static u16 imq_hash(struct net_device *dev, struct sk_buff *skb)
280 +{
281 +       unsigned int pull_len;
282 +       u16 protocol = skb->protocol;
283 +       u32 addr1, addr2;
284 +       u32 hash, ihl = 0;
285 +       union {
286 +               u16 in16[2];
287 +               u32 in32;
288 +       } ports;
289 +       u8 ip_proto;
290 +
291 +       pull_len = 0;
292 +
293 +recheck:
294 +       switch (protocol) {
295 +       case htons(ETH_P_8021Q): {
296 +               if (unlikely(skb_pull(skb, VLAN_HLEN) == NULL))
297 +                       goto other;
298 +
299 +               pull_len += VLAN_HLEN;
300 +               skb->network_header += VLAN_HLEN;
301 +
302 +               protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
303 +               goto recheck;
304 +       }
305 +
306 +       case htons(ETH_P_PPP_SES): {
307 +               if (unlikely(skb_pull(skb, PPPOE_SES_HLEN) == NULL))
308 +                       goto other;
309 +
310 +               pull_len += PPPOE_SES_HLEN;
311 +               skb->network_header += PPPOE_SES_HLEN;
312 +
313 +               protocol = pppoe_proto(skb);
314 +               goto recheck;
315 +       }
316 +
317 +       case htons(ETH_P_IP): {
318 +               const struct iphdr *iph = ip_hdr(skb);
319 +
320 +               if (unlikely(!pskb_may_pull(skb, sizeof(struct iphdr))))
321 +                       goto other;
322 +
323 +               addr1 = iph->daddr;
324 +               addr2 = iph->saddr;
325 +
326 +               ip_proto = !(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) ?
327 +                                iph->protocol : 0;
328 +               ihl = ip_hdrlen(skb);
329 +
330 +               break;
331 +       }
332 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
333 +       case htons(ETH_P_IPV6): {
334 +               const struct ipv6hdr *iph = ipv6_hdr(skb);
335 +
336 +               if (unlikely(!pskb_may_pull(skb, sizeof(struct ipv6hdr))))
337 +                       goto other;
338 +
339 +               addr1 = iph->daddr.s6_addr32[3];
340 +               addr2 = iph->saddr.s6_addr32[3];
341 +               ihl = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &ip_proto);
342 +               if (unlikely(ihl < 0))
343 +                       goto other;
344 +
345 +               break;
346 +       }
347 +#endif
348 +       default:
349 +other:
350 +               if (pull_len != 0) {
351 +                       skb_push(skb, pull_len);
352 +                       skb->network_header -= pull_len;
353 +               }
354 +
355 +               return (u16)(ntohs(protocol) % dev->real_num_tx_queues);
356 +       }
357 +
358 +       if (addr1 > addr2)
359 +               swap(addr1, addr2);
360 +
361 +       switch (ip_proto) {
362 +       case IPPROTO_TCP:
363 +       case IPPROTO_UDP:
364 +       case IPPROTO_DCCP:
365 +       case IPPROTO_ESP:
366 +       case IPPROTO_AH:
367 +       case IPPROTO_SCTP:
368 +       case IPPROTO_UDPLITE: {
369 +               if (likely(skb_copy_bits(skb, ihl, &ports.in32, 4) >= 0)) {
370 +                       if (ports.in16[0] > ports.in16[1])
371 +                               swap(ports.in16[0], ports.in16[1]);
372 +                       break;
373 +               }
374 +               /* fall-through */
375 +       }
376 +       default:
377 +               ports.in32 = 0;
378 +               break;
379 +       }
380 +
381 +       if (pull_len != 0) {
382 +               skb_push(skb, pull_len);
383 +               skb->network_header -= pull_len;
384 +       }
385 +
386 +       hash = jhash_3words(addr1, addr2, ports.in32, imq_hashrnd ^ ip_proto);
387 +
388 +       return (u16)(((u64)hash * dev->real_num_tx_queues) >> 32);
389 +}
390 +
391 +static inline bool sk_tx_queue_recorded(struct sock *sk)
392 +{
393 +       return (sk_tx_queue_get(sk) >= 0);
394 +}
395 +
396 +static struct netdev_queue *imq_select_queue(struct net_device *dev,
397 +                                               struct sk_buff *skb)
398 +{
399 +       u16 queue_index = 0;
400 +       u32 hash;
401 +
402 +       if (likely(dev->real_num_tx_queues == 1))
403 +               goto out;
404 +
405 +       /* IMQ can be receiving ingress or engress packets. */
406 +
407 +       /* Check first for if rx_queue is set */
408 +       if (skb_rx_queue_recorded(skb)) {
409 +               queue_index = skb_get_rx_queue(skb);
410 +               goto out;
411 +       }
412 +
413 +       /* Check if socket has tx_queue set */
414 +       if (sk_tx_queue_recorded(skb->sk)) {
415 +               queue_index = sk_tx_queue_get(skb->sk);
416 +               goto out;
417 +       }
418 +
419 +       /* Try use socket hash */
420 +       if (skb->sk && skb->sk->sk_hash) {
421 +               hash = skb->sk->sk_hash;
422 +               queue_index =
423 +                       (u16)(((u64)hash * dev->real_num_tx_queues) >> 32);
424 +               goto out;
425 +       }
426 +
427 +       /* Generate hash from packet data */
428 +       queue_index = imq_hash(dev, skb);
429 +
430 +out:
431 +       if (unlikely(queue_index >= dev->real_num_tx_queues))
432 +               queue_index = (u16)((u32)queue_index % dev->real_num_tx_queues);
433 +
434 +       return netdev_get_tx_queue(dev, queue_index);
435 +}
436 +
437 +static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num)
438 +{
439 +       struct net_device *dev;
440 +       struct sk_buff *skb_orig, *skb, *skb_shared;
441 +       struct Qdisc *q;
442 +       struct netdev_queue *txq;
443 +       spinlock_t *root_lock;
444 +       int users, index;
445 +       int retval = -EINVAL;
446 +
447 +       index = entry->skb->imq_flags & IMQ_F_IFMASK;
448 +       if (unlikely(index > numdevs - 1)) {
449 +               if (net_ratelimit())
450 +                       printk(KERN_WARNING
451 +                              "IMQ: invalid device specified, highest is %u\n",
452 +                              numdevs - 1);
453 +               retval = -EINVAL;
454 +               goto out;
455 +       }
456 +
457 +       /* check for imq device by index from cache */
458 +       dev = imq_devs_cache[index];
459 +       if (unlikely(!dev)) {
460 +               char buf[8];
461 +
462 +               /* get device by name and cache result */
463 +               snprintf(buf, sizeof(buf), "imq%d", index);
464 +               dev = dev_get_by_name(&init_net, buf);
465 +               if (unlikely(!dev)) {
466 +                       /* not found ?!*/
467 +                       BUG();
468 +                       retval = -ENODEV;
469 +                       goto out;
470 +               }
471 +
472 +               imq_devs_cache[index] = dev;
473 +               dev_put(dev);
474 +       }
475 +
476 +       if (unlikely(!(dev->flags & IFF_UP))) {
477 +               entry->skb->imq_flags = 0;
478 +               nf_reinject(entry, NF_ACCEPT);
479 +               retval = 0;
480 +               goto out;
481 +       }
482 +       dev->last_rx = jiffies;
483 +
484 +       skb = entry->skb;
485 +       skb_orig = NULL;
486 +
487 +       /* skb has owner? => make clone */
488 +       if (unlikely(skb->destructor)) {
489 +               skb_orig = skb;
490 +               skb = skb_clone(skb, GFP_ATOMIC);
491 +               if (unlikely(!skb)) {
492 +                       retval = -ENOMEM;
493 +                       goto out;
494 +               }
495 +               entry->skb = skb;
496 +       }
497 +
498 +       skb->nf_queue_entry = entry;
499 +
500 +       dev->stats.rx_bytes += skb->len;
501 +       dev->stats.rx_packets++;
502 +
503 +       /* Disables softirqs for lock below */
504 +       rcu_read_lock_bh();
505 +
506 +       /* Multi-queue selection */
507 +       txq = imq_select_queue(dev, skb);
508 +
509 +       q = rcu_dereference(txq->qdisc);
510 +       if (unlikely(!q->enqueue))
511 +               goto packet_not_eaten_by_imq_dev;
512 +
513 +       root_lock = qdisc_lock(q);
514 +       spin_lock(root_lock);
515 +
516 +       users = atomic_read(&skb->users);
517 +
518 +       skb_shared = skb_get(skb); /* increase reference count by one */
519 +       skb_save_cb(skb_shared); /* backup skb->cb, as qdisc layer will
520 +                                       overwrite it */
521 +       qdisc_enqueue_root(skb_shared, q); /* might kfree_skb */
522 +
523 +       if (likely(atomic_read(&skb_shared->users) == users + 1)) {
524 +               kfree_skb(skb_shared); /* decrease reference count by one */
525 +
526 +               skb->destructor = &imq_skb_destructor;
527 +
528 +               /* cloned? */
529 +               if (unlikely(skb_orig))
530 +                       kfree_skb(skb_orig); /* free original */
531 +
532 +               spin_unlock(root_lock);
533 +               rcu_read_unlock_bh();
534 +
535 +               /* schedule qdisc dequeue */
536 +               __netif_schedule(q);
537 +
538 +               retval = 0;
539 +               goto out;
540 +       } else {
541 +               skb_restore_cb(skb_shared); /* restore skb->cb */
542 +               skb->nf_queue_entry = NULL;
543 +               /* qdisc dropped packet and decreased skb reference count of
544 +                * skb, so we don't really want to and try refree as that would
545 +                * actually destroy the skb. */
546 +               spin_unlock(root_lock);
547 +               goto packet_not_eaten_by_imq_dev;
548 +       }
549 +
550 +packet_not_eaten_by_imq_dev:
551 +       rcu_read_unlock_bh();
552 +
553 +       /* cloned? restore original */
554 +       if (unlikely(skb_orig)) {
555 +               kfree_skb(skb);
556 +               entry->skb = skb_orig;
557 +       }
558 +       retval = -1;
559 +out:
560 +       return retval;
561 +}
562 +
563 +static unsigned int imq_nf_hook(unsigned int hook, struct sk_buff *pskb,
564 +                               const struct net_device *indev,
565 +                               const struct net_device *outdev,
566 +                               int (*okfn)(struct sk_buff *))
567 +{
568 +       return (pskb->imq_flags & IMQ_F_ENQUEUE) ? NF_IMQ_QUEUE : NF_ACCEPT;
569 +}
570 +
571 +static int imq_close(struct net_device *dev)
572 +{
573 +       netif_stop_queue(dev);
574 +       return 0;
575 +}
576 +
577 +static int imq_open(struct net_device *dev)
578 +{
579 +       netif_start_queue(dev);
580 +       return 0;
581 +}
582 +
583 +static const struct net_device_ops imq_netdev_ops = {
584 +       .ndo_open               = imq_open,
585 +       .ndo_stop               = imq_close,
586 +       .ndo_start_xmit         = imq_dev_xmit,
587 +       .ndo_get_stats          = imq_get_stats,
588 +};
589 +
590 +static void imq_setup(struct net_device *dev)
591 +{
592 +       dev->netdev_ops         = &imq_netdev_ops;
593 +       dev->type               = ARPHRD_VOID;
594 +       dev->mtu                = 16000;
595 +       dev->tx_queue_len       = 11000;
596 +       dev->flags              = IFF_NOARP;
597 +       dev->features           = NETIF_F_SG | NETIF_F_FRAGLIST |
598 +                                 NETIF_F_GSO | NETIF_F_HW_CSUM |
599 +                                 NETIF_F_HIGHDMA;
600 +       dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
601 +}
602 +
603 +static int imq_validate(struct nlattr *tb[], struct nlattr *data[])
604 +{
605 +       int ret = 0;
606 +
607 +       if (tb[IFLA_ADDRESS]) {
608 +               if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
609 +                       ret = -EINVAL;
610 +                       goto end;
611 +               }
612 +               if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
613 +                       ret = -EADDRNOTAVAIL;
614 +                       goto end;
615 +               }
616 +       }
617 +       return 0;
618 +end:
619 +       printk(KERN_WARNING "IMQ: imq_validate failed (%d)\n", ret);
620 +       return ret;
621 +}
622 +
623 +static struct rtnl_link_ops imq_link_ops __read_mostly = {
624 +       .kind           = "imq",
625 +       .priv_size      = 0,
626 +       .setup          = imq_setup,
627 +       .validate       = imq_validate,
628 +};
629 +
630 +static const struct nf_queue_handler imq_nfqh = {
631 +       .name  = "imq",
632 +       .outfn = imq_nf_queue,
633 +};
634 +
635 +static int __init imq_init_hooks(void)
636 +{
637 +       int ret;
638 +
639 +       nf_register_queue_imq_handler(&imq_nfqh);
640 +
641 +       ret = nf_register_hooks(imq_ops, ARRAY_SIZE(imq_ops));
642 +       if (ret < 0)
643 +               nf_unregister_queue_imq_handler();
644 +
645 +       return ret;
646 +}
647 +
648 +static int __init imq_init_one(int index)
649 +{
650 +       struct net_device *dev;
651 +       int ret;
652 +
653 +       dev = alloc_netdev_mq(0, "imq%d", imq_setup, numqueues);
654 +       if (!dev)
655 +               return -ENOMEM;
656 +
657 +       ret = dev_alloc_name(dev, dev->name);
658 +       if (ret < 0)
659 +               goto fail;
660 +
661 +       dev->rtnl_link_ops = &imq_link_ops;
662 +       ret = register_netdevice(dev);
663 +       if (ret < 0)
664 +               goto fail;
665 +
666 +       return 0;
667 +fail:
668 +       free_netdev(dev);
669 +       return ret;
670 +}
671 +
672 +static int __init imq_init_devs(void)
673 +{
674 +       int err, i;
675 +
676 +       if (numdevs < 1 || numdevs > IMQ_MAX_DEVS) {
677 +               printk(KERN_ERR "IMQ: numdevs has to be betweed 1 and %u\n",
678 +                      IMQ_MAX_DEVS);
679 +               return -EINVAL;
680 +       }
681 +
682 +       if (numqueues < 1 || numqueues > IMQ_MAX_QUEUES) {
683 +               printk(KERN_ERR "IMQ: numqueues has to be betweed 1 and %u\n",
684 +                      IMQ_MAX_QUEUES);
685 +               return -EINVAL;
686 +       }
687 +
688 +       get_random_bytes(&imq_hashrnd, sizeof(imq_hashrnd));
689 +
690 +       rtnl_lock();
691 +       err = __rtnl_link_register(&imq_link_ops);
692 +
693 +       for (i = 0; i < numdevs && !err; i++)
694 +               err = imq_init_one(i);
695 +
696 +       if (err) {
697 +               __rtnl_link_unregister(&imq_link_ops);
698 +               memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
699 +       }
700 +       rtnl_unlock();
701 +
702 +       return err;
703 +}
704 +
705 +static int __init imq_init_module(void)
706 +{
707 +       int err;
708 +
709 +#if defined(CONFIG_IMQ_NUM_DEVS)
710 +       BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS > 16);
711 +       BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS < 2);
712 +       BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS - 1 > IMQ_F_IFMASK);
713 +#endif
714 +
715 +       err = imq_init_devs();
716 +       if (err) {
717 +               printk(KERN_ERR "IMQ: Error trying imq_init_devs(net)\n");
718 +               return err;
719 +       }
720 +
721 +       err = imq_init_hooks();
722 +       if (err) {
723 +               printk(KERN_ERR "IMQ: Error trying imq_init_hooks()\n");
724 +               rtnl_link_unregister(&imq_link_ops);
725 +               memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
726 +               return err;
727 +       }
728 +
729 +       printk(KERN_INFO "IMQ driver loaded successfully. "
730 +               "(numdevs = %d, numqueues = %d)\n", numdevs, numqueues);
731 +
732 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
733 +       printk(KERN_INFO "\tHooking IMQ before NAT on PREROUTING.\n");
734 +#else
735 +       printk(KERN_INFO "\tHooking IMQ after NAT on PREROUTING.\n");
736 +#endif
737 +#if defined(CONFIG_IMQ_BEHAVIOR_AB) || defined(CONFIG_IMQ_BEHAVIOR_BB)
738 +       printk(KERN_INFO "\tHooking IMQ before NAT on POSTROUTING.\n");
739 +#else
740 +       printk(KERN_INFO "\tHooking IMQ after NAT on POSTROUTING.\n");
741 +#endif
742 +
743 +       return 0;
744 +}
745 +
746 +static void __exit imq_unhook(void)
747 +{
748 +       nf_unregister_hooks(imq_ops, ARRAY_SIZE(imq_ops));
749 +       nf_unregister_queue_imq_handler();
750 +}
751 +
752 +static void __exit imq_cleanup_devs(void)
753 +{
754 +       rtnl_link_unregister(&imq_link_ops);
755 +       memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
756 +}
757 +
758 +static void __exit imq_exit_module(void)
759 +{
760 +       imq_unhook();
761 +       imq_cleanup_devs();
762 +       printk(KERN_INFO "IMQ driver unloaded successfully.\n");
763 +}
764 +
765 +module_init(imq_init_module);
766 +module_exit(imq_exit_module);
767 +
768 +module_param(numdevs, int, 0);
769 +module_param(numqueues, int, 0);
770 +MODULE_PARM_DESC(numdevs, "number of IMQ devices (how many imq* devices will "
771 +                       "be created)");
772 +MODULE_PARM_DESC(numqueues, "number of queues per IMQ device");
773 +MODULE_AUTHOR("http://www.linuximq.net");
774 +MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See "
775 +                       "http://www.linuximq.net/ for more information.");
776 +MODULE_LICENSE("GPL");
777 +MODULE_ALIAS_RTNL_LINK("imq");
778 +
779 diff -uNr linux-2.6.35/drivers/net/Kconfig linux-2.6.35-imq-multiqueue-test1/drivers/net/Kconfig
780 --- linux-2.6.35/drivers/net/Kconfig    2010-08-02 01:11:14.000000000 +0300
781 +++ linux-2.6.35-imq-multiqueue-test1/drivers/net/Kconfig       2010-08-12 19:57:51.284627330 +0300
782 @@ -121,6 +121,129 @@
783           To compile this driver as a module, choose M here: the module
784           will be called eql.  If unsure, say N.
785  
786 +config IMQ
787 +       tristate "IMQ (intermediate queueing device) support"
788 +       depends on NETDEVICES && NETFILTER
789 +       ---help---
790 +         The IMQ device(s) is used as placeholder for QoS queueing
791 +         disciplines. Every packet entering/leaving the IP stack can be
792 +         directed through the IMQ device where it's enqueued/dequeued to the
793 +         attached qdisc. This allows you to treat network devices as classes
794 +         and distribute bandwidth among them. Iptables is used to specify
795 +         through which IMQ device, if any, packets travel.
796 +
797 +         More information at: http://www.linuximq.net/
798 +
799 +         To compile this driver as a module, choose M here: the module
800 +         will be called imq.  If unsure, say N.
801 +
802 +choice
803 +       prompt "IMQ behavior (PRE/POSTROUTING)"
804 +       depends on IMQ
805 +       default IMQ_BEHAVIOR_AB
806 +       help
807 +
808 +               This settings defines how IMQ behaves in respect to its
809 +               hooking in PREROUTING and POSTROUTING.
810 +
811 +               IMQ can work in any of the following ways:
812 +
813 +                   PREROUTING   |      POSTROUTING
814 +               -----------------|-------------------
815 +               #1  After NAT    |      After NAT
816 +               #2  After NAT    |      Before NAT
817 +               #3  Before NAT   |      After NAT
818 +               #4  Before NAT   |      Before NAT
819 +
820 +               The default behavior is to hook before NAT on PREROUTING
821 +               and after NAT on POSTROUTING (#3).
822 +
823 +               This settings are specially usefull when trying to use IMQ
824 +               to shape NATed clients.
825 +
826 +               More information can be found at: www.linuximq.net
827 +
828 +               If not sure leave the default settings alone.
829 +
830 +config IMQ_BEHAVIOR_AA
831 +       bool "IMQ AA"
832 +       help
833 +               This settings defines how IMQ behaves in respect to its
834 +               hooking in PREROUTING and POSTROUTING.
835 +
836 +               Choosing this option will make IMQ hook like this:
837 +
838 +               PREROUTING:   After NAT
839 +               POSTROUTING:  After NAT
840 +
841 +               More information can be found at: www.linuximq.net
842 +
843 +               If not sure leave the default settings alone.
844 +
845 +config IMQ_BEHAVIOR_AB
846 +       bool "IMQ AB"
847 +       help
848 +               This settings defines how IMQ behaves in respect to its
849 +               hooking in PREROUTING and POSTROUTING.
850 +
851 +               Choosing this option will make IMQ hook like this:
852 +
853 +               PREROUTING:   After NAT
854 +               POSTROUTING:  Before NAT
855 +
856 +               More information can be found at: www.linuximq.net
857 +
858 +               If not sure leave the default settings alone.
859 +
860 +config IMQ_BEHAVIOR_BA
861 +       bool "IMQ BA"
862 +       help
863 +               This settings defines how IMQ behaves in respect to its
864 +               hooking in PREROUTING and POSTROUTING.
865 +
866 +               Choosing this option will make IMQ hook like this:
867 +
868 +               PREROUTING:   Before NAT
869 +               POSTROUTING:  After NAT
870 +
871 +               More information can be found at: www.linuximq.net
872 +
873 +               If not sure leave the default settings alone.
874 +
875 +config IMQ_BEHAVIOR_BB
876 +       bool "IMQ BB"
877 +       help
878 +               This settings defines how IMQ behaves in respect to its
879 +               hooking in PREROUTING and POSTROUTING.
880 +
881 +               Choosing this option will make IMQ hook like this:
882 +
883 +               PREROUTING:   Before NAT
884 +               POSTROUTING:  Before NAT
885 +
886 +               More information can be found at: www.linuximq.net
887 +
888 +               If not sure leave the default settings alone.
889 +
890 +endchoice
891 +
892 +config IMQ_NUM_DEVS
893 +
894 +       int "Number of IMQ devices"
895 +       range 2 16
896 +       depends on IMQ
897 +       default "16"
898 +       help
899 +
900 +               This settings defines how many IMQ devices will be
901 +               created.
902 +
903 +               The default value is 16.
904 +
905 +               More information can be found at: www.linuximq.net
906 +
907 +               If not sure leave the default settings alone.
908 +
909  config TUN
910         tristate "Universal TUN/TAP device driver support"
911         select CRC32
912 diff -uNr linux-2.6.35/drivers/net/Makefile linux-2.6.35-imq-multiqueue-test1/drivers/net/Makefile
913 --- linux-2.6.35/drivers/net/Makefile   2010-08-02 01:11:14.000000000 +0300
914 +++ linux-2.6.35-imq-multiqueue-test1/drivers/net/Makefile      2010-08-12 19:57:51.291294790 +0300
915 @@ -169,6 +169,7 @@
916  obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
917  
918  obj-$(CONFIG_DUMMY) += dummy.o
919 +obj-$(CONFIG_IMQ) += imq.o
920  obj-$(CONFIG_IFB) += ifb.o
921  obj-$(CONFIG_MACVLAN) += macvlan.o
922  obj-$(CONFIG_MACVTAP) += macvtap.o
923 diff -uNr linux-2.6.35/include/linux/imq.h linux-2.6.35-imq-multiqueue-test1/include/linux/imq.h
924 --- linux-2.6.35/include/linux/imq.h    1970-01-01 02:00:00.000000000 +0200
925 +++ linux-2.6.35-imq-multiqueue-test1/include/linux/imq.h       2010-08-12 19:57:51.324632058 +0300
926 @@ -0,0 +1,13 @@
927 +#ifndef _IMQ_H
928 +#define _IMQ_H
929 +
930 +/* IFMASK (16 device indexes, 0 to 15) and flag(s) fit in 5 bits */
931 +#define IMQ_F_BITS     5
932 +
933 +#define IMQ_F_IFMASK   0x0f
934 +#define IMQ_F_ENQUEUE  0x10
935 +
936 +#define IMQ_MAX_DEVS   (IMQ_F_IFMASK + 1)
937 +
938 +#endif /* _IMQ_H */
939 +
940 diff -uNr linux-2.6.35/include/linux/netfilter/xt_IMQ.h linux-2.6.35-imq-multiqueue-test1/include/linux/netfilter/xt_IMQ.h
941 --- linux-2.6.35/include/linux/netfilter/xt_IMQ.h       1970-01-01 02:00:00.000000000 +0200
942 +++ linux-2.6.35-imq-multiqueue-test1/include/linux/netfilter/xt_IMQ.h  2010-08-12 19:57:51.344634424 +0300
943 @@ -0,0 +1,9 @@
944 +#ifndef _XT_IMQ_H
945 +#define _XT_IMQ_H
946 +
947 +struct xt_imq_info {
948 +       unsigned int todev;     /* target imq device */
949 +};
950 +
951 +#endif /* _XT_IMQ_H */
952 +
953 diff -uNr linux-2.6.35/include/linux/netfilter.h linux-2.6.35-imq-multiqueue-test1/include/linux/netfilter.h
954 --- linux-2.6.35/include/linux/netfilter.h      2010-08-02 01:11:14.000000000 +0300
955 +++ linux-2.6.35-imq-multiqueue-test1/include/linux/netfilter.h 2010-08-12 19:57:51.374637975 +0300
956 @@ -21,7 +21,8 @@
957  #define NF_QUEUE 3
958  #define NF_REPEAT 4
959  #define NF_STOP 5
960 -#define NF_MAX_VERDICT NF_STOP
961 +#define NF_IMQ_QUEUE 6
962 +#define NF_MAX_VERDICT NF_IMQ_QUEUE
963  
964  /* we overload the higher bits for encoding auxiliary data such as the queue
965   * number. Not nice, but better than additional function arguments. */
966 diff -uNr linux-2.6.35/include/linux/netfilter_ipv4/ipt_IMQ.h linux-2.6.35-imq-multiqueue-test1/include/linux/netfilter_ipv4/ipt_IMQ.h
967 --- linux-2.6.35/include/linux/netfilter_ipv4/ipt_IMQ.h 1970-01-01 02:00:00.000000000 +0200
968 +++ linux-2.6.35-imq-multiqueue-test1/include/linux/netfilter_ipv4/ipt_IMQ.h    2010-08-12 19:57:51.374637975 +0300
969 @@ -0,0 +1,10 @@
970 +#ifndef _IPT_IMQ_H
971 +#define _IPT_IMQ_H
972 +
973 +/* Backwards compatibility for old userspace */
974 +#include <linux/netfilter/xt_IMQ.h>
975 +
976 +#define ipt_imq_info xt_imq_info
977 +
978 +#endif /* _IPT_IMQ_H */
979 +
980 diff -uNr linux-2.6.35/include/linux/netfilter_ipv6/ip6t_IMQ.h linux-2.6.35-imq-multiqueue-test1/include/linux/netfilter_ipv6/ip6t_IMQ.h
981 --- linux-2.6.35/include/linux/netfilter_ipv6/ip6t_IMQ.h        1970-01-01 02:00:00.000000000 +0200
982 +++ linux-2.6.35-imq-multiqueue-test1/include/linux/netfilter_ipv6/ip6t_IMQ.h   2010-08-12 19:57:51.374637975 +0300
983 @@ -0,0 +1,10 @@
984 +#ifndef _IP6T_IMQ_H
985 +#define _IP6T_IMQ_H
986 +
987 +/* Backwards compatibility for old userspace */
988 +#include <linux/netfilter/xt_IMQ.h>
989 +
990 +#define ip6t_imq_info xt_imq_info
991 +
992 +#endif /* _IP6T_IMQ_H */
993 +
994 diff -uNr linux-2.6.35/include/linux/skbuff.h linux-2.6.35-imq-multiqueue-test1/include/linux/skbuff.h
995 --- linux-2.6.35/include/linux/skbuff.h 2010-08-02 01:11:14.000000000 +0300
996 +++ linux-2.6.35-imq-multiqueue-test1/include/linux/skbuff.h    2010-08-12 19:57:51.387972881 +0300
997 @@ -29,6 +29,9 @@
998  #include <linux/rcupdate.h>
999  #include <linux/dmaengine.h>
1000  #include <linux/hrtimer.h>
1001 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1002 +#include <linux/imq.h>
1003 +#endif
1004  
1005  /* Don't change this without changing skb_csum_unnecessary! */
1006  #define CHECKSUM_NONE 0
1007 @@ -327,6 +330,9 @@
1008          * first. This is owned by whoever has the skb queued ATM.
1009          */
1010         char                    cb[48] __aligned(8);
1011 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1012 +       void                    *cb_next;
1013 +#endif
1014  
1015         unsigned long           _skb_refdst;
1016  #ifdef CONFIG_XFRM
1017 @@ -363,6 +369,9 @@
1018         struct nf_conntrack     *nfct;
1019         struct sk_buff          *nfct_reasm;
1020  #endif
1021 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1022 +       struct nf_queue_entry   *nf_queue_entry;
1023 +#endif
1024  #ifdef CONFIG_BRIDGE_NETFILTER
1025         struct nf_bridge_info   *nf_bridge;
1026  #endif
1027 @@ -389,6 +398,10 @@
1028  
1029         /* 0/14 bit hole */
1030  
1031 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1032 +       __u8                    imq_flags:IMQ_F_BITS;
1033 +#endif
1034 +
1035  #ifdef CONFIG_NET_DMA
1036         dma_cookie_t            dma_cookie;
1037  #endif
1038 @@ -487,6 +500,12 @@
1039         return (struct rtable *)skb_dst(skb);
1040  }
1041  
1042 +
1043 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1044 +extern int skb_save_cb(struct sk_buff *skb);
1045 +extern int skb_restore_cb(struct sk_buff *skb);
1046 +#endif
1047 +
1048  extern void kfree_skb(struct sk_buff *skb);
1049  extern void consume_skb(struct sk_buff *skb);
1050  extern void           __kfree_skb(struct sk_buff *skb);
1051 @@ -2034,6 +2053,10 @@
1052         dst->nfct_reasm = src->nfct_reasm;
1053         nf_conntrack_get_reasm(src->nfct_reasm);
1054  #endif
1055 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1056 +       dst->imq_flags = src->imq_flags;
1057 +       dst->nf_queue_entry = src->nf_queue_entry;
1058 +#endif
1059  #ifdef CONFIG_BRIDGE_NETFILTER
1060         dst->nf_bridge  = src->nf_bridge;
1061         nf_bridge_get(src->nf_bridge);
1062 diff -uNr linux-2.6.35/include/net/netfilter/nf_queue.h linux-2.6.35-imq-multiqueue-test1/include/net/netfilter/nf_queue.h
1063 --- linux-2.6.35/include/net/netfilter/nf_queue.h       2010-08-02 01:11:14.000000000 +0300
1064 +++ linux-2.6.35-imq-multiqueue-test1/include/net/netfilter/nf_queue.h  2010-08-12 19:57:51.394640341 +0300
1065 @@ -30,5 +30,11 @@
1066                                        const struct nf_queue_handler *qh);
1067  extern void nf_unregister_queue_handlers(const struct nf_queue_handler *qh);
1068  extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
1069 +extern void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
1070 +
1071 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1072 +extern void nf_register_queue_imq_handler(const struct nf_queue_handler *qh);
1073 +extern void nf_unregister_queue_imq_handler(void);
1074 +#endif
1075  
1076  #endif /* _NF_QUEUE_H */
1077 diff -uNr linux-2.6.35/net/core/dev.c linux-2.6.35-imq-multiqueue-test1/net/core/dev.c
1078 --- linux-2.6.35/net/core/dev.c 2010-08-02 01:11:14.000000000 +0300
1079 +++ linux-2.6.35-imq-multiqueue-test1/net/core/dev.c    2010-08-12 19:57:51.464648614 +0300
1080 @@ -98,6 +98,9 @@
1081  #include <net/net_namespace.h>
1082  #include <net/sock.h>
1083  #include <linux/rtnetlink.h>
1084 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1085 +#include <linux/imq.h>
1086 +#endif
1087  #include <linux/proc_fs.h>
1088  #include <linux/seq_file.h>
1089  #include <linux/stat.h>
1090 @@ -1931,7 +1934,11 @@
1091         int rc = NETDEV_TX_OK;
1092  
1093         if (likely(!skb->next)) {
1094 -               if (!list_empty(&ptype_all))
1095 +               if (!list_empty(&ptype_all)
1096 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1097 +                       && !(skb->imq_flags & IMQ_F_ENQUEUE)
1098 +#endif
1099 +                  )
1100                         dev_queue_xmit_nit(skb, dev);
1101  
1102                 /*
1103 @@ -2027,8 +2034,7 @@
1104         return queue_index;
1105  }
1106  
1107 -static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1108 -                                       struct sk_buff *skb)
1109 +static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb)
1110  {
1111         int queue_index;
1112         struct sock *sk = skb->sk;
1113 diff -uNr linux-2.6.35/net/core/skbuff.c linux-2.6.35-imq-multiqueue-test1/net/core/skbuff.c
1114 --- linux-2.6.35/net/core/skbuff.c      2010-08-02 01:11:14.000000000 +0300
1115 +++ linux-2.6.35-imq-multiqueue-test1/net/core/skbuff.c 2010-08-12 19:57:51.464648614 +0300
1116 @@ -72,6 +72,9 @@
1117  
1118  static struct kmem_cache *skbuff_head_cache __read_mostly;
1119  static struct kmem_cache *skbuff_fclone_cache __read_mostly;
1120 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1121 +static struct kmem_cache *skbuff_cb_store_cache __read_mostly;
1122 +#endif
1123  
1124  static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
1125                                   struct pipe_buffer *buf)
1126 @@ -91,6 +94,82 @@
1127         return 1;
1128  }
1129  
1130 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1131 +/* Control buffer save/restore for IMQ devices */
1132 +struct skb_cb_table {
1133 +       char                    cb[48] __aligned(8);
1134 +       void                    *cb_next;
1135 +       atomic_t                refcnt;
1136 +};
1137 +
1138 +static DEFINE_SPINLOCK(skb_cb_store_lock);
1139 +
1140 +int skb_save_cb(struct sk_buff *skb)
1141 +{
1142 +       struct skb_cb_table *next;
1143 +
1144 +       next = kmem_cache_alloc(skbuff_cb_store_cache, GFP_ATOMIC);
1145 +       if (!next)
1146 +               return -ENOMEM;
1147 +
1148 +       BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb));
1149 +
1150 +       memcpy(next->cb, skb->cb, sizeof(skb->cb));
1151 +       next->cb_next = skb->cb_next;
1152 +
1153 +       atomic_set(&next->refcnt, 1);
1154 +
1155 +       skb->cb_next = next;
1156 +       return 0;
1157 +}
1158 +EXPORT_SYMBOL(skb_save_cb);
1159 +
1160 +int skb_restore_cb(struct sk_buff *skb)
1161 +{
1162 +       struct skb_cb_table *next;
1163 +
1164 +       if (!skb->cb_next)
1165 +               return 0;
1166 +
1167 +       next = skb->cb_next;
1168 +
1169 +       BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb));
1170 +
1171 +       memcpy(skb->cb, next->cb, sizeof(skb->cb));
1172 +       skb->cb_next = next->cb_next;
1173 +
1174 +       spin_lock(&skb_cb_store_lock);
1175 +
1176 +       if (atomic_dec_and_test(&next->refcnt))
1177 +               kmem_cache_free(skbuff_cb_store_cache, next);
1178 +
1179 +       spin_unlock(&skb_cb_store_lock);
1180 +
1181 +       return 0;
1182 +}
1183 +EXPORT_SYMBOL(skb_restore_cb);
1184 +
1185 +static void skb_copy_stored_cb(struct sk_buff *new, const struct sk_buff *__old)
1186 +{
1187 +       struct skb_cb_table *next;
1188 +       struct sk_buff *old;
1189 +
1190 +       if (!__old->cb_next) {
1191 +               new->cb_next = NULL;
1192 +               return;
1193 +       }
1194 +
1195 +       spin_lock(&skb_cb_store_lock);
1196 +
1197 +       old = (struct sk_buff *)__old;
1198 +
1199 +       next = old->cb_next;
1200 +       atomic_inc(&next->refcnt);
1201 +       new->cb_next = next;
1202 +
1203 +       spin_unlock(&skb_cb_store_lock);
1204 +}
1205 +#endif
1206  
1207  /* Pipe buffer operations for a socket. */
1208  static const struct pipe_buf_operations sock_pipe_buf_ops = {
1209 @@ -391,6 +470,26 @@
1210                 WARN_ON(in_irq());
1211                 skb->destructor(skb);
1212         }
1213 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1214 +       /* This should not happen. When it does, avoid memleak by restoring
1215 +       the chain of cb-backups. */
1216 +       while (skb->cb_next != NULL) {
1217 +               if (net_ratelimit())
1218 +                       printk(KERN_WARNING "IMQ: kfree_skb: skb->cb_next: "
1219 +                               "%08x\n", (unsigned int)skb->cb_next);
1220 +
1221 +               skb_restore_cb(skb);
1222 +       }
1223 +       /* This should not happen either, nf_queue_entry is nullified in
1224 +        * imq_dev_xmit(). If we have non-NULL nf_queue_entry then we are
1225 +        * leaking entry pointers, maybe memory. We don't know if this is
1226 +        * pointer to already freed memory, or should this be freed.
1227 +        * If this happens we need to add refcounting, etc for nf_queue_entry.
1228 +        */
1229 +       if (skb->nf_queue_entry && net_ratelimit())
1230 +               printk(KERN_WARNING
1231 +                               "IMQ: kfree_skb: skb->nf_queue_entry != NULL");
1232 +#endif
1233  #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1234         nf_conntrack_put(skb->nfct);
1235         nf_conntrack_put_reasm(skb->nfct_reasm);
1236 @@ -526,6 +625,9 @@
1237         new->sp                 = secpath_get(old->sp);
1238  #endif
1239         memcpy(new->cb, old->cb, sizeof(old->cb));
1240 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1241 +       skb_copy_stored_cb(new, old);
1242 +#endif
1243         new->csum               = old->csum;
1244         new->local_df           = old->local_df;
1245         new->pkt_type           = old->pkt_type;
1246 @@ -2776,6 +2878,13 @@
1247                                                 0,
1248                                                 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1249                                                 NULL);
1250 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1251 +       skbuff_cb_store_cache = kmem_cache_create("skbuff_cb_store_cache",
1252 +                                                 sizeof(struct skb_cb_table),
1253 +                                                 0,
1254 +                                                 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1255 +                                                 NULL);
1256 +#endif
1257  }
1258  
1259  /**
1260 diff -uNr linux-2.6.35/net/ipv4/netfilter/iptable_mangle.c linux-2.6.35-imq-multiqueue-test1/net/ipv4/netfilter/iptable_mangle.c
1261 --- linux-2.6.35/net/ipv4/netfilter/iptable_mangle.c    2010-08-02 01:11:14.000000000 +0300
1262 +++ linux-2.6.35-imq-multiqueue-test1/net/ipv4/netfilter/iptable_mangle.c       2010-08-12 19:57:51.501319617 +0300
1263 @@ -60,7 +60,8 @@
1264         ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out,
1265                            dev_net(out)->ipv4.iptable_mangle);
1266         /* Reroute for ANY change. */
1267 -       if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) {
1268 +       if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE &&
1269 +                                                       ret != NF_IMQ_QUEUE) {
1270                 iph = ip_hdr(skb);
1271  
1272                 if (iph->saddr != saddr ||
1273 diff -uNr linux-2.6.35/net/netfilter/core.c linux-2.6.35-imq-multiqueue-test1/net/netfilter/core.c
1274 --- linux-2.6.35/net/netfilter/core.c   2010-08-02 01:11:14.000000000 +0300
1275 +++ linux-2.6.35-imq-multiqueue-test1/net/netfilter/core.c      2010-08-12 20:31:28.666436279 +0300
1276 @@ -182,6 +182,12 @@
1277                 if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
1278                               verdict >> NF_VERDICT_BITS))
1279                         goto next_hook;
1280 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1281 +       } else if ((verdict & NF_VERDICT_MASK) == NF_IMQ_QUEUE) {
1282 +               if (!nf_imq_queue(skb, elem, pf, hook, indev, outdev, okfn,
1283 +                             verdict >> NF_VERDICT_BITS))
1284 +                       goto next_hook;
1285 +#endif
1286         }
1287         rcu_read_unlock();
1288         return ret;
1289 diff -uNr linux-2.6.35/net/netfilter/Kconfig linux-2.6.35-imq-multiqueue-test1/net/netfilter/Kconfig
1290 --- linux-2.6.35/net/netfilter/Kconfig  2010-08-02 01:11:14.000000000 +0300
1291 +++ linux-2.6.35-imq-multiqueue-test1/net/netfilter/Kconfig     2010-08-12 19:57:51.567994165 +0300
1292 @@ -448,6 +448,18 @@
1293           For more information on the LEDs available on your system, see
1294           Documentation/leds-class.txt
1295  
1296 +config NETFILTER_XT_TARGET_IMQ
1297 +        tristate '"IMQ" target support'
1298 +       depends on NETFILTER_XTABLES
1299 +       depends on IP_NF_MANGLE || IP6_NF_MANGLE
1300 +       select IMQ
1301 +       default m if NETFILTER_ADVANCED=n
1302 +        help
1303 +          This option adds a `IMQ' target which is used to specify if and
1304 +          to which imq device packets should get enqueued/dequeued.
1305 +
1306 +          To compile it as a module, choose M here.  If unsure, say N.
1307 +
1308  config NETFILTER_XT_TARGET_MARK
1309         tristate '"MARK" target support'
1310         depends on NETFILTER_ADVANCED
1311 diff -uNr linux-2.6.35/net/netfilter/Makefile linux-2.6.35-imq-multiqueue-test1/net/netfilter/Makefile
1312 --- linux-2.6.35/net/netfilter/Makefile 2010-08-02 01:11:14.000000000 +0300
1313 +++ linux-2.6.35-imq-multiqueue-test1/net/netfilter/Makefile    2010-08-12 19:57:51.577995346 +0300
1314 @@ -50,6 +50,7 @@
1315  obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
1316  obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
1317  obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
1318 +obj-$(CONFIG_NETFILTER_XT_TARGET_IMQ) += xt_IMQ.o
1319  obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
1320  obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o
1321  obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
1322 diff -uNr linux-2.6.35/net/netfilter/nf_internals.h linux-2.6.35-imq-multiqueue-test1/net/netfilter/nf_internals.h
1323 --- linux-2.6.35/net/netfilter/nf_internals.h   2010-08-02 01:11:14.000000000 +0300
1324 +++ linux-2.6.35-imq-multiqueue-test1/net/netfilter/nf_internals.h      2010-08-12 20:33:35.581440253 +0300
1325 @@ -30,6 +30,15 @@
1326                     struct net_device *outdev,
1327                     int (*okfn)(struct sk_buff *),
1328                     unsigned int queuenum);
1329 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1330 +extern int nf_imq_queue(struct sk_buff *skb,
1331 +                   struct list_head *elem,
1332 +                   u_int8_t pf, unsigned int hook,
1333 +                   struct net_device *indev,
1334 +                   struct net_device *outdev,
1335 +                   int (*okfn)(struct sk_buff *),
1336 +                   unsigned int queuenum);
1337 +#endif
1338  extern int __init netfilter_queue_init(void);
1339  
1340  /* nf_log.c */
1341 diff -uNr linux-2.6.35/net/netfilter/nf_queue.c linux-2.6.35-imq-multiqueue-test1/net/netfilter/nf_queue.c
1342 --- linux-2.6.35/net/netfilter/nf_queue.c       2010-08-02 01:11:14.000000000 +0300
1343 +++ linux-2.6.35-imq-multiqueue-test1/net/netfilter/nf_queue.c  2010-08-12 22:21:18.688483171 +0300
1344 @@ -22,6 +22,27 @@
1345  
1346  static DEFINE_MUTEX(queue_handler_mutex);
1347  
1348 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1349 +static const struct nf_queue_handler *queue_imq_handler;
1350 +
1351 +void nf_register_queue_imq_handler(const struct nf_queue_handler *qh)
1352 +{
1353 +       mutex_lock(&queue_handler_mutex);
1354 +       rcu_assign_pointer(queue_imq_handler, qh);
1355 +       mutex_unlock(&queue_handler_mutex);
1356 +}
1357 +EXPORT_SYMBOL_GPL(nf_register_queue_imq_handler);
1358 +
1359 +void nf_unregister_queue_imq_handler(void)
1360 +{
1361 +       mutex_lock(&queue_handler_mutex);
1362 +       rcu_assign_pointer(queue_imq_handler, NULL);
1363 +       mutex_unlock(&queue_handler_mutex);
1364 +}
1365 +EXPORT_SYMBOL_GPL(nf_unregister_queue_imq_handler);
1366 +
1367 +#endif
1368 +
1369  /* return EBUSY when somebody else is registered, return EEXIST if the
1370   * same handler is registered, return 0 in case of success. */
1371  int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
1372 @@ -82,7 +103,7 @@
1373  }
1374  EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
1375  
1376 -static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
1377 +void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
1378  {
1379         /* Release those devices we held, or Alexey will kill me. */
1380         if (entry->indev)
1381 @@ -102,6 +123,7 @@
1382         /* Drop reference to owner of hook which queued us. */
1383         module_put(entry->elem->owner);
1384  }
1385 +EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
1386  
1387  /*
1388   * Any packet that leaves via this function must come back
1389 @@ -113,7 +135,8 @@
1390                       struct net_device *indev,
1391                       struct net_device *outdev,
1392                       int (*okfn)(struct sk_buff *),
1393 -                     unsigned int queuenum)
1394 +                     unsigned int queuenum,
1395 +                     bool imq_queue)
1396  {
1397         int status;
1398         struct nf_queue_entry *entry = NULL;
1399 @@ -127,6 +150,11 @@
1400         /* QUEUE == DROP if noone is waiting, to be safe. */
1401         rcu_read_lock();
1402  
1403 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1404 +       if (imq_queue)
1405 +               qh = rcu_dereference(queue_imq_handler);
1406 +       else
1407 +#endif
1408         qh = rcu_dereference(queue_handler[pf]);
1409         if (!qh)
1410                 goto err_unlock;
1411 @@ -192,19 +220,20 @@
1412         return 1;
1413  }
1414  
1415 -int nf_queue(struct sk_buff *skb,
1416 +static int _nf_queue(struct sk_buff *skb,
1417              struct list_head *elem,
1418              u_int8_t pf, unsigned int hook,
1419              struct net_device *indev,
1420              struct net_device *outdev,
1421              int (*okfn)(struct sk_buff *),
1422 -            unsigned int queuenum)
1423 +            unsigned int queuenum,
1424 +            bool imq_queue)
1425  {
1426         struct sk_buff *segs;
1427  
1428         if (!skb_is_gso(skb))
1429                 return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
1430 -                                 queuenum);
1431 +                                 queuenum, imq_queue);
1432  
1433         switch (pf) {
1434         case NFPROTO_IPV4:
1435 @@ -225,13 +254,39 @@
1436  
1437                 segs->next = NULL;
1438                 if (!__nf_queue(segs, elem, pf, hook, indev, outdev, okfn,
1439 -                               queuenum))
1440 +                               queuenum, imq_queue))
1441                         kfree_skb(segs);
1442                 segs = nskb;
1443         } while (segs);
1444         return 1;
1445  }
1446  
1447 +int nf_queue(struct sk_buff *skb,
1448 +            struct list_head *elem,
1449 +            u_int8_t pf, unsigned int hook,
1450 +            struct net_device *indev,
1451 +            struct net_device *outdev,
1452 +            int (*okfn)(struct sk_buff *),
1453 +            unsigned int queuenum)
1454 +{
1455 +       return _nf_queue(skb, elem, pf, hook, indev, outdev, okfn, queuenum,
1456 +                        false);
1457 +}
1458 +
1459 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1460 +int nf_imq_queue(struct sk_buff *skb,
1461 +            struct list_head *elem,
1462 +            u_int8_t pf, unsigned int hook,
1463 +            struct net_device *indev,
1464 +            struct net_device *outdev,
1465 +            int (*okfn)(struct sk_buff *),
1466 +            unsigned int queuenum)
1467 +{
1468 +       return _nf_queue(skb, elem, pf, hook, indev, outdev, okfn, queuenum,
1469 +                        true);
1470 +}
1471 +#endif
1472 +
1473  void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
1474  {
1475         struct sk_buff *skb = entry->skb;
1476 @@ -272,7 +327,13 @@
1477         case NF_QUEUE:
1478                 if (!__nf_queue(skb, elem, entry->pf, entry->hook,
1479                                 entry->indev, entry->outdev, entry->okfn,
1480 -                               verdict >> NF_VERDICT_BITS))
1481 +                               verdict >> NF_VERDICT_BITS, false))
1482 +                       goto next_hook;
1483 +               break;
1484 +       case NF_IMQ_QUEUE:
1485 +               if (!__nf_queue(skb, elem, entry->pf, entry->hook,
1486 +                               entry->indev, entry->outdev, entry->okfn,
1487 +                               verdict >> NF_VERDICT_BITS, true))
1488                         goto next_hook;
1489                 break;
1490         case NF_STOLEN:
1491 diff -uNr linux-2.6.35/net/netfilter/xt_IMQ.c linux-2.6.35-imq-multiqueue-test1/net/netfilter/xt_IMQ.c
1492 --- linux-2.6.35/net/netfilter/xt_IMQ.c 1970-01-01 02:00:00.000000000 +0200
1493 +++ linux-2.6.35-imq-multiqueue-test1/net/netfilter/xt_IMQ.c    2010-08-12 22:10:20.657312054 +0300
1494 @@ -0,0 +1,74 @@
1495 +/*
1496 + * This target marks packets to be enqueued to an imq device
1497 + */
1498 +#include <linux/module.h>
1499 +#include <linux/skbuff.h>
1500 +#include <linux/netfilter/x_tables.h>
1501 +#include <linux/netfilter/xt_IMQ.h>
1502 +#include <linux/imq.h>
1503 +
1504 +static unsigned int imq_target(struct sk_buff *pskb,
1505 +                               const struct xt_action_param *par)
1506 +{
1507 +       const struct xt_imq_info *mr = par->targinfo;
1508 +
1509 +       pskb->imq_flags = (mr->todev & IMQ_F_IFMASK) | IMQ_F_ENQUEUE;
1510 +
1511 +       return XT_CONTINUE;
1512 +}
1513 +
1514 +static int imq_checkentry(const struct xt_tgchk_param *par)
1515 +{
1516 +       struct xt_imq_info *mr = par->targinfo;
1517 +
1518 +       if (mr->todev > IMQ_MAX_DEVS - 1) {
1519 +               printk(KERN_WARNING
1520 +                      "IMQ: invalid device specified, highest is %u\n",
1521 +                      IMQ_MAX_DEVS - 1);
1522 +               return -EINVAL;
1523 +       }
1524 +
1525 +       return 0;
1526 +}
1527 +
1528 +static struct xt_target xt_imq_reg[] __read_mostly = {
1529 +       {
1530 +               .name           = "IMQ",
1531 +               .family         = AF_INET,
1532 +               .checkentry     = imq_checkentry,
1533 +               .target         = imq_target,
1534 +               .targetsize     = sizeof(struct xt_imq_info),
1535 +               .table          = "mangle",
1536 +               .me             = THIS_MODULE
1537 +       },
1538 +       {
1539 +               .name           = "IMQ",
1540 +               .family         = AF_INET6,
1541 +               .checkentry     = imq_checkentry,
1542 +               .target         = imq_target,
1543 +               .targetsize     = sizeof(struct xt_imq_info),
1544 +               .table          = "mangle",
1545 +               .me             = THIS_MODULE
1546 +       },
1547 +};
1548 +
1549 +static int __init imq_init(void)
1550 +{
1551 +       return xt_register_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg));
1552 +}
1553 +
1554 +static void __exit imq_fini(void)
1555 +{
1556 +       xt_unregister_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg));
1557 +}
1558 +
1559 +module_init(imq_init);
1560 +module_exit(imq_fini);
1561 +
1562 +MODULE_AUTHOR("http://www.linuximq.net");
1563 +MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. "
1564 +                  "See http://www.linuximq.net/ for more information.");
1565 +MODULE_LICENSE("GPL");
1566 +MODULE_ALIAS("ipt_IMQ");
1567 +MODULE_ALIAS("ip6t_IMQ");
1568 +
This page took 0.138081 seconds and 4 git commands to generate.