]> git.pld-linux.org Git - packages/kernel.git/blob - kernel-imq.patch
- updated to linux-2.6.39-imq-multiqueue-test1.diff.bz2
[packages/kernel.git] / kernel-imq.patch
1 diff -uNr linux-2.6.39/drivers/net/imq.c linux-2.6.39-imqmq/drivers/net/imq.c
2 --- linux-2.6.39/drivers/net/imq.c      1970-01-01 02:00:00.000000000 +0200
3 +++ linux-2.6.39-imqmq/drivers/net/imq.c        2011-05-19 11:08:03.838522212 +0300
4 @@ -0,0 +1,777 @@
5 +/*
6 + *             Pseudo-driver for the intermediate queue device.
7 + *
8 + *             This program is free software; you can redistribute it and/or
9 + *             modify it under the terms of the GNU General Public License
10 + *             as published by the Free Software Foundation; either version
11 + *             2 of the License, or (at your option) any later version.
12 + *
13 + * Authors:    Patrick McHardy, <kaber@trash.net>
14 + *
15 + *            The first version was written by Martin Devera, <devik@cdi.cz>
16 + *
17 + * Credits:    Jan Rafaj <imq2t@cedric.vabo.cz>
18 + *              - Update patch to 2.4.21
19 + *             Sebastian Strollo <sstrollo@nortelnetworks.com>
20 + *              - Fix "Dead-loop on netdevice imq"-issue
21 + *             Marcel Sebek <sebek64@post.cz>
22 + *              - Update to 2.6.2-rc1
23 + *
24 + *            After some time of inactivity there is a group taking care
25 + *            of IMQ again: http://www.linuximq.net
26 + *
27 + *
28 + *            2004/06/30 - New version of IMQ patch to kernels <=2.6.7
29 + *             including the following changes:
30 + *
31 + *            - Correction of ipv6 support "+"s issue (Hasso Tepper)
32 + *            - Correction of imq_init_devs() issue that resulted in
33 + *            kernel OOPS unloading IMQ as module (Norbert Buchmuller)
34 + *            - Addition of functionality to choose number of IMQ devices
35 + *            during kernel config (Andre Correa)
36 + *            - Addition of functionality to choose how IMQ hooks on
37 + *            PRE and POSTROUTING (after or before NAT) (Andre Correa)
38 + *            - Cosmetic corrections (Norbert Buchmuller) (Andre Correa)
39 + *
40 + *
41 + *             2005/12/16 - IMQ versions between 2.6.7 and 2.6.13 were
42 + *             released with almost no problems. 2.6.14-x was released
43 + *             with some important changes: nfcache was removed; After
44 + *             some weeks of trouble we figured out that some IMQ fields
45 + *             in skb were missing in skbuff.c - skb_clone and copy_skb_header.
46 + *             These functions are correctly patched by this new patch version.
47 + *
48 + *             Thanks for all who helped to figure out all the problems with
49 + *             2.6.14.x: Patrick McHardy, Rune Kock, VeNoMouS, Max CtRiX,
50 + *             Kevin Shanahan, Richard Lucassen, Valery Dachev (hopefully
51 + *             I didn't forget anybody). I apologize again for my lack of time.
52 + *
53 + *
54 + *             2008/06/17 - 2.6.25 - Changed imq.c to use qdisc_run() instead
55 + *             of qdisc_restart() and moved qdisc_run() to tasklet to avoid
56 + *             recursive locking. New initialization routines to fix 'rmmod' not
57 + *             working anymore. Used code from ifb.c. (Jussi Kivilinna)
58 + *
59 + *             2008/08/06 - 2.6.26 - (JK)
60 + *              - Replaced tasklet with 'netif_schedule()'.
61 + *              - Cleaned up and added comments for imq_nf_queue().
62 + *
63 + *             2009/04/12
64 + *              - Add skb_save_cb/skb_restore_cb helper functions for backuping
65 + *                control buffer. This is needed because qdisc-layer on kernels
66 + *                2.6.27 and newer overwrite control buffer. (Jussi Kivilinna)
67 + *              - Add better locking for IMQ device. Hopefully this will solve
68 + *                SMP issues. (Jussi Kivilinna)
69 + *              - Port to 2.6.27
70 + *              - Port to 2.6.28
71 + *              - Port to 2.6.29 + fix rmmod not working
72 + *
73 + *             2009/04/20 - (Jussi Kivilinna)
74 + *              - Use netdevice feature flags to avoid extra packet handling
75 + *                by core networking layer and possibly increase performance.
76 + *
77 + *             2009/09/26 - (Jussi Kivilinna)
78 + *              - Add imq_nf_reinject_lockless to fix deadlock with
79 + *                imq_nf_queue/imq_nf_reinject.
80 + *
81 + *             2009/12/08 - (Jussi Kivilinna)
82 + *              - Port to 2.6.32
83 + *              - Add check for skb->nf_queue_entry==NULL in imq_dev_xmit()
84 + *              - Also add better error checking for skb->nf_queue_entry usage
85 + *
86 + *             2010/02/25 - (Jussi Kivilinna)
87 + *              - Port to 2.6.33
88 + *
89 + *             2010/08/15 - (Jussi Kivilinna)
90 + *              - Port to 2.6.35
91 + *              - Simplify hook registration by using nf_register_hooks.
92 + *              - nf_reinject doesn't need spinlock around it, therefore remove
93 + *                imq_nf_reinject function. Other nf_reinject users protect
94 + *                their own data with spinlock. With IMQ however all data is
95 + *                needed is stored per skbuff, so no locking is needed.
96 + *              - Changed IMQ to use 'separate' NF_IMQ_QUEUE instead of
97 + *                NF_QUEUE, this allows working coexistance of IMQ and other
98 + *                NF_QUEUE users.
99 + *              - Make IMQ multi-queue. Number of IMQ device queues can be
100 + *                increased with 'numqueues' module parameters. Default number
101 + *                of queues is 1, in other words by default IMQ works as
102 + *                single-queue device. Multi-queue selection is based on
103 + *                IFB multi-queue patch by Changli Gao <xiaosuo@gmail.com>.
104 + *
105 + *             2011/03/18 - (Jussi Kivilinna)
106 + *              - Port to 2.6.38
107 + *
108 + *            Also, many thanks to pablo Sebastian Greco for making the initial
109 + *            patch and to those who helped the testing.
110 + *
111 + *             More info at: http://www.linuximq.net/ (Andre Correa)
112 + */
113 +
114 +#include <linux/module.h>
115 +#include <linux/kernel.h>
116 +#include <linux/moduleparam.h>
117 +#include <linux/list.h>
118 +#include <linux/skbuff.h>
119 +#include <linux/netdevice.h>
120 +#include <linux/etherdevice.h>
121 +#include <linux/rtnetlink.h>
122 +#include <linux/if_arp.h>
123 +#include <linux/netfilter.h>
124 +#include <linux/netfilter_ipv4.h>
125 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
126 +       #include <linux/netfilter_ipv6.h>
127 +#endif
128 +#include <linux/imq.h>
129 +#include <net/pkt_sched.h>
130 +#include <net/netfilter/nf_queue.h>
131 +#include <net/sock.h>
132 +#include <linux/ip.h>
133 +#include <linux/ipv6.h>
134 +#include <linux/if_vlan.h>
135 +#include <linux/if_pppox.h>
136 +#include <net/ip.h>
137 +#include <net/ipv6.h>
138 +
139 +static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num);
140 +
141 +static nf_hookfn imq_nf_hook;
142 +
143 +static struct nf_hook_ops imq_ops[] = {
144 +       {
145 +       /* imq_ingress_ipv4 */
146 +               .hook           = imq_nf_hook,
147 +               .owner          = THIS_MODULE,
148 +               .pf             = PF_INET,
149 +               .hooknum        = NF_INET_PRE_ROUTING,
150 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
151 +               .priority       = NF_IP_PRI_MANGLE + 1,
152 +#else
153 +               .priority       = NF_IP_PRI_NAT_DST + 1,
154 +#endif
155 +       },
156 +       {
157 +       /* imq_egress_ipv4 */
158 +               .hook           = imq_nf_hook,
159 +               .owner          = THIS_MODULE,
160 +               .pf             = PF_INET,
161 +               .hooknum        = NF_INET_POST_ROUTING,
162 +#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
163 +               .priority       = NF_IP_PRI_LAST,
164 +#else
165 +               .priority       = NF_IP_PRI_NAT_SRC - 1,
166 +#endif
167 +       },
168 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
169 +       {
170 +       /* imq_ingress_ipv6 */
171 +               .hook           = imq_nf_hook,
172 +               .owner          = THIS_MODULE,
173 +               .pf             = PF_INET6,
174 +               .hooknum        = NF_INET_PRE_ROUTING,
175 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
176 +               .priority       = NF_IP6_PRI_MANGLE + 1,
177 +#else
178 +               .priority       = NF_IP6_PRI_NAT_DST + 1,
179 +#endif
180 +       },
181 +       {
182 +       /* imq_egress_ipv6 */
183 +               .hook           = imq_nf_hook,
184 +               .owner          = THIS_MODULE,
185 +               .pf             = PF_INET6,
186 +               .hooknum        = NF_INET_POST_ROUTING,
187 +#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
188 +               .priority       = NF_IP6_PRI_LAST,
189 +#else
190 +               .priority       = NF_IP6_PRI_NAT_SRC - 1,
191 +#endif
192 +       },
193 +#endif
194 +};
195 +
196 +#if defined(CONFIG_IMQ_NUM_DEVS)
197 +static int numdevs = CONFIG_IMQ_NUM_DEVS;
198 +#else
199 +static int numdevs = IMQ_MAX_DEVS;
200 +#endif
201 +
202 +#define IMQ_MAX_QUEUES 32
203 +static int numqueues = 1;
204 +
205 +/*static DEFINE_SPINLOCK(imq_nf_queue_lock);*/
206 +
207 +static struct net_device *imq_devs_cache[IMQ_MAX_DEVS];
208 +
209 +
210 +static struct net_device_stats *imq_get_stats(struct net_device *dev)
211 +{
212 +       return &dev->stats;
213 +}
214 +
215 +/* called for packets kfree'd in qdiscs at places other than enqueue */
216 +static void imq_skb_destructor(struct sk_buff *skb)
217 +{
218 +       struct nf_queue_entry *entry = skb->nf_queue_entry;
219 +
220 +       skb->nf_queue_entry = NULL;
221 +
222 +       if (entry) {
223 +               nf_queue_entry_release_refs(entry);
224 +               kfree(entry);
225 +       }
226 +
227 +       skb_restore_cb(skb); /* kfree backup */
228 +}
229 +
230 +static netdev_tx_t imq_dev_xmit(struct sk_buff *skb, struct net_device *dev)
231 +{
232 +       struct nf_queue_entry *entry = skb->nf_queue_entry;
233 +
234 +       skb->nf_queue_entry = NULL;
235 +       dev->trans_start = jiffies;
236 +
237 +       dev->stats.tx_bytes += skb->len;
238 +       dev->stats.tx_packets++;
239 +
240 +       if (entry == NULL) {
241 +               /* We don't know what is going on here.. packet is queued for
242 +                * imq device, but (probably) not by us.
243 +                *
244 +                * If this packet was not send here by imq_nf_queue(), then
245 +                * skb_save_cb() was not used and skb_free() should not show:
246 +                *   WARNING: IMQ: kfree_skb: skb->cb_next:..
247 +                * and/or
248 +                *   WARNING: IMQ: kfree_skb: skb->nf_queue_entry...
249 +                *
250 +                * However if this message is shown, then IMQ is somehow broken
251 +                * and you should report this to linuximq.net.
252 +                */
253 +
254 +               /* imq_dev_xmit is black hole that eats all packets, report that
255 +                * we eat this packet happily and increase dropped counters.
256 +                */
257 +
258 +               dev->stats.tx_dropped++;
259 +               dev_kfree_skb(skb);
260 +
261 +               return NETDEV_TX_OK;
262 +       }
263 +
264 +       skb_restore_cb(skb); /* restore skb->cb */
265 +
266 +       skb->imq_flags = 0;
267 +       skb->destructor = NULL;
268 +
269 +       nf_reinject(entry, NF_ACCEPT);
270 +
271 +       return NETDEV_TX_OK;
272 +}
273 +
274 +static u32 imq_hashrnd;
275 +
276 +static inline __be16 pppoe_proto(const struct sk_buff *skb)
277 +{
278 +       return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
279 +                       sizeof(struct pppoe_hdr)));
280 +}
281 +
282 +static u16 imq_hash(struct net_device *dev, struct sk_buff *skb)
283 +{
284 +       unsigned int pull_len;
285 +       u16 protocol = skb->protocol;
286 +       u32 addr1, addr2;
287 +       u32 hash, ihl = 0;
288 +       union {
289 +               u16 in16[2];
290 +               u32 in32;
291 +       } ports;
292 +       u8 ip_proto;
293 +
294 +       pull_len = 0;
295 +
296 +recheck:
297 +       switch (protocol) {
298 +       case htons(ETH_P_8021Q): {
299 +               if (unlikely(skb_pull(skb, VLAN_HLEN) == NULL))
300 +                       goto other;
301 +
302 +               pull_len += VLAN_HLEN;
303 +               skb->network_header += VLAN_HLEN;
304 +
305 +               protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
306 +               goto recheck;
307 +       }
308 +
309 +       case htons(ETH_P_PPP_SES): {
310 +               if (unlikely(skb_pull(skb, PPPOE_SES_HLEN) == NULL))
311 +                       goto other;
312 +
313 +               pull_len += PPPOE_SES_HLEN;
314 +               skb->network_header += PPPOE_SES_HLEN;
315 +
316 +               protocol = pppoe_proto(skb);
317 +               goto recheck;
318 +       }
319 +
320 +       case htons(ETH_P_IP): {
321 +               const struct iphdr *iph = ip_hdr(skb);
322 +
323 +               if (unlikely(!pskb_may_pull(skb, sizeof(struct iphdr))))
324 +                       goto other;
325 +
326 +               addr1 = iph->daddr;
327 +               addr2 = iph->saddr;
328 +
329 +               ip_proto = !(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) ?
330 +                                iph->protocol : 0;
331 +               ihl = ip_hdrlen(skb);
332 +
333 +               break;
334 +       }
335 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
336 +       case htons(ETH_P_IPV6): {
337 +               const struct ipv6hdr *iph = ipv6_hdr(skb);
338 +
339 +               if (unlikely(!pskb_may_pull(skb, sizeof(struct ipv6hdr))))
340 +                       goto other;
341 +
342 +               addr1 = iph->daddr.s6_addr32[3];
343 +               addr2 = iph->saddr.s6_addr32[3];
344 +               ihl = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &ip_proto);
345 +               if (unlikely(ihl < 0))
346 +                       goto other;
347 +
348 +               break;
349 +       }
350 +#endif
351 +       default:
352 +other:
353 +               if (pull_len != 0) {
354 +                       skb_push(skb, pull_len);
355 +                       skb->network_header -= pull_len;
356 +               }
357 +
358 +               return (u16)(ntohs(protocol) % dev->real_num_tx_queues);
359 +       }
360 +
361 +       if (addr1 > addr2)
362 +               swap(addr1, addr2);
363 +
364 +       switch (ip_proto) {
365 +       case IPPROTO_TCP:
366 +       case IPPROTO_UDP:
367 +       case IPPROTO_DCCP:
368 +       case IPPROTO_ESP:
369 +       case IPPROTO_AH:
370 +       case IPPROTO_SCTP:
371 +       case IPPROTO_UDPLITE: {
372 +               if (likely(skb_copy_bits(skb, ihl, &ports.in32, 4) >= 0)) {
373 +                       if (ports.in16[0] > ports.in16[1])
374 +                               swap(ports.in16[0], ports.in16[1]);
375 +                       break;
376 +               }
377 +               /* fall-through */
378 +       }
379 +       default:
380 +               ports.in32 = 0;
381 +               break;
382 +       }
383 +
384 +       if (pull_len != 0) {
385 +               skb_push(skb, pull_len);
386 +               skb->network_header -= pull_len;
387 +       }
388 +
389 +       hash = jhash_3words(addr1, addr2, ports.in32, imq_hashrnd ^ ip_proto);
390 +
391 +       return (u16)(((u64)hash * dev->real_num_tx_queues) >> 32);
392 +}
393 +
394 +static inline bool sk_tx_queue_recorded(struct sock *sk)
395 +{
396 +       return (sk_tx_queue_get(sk) >= 0);
397 +}
398 +
399 +static struct netdev_queue *imq_select_queue(struct net_device *dev,
400 +                                               struct sk_buff *skb)
401 +{
402 +       u16 queue_index = 0;
403 +       u32 hash;
404 +
405 +       if (likely(dev->real_num_tx_queues == 1))
406 +               goto out;
407 +
408 +       /* IMQ can be receiving ingress or engress packets. */
409 +
410 +       /* Check first for if rx_queue is set */
411 +       if (skb_rx_queue_recorded(skb)) {
412 +               queue_index = skb_get_rx_queue(skb);
413 +               goto out;
414 +       }
415 +
416 +       /* Check if socket has tx_queue set */
417 +       if (sk_tx_queue_recorded(skb->sk)) {
418 +               queue_index = sk_tx_queue_get(skb->sk);
419 +               goto out;
420 +       }
421 +
422 +       /* Try use socket hash */
423 +       if (skb->sk && skb->sk->sk_hash) {
424 +               hash = skb->sk->sk_hash;
425 +               queue_index =
426 +                       (u16)(((u64)hash * dev->real_num_tx_queues) >> 32);
427 +               goto out;
428 +       }
429 +
430 +       /* Generate hash from packet data */
431 +       queue_index = imq_hash(dev, skb);
432 +
433 +out:
434 +       if (unlikely(queue_index >= dev->real_num_tx_queues))
435 +               queue_index = (u16)((u32)queue_index % dev->real_num_tx_queues);
436 +
437 +       return netdev_get_tx_queue(dev, queue_index);
438 +}
439 +
440 +static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num)
441 +{
442 +       struct net_device *dev;
443 +       struct sk_buff *skb_orig, *skb, *skb_shared;
444 +       struct Qdisc *q;
445 +       struct netdev_queue *txq;
446 +       spinlock_t *root_lock;
447 +       int users, index;
448 +       int retval = -EINVAL;
449 +
450 +       index = entry->skb->imq_flags & IMQ_F_IFMASK;
451 +       if (unlikely(index > numdevs - 1)) {
452 +               if (net_ratelimit())
453 +                       printk(KERN_WARNING
454 +                              "IMQ: invalid device specified, highest is %u\n",
455 +                              numdevs - 1);
456 +               retval = -EINVAL;
457 +               goto out;
458 +       }
459 +
460 +       /* check for imq device by index from cache */
461 +       dev = imq_devs_cache[index];
462 +       if (unlikely(!dev)) {
463 +               char buf[8];
464 +
465 +               /* get device by name and cache result */
466 +               snprintf(buf, sizeof(buf), "imq%d", index);
467 +               dev = dev_get_by_name(&init_net, buf);
468 +               if (unlikely(!dev)) {
469 +                       /* not found ?!*/
470 +                       BUG();
471 +                       retval = -ENODEV;
472 +                       goto out;
473 +               }
474 +
475 +               imq_devs_cache[index] = dev;
476 +               dev_put(dev);
477 +       }
478 +
479 +       if (unlikely(!(dev->flags & IFF_UP))) {
480 +               entry->skb->imq_flags = 0;
481 +               nf_reinject(entry, NF_ACCEPT);
482 +               retval = 0;
483 +               goto out;
484 +       }
485 +       dev->last_rx = jiffies;
486 +
487 +       skb = entry->skb;
488 +       skb_orig = NULL;
489 +
490 +       /* skb has owner? => make clone */
491 +       if (unlikely(skb->destructor)) {
492 +               skb_orig = skb;
493 +               skb = skb_clone(skb, GFP_ATOMIC);
494 +               if (unlikely(!skb)) {
495 +                       retval = -ENOMEM;
496 +                       goto out;
497 +               }
498 +               entry->skb = skb;
499 +       }
500 +
501 +       skb->nf_queue_entry = entry;
502 +
503 +       dev->stats.rx_bytes += skb->len;
504 +       dev->stats.rx_packets++;
505 +
506 +       /* Disables softirqs for lock below */
507 +       rcu_read_lock_bh();
508 +
509 +       /* Multi-queue selection */
510 +       txq = imq_select_queue(dev, skb);
511 +
512 +       q = rcu_dereference(txq->qdisc);
513 +       if (unlikely(!q->enqueue))
514 +               goto packet_not_eaten_by_imq_dev;
515 +
516 +       root_lock = qdisc_lock(q);
517 +       spin_lock(root_lock);
518 +
519 +       users = atomic_read(&skb->users);
520 +
521 +       skb_shared = skb_get(skb); /* increase reference count by one */
522 +       skb_save_cb(skb_shared); /* backup skb->cb, as qdisc layer will
523 +                                       overwrite it */
524 +       qdisc_enqueue_root(skb_shared, q); /* might kfree_skb */
525 +
526 +       if (likely(atomic_read(&skb_shared->users) == users + 1)) {
527 +               kfree_skb(skb_shared); /* decrease reference count by one */
528 +
529 +               skb->destructor = &imq_skb_destructor;
530 +
531 +               /* cloned? */
532 +               if (unlikely(skb_orig))
533 +                       kfree_skb(skb_orig); /* free original */
534 +
535 +               spin_unlock(root_lock);
536 +               rcu_read_unlock_bh();
537 +
538 +               /* schedule qdisc dequeue */
539 +               __netif_schedule(q);
540 +
541 +               retval = 0;
542 +               goto out;
543 +       } else {
544 +               skb_restore_cb(skb_shared); /* restore skb->cb */
545 +               skb->nf_queue_entry = NULL;
546 +               /* qdisc dropped packet and decreased skb reference count of
547 +                * skb, so we don't really want to and try refree as that would
548 +                * actually destroy the skb. */
549 +               spin_unlock(root_lock);
550 +               goto packet_not_eaten_by_imq_dev;
551 +       }
552 +
553 +packet_not_eaten_by_imq_dev:
554 +       rcu_read_unlock_bh();
555 +
556 +       /* cloned? restore original */
557 +       if (unlikely(skb_orig)) {
558 +               kfree_skb(skb);
559 +               entry->skb = skb_orig;
560 +       }
561 +       retval = -1;
562 +out:
563 +       return retval;
564 +}
565 +
566 +static unsigned int imq_nf_hook(unsigned int hook, struct sk_buff *pskb,
567 +                               const struct net_device *indev,
568 +                               const struct net_device *outdev,
569 +                               int (*okfn)(struct sk_buff *))
570 +{
571 +       return (pskb->imq_flags & IMQ_F_ENQUEUE) ? NF_IMQ_QUEUE : NF_ACCEPT;
572 +}
573 +
574 +static int imq_close(struct net_device *dev)
575 +{
576 +       netif_stop_queue(dev);
577 +       return 0;
578 +}
579 +
580 +static int imq_open(struct net_device *dev)
581 +{
582 +       netif_start_queue(dev);
583 +       return 0;
584 +}
585 +
586 +static const struct net_device_ops imq_netdev_ops = {
587 +       .ndo_open               = imq_open,
588 +       .ndo_stop               = imq_close,
589 +       .ndo_start_xmit         = imq_dev_xmit,
590 +       .ndo_get_stats          = imq_get_stats,
591 +};
592 +
593 +static void imq_setup(struct net_device *dev)
594 +{
595 +       dev->netdev_ops         = &imq_netdev_ops;
596 +       dev->type               = ARPHRD_VOID;
597 +       dev->mtu                = 16000;
598 +       dev->tx_queue_len       = 11000;
599 +       dev->flags              = IFF_NOARP;
600 +       dev->features           = NETIF_F_SG | NETIF_F_FRAGLIST |
601 +                                 NETIF_F_GSO | NETIF_F_HW_CSUM |
602 +                                 NETIF_F_HIGHDMA;
603 +       dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
604 +}
605 +
606 +static int imq_validate(struct nlattr *tb[], struct nlattr *data[])
607 +{
608 +       int ret = 0;
609 +
610 +       if (tb[IFLA_ADDRESS]) {
611 +               if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
612 +                       ret = -EINVAL;
613 +                       goto end;
614 +               }
615 +               if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
616 +                       ret = -EADDRNOTAVAIL;
617 +                       goto end;
618 +               }
619 +       }
620 +       return 0;
621 +end:
622 +       printk(KERN_WARNING "IMQ: imq_validate failed (%d)\n", ret);
623 +       return ret;
624 +}
625 +
626 +static struct rtnl_link_ops imq_link_ops __read_mostly = {
627 +       .kind           = "imq",
628 +       .priv_size      = 0,
629 +       .setup          = imq_setup,
630 +       .validate       = imq_validate,
631 +};
632 +
633 +static const struct nf_queue_handler imq_nfqh = {
634 +       .name  = "imq",
635 +       .outfn = imq_nf_queue,
636 +};
637 +
638 +static int __init imq_init_hooks(void)
639 +{
640 +       int ret;
641 +
642 +       nf_register_queue_imq_handler(&imq_nfqh);
643 +
644 +       ret = nf_register_hooks(imq_ops, ARRAY_SIZE(imq_ops));
645 +       if (ret < 0)
646 +               nf_unregister_queue_imq_handler();
647 +
648 +       return ret;
649 +}
650 +
651 +static int __init imq_init_one(int index)
652 +{
653 +       struct net_device *dev;
654 +       int ret;
655 +
656 +       dev = alloc_netdev_mq(0, "imq%d", imq_setup, numqueues);
657 +       if (!dev)
658 +               return -ENOMEM;
659 +
660 +       ret = dev_alloc_name(dev, dev->name);
661 +       if (ret < 0)
662 +               goto fail;
663 +
664 +       dev->rtnl_link_ops = &imq_link_ops;
665 +       ret = register_netdevice(dev);
666 +       if (ret < 0)
667 +               goto fail;
668 +
669 +       return 0;
670 +fail:
671 +       free_netdev(dev);
672 +       return ret;
673 +}
674 +
675 +static int __init imq_init_devs(void)
676 +{
677 +       int err, i;
678 +
679 +       if (numdevs < 1 || numdevs > IMQ_MAX_DEVS) {
680 +               printk(KERN_ERR "IMQ: numdevs has to be betweed 1 and %u\n",
681 +                      IMQ_MAX_DEVS);
682 +               return -EINVAL;
683 +       }
684 +
685 +       if (numqueues < 1 || numqueues > IMQ_MAX_QUEUES) {
686 +               printk(KERN_ERR "IMQ: numqueues has to be betweed 1 and %u\n",
687 +                      IMQ_MAX_QUEUES);
688 +               return -EINVAL;
689 +       }
690 +
691 +       get_random_bytes(&imq_hashrnd, sizeof(imq_hashrnd));
692 +
693 +       rtnl_lock();
694 +       err = __rtnl_link_register(&imq_link_ops);
695 +
696 +       for (i = 0; i < numdevs && !err; i++)
697 +               err = imq_init_one(i);
698 +
699 +       if (err) {
700 +               __rtnl_link_unregister(&imq_link_ops);
701 +               memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
702 +       }
703 +       rtnl_unlock();
704 +
705 +       return err;
706 +}
707 +
708 +static int __init imq_init_module(void)
709 +{
710 +       int err;
711 +
712 +#if defined(CONFIG_IMQ_NUM_DEVS)
713 +       BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS > 16);
714 +       BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS < 2);
715 +       BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS - 1 > IMQ_F_IFMASK);
716 +#endif
717 +
718 +       err = imq_init_devs();
719 +       if (err) {
720 +               printk(KERN_ERR "IMQ: Error trying imq_init_devs(net)\n");
721 +               return err;
722 +       }
723 +
724 +       err = imq_init_hooks();
725 +       if (err) {
726 +               printk(KERN_ERR "IMQ: Error trying imq_init_hooks()\n");
727 +               rtnl_link_unregister(&imq_link_ops);
728 +               memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
729 +               return err;
730 +       }
731 +
732 +       printk(KERN_INFO "IMQ driver loaded successfully. "
733 +               "(numdevs = %d, numqueues = %d)\n", numdevs, numqueues);
734 +
735 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
736 +       printk(KERN_INFO "\tHooking IMQ before NAT on PREROUTING.\n");
737 +#else
738 +       printk(KERN_INFO "\tHooking IMQ after NAT on PREROUTING.\n");
739 +#endif
740 +#if defined(CONFIG_IMQ_BEHAVIOR_AB) || defined(CONFIG_IMQ_BEHAVIOR_BB)
741 +       printk(KERN_INFO "\tHooking IMQ before NAT on POSTROUTING.\n");
742 +#else
743 +       printk(KERN_INFO "\tHooking IMQ after NAT on POSTROUTING.\n");
744 +#endif
745 +
746 +       return 0;
747 +}
748 +
749 +static void __exit imq_unhook(void)
750 +{
751 +       nf_unregister_hooks(imq_ops, ARRAY_SIZE(imq_ops));
752 +       nf_unregister_queue_imq_handler();
753 +}
754 +
755 +static void __exit imq_cleanup_devs(void)
756 +{
757 +       rtnl_link_unregister(&imq_link_ops);
758 +       memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
759 +}
760 +
761 +static void __exit imq_exit_module(void)
762 +{
763 +       imq_unhook();
764 +       imq_cleanup_devs();
765 +       printk(KERN_INFO "IMQ driver unloaded successfully.\n");
766 +}
767 +
768 +module_init(imq_init_module);
769 +module_exit(imq_exit_module);
770 +
771 +module_param(numdevs, int, 0);
772 +module_param(numqueues, int, 0);
773 +MODULE_PARM_DESC(numdevs, "number of IMQ devices (how many imq* devices will "
774 +                       "be created)");
775 +MODULE_PARM_DESC(numqueues, "number of queues per IMQ device");
776 +MODULE_AUTHOR("http://www.linuximq.net");
777 +MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See "
778 +                       "http://www.linuximq.net/ for more information.");
779 +MODULE_LICENSE("GPL");
780 +MODULE_ALIAS_RTNL_LINK("imq");
781 +
782 diff -uNr linux-2.6.39/drivers/net/Kconfig linux-2.6.39-imqmq/drivers/net/Kconfig
783 --- linux-2.6.39/drivers/net/Kconfig    2011-05-19 07:06:34.000000000 +0300
784 +++ linux-2.6.39-imqmq/drivers/net/Kconfig      2011-05-19 11:08:04.281864473 +0300
785 @@ -124,6 +124,129 @@
786           To compile this driver as a module, choose M here: the module
787           will be called eql.  If unsure, say N.
788  
789 +config IMQ
790 +       tristate "IMQ (intermediate queueing device) support"
791 +       depends on NETDEVICES && NETFILTER
792 +       ---help---
793 +         The IMQ device(s) is used as placeholder for QoS queueing
794 +         disciplines. Every packet entering/leaving the IP stack can be
795 +         directed through the IMQ device where it's enqueued/dequeued to the
796 +         attached qdisc. This allows you to treat network devices as classes
797 +         and distribute bandwidth among them. Iptables is used to specify
798 +         through which IMQ device, if any, packets travel.
799 +
800 +         More information at: http://www.linuximq.net/
801 +
802 +         To compile this driver as a module, choose M here: the module
803 +         will be called imq.  If unsure, say N.
804 +
805 +choice
806 +       prompt "IMQ behavior (PRE/POSTROUTING)"
807 +       depends on IMQ
808 +       default IMQ_BEHAVIOR_AB
809 +       help
810 +
811 +               This settings defines how IMQ behaves in respect to its
812 +               hooking in PREROUTING and POSTROUTING.
813 +
814 +               IMQ can work in any of the following ways:
815 +
816 +                   PREROUTING   |      POSTROUTING
817 +               -----------------|-------------------
818 +               #1  After NAT    |      After NAT
819 +               #2  After NAT    |      Before NAT
820 +               #3  Before NAT   |      After NAT
821 +               #4  Before NAT   |      Before NAT
822 +
823 +               The default behavior is to hook before NAT on PREROUTING
824 +               and after NAT on POSTROUTING (#3).
825 +
826 +               This settings are specially usefull when trying to use IMQ
827 +               to shape NATed clients.
828 +
829 +               More information can be found at: www.linuximq.net
830 +
831 +               If not sure leave the default settings alone.
832 +
833 +config IMQ_BEHAVIOR_AA
834 +       bool "IMQ AA"
835 +       help
836 +               This settings defines how IMQ behaves in respect to its
837 +               hooking in PREROUTING and POSTROUTING.
838 +
839 +               Choosing this option will make IMQ hook like this:
840 +
841 +               PREROUTING:   After NAT
842 +               POSTROUTING:  After NAT
843 +
844 +               More information can be found at: www.linuximq.net
845 +
846 +               If not sure leave the default settings alone.
847 +
848 +config IMQ_BEHAVIOR_AB
849 +       bool "IMQ AB"
850 +       help
851 +               This settings defines how IMQ behaves in respect to its
852 +               hooking in PREROUTING and POSTROUTING.
853 +
854 +               Choosing this option will make IMQ hook like this:
855 +
856 +               PREROUTING:   After NAT
857 +               POSTROUTING:  Before NAT
858 +
859 +               More information can be found at: www.linuximq.net
860 +
861 +               If not sure leave the default settings alone.
862 +
863 +config IMQ_BEHAVIOR_BA
864 +       bool "IMQ BA"
865 +       help
866 +               This settings defines how IMQ behaves in respect to its
867 +               hooking in PREROUTING and POSTROUTING.
868 +
869 +               Choosing this option will make IMQ hook like this:
870 +
871 +               PREROUTING:   Before NAT
872 +               POSTROUTING:  After NAT
873 +
874 +               More information can be found at: www.linuximq.net
875 +
876 +               If not sure leave the default settings alone.
877 +
878 +config IMQ_BEHAVIOR_BB
879 +       bool "IMQ BB"
880 +       help
881 +               This settings defines how IMQ behaves in respect to its
882 +               hooking in PREROUTING and POSTROUTING.
883 +
884 +               Choosing this option will make IMQ hook like this:
885 +
886 +               PREROUTING:   Before NAT
887 +               POSTROUTING:  Before NAT
888 +
889 +               More information can be found at: www.linuximq.net
890 +
891 +               If not sure leave the default settings alone.
892 +
893 +endchoice
894 +
895 +config IMQ_NUM_DEVS
896 +
897 +       int "Number of IMQ devices"
898 +       range 2 16
899 +       depends on IMQ
900 +       default "16"
901 +       help
902 +
903 +               This settings defines how many IMQ devices will be
904 +               created.
905 +
906 +               The default value is 16.
907 +
908 +               More information can be found at: www.linuximq.net
909 +
910 +               If not sure leave the default settings alone.
911 +
912  config TUN
913         tristate "Universal TUN/TAP device driver support"
914         select CRC32
915 diff -uNr linux-2.6.39/drivers/net/Makefile linux-2.6.39-imqmq/drivers/net/Makefile
916 --- linux-2.6.39/drivers/net/Makefile   2011-05-19 07:06:34.000000000 +0300
917 +++ linux-2.6.39-imqmq/drivers/net/Makefile     2011-05-19 11:08:04.281864473 +0300
918 @@ -175,6 +175,7 @@
919  obj-$(CONFIG_XEN_NETDEV_BACKEND) += xen-netback/
920  
921  obj-$(CONFIG_DUMMY) += dummy.o
922 +obj-$(CONFIG_IMQ) += imq.o
923  obj-$(CONFIG_IFB) += ifb.o
924  obj-$(CONFIG_MACVLAN) += macvlan.o
925  obj-$(CONFIG_MACVTAP) += macvtap.o
926 diff -uNr linux-2.6.39/include/linux/imq.h linux-2.6.39-imqmq/include/linux/imq.h
927 --- linux-2.6.39/include/linux/imq.h    1970-01-01 02:00:00.000000000 +0200
928 +++ linux-2.6.39-imqmq/include/linux/imq.h      2011-05-19 11:08:04.281864473 +0300
929 @@ -0,0 +1,13 @@
930 +#ifndef _IMQ_H
931 +#define _IMQ_H
932 +
933 +/* IFMASK (16 device indexes, 0 to 15) and flag(s) fit in 5 bits */
934 +#define IMQ_F_BITS     5
935 +
936 +#define IMQ_F_IFMASK   0x0f
937 +#define IMQ_F_ENQUEUE  0x10
938 +
939 +#define IMQ_MAX_DEVS   (IMQ_F_IFMASK + 1)
940 +
941 +#endif /* _IMQ_H */
942 +
943 diff -uNr linux-2.6.39/include/linux/netfilter/xt_IMQ.h linux-2.6.39-imqmq/include/linux/netfilter/xt_IMQ.h
944 --- linux-2.6.39/include/linux/netfilter/xt_IMQ.h       1970-01-01 02:00:00.000000000 +0200
945 +++ linux-2.6.39-imqmq/include/linux/netfilter/xt_IMQ.h 2011-05-19 11:08:04.281864473 +0300
946 @@ -0,0 +1,9 @@
947 +#ifndef _XT_IMQ_H
948 +#define _XT_IMQ_H
949 +
950 +struct xt_imq_info {
951 +       unsigned int todev;     /* target imq device */
952 +};
953 +
954 +#endif /* _XT_IMQ_H */
955 +
956 diff -uNr linux-2.6.39/include/linux/netfilter.h linux-2.6.39-imqmq/include/linux/netfilter.h
957 --- linux-2.6.39/include/linux/netfilter.h      2011-05-19 07:06:34.000000000 +0300
958 +++ linux-2.6.39-imqmq/include/linux/netfilter.h        2011-05-19 11:08:04.285197874 +0300
959 @@ -21,7 +21,8 @@
960  #define NF_QUEUE 3
961  #define NF_REPEAT 4
962  #define NF_STOP 5
963 -#define NF_MAX_VERDICT NF_STOP
964 +#define NF_IMQ_QUEUE 6
965 +#define NF_MAX_VERDICT NF_IMQ_QUEUE
966  
967  /* we overload the higher bits for encoding auxiliary data such as the queue
968   * number or errno values. Not nice, but better than additional function
969 diff -uNr linux-2.6.39/include/linux/netfilter_ipv4/ipt_IMQ.h linux-2.6.39-imqmq/include/linux/netfilter_ipv4/ipt_IMQ.h
970 --- linux-2.6.39/include/linux/netfilter_ipv4/ipt_IMQ.h 1970-01-01 02:00:00.000000000 +0200
971 +++ linux-2.6.39-imqmq/include/linux/netfilter_ipv4/ipt_IMQ.h   2011-05-19 11:08:04.285197874 +0300
972 @@ -0,0 +1,10 @@
973 +#ifndef _IPT_IMQ_H
974 +#define _IPT_IMQ_H
975 +
976 +/* Backwards compatibility for old userspace */
977 +#include <linux/netfilter/xt_IMQ.h>
978 +
979 +#define ipt_imq_info xt_imq_info
980 +
981 +#endif /* _IPT_IMQ_H */
982 +
983 diff -uNr linux-2.6.39/include/linux/netfilter_ipv6/ip6t_IMQ.h linux-2.6.39-imqmq/include/linux/netfilter_ipv6/ip6t_IMQ.h
984 --- linux-2.6.39/include/linux/netfilter_ipv6/ip6t_IMQ.h        1970-01-01 02:00:00.000000000 +0200
985 +++ linux-2.6.39-imqmq/include/linux/netfilter_ipv6/ip6t_IMQ.h  2011-05-19 11:08:04.285197874 +0300
986 @@ -0,0 +1,10 @@
987 +#ifndef _IP6T_IMQ_H
988 +#define _IP6T_IMQ_H
989 +
990 +/* Backwards compatibility for old userspace */
991 +#include <linux/netfilter/xt_IMQ.h>
992 +
993 +#define ip6t_imq_info xt_imq_info
994 +
995 +#endif /* _IP6T_IMQ_H */
996 +
997 diff -uNr linux-2.6.39/include/linux/skbuff.h linux-2.6.39-imqmq/include/linux/skbuff.h
998 --- linux-2.6.39/include/linux/skbuff.h 2011-05-19 07:06:34.000000000 +0300
999 +++ linux-2.6.39-imqmq/include/linux/skbuff.h   2011-05-19 11:08:04.288531274 +0300
1000 @@ -29,6 +29,9 @@
1001  #include <linux/rcupdate.h>
1002  #include <linux/dmaengine.h>
1003  #include <linux/hrtimer.h>
1004 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1005 +#include <linux/imq.h>
1006 +#endif
1007  
1008  /* Don't change this without changing skb_csum_unnecessary! */
1009  #define CHECKSUM_NONE 0
1010 @@ -339,6 +342,9 @@
1011          * first. This is owned by whoever has the skb queued ATM.
1012          */
1013         char                    cb[48] __aligned(8);
1014 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1015 +       void                    *cb_next;
1016 +#endif
1017  
1018         unsigned long           _skb_refdst;
1019  #ifdef CONFIG_XFRM
1020 @@ -377,6 +383,9 @@
1021  #ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
1022         struct sk_buff          *nfct_reasm;
1023  #endif
1024 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1025 +       struct nf_queue_entry   *nf_queue_entry;
1026 +#endif
1027  #ifdef CONFIG_BRIDGE_NETFILTER
1028         struct nf_bridge_info   *nf_bridge;
1029  #endif
1030 @@ -401,6 +410,10 @@
1031  
1032         /* 0/13 bit hole */
1033  
1034 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1035 +       __u8                    imq_flags:IMQ_F_BITS;
1036 +#endif
1037 +
1038  #ifdef CONFIG_NET_DMA
1039         dma_cookie_t            dma_cookie;
1040  #endif
1041 @@ -487,6 +500,12 @@
1042         return (struct rtable *)skb_dst(skb);
1043  }
1044  
1045 +
1046 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1047 +extern int skb_save_cb(struct sk_buff *skb);
1048 +extern int skb_restore_cb(struct sk_buff *skb);
1049 +#endif
1050 +
1051  extern void kfree_skb(struct sk_buff *skb);
1052  extern void consume_skb(struct sk_buff *skb);
1053  extern void           __kfree_skb(struct sk_buff *skb);
1054 @@ -2129,6 +2148,10 @@
1055         dst->nfct_reasm = src->nfct_reasm;
1056         nf_conntrack_get_reasm(src->nfct_reasm);
1057  #endif
1058 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1059 +       dst->imq_flags = src->imq_flags;
1060 +       dst->nf_queue_entry = src->nf_queue_entry;
1061 +#endif
1062  #ifdef CONFIG_BRIDGE_NETFILTER
1063         dst->nf_bridge  = src->nf_bridge;
1064         nf_bridge_get(src->nf_bridge);
1065 diff -uNr linux-2.6.39/include/net/netfilter/nf_queue.h linux-2.6.39-imqmq/include/net/netfilter/nf_queue.h
1066 --- linux-2.6.39/include/net/netfilter/nf_queue.h       2011-05-19 07:06:34.000000000 +0300
1067 +++ linux-2.6.39-imqmq/include/net/netfilter/nf_queue.h 2011-05-19 11:08:04.288531274 +0300
1068 @@ -30,5 +30,11 @@
1069                                        const struct nf_queue_handler *qh);
1070  extern void nf_unregister_queue_handlers(const struct nf_queue_handler *qh);
1071  extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
1072 +extern void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
1073 +
1074 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1075 +extern void nf_register_queue_imq_handler(const struct nf_queue_handler *qh);
1076 +extern void nf_unregister_queue_imq_handler(void);
1077 +#endif
1078  
1079  #endif /* _NF_QUEUE_H */
1080 diff -uNr linux-2.6.39/net/core/dev.c linux-2.6.39-imqmq/net/core/dev.c
1081 --- linux-2.6.39/net/core/dev.c 2011-05-19 07:06:34.000000000 +0300
1082 +++ linux-2.6.39-imqmq/net/core/dev.c   2011-05-19 11:08:04.288531274 +0300
1083 @@ -98,6 +98,9 @@
1084  #include <net/net_namespace.h>
1085  #include <net/sock.h>
1086  #include <linux/rtnetlink.h>
1087 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1088 +#include <linux/imq.h>
1089 +#endif
1090  #include <linux/proc_fs.h>
1091  #include <linux/seq_file.h>
1092  #include <linux/stat.h>
1093 @@ -2099,12 +2102,21 @@
1094                 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1095                         skb_dst_drop(skb);
1096  
1097 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1098 +               if (!list_empty(&ptype_all) &&
1099 +                                       !(skb->imq_flags & IMQ_F_ENQUEUE))
1100 +#else
1101                 if (!list_empty(&ptype_all))
1102 +#endif
1103                         dev_queue_xmit_nit(skb, dev);
1104  
1105                 skb_orphan_try(skb);
1106  
1107 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1108 +               features = skb->dev ? netif_skb_features(skb) : dev->features;
1109 +#else
1110                 features = netif_skb_features(skb);
1111 +#endif
1112  
1113                 if (vlan_tx_tag_present(skb) &&
1114                     !(features & NETIF_F_HW_VLAN_TX)) {
1115 @@ -2269,8 +2281,7 @@
1116  #endif
1117  }
1118  
1119 -static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1120 -                                       struct sk_buff *skb)
1121 +static struct netdev_queue *dev_pick_tx(struct net_device *dev, struct sk_buff *skb)
1122  {
1123         int queue_index;
1124         const struct net_device_ops *ops = dev->netdev_ops;
1125 diff -uNr linux-2.6.39/net/core/skbuff.c linux-2.6.39-imqmq/net/core/skbuff.c
1126 --- linux-2.6.39/net/core/skbuff.c      2011-05-19 07:06:34.000000000 +0300
1127 +++ linux-2.6.39-imqmq/net/core/skbuff.c        2011-05-19 11:08:04.288531274 +0300
1128 @@ -72,6 +72,9 @@
1129  
1130  static struct kmem_cache *skbuff_head_cache __read_mostly;
1131  static struct kmem_cache *skbuff_fclone_cache __read_mostly;
1132 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1133 +static struct kmem_cache *skbuff_cb_store_cache __read_mostly;
1134 +#endif
1135  
1136  static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
1137                                   struct pipe_buffer *buf)
1138 @@ -91,6 +94,82 @@
1139         return 1;
1140  }
1141  
1142 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1143 +/* Control buffer save/restore for IMQ devices */
1144 +struct skb_cb_table {
1145 +       char                    cb[48] __aligned(8);
1146 +       void                    *cb_next;
1147 +       atomic_t                refcnt;
1148 +};
1149 +
1150 +static DEFINE_SPINLOCK(skb_cb_store_lock);
1151 +
1152 +int skb_save_cb(struct sk_buff *skb)
1153 +{
1154 +       struct skb_cb_table *next;
1155 +
1156 +       next = kmem_cache_alloc(skbuff_cb_store_cache, GFP_ATOMIC);
1157 +       if (!next)
1158 +               return -ENOMEM;
1159 +
1160 +       BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb));
1161 +
1162 +       memcpy(next->cb, skb->cb, sizeof(skb->cb));
1163 +       next->cb_next = skb->cb_next;
1164 +
1165 +       atomic_set(&next->refcnt, 1);
1166 +
1167 +       skb->cb_next = next;
1168 +       return 0;
1169 +}
1170 +EXPORT_SYMBOL(skb_save_cb);
1171 +
1172 +int skb_restore_cb(struct sk_buff *skb)
1173 +{
1174 +       struct skb_cb_table *next;
1175 +
1176 +       if (!skb->cb_next)
1177 +               return 0;
1178 +
1179 +       next = skb->cb_next;
1180 +
1181 +       BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb));
1182 +
1183 +       memcpy(skb->cb, next->cb, sizeof(skb->cb));
1184 +       skb->cb_next = next->cb_next;
1185 +
1186 +       spin_lock(&skb_cb_store_lock);
1187 +
1188 +       if (atomic_dec_and_test(&next->refcnt))
1189 +               kmem_cache_free(skbuff_cb_store_cache, next);
1190 +
1191 +       spin_unlock(&skb_cb_store_lock);
1192 +
1193 +       return 0;
1194 +}
1195 +EXPORT_SYMBOL(skb_restore_cb);
1196 +
1197 +static void skb_copy_stored_cb(struct sk_buff *new, const struct sk_buff *__old)
1198 +{
1199 +       struct skb_cb_table *next;
1200 +       struct sk_buff *old;
1201 +
1202 +       if (!__old->cb_next) {
1203 +               new->cb_next = NULL;
1204 +               return;
1205 +       }
1206 +
1207 +       spin_lock(&skb_cb_store_lock);
1208 +
1209 +       old = (struct sk_buff *)__old;
1210 +
1211 +       next = old->cb_next;
1212 +       atomic_inc(&next->refcnt);
1213 +       new->cb_next = next;
1214 +
1215 +       spin_unlock(&skb_cb_store_lock);
1216 +}
1217 +#endif
1218  
1219  /* Pipe buffer operations for a socket. */
1220  static const struct pipe_buf_operations sock_pipe_buf_ops = {
1221 @@ -379,6 +458,26 @@
1222                 WARN_ON(in_irq());
1223                 skb->destructor(skb);
1224         }
1225 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1226 +       /* This should not happen. When it does, avoid memleak by restoring
1227 +       the chain of cb-backups. */
1228 +       while (skb->cb_next != NULL) {
1229 +               if (net_ratelimit())
1230 +                       printk(KERN_WARNING "IMQ: kfree_skb: skb->cb_next: "
1231 +                               "%08x\n", (unsigned int)skb->cb_next);
1232 +
1233 +               skb_restore_cb(skb);
1234 +       }
1235 +       /* This should not happen either, nf_queue_entry is nullified in
1236 +        * imq_dev_xmit(). If we have non-NULL nf_queue_entry then we are
1237 +        * leaking entry pointers, maybe memory. We don't know if this is
1238 +        * pointer to already freed memory, or should this be freed.
1239 +        * If this happens we need to add refcounting, etc for nf_queue_entry.
1240 +        */
1241 +       if (skb->nf_queue_entry && net_ratelimit())
1242 +               printk(KERN_WARNING
1243 +                               "IMQ: kfree_skb: skb->nf_queue_entry != NULL");
1244 +#endif
1245  #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1246         nf_conntrack_put(skb->nfct);
1247  #endif
1248 @@ -517,6 +616,9 @@
1249         new->sp                 = secpath_get(old->sp);
1250  #endif
1251         memcpy(new->cb, old->cb, sizeof(old->cb));
1252 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1253 +       skb_copy_stored_cb(new, old);
1254 +#endif
1255         new->csum               = old->csum;
1256         new->local_df           = old->local_df;
1257         new->pkt_type           = old->pkt_type;
1258 @@ -2780,6 +2882,13 @@
1259                                                 0,
1260                                                 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1261                                                 NULL);
1262 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1263 +       skbuff_cb_store_cache = kmem_cache_create("skbuff_cb_store_cache",
1264 +                                                 sizeof(struct skb_cb_table),
1265 +                                                 0,
1266 +                                                 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1267 +                                                 NULL);
1268 +#endif
1269  }
1270  
1271  /**
1272 diff -uNr linux-2.6.39/net/netfilter/core.c linux-2.6.39-imqmq/net/netfilter/core.c
1273 --- linux-2.6.39/net/netfilter/core.c   2011-05-19 07:06:34.000000000 +0300
1274 +++ linux-2.6.39-imqmq/net/netfilter/core.c     2011-05-19 11:13:19.891558119 +0300
1275 @@ -191,6 +191,20 @@
1276                         kfree_skb(skb);
1277                 }
1278                 ret = 0;
1279 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1280 +       } else if ((verdict & NF_VERDICT_MASK) == NF_IMQ_QUEUE) {
1281 +               ret = nf_imq_queue(skb, elem, pf, hook, indev, outdev, okfn,
1282 +                              verdict >> NF_VERDICT_QBITS);
1283 +               if (ret < 0) {
1284 +                       if (ret == -ECANCELED)
1285 +                               goto next_hook;
1286 +                       if (ret == -ESRCH &&
1287 +                          (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
1288 +                               goto next_hook;
1289 +                       kfree_skb(skb);
1290 +               }
1291 +               ret = 0;
1292 +#endif
1293         }
1294         rcu_read_unlock();
1295         return ret;
1296 diff -uNr linux-2.6.39/net/netfilter/Kconfig linux-2.6.39-imqmq/net/netfilter/Kconfig
1297 --- linux-2.6.39/net/netfilter/Kconfig  2011-05-19 07:06:34.000000000 +0300
1298 +++ linux-2.6.39-imqmq/net/netfilter/Kconfig    2011-05-19 11:08:04.288531274 +0300
1299 @@ -507,6 +507,18 @@
1300           For more information on the LEDs available on your system, see
1301           Documentation/leds-class.txt
1302  
1303 +config NETFILTER_XT_TARGET_IMQ
1304 +        tristate '"IMQ" target support'
1305 +       depends on NETFILTER_XTABLES
1306 +       depends on IP_NF_MANGLE || IP6_NF_MANGLE
1307 +       select IMQ
1308 +       default m if NETFILTER_ADVANCED=n
1309 +        help
1310 +          This option adds a `IMQ' target which is used to specify if and
1311 +          to which imq device packets should get enqueued/dequeued.
1312 +
1313 +          To compile it as a module, choose M here.  If unsure, say N.
1314 +
1315  config NETFILTER_XT_TARGET_MARK
1316         tristate '"MARK" target support'
1317         depends on NETFILTER_ADVANCED
1318 diff -uNr linux-2.6.39/net/netfilter/Makefile linux-2.6.39-imqmq/net/netfilter/Makefile
1319 --- linux-2.6.39/net/netfilter/Makefile 2011-05-19 07:06:34.000000000 +0300
1320 +++ linux-2.6.39-imqmq/net/netfilter/Makefile   2011-05-19 11:08:04.291864674 +0300
1321 @@ -56,6 +56,7 @@
1322  obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
1323  obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
1324  obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
1325 +obj-$(CONFIG_NETFILTER_XT_TARGET_IMQ) += xt_IMQ.o
1326  obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
1327  obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o
1328  obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
1329 diff -uNr linux-2.6.39/net/netfilter/nf_internals.h linux-2.6.39-imqmq/net/netfilter/nf_internals.h
1330 --- linux-2.6.39/net/netfilter/nf_internals.h   2011-05-19 07:06:34.000000000 +0300
1331 +++ linux-2.6.39-imqmq/net/netfilter/nf_internals.h     2011-05-19 11:08:04.291864674 +0300
1332 @@ -30,6 +30,15 @@
1333                     struct net_device *outdev,
1334                     int (*okfn)(struct sk_buff *),
1335                     unsigned int queuenum);
1336 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1337 +extern int nf_imq_queue(struct sk_buff *skb,
1338 +                   struct list_head *elem,
1339 +                   u_int8_t pf, unsigned int hook,
1340 +                   struct net_device *indev,
1341 +                   struct net_device *outdev,
1342 +                   int (*okfn)(struct sk_buff *),
1343 +                   unsigned int queuenum);
1344 +#endif
1345  extern int __init netfilter_queue_init(void);
1346  
1347  /* nf_log.c */
1348 diff -uNr linux-2.6.39/net/netfilter/nf_queue.c linux-2.6.39-imqmq/net/netfilter/nf_queue.c
1349 --- linux-2.6.39/net/netfilter/nf_queue.c       2011-05-19 07:06:34.000000000 +0300
1350 +++ linux-2.6.39-imqmq/net/netfilter/nf_queue.c 2011-05-19 11:22:38.189467462 +0300
1351 @@ -22,6 +22,26 @@
1352  
1353  static DEFINE_MUTEX(queue_handler_mutex);
1354  
1355 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1356 +static const struct nf_queue_handler *queue_imq_handler;
1357 +
1358 +void nf_register_queue_imq_handler(const struct nf_queue_handler *qh)
1359 +{
1360 +       mutex_lock(&queue_handler_mutex);
1361 +       rcu_assign_pointer(queue_imq_handler, qh);
1362 +       mutex_unlock(&queue_handler_mutex);
1363 +}
1364 +EXPORT_SYMBOL_GPL(nf_register_queue_imq_handler);
1365 +
1366 +void nf_unregister_queue_imq_handler(void)
1367 +{
1368 +       mutex_lock(&queue_handler_mutex);
1369 +       rcu_assign_pointer(queue_imq_handler, NULL);
1370 +       mutex_unlock(&queue_handler_mutex);
1371 +}
1372 +EXPORT_SYMBOL_GPL(nf_unregister_queue_imq_handler);
1373 +#endif
1374 +
1375  /* return EBUSY when somebody else is registered, return EEXIST if the
1376   * same handler is registered, return 0 in case of success. */
1377  int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
1378 @@ -92,7 +112,7 @@
1379  }
1380  EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
1381  
1382 -static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
1383 +void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
1384  {
1385         /* Release those devices we held, or Alexey will kill me. */
1386         if (entry->indev)
1387 @@ -112,6 +132,7 @@
1388         /* Drop reference to owner of hook which queued us. */
1389         module_put(entry->elem->owner);
1390  }
1391 +EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
1392  
1393  /*
1394   * Any packet that leaves via this function must come back
1395 @@ -123,7 +144,8 @@
1396                       struct net_device *indev,
1397                       struct net_device *outdev,
1398                       int (*okfn)(struct sk_buff *),
1399 -                     unsigned int queuenum)
1400 +                     unsigned int queuenum,
1401 +                     bool imq_queue)
1402  {
1403         int status = -ENOENT;
1404         struct nf_queue_entry *entry = NULL;
1405 @@ -137,7 +159,14 @@
1406         /* QUEUE == DROP if no one is waiting, to be safe. */
1407         rcu_read_lock();
1408  
1409 -       qh = rcu_dereference(queue_handler[pf]);
1410 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1411 +       if (imq_queue)
1412 +               qh = rcu_dereference(queue_imq_handler);
1413 +       else
1414 +               qh = rcu_dereference(queue_handler[pf]);
1415 +#else
1416 +       qh = rcu_dereference(queue_handler[pf]);
1417 +#endif
1418         if (!qh) {
1419                 status = -ESRCH;
1420                 goto err_unlock;
1421 @@ -203,13 +232,14 @@
1422         return status;
1423  }
1424  
1425 -int nf_queue(struct sk_buff *skb,
1426 -            struct list_head *elem,
1427 -            u_int8_t pf, unsigned int hook,
1428 -            struct net_device *indev,
1429 -            struct net_device *outdev,
1430 -            int (*okfn)(struct sk_buff *),
1431 -            unsigned int queuenum)
1432 +static int _nf_queue(struct sk_buff *skb,
1433 +                    struct list_head *elem,
1434 +                    u_int8_t pf, unsigned int hook,
1435 +                    struct net_device *indev,
1436 +                    struct net_device *outdev,
1437 +                    int (*okfn)(struct sk_buff *),
1438 +                    unsigned int queuenum,
1439 +                    bool imq_queue)
1440  {
1441         struct sk_buff *segs;
1442         int err;
1443 @@ -217,7 +247,7 @@
1444  
1445         if (!skb_is_gso(skb))
1446                 return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
1447 -                                 queuenum);
1448 +                                 queuenum, imq_queue);
1449  
1450         switch (pf) {
1451         case NFPROTO_IPV4:
1452 @@ -244,7 +274,7 @@
1453                 segs->next = NULL;
1454                 if (err == 0)
1455                         err = __nf_queue(segs, elem, pf, hook, indev,
1456 -                                          outdev, okfn, queuenum);
1457 +                                        outdev, okfn, queuenum, imq_queue);
1458                 if (err == 0)
1459                         queued++;
1460                 else
1461 @@ -260,6 +290,32 @@
1462         return err;
1463  }
1464  
1465 +int nf_queue(struct sk_buff *skb,
1466 +            struct list_head *elem,
1467 +            u_int8_t pf, unsigned int hook,
1468 +            struct net_device *indev,
1469 +            struct net_device *outdev,
1470 +            int (*okfn)(struct sk_buff *),
1471 +            unsigned int queuenum)
1472 +{
1473 +       return _nf_queue(skb, elem, pf, hook, indev, outdev, okfn, queuenum,
1474 +                        false);
1475 +}
1476 +
1477 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1478 +int nf_imq_queue(struct sk_buff *skb,
1479 +            struct list_head *elem,
1480 +            u_int8_t pf, unsigned int hook,
1481 +            struct net_device *indev,
1482 +            struct net_device *outdev,
1483 +            int (*okfn)(struct sk_buff *),
1484 +            unsigned int queuenum)
1485 +{
1486 +       return _nf_queue(skb, elem, pf, hook, indev, outdev, okfn, queuenum,
1487 +                        true);
1488 +}
1489 +#endif
1490 +
1491  void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
1492  {
1493         struct sk_buff *skb = entry->skb;
1494 @@ -301,7 +357,7 @@
1495         case NF_QUEUE:
1496                 err = __nf_queue(skb, elem, entry->pf, entry->hook,
1497                                  entry->indev, entry->outdev, entry->okfn,
1498 -                                verdict >> NF_VERDICT_QBITS);
1499 +                                verdict >> NF_VERDICT_QBITS, false);
1500                 if (err < 0) {
1501                         if (err == -ECANCELED)
1502                                 goto next_hook;
1503 @@ -311,6 +367,21 @@
1504                         kfree_skb(skb);
1505                 }
1506                 break;
1507 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1508 +       case NF_IMQ_QUEUE:
1509 +               err = __nf_queue(skb, elem, entry->pf, entry->hook,
1510 +                                entry->indev, entry->outdev, entry->okfn,
1511 +                                verdict >> NF_VERDICT_QBITS, true);
1512 +               if (err < 0) {
1513 +                       if (err == -ECANCELED)
1514 +                               goto next_hook;
1515 +                       if (err == -ESRCH &&
1516 +                          (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
1517 +                               goto next_hook;
1518 +                       kfree_skb(skb);
1519 +               }
1520 +               break;
1521 +#endif
1522         case NF_STOLEN:
1523         default:
1524                 kfree_skb(skb);
1525 diff -uNr linux-2.6.39/net/netfilter/xt_IMQ.c linux-2.6.39-imqmq/net/netfilter/xt_IMQ.c
1526 --- linux-2.6.39/net/netfilter/xt_IMQ.c 1970-01-01 02:00:00.000000000 +0200
1527 +++ linux-2.6.39-imqmq/net/netfilter/xt_IMQ.c   2011-05-19 11:08:04.308531677 +0300
1528 @@ -0,0 +1,74 @@
1529 +/*
1530 + * This target marks packets to be enqueued to an imq device
1531 + */
1532 +#include <linux/module.h>
1533 +#include <linux/skbuff.h>
1534 +#include <linux/netfilter/x_tables.h>
1535 +#include <linux/netfilter/xt_IMQ.h>
1536 +#include <linux/imq.h>
1537 +
1538 +static unsigned int imq_target(struct sk_buff *pskb,
1539 +                               const struct xt_action_param *par)
1540 +{
1541 +       const struct xt_imq_info *mr = par->targinfo;
1542 +
1543 +       pskb->imq_flags = (mr->todev & IMQ_F_IFMASK) | IMQ_F_ENQUEUE;
1544 +
1545 +       return XT_CONTINUE;
1546 +}
1547 +
1548 +static int imq_checkentry(const struct xt_tgchk_param *par)
1549 +{
1550 +       struct xt_imq_info *mr = par->targinfo;
1551 +
1552 +       if (mr->todev > IMQ_MAX_DEVS - 1) {
1553 +               printk(KERN_WARNING
1554 +                      "IMQ: invalid device specified, highest is %u\n",
1555 +                      IMQ_MAX_DEVS - 1);
1556 +               return -EINVAL;
1557 +       }
1558 +
1559 +       return 0;
1560 +}
1561 +
1562 +static struct xt_target xt_imq_reg[] __read_mostly = {
1563 +       {
1564 +               .name           = "IMQ",
1565 +               .family         = AF_INET,
1566 +               .checkentry     = imq_checkentry,
1567 +               .target         = imq_target,
1568 +               .targetsize     = sizeof(struct xt_imq_info),
1569 +               .table          = "mangle",
1570 +               .me             = THIS_MODULE
1571 +       },
1572 +       {
1573 +               .name           = "IMQ",
1574 +               .family         = AF_INET6,
1575 +               .checkentry     = imq_checkentry,
1576 +               .target         = imq_target,
1577 +               .targetsize     = sizeof(struct xt_imq_info),
1578 +               .table          = "mangle",
1579 +               .me             = THIS_MODULE
1580 +       },
1581 +};
1582 +
1583 +static int __init imq_init(void)
1584 +{
1585 +       return xt_register_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg));
1586 +}
1587 +
1588 +static void __exit imq_fini(void)
1589 +{
1590 +       xt_unregister_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg));
1591 +}
1592 +
1593 +module_init(imq_init);
1594 +module_exit(imq_fini);
1595 +
1596 +MODULE_AUTHOR("http://www.linuximq.net");
1597 +MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. "
1598 +                  "See http://www.linuximq.net/ for more information.");
1599 +MODULE_LICENSE("GPL");
1600 +MODULE_ALIAS("ipt_IMQ");
1601 +MODULE_ALIAS("ip6t_IMQ");
1602 +
This page took 0.139674 seconds and 4 git commands to generate.