]> git.pld-linux.org Git - packages/kernel.git/blob - kernel-imq.patch
- md5 fix
[packages/kernel.git] / kernel-imq.patch
1 diff -uNr linux-3.5/drivers/net/imq.c linux-3.5-imq/drivers/net/imq.c
2 --- linux-3.5/drivers/net/imq.c 1970-01-01 02:00:00.000000000 +0200
3 +++ linux-3.5-imq/drivers/net/imq.c     2012-07-25 12:31:59.709321554 +0300
4 @@ -0,0 +1,857 @@
5 +/*
6 + *             Pseudo-driver for the intermediate queue device.
7 + *
8 + *             This program is free software; you can redistribute it and/or
9 + *             modify it under the terms of the GNU General Public License
10 + *             as published by the Free Software Foundation; either version
11 + *             2 of the License, or (at your option) any later version.
12 + *
13 + * Authors:    Patrick McHardy, <kaber@trash.net>
14 + *
15 + *            The first version was written by Martin Devera, <devik@cdi.cz>
16 + *
17 + * Credits:    Jan Rafaj <imq2t@cedric.vabo.cz>
18 + *              - Update patch to 2.4.21
19 + *             Sebastian Strollo <sstrollo@nortelnetworks.com>
20 + *              - Fix "Dead-loop on netdevice imq"-issue
21 + *             Marcel Sebek <sebek64@post.cz>
22 + *              - Update to 2.6.2-rc1
23 + *
24 + *            After some time of inactivity there is a group taking care
25 + *            of IMQ again: http://www.linuximq.net
26 + *
27 + *
28 + *            2004/06/30 - New version of IMQ patch to kernels <=2.6.7
29 + *             including the following changes:
30 + *
31 + *            - Correction of ipv6 support "+"s issue (Hasso Tepper)
32 + *            - Correction of imq_init_devs() issue that resulted in
33 + *            kernel OOPS unloading IMQ as module (Norbert Buchmuller)
34 + *            - Addition of functionality to choose number of IMQ devices
35 + *            during kernel config (Andre Correa)
36 + *            - Addition of functionality to choose how IMQ hooks on
37 + *            PRE and POSTROUTING (after or before NAT) (Andre Correa)
38 + *            - Cosmetic corrections (Norbert Buchmuller) (Andre Correa)
39 + *
40 + *
41 + *             2005/12/16 - IMQ versions between 2.6.7 and 2.6.13 were
42 + *             released with almost no problems. 2.6.14-x was released
43 + *             with some important changes: nfcache was removed; After
44 + *             some weeks of trouble we figured out that some IMQ fields
45 + *             in skb were missing in skbuff.c - skb_clone and copy_skb_header.
46 + *             These functions are correctly patched by this new patch version.
47 + *
48 + *             Thanks for all who helped to figure out all the problems with
49 + *             2.6.14.x: Patrick McHardy, Rune Kock, VeNoMouS, Max CtRiX,
50 + *             Kevin Shanahan, Richard Lucassen, Valery Dachev (hopefully
51 + *             I didn't forget anybody). I apologize again for my lack of time.
52 + *
53 + *
54 + *             2008/06/17 - 2.6.25 - Changed imq.c to use qdisc_run() instead
55 + *             of qdisc_restart() and moved qdisc_run() to tasklet to avoid
56 + *             recursive locking. New initialization routines to fix 'rmmod' not
57 + *             working anymore. Used code from ifb.c. (Jussi Kivilinna)
58 + *
59 + *             2008/08/06 - 2.6.26 - (JK)
60 + *              - Replaced tasklet with 'netif_schedule()'.
61 + *              - Cleaned up and added comments for imq_nf_queue().
62 + *
63 + *             2009/04/12
64 + *              - Add skb_save_cb/skb_restore_cb helper functions for backuping
65 + *                control buffer. This is needed because qdisc-layer on kernels
66 + *                2.6.27 and newer overwrite control buffer. (Jussi Kivilinna)
67 + *              - Add better locking for IMQ device. Hopefully this will solve
68 + *                SMP issues. (Jussi Kivilinna)
69 + *              - Port to 2.6.27
70 + *              - Port to 2.6.28
71 + *              - Port to 2.6.29 + fix rmmod not working
72 + *
73 + *             2009/04/20 - (Jussi Kivilinna)
74 + *              - Use netdevice feature flags to avoid extra packet handling
75 + *                by core networking layer and possibly increase performance.
76 + *
77 + *             2009/09/26 - (Jussi Kivilinna)
78 + *              - Add imq_nf_reinject_lockless to fix deadlock with
79 + *                imq_nf_queue/imq_nf_reinject.
80 + *
81 + *             2009/12/08 - (Jussi Kivilinna)
82 + *              - Port to 2.6.32
83 + *              - Add check for skb->nf_queue_entry==NULL in imq_dev_xmit()
84 + *              - Also add better error checking for skb->nf_queue_entry usage
85 + *
86 + *             2010/02/25 - (Jussi Kivilinna)
87 + *              - Port to 2.6.33
88 + *
89 + *             2010/08/15 - (Jussi Kivilinna)
90 + *              - Port to 2.6.35
91 + *              - Simplify hook registration by using nf_register_hooks.
92 + *              - nf_reinject doesn't need spinlock around it, therefore remove
93 + *                imq_nf_reinject function. Other nf_reinject users protect
94 + *                their own data with spinlock. With IMQ however all data is
95 + *                needed is stored per skbuff, so no locking is needed.
96 + *              - Changed IMQ to use 'separate' NF_IMQ_QUEUE instead of
97 + *                NF_QUEUE, this allows working coexistance of IMQ and other
98 + *                NF_QUEUE users.
99 + *              - Make IMQ multi-queue. Number of IMQ device queues can be
100 + *                increased with 'numqueues' module parameters. Default number
101 + *                of queues is 1, in other words by default IMQ works as
102 + *                single-queue device. Multi-queue selection is based on
103 + *                IFB multi-queue patch by Changli Gao <xiaosuo@gmail.com>.
104 + *
105 + *             2011/03/18 - (Jussi Kivilinna)
106 + *              - Port to 2.6.38
107 + *
108 + *             2011/07/12 - (syoder89@gmail.com)
109 + *              - Crash fix that happens when the receiving interface has more
110 + *                than one queue (add missing skb_set_queue_mapping in
111 + *                imq_select_queue).
112 + *
113 + *             2011/07/26 - (Jussi Kivilinna)
114 + *              - Add queue mapping checks for packets exiting IMQ.
115 + *              - Port to 3.0
116 + *
117 + *             2011/08/16 - (Jussi Kivilinna)
118 + *              - Clear IFF_TX_SKB_SHARING flag that was added for linux 3.0.2
119 + *
120 + *             2011/11/03 - Germano Michel <germanomichel@gmail.com>
121 + *              - Fix IMQ for net namespaces
122 + *
123 + *             2011/11/04 - Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
124 + *              - Port to 3.1
125 + *              - Clean-up, move 'get imq device pointer by imqX name' to
126 + *                separate function from imq_nf_queue().
127 + *
128 + *             2012/01/05 - Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
129 + *              - Port to 3.2
130 + *
131 + *             2012/03/19 - Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
132 + *              - Port to 3.3
133 + *
134 + *            Also, many thanks to pablo Sebastian Greco for making the initial
135 + *            patch and to those who helped the testing.
136 + *
137 + *             More info at: http://www.linuximq.net/ (Andre Correa)
138 + */
139 +
140 +#include <linux/module.h>
141 +#include <linux/kernel.h>
142 +#include <linux/moduleparam.h>
143 +#include <linux/list.h>
144 +#include <linux/skbuff.h>
145 +#include <linux/netdevice.h>
146 +#include <linux/etherdevice.h>
147 +#include <linux/rtnetlink.h>
148 +#include <linux/if_arp.h>
149 +#include <linux/netfilter.h>
150 +#include <linux/netfilter_ipv4.h>
151 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
152 +       #include <linux/netfilter_ipv6.h>
153 +#endif
154 +#include <linux/imq.h>
155 +#include <net/pkt_sched.h>
156 +#include <net/netfilter/nf_queue.h>
157 +#include <net/sock.h>
158 +#include <linux/ip.h>
159 +#include <linux/ipv6.h>
160 +#include <linux/if_vlan.h>
161 +#include <linux/if_pppox.h>
162 +#include <net/ip.h>
163 +#include <net/ipv6.h>
164 +
165 +static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num);
166 +
167 +static nf_hookfn imq_nf_hook;
168 +
169 +static struct nf_hook_ops imq_ops[] = {
170 +       {
171 +       /* imq_ingress_ipv4 */
172 +               .hook           = imq_nf_hook,
173 +               .owner          = THIS_MODULE,
174 +               .pf             = PF_INET,
175 +               .hooknum        = NF_INET_PRE_ROUTING,
176 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
177 +               .priority       = NF_IP_PRI_MANGLE + 1,
178 +#else
179 +               .priority       = NF_IP_PRI_NAT_DST + 1,
180 +#endif
181 +       },
182 +       {
183 +       /* imq_egress_ipv4 */
184 +               .hook           = imq_nf_hook,
185 +               .owner          = THIS_MODULE,
186 +               .pf             = PF_INET,
187 +               .hooknum        = NF_INET_POST_ROUTING,
188 +#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
189 +               .priority       = NF_IP_PRI_LAST,
190 +#else
191 +               .priority       = NF_IP_PRI_NAT_SRC - 1,
192 +#endif
193 +       },
194 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
195 +       {
196 +       /* imq_ingress_ipv6 */
197 +               .hook           = imq_nf_hook,
198 +               .owner          = THIS_MODULE,
199 +               .pf             = PF_INET6,
200 +               .hooknum        = NF_INET_PRE_ROUTING,
201 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
202 +               .priority       = NF_IP6_PRI_MANGLE + 1,
203 +#else
204 +               .priority       = NF_IP6_PRI_NAT_DST + 1,
205 +#endif
206 +       },
207 +       {
208 +       /* imq_egress_ipv6 */
209 +               .hook           = imq_nf_hook,
210 +               .owner          = THIS_MODULE,
211 +               .pf             = PF_INET6,
212 +               .hooknum        = NF_INET_POST_ROUTING,
213 +#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
214 +               .priority       = NF_IP6_PRI_LAST,
215 +#else
216 +               .priority       = NF_IP6_PRI_NAT_SRC - 1,
217 +#endif
218 +       },
219 +#endif
220 +};
221 +
222 +#if defined(CONFIG_IMQ_NUM_DEVS)
223 +static int numdevs = CONFIG_IMQ_NUM_DEVS;
224 +#else
225 +static int numdevs = IMQ_MAX_DEVS;
226 +#endif
227 +
228 +static struct net_device *imq_devs_cache[IMQ_MAX_DEVS];
229 +
230 +#define IMQ_MAX_QUEUES 32
231 +static int numqueues = 1;
232 +static u32 imq_hashrnd;
233 +
234 +static inline __be16 pppoe_proto(const struct sk_buff *skb)
235 +{
236 +       return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
237 +                       sizeof(struct pppoe_hdr)));
238 +}
239 +
240 +static u16 imq_hash(struct net_device *dev, struct sk_buff *skb)
241 +{
242 +       unsigned int pull_len;
243 +       u16 protocol = skb->protocol;
244 +       u32 addr1, addr2;
245 +       u32 hash, ihl = 0;
246 +       union {
247 +               u16 in16[2];
248 +               u32 in32;
249 +       } ports;
250 +       u8 ip_proto;
251 +
252 +       pull_len = 0;
253 +
254 +recheck:
255 +       switch (protocol) {
256 +       case htons(ETH_P_8021Q): {
257 +               if (unlikely(skb_pull(skb, VLAN_HLEN) == NULL))
258 +                       goto other;
259 +
260 +               pull_len += VLAN_HLEN;
261 +               skb->network_header += VLAN_HLEN;
262 +
263 +               protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
264 +               goto recheck;
265 +       }
266 +
267 +       case htons(ETH_P_PPP_SES): {
268 +               if (unlikely(skb_pull(skb, PPPOE_SES_HLEN) == NULL))
269 +                       goto other;
270 +
271 +               pull_len += PPPOE_SES_HLEN;
272 +               skb->network_header += PPPOE_SES_HLEN;
273 +
274 +               protocol = pppoe_proto(skb);
275 +               goto recheck;
276 +       }
277 +
278 +       case htons(ETH_P_IP): {
279 +               const struct iphdr *iph = ip_hdr(skb);
280 +
281 +               if (unlikely(!pskb_may_pull(skb, sizeof(struct iphdr))))
282 +                       goto other;
283 +
284 +               addr1 = iph->daddr;
285 +               addr2 = iph->saddr;
286 +
287 +               ip_proto = !(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) ?
288 +                                iph->protocol : 0;
289 +               ihl = ip_hdrlen(skb);
290 +
291 +               break;
292 +       }
293 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
294 +       case htons(ETH_P_IPV6): {
295 +               const struct ipv6hdr *iph = ipv6_hdr(skb);
296 +               __be16 fo = 0;
297 +
298 +               if (unlikely(!pskb_may_pull(skb, sizeof(struct ipv6hdr))))
299 +                       goto other;
300 +
301 +               addr1 = iph->daddr.s6_addr32[3];
302 +               addr2 = iph->saddr.s6_addr32[3];
303 +               ihl = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &ip_proto, &fo);
304 +               if (unlikely(ihl < 0))
305 +                       goto other;
306 +
307 +               break;
308 +       }
309 +#endif
310 +       default:
311 +other:
312 +               if (pull_len != 0) {
313 +                       skb_push(skb, pull_len);
314 +                       skb->network_header -= pull_len;
315 +               }
316 +
317 +               return (u16)(ntohs(protocol) % dev->real_num_tx_queues);
318 +       }
319 +
320 +       if (addr1 > addr2)
321 +               swap(addr1, addr2);
322 +
323 +       switch (ip_proto) {
324 +       case IPPROTO_TCP:
325 +       case IPPROTO_UDP:
326 +       case IPPROTO_DCCP:
327 +       case IPPROTO_ESP:
328 +       case IPPROTO_AH:
329 +       case IPPROTO_SCTP:
330 +       case IPPROTO_UDPLITE: {
331 +               if (likely(skb_copy_bits(skb, ihl, &ports.in32, 4) >= 0)) {
332 +                       if (ports.in16[0] > ports.in16[1])
333 +                               swap(ports.in16[0], ports.in16[1]);
334 +                       break;
335 +               }
336 +               /* fall-through */
337 +       }
338 +       default:
339 +               ports.in32 = 0;
340 +               break;
341 +       }
342 +
343 +       if (pull_len != 0) {
344 +               skb_push(skb, pull_len);
345 +               skb->network_header -= pull_len;
346 +       }
347 +
348 +       hash = jhash_3words(addr1, addr2, ports.in32, imq_hashrnd ^ ip_proto);
349 +
350 +       return (u16)(((u64)hash * dev->real_num_tx_queues) >> 32);
351 +}
352 +
353 +static inline bool sk_tx_queue_recorded(struct sock *sk)
354 +{
355 +       return (sk_tx_queue_get(sk) >= 0);
356 +}
357 +
358 +static struct netdev_queue *imq_select_queue(struct net_device *dev,
359 +                                               struct sk_buff *skb)
360 +{
361 +       u16 queue_index = 0;
362 +       u32 hash;
363 +
364 +       if (likely(dev->real_num_tx_queues == 1))
365 +               goto out;
366 +
367 +       /* IMQ can be receiving ingress or engress packets. */
368 +
369 +       /* Check first for if rx_queue is set */
370 +       if (skb_rx_queue_recorded(skb)) {
371 +               queue_index = skb_get_rx_queue(skb);
372 +               goto out;
373 +       }
374 +
375 +       /* Check if socket has tx_queue set */
376 +       if (sk_tx_queue_recorded(skb->sk)) {
377 +               queue_index = sk_tx_queue_get(skb->sk);
378 +               goto out;
379 +       }
380 +
381 +       /* Try use socket hash */
382 +       if (skb->sk && skb->sk->sk_hash) {
383 +               hash = skb->sk->sk_hash;
384 +               queue_index =
385 +                       (u16)(((u64)hash * dev->real_num_tx_queues) >> 32);
386 +               goto out;
387 +       }
388 +
389 +       /* Generate hash from packet data */
390 +       queue_index = imq_hash(dev, skb);
391 +
392 +out:
393 +       if (unlikely(queue_index >= dev->real_num_tx_queues))
394 +               queue_index = (u16)((u32)queue_index % dev->real_num_tx_queues);
395 +
396 +       skb_set_queue_mapping(skb, queue_index);
397 +       return netdev_get_tx_queue(dev, queue_index);
398 +}
399 +
400 +static struct net_device_stats *imq_get_stats(struct net_device *dev)
401 +{
402 +       return &dev->stats;
403 +}
404 +
405 +/* called for packets kfree'd in qdiscs at places other than enqueue */
406 +static void imq_skb_destructor(struct sk_buff *skb)
407 +{
408 +       struct nf_queue_entry *entry = skb->nf_queue_entry;
409 +
410 +       skb->nf_queue_entry = NULL;
411 +
412 +       if (entry) {
413 +               nf_queue_entry_release_refs(entry);
414 +               kfree(entry);
415 +       }
416 +
417 +       skb_restore_cb(skb); /* kfree backup */
418 +}
419 +
420 +static void imq_done_check_queue_mapping(struct sk_buff *skb,
421 +                                        struct net_device *dev)
422 +{
423 +       unsigned int queue_index;
424 +
425 +       /* Don't let queue_mapping be left too large after exiting IMQ */
426 +       if (likely(skb->dev != dev && skb->dev != NULL)) {
427 +               queue_index = skb_get_queue_mapping(skb);
428 +               if (unlikely(queue_index >= skb->dev->real_num_tx_queues)) {
429 +                       queue_index = (u16)((u32)queue_index %
430 +                                               skb->dev->real_num_tx_queues);
431 +                       skb_set_queue_mapping(skb, queue_index);
432 +               }
433 +       } else {
434 +               /* skb->dev was IMQ device itself or NULL, be on safe side and
435 +                * just clear queue mapping.
436 +                */
437 +               skb_set_queue_mapping(skb, 0);
438 +       }
439 +}
440 +
441 +static netdev_tx_t imq_dev_xmit(struct sk_buff *skb, struct net_device *dev)
442 +{
443 +       struct nf_queue_entry *entry = skb->nf_queue_entry;
444 +
445 +       skb->nf_queue_entry = NULL;
446 +       dev->trans_start = jiffies;
447 +
448 +       dev->stats.tx_bytes += skb->len;
449 +       dev->stats.tx_packets++;
450 +
451 +       if (unlikely(entry == NULL)) {
452 +               /* We don't know what is going on here.. packet is queued for
453 +                * imq device, but (probably) not by us.
454 +                *
455 +                * If this packet was not send here by imq_nf_queue(), then
456 +                * skb_save_cb() was not used and skb_free() should not show:
457 +                *   WARNING: IMQ: kfree_skb: skb->cb_next:..
458 +                * and/or
459 +                *   WARNING: IMQ: kfree_skb: skb->nf_queue_entry...
460 +                *
461 +                * However if this message is shown, then IMQ is somehow broken
462 +                * and you should report this to linuximq.net.
463 +                */
464 +
465 +               /* imq_dev_xmit is black hole that eats all packets, report that
466 +                * we eat this packet happily and increase dropped counters.
467 +                */
468 +
469 +               dev->stats.tx_dropped++;
470 +               dev_kfree_skb(skb);
471 +
472 +               return NETDEV_TX_OK;
473 +       }
474 +
475 +       skb_restore_cb(skb); /* restore skb->cb */
476 +
477 +       skb->imq_flags = 0;
478 +       skb->destructor = NULL;
479 +
480 +       imq_done_check_queue_mapping(skb, dev);
481 +
482 +       nf_reinject(entry, NF_ACCEPT);
483 +
484 +       return NETDEV_TX_OK;
485 +}
486 +
487 +static struct net_device *get_imq_device_by_index(int index)
488 +{
489 +       struct net_device *dev = NULL;
490 +       struct net *net;
491 +       char buf[8];
492 +
493 +       /* get device by name and cache result */
494 +       snprintf(buf, sizeof(buf), "imq%d", index);
495 +
496 +       /* Search device from all namespaces. */
497 +       for_each_net(net) {
498 +               dev = dev_get_by_name(net, buf);
499 +               if (dev)
500 +                       break;
501 +       }
502 +
503 +       if (WARN_ON_ONCE(dev == NULL)) {
504 +               /* IMQ device not found. Exotic config? */
505 +               return ERR_PTR(-ENODEV);
506 +       }
507 +
508 +       imq_devs_cache[index] = dev;
509 +       dev_put(dev);
510 +
511 +       return dev;
512 +}
513 +
514 +static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num)
515 +{
516 +       struct net_device *dev;
517 +       struct sk_buff *skb_orig, *skb, *skb_shared;
518 +       struct Qdisc *q;
519 +       struct netdev_queue *txq;
520 +       spinlock_t *root_lock;
521 +       int users, index;
522 +       int retval = -EINVAL;
523 +       unsigned int orig_queue_index;
524 +
525 +       index = entry->skb->imq_flags & IMQ_F_IFMASK;
526 +       if (unlikely(index > numdevs - 1)) {
527 +               if (net_ratelimit())
528 +                       printk(KERN_WARNING
529 +                              "IMQ: invalid device specified, highest is %u\n",
530 +                              numdevs - 1);
531 +               retval = -EINVAL;
532 +               goto out;
533 +       }
534 +
535 +       /* check for imq device by index from cache */
536 +       dev = imq_devs_cache[index];
537 +       if (unlikely(!dev)) {
538 +               dev = get_imq_device_by_index(index);
539 +               if (IS_ERR(dev)) {
540 +                       retval = PTR_ERR(dev);
541 +                       goto out;
542 +               }
543 +       }
544 +
545 +       if (unlikely(!(dev->flags & IFF_UP))) {
546 +               entry->skb->imq_flags = 0;
547 +               nf_reinject(entry, NF_ACCEPT);
548 +               retval = 0;
549 +               goto out;
550 +       }
551 +       dev->last_rx = jiffies;
552 +
553 +       skb = entry->skb;
554 +       skb_orig = NULL;
555 +
556 +       /* skb has owner? => make clone */
557 +       if (unlikely(skb->destructor)) {
558 +               skb_orig = skb;
559 +               skb = skb_clone(skb, GFP_ATOMIC);
560 +               if (unlikely(!skb)) {
561 +                       retval = -ENOMEM;
562 +                       goto out;
563 +               }
564 +               entry->skb = skb;
565 +       }
566 +
567 +       skb->nf_queue_entry = entry;
568 +
569 +       dev->stats.rx_bytes += skb->len;
570 +       dev->stats.rx_packets++;
571 +
572 +       if (!skb->dev) {
573 +               /* skb->dev == NULL causes problems, try the find cause. */
574 +               if (net_ratelimit()) {
575 +                       dev_warn(&dev->dev,
576 +                                "received packet with skb->dev == NULL\n");
577 +                       dump_stack();
578 +               }
579 +
580 +               skb->dev = dev;
581 +       }
582 +
583 +       /* Disables softirqs for lock below */
584 +       rcu_read_lock_bh();
585 +
586 +       /* Multi-queue selection */
587 +       orig_queue_index = skb_get_queue_mapping(skb);
588 +       txq = imq_select_queue(dev, skb);
589 +
590 +       q = rcu_dereference(txq->qdisc);
591 +       if (unlikely(!q->enqueue))
592 +               goto packet_not_eaten_by_imq_dev;
593 +
594 +       root_lock = qdisc_lock(q);
595 +       spin_lock(root_lock);
596 +
597 +       users = atomic_read(&skb->users);
598 +
599 +       skb_shared = skb_get(skb); /* increase reference count by one */
600 +       skb_save_cb(skb_shared); /* backup skb->cb, as qdisc layer will
601 +                                       overwrite it */
602 +       qdisc_enqueue_root(skb_shared, q); /* might kfree_skb */
603 +
604 +       if (likely(atomic_read(&skb_shared->users) == users + 1)) {
605 +               kfree_skb(skb_shared); /* decrease reference count by one */
606 +
607 +               skb->destructor = &imq_skb_destructor;
608 +
609 +               /* cloned? */
610 +               if (unlikely(skb_orig))
611 +                       kfree_skb(skb_orig); /* free original */
612 +
613 +               spin_unlock(root_lock);
614 +               rcu_read_unlock_bh();
615 +
616 +               /* schedule qdisc dequeue */
617 +               __netif_schedule(q);
618 +
619 +               retval = 0;
620 +               goto out;
621 +       } else {
622 +               skb_restore_cb(skb_shared); /* restore skb->cb */
623 +               skb->nf_queue_entry = NULL;
624 +               /* qdisc dropped packet and decreased skb reference count of
625 +                * skb, so we don't really want to and try refree as that would
626 +                * actually destroy the skb. */
627 +               spin_unlock(root_lock);
628 +               goto packet_not_eaten_by_imq_dev;
629 +       }
630 +
631 +packet_not_eaten_by_imq_dev:
632 +       skb_set_queue_mapping(skb, orig_queue_index);
633 +       rcu_read_unlock_bh();
634 +
635 +       /* cloned? restore original */
636 +       if (unlikely(skb_orig)) {
637 +               kfree_skb(skb);
638 +               entry->skb = skb_orig;
639 +       }
640 +       retval = -1;
641 +out:
642 +       return retval;
643 +}
644 +
645 +static unsigned int imq_nf_hook(unsigned int hook, struct sk_buff *pskb,
646 +                               const struct net_device *indev,
647 +                               const struct net_device *outdev,
648 +                               int (*okfn)(struct sk_buff *))
649 +{
650 +       return (pskb->imq_flags & IMQ_F_ENQUEUE) ? NF_IMQ_QUEUE : NF_ACCEPT;
651 +}
652 +
653 +static int imq_close(struct net_device *dev)
654 +{
655 +       netif_stop_queue(dev);
656 +       return 0;
657 +}
658 +
659 +static int imq_open(struct net_device *dev)
660 +{
661 +       netif_start_queue(dev);
662 +       return 0;
663 +}
664 +
665 +static const struct net_device_ops imq_netdev_ops = {
666 +       .ndo_open               = imq_open,
667 +       .ndo_stop               = imq_close,
668 +       .ndo_start_xmit         = imq_dev_xmit,
669 +       .ndo_get_stats          = imq_get_stats,
670 +};
671 +
672 +static void imq_setup(struct net_device *dev)
673 +{
674 +       dev->netdev_ops         = &imq_netdev_ops;
675 +       dev->type               = ARPHRD_VOID;
676 +       dev->mtu                = 16000; /* too small? */
677 +       dev->tx_queue_len       = 11000; /* too big? */
678 +       dev->flags              = IFF_NOARP;
679 +       dev->features           = NETIF_F_SG | NETIF_F_FRAGLIST |
680 +                                 NETIF_F_GSO | NETIF_F_HW_CSUM |
681 +                                 NETIF_F_HIGHDMA;
682 +       dev->priv_flags         &= ~(IFF_XMIT_DST_RELEASE |
683 +                                    IFF_TX_SKB_SHARING);
684 +}
685 +
686 +static int imq_validate(struct nlattr *tb[], struct nlattr *data[])
687 +{
688 +       int ret = 0;
689 +
690 +       if (tb[IFLA_ADDRESS]) {
691 +               if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
692 +                       ret = -EINVAL;
693 +                       goto end;
694 +               }
695 +               if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
696 +                       ret = -EADDRNOTAVAIL;
697 +                       goto end;
698 +               }
699 +       }
700 +       return 0;
701 +end:
702 +       printk(KERN_WARNING "IMQ: imq_validate failed (%d)\n", ret);
703 +       return ret;
704 +}
705 +
706 +static struct rtnl_link_ops imq_link_ops __read_mostly = {
707 +       .kind           = "imq",
708 +       .priv_size      = 0,
709 +       .setup          = imq_setup,
710 +       .validate       = imq_validate,
711 +};
712 +
713 +static const struct nf_queue_handler imq_nfqh = {
714 +       .name  = "imq",
715 +       .outfn = imq_nf_queue,
716 +};
717 +
718 +static int __init imq_init_hooks(void)
719 +{
720 +       int ret;
721 +
722 +       nf_register_queue_imq_handler(&imq_nfqh);
723 +
724 +       ret = nf_register_hooks(imq_ops, ARRAY_SIZE(imq_ops));
725 +       if (ret < 0)
726 +               nf_unregister_queue_imq_handler();
727 +
728 +       return ret;
729 +}
730 +
731 +static int __init imq_init_one(int index)
732 +{
733 +       struct net_device *dev;
734 +       int ret;
735 +
736 +       dev = alloc_netdev_mq(0, "imq%d", imq_setup, numqueues);
737 +       if (!dev)
738 +               return -ENOMEM;
739 +
740 +       ret = dev_alloc_name(dev, dev->name);
741 +       if (ret < 0)
742 +               goto fail;
743 +
744 +       dev->rtnl_link_ops = &imq_link_ops;
745 +       ret = register_netdevice(dev);
746 +       if (ret < 0)
747 +               goto fail;
748 +
749 +       return 0;
750 +fail:
751 +       free_netdev(dev);
752 +       return ret;
753 +}
754 +
755 +static int __init imq_init_devs(void)
756 +{
757 +       int err, i;
758 +
759 +       if (numdevs < 1 || numdevs > IMQ_MAX_DEVS) {
760 +               printk(KERN_ERR "IMQ: numdevs has to be betweed 1 and %u\n",
761 +                      IMQ_MAX_DEVS);
762 +               return -EINVAL;
763 +       }
764 +
765 +       if (numqueues < 1 || numqueues > IMQ_MAX_QUEUES) {
766 +               printk(KERN_ERR "IMQ: numqueues has to be betweed 1 and %u\n",
767 +                      IMQ_MAX_QUEUES);
768 +               return -EINVAL;
769 +       }
770 +
771 +       get_random_bytes(&imq_hashrnd, sizeof(imq_hashrnd));
772 +
773 +       rtnl_lock();
774 +       err = __rtnl_link_register(&imq_link_ops);
775 +
776 +       for (i = 0; i < numdevs && !err; i++)
777 +               err = imq_init_one(i);
778 +
779 +       if (err) {
780 +               __rtnl_link_unregister(&imq_link_ops);
781 +               memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
782 +       }
783 +       rtnl_unlock();
784 +
785 +       return err;
786 +}
787 +
788 +static int __init imq_init_module(void)
789 +{
790 +       int err;
791 +
792 +#if defined(CONFIG_IMQ_NUM_DEVS)
793 +       BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS > 16);
794 +       BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS < 2);
795 +       BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS - 1 > IMQ_F_IFMASK);
796 +#endif
797 +
798 +       err = imq_init_devs();
799 +       if (err) {
800 +               printk(KERN_ERR "IMQ: Error trying imq_init_devs(net)\n");
801 +               return err;
802 +       }
803 +
804 +       err = imq_init_hooks();
805 +       if (err) {
806 +               printk(KERN_ERR "IMQ: Error trying imq_init_hooks()\n");
807 +               rtnl_link_unregister(&imq_link_ops);
808 +               memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
809 +               return err;
810 +       }
811 +
812 +       printk(KERN_INFO "IMQ driver loaded successfully. "
813 +               "(numdevs = %d, numqueues = %d)\n", numdevs, numqueues);
814 +
815 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
816 +       printk(KERN_INFO "\tHooking IMQ before NAT on PREROUTING.\n");
817 +#else
818 +       printk(KERN_INFO "\tHooking IMQ after NAT on PREROUTING.\n");
819 +#endif
820 +#if defined(CONFIG_IMQ_BEHAVIOR_AB) || defined(CONFIG_IMQ_BEHAVIOR_BB)
821 +       printk(KERN_INFO "\tHooking IMQ before NAT on POSTROUTING.\n");
822 +#else
823 +       printk(KERN_INFO "\tHooking IMQ after NAT on POSTROUTING.\n");
824 +#endif
825 +
826 +       return 0;
827 +}
828 +
829 +static void __exit imq_unhook(void)
830 +{
831 +       nf_unregister_hooks(imq_ops, ARRAY_SIZE(imq_ops));
832 +       nf_unregister_queue_imq_handler();
833 +}
834 +
835 +static void __exit imq_cleanup_devs(void)
836 +{
837 +       rtnl_link_unregister(&imq_link_ops);
838 +       memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
839 +}
840 +
841 +static void __exit imq_exit_module(void)
842 +{
843 +       imq_unhook();
844 +       imq_cleanup_devs();
845 +       printk(KERN_INFO "IMQ driver unloaded successfully.\n");
846 +}
847 +
848 +module_init(imq_init_module);
849 +module_exit(imq_exit_module);
850 +
851 +module_param(numdevs, int, 0);
852 +module_param(numqueues, int, 0);
853 +MODULE_PARM_DESC(numdevs, "number of IMQ devices (how many imq* devices will "
854 +                       "be created)");
855 +MODULE_PARM_DESC(numqueues, "number of queues per IMQ device");
856 +MODULE_AUTHOR("http://www.linuximq.net");
857 +MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See "
858 +                       "http://www.linuximq.net/ for more information.");
859 +MODULE_LICENSE("GPL");
860 +MODULE_ALIAS_RTNL_LINK("imq");
861 +
862 diff -uNr linux-3.5/drivers/net/Kconfig linux-3.5-imq/drivers/net/Kconfig
863 --- linux-3.5/drivers/net/Kconfig       2012-07-21 23:58:29.000000000 +0300
864 +++ linux-3.5-imq/drivers/net/Kconfig   2012-07-25 12:31:59.709321554 +0300
865 @@ -192,6 +192,125 @@
866         depends on RIONET
867         default "128"
868  
869 +config IMQ
870 +       tristate "IMQ (intermediate queueing device) support"
871 +       depends on NETDEVICES && NETFILTER
872 +       ---help---
873 +         The IMQ device(s) is used as placeholder for QoS queueing
874 +         disciplines. Every packet entering/leaving the IP stack can be
875 +         directed through the IMQ device where it's enqueued/dequeued to the
876 +         attached qdisc. This allows you to treat network devices as classes
877 +         and distribute bandwidth among them. Iptables is used to specify
878 +         through which IMQ device, if any, packets travel.
879 +
880 +         More information at: http://www.linuximq.net/
881 +
882 +         To compile this driver as a module, choose M here: the module
883 +         will be called imq.  If unsure, say N.
884 +
885 +choice
886 +       prompt "IMQ behavior (PRE/POSTROUTING)"
887 +       depends on IMQ
888 +       default IMQ_BEHAVIOR_AB
889 +       help
890 +         This setting defines how IMQ behaves in respect to its
891 +         hooking in PREROUTING and POSTROUTING.
892 +
893 +         IMQ can work in any of the following ways:
894 +
895 +             PREROUTING   |      POSTROUTING
896 +         -----------------|-------------------
897 +         #1  After NAT    |      After NAT
898 +         #2  After NAT    |      Before NAT
899 +         #3  Before NAT   |      After NAT
900 +         #4  Before NAT   |      Before NAT
901 +
902 +         The default behavior is to hook before NAT on PREROUTING
903 +         and after NAT on POSTROUTING (#3).
904 +
905 +         This settings are specially usefull when trying to use IMQ
906 +         to shape NATed clients.
907 +
908 +         More information can be found at: www.linuximq.net
909 +
910 +         If not sure leave the default settings alone.
911 +
912 +config IMQ_BEHAVIOR_AA
913 +       bool "IMQ AA"
914 +       help
915 +         This setting defines how IMQ behaves in respect to its
916 +         hooking in PREROUTING and POSTROUTING.
917 +
918 +         Choosing this option will make IMQ hook like this:
919 +
920 +         PREROUTING:   After NAT
921 +         POSTROUTING:  After NAT
922 +
923 +         More information can be found at: www.linuximq.net
924 +
925 +         If not sure leave the default settings alone.
926 +
927 +config IMQ_BEHAVIOR_AB
928 +       bool "IMQ AB"
929 +       help
930 +         This setting defines how IMQ behaves in respect to its
931 +         hooking in PREROUTING and POSTROUTING.
932 +
933 +         Choosing this option will make IMQ hook like this:
934 +
935 +         PREROUTING:   After NAT
936 +         POSTROUTING:  Before NAT
937 +
938 +         More information can be found at: www.linuximq.net
939 +
940 +         If not sure leave the default settings alone.
941 +
942 +config IMQ_BEHAVIOR_BA
943 +       bool "IMQ BA"
944 +       help
945 +         This setting defines how IMQ behaves in respect to its
946 +         hooking in PREROUTING and POSTROUTING.
947 +
948 +         Choosing this option will make IMQ hook like this:
949 +
950 +         PREROUTING:   Before NAT
951 +         POSTROUTING:  After NAT
952 +
953 +         More information can be found at: www.linuximq.net
954 +
955 +         If not sure leave the default settings alone.
956 +
957 +config IMQ_BEHAVIOR_BB
958 +       bool "IMQ BB"
959 +       help
960 +         This setting defines how IMQ behaves in respect to its
961 +         hooking in PREROUTING and POSTROUTING.
962 +
963 +         Choosing this option will make IMQ hook like this:
964 +
965 +         PREROUTING:   Before NAT
966 +         POSTROUTING:  Before NAT
967 +
968 +         More information can be found at: www.linuximq.net
969 +
970 +         If not sure leave the default settings alone.
971 +
972 +endchoice
973 +
974 +config IMQ_NUM_DEVS
975 +       int "Number of IMQ devices"
976 +       range 2 16
977 +       depends on IMQ
978 +       default "16"
979 +       help
980 +         This setting defines how many IMQ devices will be created.
981 +
982 +         The default value is 16.
983 +
984 +         More information can be found at: www.linuximq.net
985 +
986 +         If not sure leave the default settings alone.
987 +
988  config TUN
989         tristate "Universal TUN/TAP device driver support"
990         select CRC32
991 diff -uNr linux-3.5/drivers/net/Makefile linux-3.5-imq/drivers/net/Makefile
992 --- linux-3.5/drivers/net/Makefile      2012-07-21 23:58:29.000000000 +0300
993 +++ linux-3.5-imq/drivers/net/Makefile  2012-07-25 12:31:59.709321554 +0300
994 @@ -9,6 +9,7 @@
995  obj-$(CONFIG_DUMMY) += dummy.o
996  obj-$(CONFIG_EQUALIZER) += eql.o
997  obj-$(CONFIG_IFB) += ifb.o
998 +obj-$(CONFIG_IMQ) += imq.o
999  obj-$(CONFIG_MACVLAN) += macvlan.o
1000  obj-$(CONFIG_MACVTAP) += macvtap.o
1001  obj-$(CONFIG_MII) += mii.o
1002 diff -uNr linux-3.5/include/linux/imq.h linux-3.5-imq/include/linux/imq.h
1003 --- linux-3.5/include/linux/imq.h       1970-01-01 02:00:00.000000000 +0200
1004 +++ linux-3.5-imq/include/linux/imq.h   2012-07-25 12:31:59.709321554 +0300
1005 @@ -0,0 +1,13 @@
1006 +#ifndef _IMQ_H
1007 +#define _IMQ_H
1008 +
1009 +/* IFMASK (16 device indexes, 0 to 15) and flag(s) fit in 5 bits */
1010 +#define IMQ_F_BITS     5
1011 +
1012 +#define IMQ_F_IFMASK   0x0f
1013 +#define IMQ_F_ENQUEUE  0x10
1014 +
1015 +#define IMQ_MAX_DEVS   (IMQ_F_IFMASK + 1)
1016 +
1017 +#endif /* _IMQ_H */
1018 +
1019 diff -uNr linux-3.5/include/linux/netfilter/xt_IMQ.h linux-3.5-imq/include/linux/netfilter/xt_IMQ.h
1020 --- linux-3.5/include/linux/netfilter/xt_IMQ.h  1970-01-01 02:00:00.000000000 +0200
1021 +++ linux-3.5-imq/include/linux/netfilter/xt_IMQ.h      2012-07-25 12:31:59.709321554 +0300
1022 @@ -0,0 +1,9 @@
1023 +#ifndef _XT_IMQ_H
1024 +#define _XT_IMQ_H
1025 +
1026 +struct xt_imq_info {
1027 +       unsigned int todev;     /* target imq device */
1028 +};
1029 +
1030 +#endif /* _XT_IMQ_H */
1031 +
1032 diff -uNr linux-3.5/include/uapi/linux/netfilter.h linux-3.5-imq/include/uapi/linux/netfilter.h
1033 --- linux-3.5/include/uapi/linux/netfilter.h    2012-07-21 23:58:29.000000000 +0300
1034 +++ linux-3.5-imq/include/uapi/linux/netfilter.h        2012-07-25 12:31:59.709321554 +0300
1035 @@ -22,7 +22,8 @@
1036  #define NF_QUEUE 3
1037  #define NF_REPEAT 4
1038  #define NF_STOP 5
1039 -#define NF_MAX_VERDICT NF_STOP
1040 +#define NF_IMQ_QUEUE 6
1041 +#define NF_MAX_VERDICT NF_IMQ_QUEUE
1042  
1043  /* we overload the higher bits for encoding auxiliary data such as the queue
1044   * number or errno values. Not nice, but better than additional function
1045 diff -uNr linux-3.5/include/linux/netfilter_ipv4/ipt_IMQ.h linux-3.5-imq/include/linux/netfilter_ipv4/ipt_IMQ.h
1046 --- linux-3.5/include/linux/netfilter_ipv4/ipt_IMQ.h    1970-01-01 02:00:00.000000000 +0200
1047 +++ linux-3.5-imq/include/linux/netfilter_ipv4/ipt_IMQ.h        2012-07-25 12:31:59.709321554 +0300
1048 @@ -0,0 +1,10 @@
1049 +#ifndef _IPT_IMQ_H
1050 +#define _IPT_IMQ_H
1051 +
1052 +/* Backwards compatibility for old userspace */
1053 +#include <linux/netfilter/xt_IMQ.h>
1054 +
1055 +#define ipt_imq_info xt_imq_info
1056 +
1057 +#endif /* _IPT_IMQ_H */
1058 +
1059 diff -uNr linux-3.5/include/linux/netfilter_ipv6/ip6t_IMQ.h linux-3.5-imq/include/linux/netfilter_ipv6/ip6t_IMQ.h
1060 --- linux-3.5/include/linux/netfilter_ipv6/ip6t_IMQ.h   1970-01-01 02:00:00.000000000 +0200
1061 +++ linux-3.5-imq/include/linux/netfilter_ipv6/ip6t_IMQ.h       2012-07-25 12:31:59.709321554 +0300
1062 @@ -0,0 +1,10 @@
1063 +#ifndef _IP6T_IMQ_H
1064 +#define _IP6T_IMQ_H
1065 +
1066 +/* Backwards compatibility for old userspace */
1067 +#include <linux/netfilter/xt_IMQ.h>
1068 +
1069 +#define ip6t_imq_info xt_imq_info
1070 +
1071 +#endif /* _IP6T_IMQ_H */
1072 +
1073 diff -uNr linux-3.5/include/linux/skbuff.h linux-3.5-imq/include/linux/skbuff.h
1074 --- linux-3.5/include/linux/skbuff.h    2012-07-21 23:58:29.000000000 +0300
1075 +++ linux-3.5-imq/include/linux/skbuff.h        2012-07-25 12:31:59.712654956 +0300
1076 @@ -32,6 +32,9 @@
1077  #include <linux/hrtimer.h>
1078  #include <linux/dma-mapping.h>
1079  #include <linux/netdev_features.h>
1080 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1081 +#include <linux/imq.h>
1082 +#endif
1083  
1084  /* Don't change this without changing skb_csum_unnecessary! */
1085  #define CHECKSUM_NONE 0
1086 @@ -402,6 +405,9 @@
1087          * first. This is owned by whoever has the skb queued ATM.
1088          */
1089         char                    cb[48] __aligned(8);
1090 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1091 +       void                    *cb_next;
1092 +#endif
1093  
1094         unsigned long           _skb_refdst;
1095  #ifdef CONFIG_XFRM
1096 @@ -440,6 +446,9 @@
1097  #ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
1098         struct sk_buff          *nfct_reasm;
1099  #endif
1100 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1101 +       struct nf_queue_entry   *nf_queue_entry;
1102 +#endif
1103  #ifdef CONFIG_BRIDGE_NETFILTER
1104         struct nf_bridge_info   *nf_bridge;
1105  #endif
1106 @@ -471,6 +480,10 @@
1107         /* 8/10 bit hole (depending on ndisc_nodetype presence) */
1108         kmemcheck_bitfield_end(flags2);
1109  
1110 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1111 +       __u8                    imq_flags:IMQ_F_BITS;
1112 +#endif
1113 +
1114  #ifdef CONFIG_NET_DMA
1115         dma_cookie_t            dma_cookie;
1116  #endif
1117 @@ -555,6 +568,12 @@
1118         return (struct rtable *)skb_dst(skb);
1119  }
1120  
1121 +
1122 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1123 +extern int skb_save_cb(struct sk_buff *skb);
1124 +extern int skb_restore_cb(struct sk_buff *skb);
1125 +#endif
1126 +
1127  extern void kfree_skb(struct sk_buff *skb);
1128  extern void consume_skb(struct sk_buff *skb);
1129  extern void           __kfree_skb(struct sk_buff *skb);
1130 @@ -2416,6 +2435,10 @@
1131         dst->nfct_reasm = src->nfct_reasm;
1132         nf_conntrack_get_reasm(src->nfct_reasm);
1133  #endif
1134 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1135 +       dst->imq_flags = src->imq_flags;
1136 +       dst->nf_queue_entry = src->nf_queue_entry;
1137 +#endif
1138  #ifdef CONFIG_BRIDGE_NETFILTER
1139         dst->nf_bridge  = src->nf_bridge;
1140         nf_bridge_get(src->nf_bridge);
1141 diff -uNr linux-3.5/include/net/netfilter/nf_queue.h linux-3.5-imq/include/net/netfilter/nf_queue.h
1142 --- linux-3.5/include/net/netfilter/nf_queue.h  2012-07-21 23:58:29.000000000 +0300
1143 +++ linux-3.5-imq/include/net/netfilter/nf_queue.h      2012-07-25 12:31:59.715988358 +0300
1144 @@ -30,5 +30,11 @@
1145                                        const struct nf_queue_handler *qh);
1146  extern void nf_unregister_queue_handlers(const struct nf_queue_handler *qh);
1147  extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
1148 +extern void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
1149 +
1150 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1151 +extern void nf_register_queue_imq_handler(const struct nf_queue_handler *qh);
1152 +extern void nf_unregister_queue_imq_handler(void);
1153 +#endif
1154  
1155  #endif /* _NF_QUEUE_H */
1156 diff -uNr linux-3.5/net/core/dev.c linux-3.5-imq/net/core/dev.c
1157 --- linux-3.5/net/core/dev.c    2012-07-21 23:58:29.000000000 +0300
1158 +++ linux-3.5-imq/net/core/dev.c        2012-07-25 12:31:59.719321759 +0300
1159 @@ -97,6 +97,9 @@
1160  #include <net/net_namespace.h>
1161  #include <net/sock.h>
1162  #include <linux/rtnetlink.h>
1163 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1164 +#include <linux/imq.h>
1165 +#endif
1166  #include <linux/proc_fs.h>
1167  #include <linux/seq_file.h>
1168  #include <linux/stat.h>
1169 @@ -2171,7 +2174,12 @@
1170                 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1171                         skb_dst_drop(skb);
1172  
1173 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1174 +               if (!list_empty(&ptype_all) &&
1175 +                                       !(skb->imq_flags & IMQ_F_ENQUEUE))
1176 +#else
1177                 if (!list_empty(&ptype_all))
1178 +#endif
1179                         dev_queue_xmit_nit(skb, dev);
1180  
1181                 features = netif_skb_features(skb);
1182 diff -uNr linux-3.5/net/core/skbuff.c linux-3.5-imq/net/core/skbuff.c
1183 --- linux-3.5/net/core/skbuff.c 2012-07-21 23:58:29.000000000 +0300
1184 +++ linux-3.5-imq/net/core/skbuff.c     2012-07-25 12:31:59.722655161 +0300
1185 @@ -73,6 +73,9 @@
1186  
1187  struct kmem_cache *skbuff_head_cache __read_mostly;
1188  static struct kmem_cache *skbuff_fclone_cache __read_mostly;
1189 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1190 +static struct kmem_cache *skbuff_cb_store_cache __read_mostly;
1191 +#endif
1192  
1193  static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
1194                                   struct pipe_buffer *buf)
1195 @@ -92,6 +95,82 @@
1196         return 1;
1197  }
1198  
1199 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1200 +/* Control buffer save/restore for IMQ devices */
1201 +struct skb_cb_table {
1202 +       char                    cb[48] __aligned(8);
1203 +       void                    *cb_next;
1204 +       atomic_t                refcnt;
1205 +};
1206 +
1207 +static DEFINE_SPINLOCK(skb_cb_store_lock);
1208 +
1209 +int skb_save_cb(struct sk_buff *skb)
1210 +{
1211 +       struct skb_cb_table *next;
1212 +
1213 +       next = kmem_cache_alloc(skbuff_cb_store_cache, GFP_ATOMIC);
1214 +       if (!next)
1215 +               return -ENOMEM;
1216 +
1217 +       BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb));
1218 +
1219 +       memcpy(next->cb, skb->cb, sizeof(skb->cb));
1220 +       next->cb_next = skb->cb_next;
1221 +
1222 +       atomic_set(&next->refcnt, 1);
1223 +
1224 +       skb->cb_next = next;
1225 +       return 0;
1226 +}
1227 +EXPORT_SYMBOL(skb_save_cb);
1228 +
1229 +int skb_restore_cb(struct sk_buff *skb)
1230 +{
1231 +       struct skb_cb_table *next;
1232 +
1233 +       if (!skb->cb_next)
1234 +               return 0;
1235 +
1236 +       next = skb->cb_next;
1237 +
1238 +       BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb));
1239 +
1240 +       memcpy(skb->cb, next->cb, sizeof(skb->cb));
1241 +       skb->cb_next = next->cb_next;
1242 +
1243 +       spin_lock(&skb_cb_store_lock);
1244 +
1245 +       if (atomic_dec_and_test(&next->refcnt))
1246 +               kmem_cache_free(skbuff_cb_store_cache, next);
1247 +
1248 +       spin_unlock(&skb_cb_store_lock);
1249 +
1250 +       return 0;
1251 +}
1252 +EXPORT_SYMBOL(skb_restore_cb);
1253 +
1254 +static void skb_copy_stored_cb(struct sk_buff *new, const struct sk_buff *__old)
1255 +{
1256 +       struct skb_cb_table *next;
1257 +       struct sk_buff *old;
1258 +
1259 +       if (!__old->cb_next) {
1260 +               new->cb_next = NULL;
1261 +               return;
1262 +       }
1263 +
1264 +       spin_lock(&skb_cb_store_lock);
1265 +
1266 +       old = (struct sk_buff *)__old;
1267 +
1268 +       next = old->cb_next;
1269 +       atomic_inc(&next->refcnt);
1270 +       new->cb_next = next;
1271 +
1272 +       spin_unlock(&skb_cb_store_lock);
1273 +}
1274 +#endif
1275  
1276  /* Pipe buffer operations for a socket. */
1277  static const struct pipe_buf_operations sock_pipe_buf_ops = {
1278 @@ -490,6 +569,29 @@
1279                 WARN_ON(in_irq());
1280                 skb->destructor(skb);
1281         }
1282 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1283 +       /*
1284 +        * This should not happen. When it does, avoid memleak by restoring
1285 +        * the chain of cb-backups.
1286 +        */
1287 +       while (skb->cb_next != NULL) {
1288 +               if (net_ratelimit())
1289 +                       printk(KERN_WARNING "IMQ: kfree_skb: skb->cb_next: "
1290 +                               "%08x\n", (unsigned int)skb->cb_next);
1291 +
1292 +               skb_restore_cb(skb);
1293 +       }
1294 +       /*
1295 +        * This should not happen either, nf_queue_entry is nullified in
1296 +        * imq_dev_xmit(). If we have non-NULL nf_queue_entry then we are
1297 +        * leaking entry pointers, maybe memory. We don't know if this is
1298 +        * pointer to already freed memory, or should this be freed.
1299 +        * If this happens we need to add refcounting, etc for nf_queue_entry.
1300 +        */
1301 +       if (skb->nf_queue_entry && net_ratelimit())
1302 +               printk(KERN_WARNING
1303 +                       "IMQ: kfree_skb: skb->nf_queue_entry != NULL");
1304 +#endif
1305  #if IS_ENABLED(CONFIG_NF_CONNTRACK)
1306         nf_conntrack_put(skb->nfct);
1307  #endif
1308 @@ -635,6 +737,9 @@
1309         new->sp                 = secpath_get(old->sp);
1310  #endif
1311         memcpy(new->cb, old->cb, sizeof(old->cb));
1312 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1313 +       skb_copy_stored_cb(new, old);
1314 +#endif
1315         new->csum               = old->csum;
1316         new->local_df           = old->local_df;
1317         new->pkt_type           = old->pkt_type;
1318 @@ -3029,6 +3134,13 @@
1319                                                 0,
1320                                                 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1321                                                 NULL);
1322 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1323 +       skbuff_cb_store_cache = kmem_cache_create("skbuff_cb_store_cache",
1324 +                                                 sizeof(struct skb_cb_table),
1325 +                                                 0,
1326 +                                                 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1327 +                                                 NULL);
1328 +#endif
1329  }
1330  
1331  /**
1332 diff -uNr linux-3.5/net/ipv6/ip6_output.c linux-3.5-imq/net/ipv6/ip6_output.c
1333 --- linux-3.5/net/ipv6/ip6_output.c     2012-07-21 23:58:29.000000000 +0300
1334 +++ linux-3.5-imq/net/ipv6/ip6_output.c 2012-07-25 12:31:59.722655161 +0300
1335 @@ -102,9 +102,6 @@
1336         struct net_device *dev = dst->dev;
1337         struct neighbour *neigh;
1338  
1339 -       skb->protocol = htons(ETH_P_IPV6);
1340 -       skb->dev = dev;
1341 -
1342         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
1343                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1344  
1345 @@ -170,6 +167,11 @@
1346                 return 0;
1347         }
1348  
1349 +       /* IMQ-patch: moved setting skb->dev and skb->protocol from
1350 +        * ip6_finish_output2 to fix crashing at netif_skb_features(). */
1351 +       skb->protocol = htons(ETH_P_IPV6);
1352 +       skb->dev = dev;
1353 +
1354         return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
1355                             ip6_finish_output,
1356                             !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1357 diff -uNr linux-3.5/net/netfilter/core.c linux-3.5-imq/net/netfilter/core.c
1358 --- linux-3.5/net/netfilter/core.c      2012-07-21 23:58:29.000000000 +0300
1359 +++ linux-3.5-imq/net/netfilter/core.c  2012-07-25 12:31:59.722655161 +0300
1360 @@ -190,9 +190,11 @@
1361                 ret = NF_DROP_GETERR(verdict);
1362                 if (ret == 0)
1363                         ret = -EPERM;
1364 -       } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
1365 +       } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE ||
1366 +                  (verdict & NF_VERDICT_MASK) == NF_IMQ_QUEUE) {
1367                 int err = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
1368 -                                               verdict >> NF_VERDICT_QBITS);
1369 +                                               verdict >> NF_VERDICT_QBITS,
1370 +                                               verdict & NF_VERDICT_MASK);
1371                 if (err < 0) {
1372                         if (err == -ECANCELED)
1373                                 goto next_hook;
1374 diff -uNr linux-3.5/net/netfilter/Kconfig linux-3.5-imq/net/netfilter/Kconfig
1375 --- linux-3.5/net/netfilter/Kconfig     2012-07-21 23:58:29.000000000 +0300
1376 +++ linux-3.5-imq/net/netfilter/Kconfig 2012-07-25 12:31:59.722655161 +0300
1377 @@ -569,6 +569,18 @@
1378  
1379           To compile it as a module, choose M here.  If unsure, say N.
1380  
1381 +config NETFILTER_XT_TARGET_IMQ
1382 +        tristate '"IMQ" target support'
1383 +       depends on NETFILTER_XTABLES
1384 +       depends on IP_NF_MANGLE || IP6_NF_MANGLE
1385 +       select IMQ
1386 +       default m if NETFILTER_ADVANCED=n
1387 +        help
1388 +          This option adds a `IMQ' target which is used to specify if and
1389 +          to which imq device packets should get enqueued/dequeued.
1390 +
1391 +          To compile it as a module, choose M here.  If unsure, say N.
1392 +
1393  config NETFILTER_XT_TARGET_MARK
1394         tristate '"MARK" target support'
1395         depends on NETFILTER_ADVANCED
1396 diff -uNr linux-3.5/net/netfilter/Makefile linux-3.5-imq/net/netfilter/Makefile
1397 --- linux-3.5/net/netfilter/Makefile    2012-07-21 23:58:29.000000000 +0300
1398 +++ linux-3.5-imq/net/netfilter/Makefile        2012-07-25 12:32:28.966592115 +0300
1399 @@ -60,6 +60,7 @@
1400  obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
1401  obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
1402  obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o
1403 +obj-$(CONFIG_NETFILTER_XT_TARGET_IMQ) += xt_IMQ.o
1404  obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
1405  obj-$(CONFIG_NETFILTER_XT_TARGET_LOG) += xt_LOG.o
1406  obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o
1407 diff -uNr linux-3.5/net/netfilter/nf_internals.h linux-3.5-imq/net/netfilter/nf_internals.h
1408 --- linux-3.5/net/netfilter/nf_internals.h      2012-07-21 23:58:29.000000000 +0300
1409 +++ linux-3.5-imq/net/netfilter/nf_internals.h  2012-07-25 12:31:59.725988564 +0300
1410 @@ -29,7 +29,7 @@
1411                     struct net_device *indev,
1412                     struct net_device *outdev,
1413                     int (*okfn)(struct sk_buff *),
1414 -                   unsigned int queuenum);
1415 +                   unsigned int queuenum, unsigned int queuetype);
1416  extern int __init netfilter_queue_init(void);
1417  
1418  /* nf_log.c */
1419 diff -uNr linux-3.5/net/netfilter/nf_queue.c linux-3.5-imq/net/netfilter/nf_queue.c
1420 --- linux-3.5/net/netfilter/nf_queue.c  2012-07-21 23:58:29.000000000 +0300
1421 +++ linux-3.5-imq/net/netfilter/nf_queue.c      2012-07-25 12:31:59.725988564 +0300
1422 @@ -22,6 +22,26 @@
1423  
1424  static DEFINE_MUTEX(queue_handler_mutex);
1425  
1426 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1427 +static const struct nf_queue_handler *queue_imq_handler;
1428 +
1429 +void nf_register_queue_imq_handler(const struct nf_queue_handler *qh)
1430 +{
1431 +       mutex_lock(&queue_handler_mutex);
1432 +       rcu_assign_pointer(queue_imq_handler, qh);
1433 +       mutex_unlock(&queue_handler_mutex);
1434 +}
1435 +EXPORT_SYMBOL_GPL(nf_register_queue_imq_handler);
1436 +
1437 +void nf_unregister_queue_imq_handler(void)
1438 +{
1439 +       mutex_lock(&queue_handler_mutex);
1440 +       rcu_assign_pointer(queue_imq_handler, NULL);
1441 +       mutex_unlock(&queue_handler_mutex);
1442 +}
1443 +EXPORT_SYMBOL_GPL(nf_unregister_queue_imq_handler);
1444 +#endif
1445 +
1446  /* return EBUSY when somebody else is registered, return EEXIST if the
1447   * same handler is registered, return 0 in case of success. */
1448  int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
1449 @@ -92,7 +112,7 @@
1450  }
1451  EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
1452  
1453 -static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
1454 +void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
1455  {
1456         /* Release those devices we held, or Alexey will kill me. */
1457         if (entry->indev)
1458 @@ -112,6 +132,7 @@
1459         /* Drop reference to owner of hook which queued us. */
1460         module_put(entry->elem->owner);
1461  }
1462 +EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
1463  
1464  /*
1465   * Any packet that leaves via this function must come back
1466 @@ -123,7 +144,8 @@
1467                       struct net_device *indev,
1468                       struct net_device *outdev,
1469                       int (*okfn)(struct sk_buff *),
1470 -                     unsigned int queuenum)
1471 +                     unsigned int queuenum,
1472 +                     unsigned int queuetype)
1473  {
1474         int status = -ENOENT;
1475         struct nf_queue_entry *entry = NULL;
1476 @@ -137,7 +159,17 @@
1477         /* QUEUE == DROP if no one is waiting, to be safe. */
1478         rcu_read_lock();
1479  
1480 -       qh = rcu_dereference(queue_handler[pf]);
1481 +       if (queuetype == NF_IMQ_QUEUE) {
1482 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1483 +               qh = rcu_dereference(queue_imq_handler);
1484 +#else
1485 +               BUG();
1486 +               goto err_unlock;
1487 +#endif
1488 +       } else {
1489 +               qh = rcu_dereference(queue_handler[pf]);
1490 +       }
1491 +
1492         if (!qh) {
1493                 status = -ESRCH;
1494                 goto err_unlock;
1495 @@ -230,7 +262,8 @@
1496              struct net_device *indev,
1497              struct net_device *outdev,
1498              int (*okfn)(struct sk_buff *),
1499 -            unsigned int queuenum)
1500 +            unsigned int queuenum,
1501 +            unsigned int queuetype)
1502  {
1503         struct sk_buff *segs;
1504         int err = -EINVAL;
1505 @@ -238,7 +271,7 @@
1506  
1507         if (!skb_is_gso(skb))
1508                 return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
1509 -                                 queuenum);
1510 +                                 queuenum, queuetype);
1511  
1512         switch (pf) {
1513         case NFPROTO_IPV4:
1514 @@ -266,7 +299,7 @@
1515                 if (err == 0) {
1516                         nf_bridge_adjust_segmented_data(segs);
1517                         err = __nf_queue(segs, elem, pf, hook, indev,
1518 -                                          outdev, okfn, queuenum);
1519 +                                          outdev, okfn, queuenum, queuetype);
1520                 }
1521                 if (err == 0)
1522                         queued++;
1523 @@ -323,9 +356,11 @@
1524                 local_bh_enable();
1525                 break;
1526         case NF_QUEUE:
1527 +       case NF_IMQ_QUEUE:
1528                 err = __nf_queue(skb, elem, entry->pf, entry->hook,
1529                                  entry->indev, entry->outdev, entry->okfn,
1530 -                                verdict >> NF_VERDICT_QBITS);
1531 +                                verdict >> NF_VERDICT_QBITS,
1532 +                                verdict & NF_VERDICT_MASK);
1533                 if (err < 0) {
1534                         if (err == -ECANCELED)
1535                                 goto next_hook;
1536 diff -uNr linux-3.5/net/netfilter/xt_IMQ.c linux-3.5-imq/net/netfilter/xt_IMQ.c
1537 --- linux-3.5/net/netfilter/xt_IMQ.c    1970-01-01 02:00:00.000000000 +0200
1538 +++ linux-3.5-imq/net/netfilter/xt_IMQ.c        2012-07-25 12:31:59.725988564 +0300
1539 @@ -0,0 +1,74 @@
1540 +/*
1541 + * This target marks packets to be enqueued to an imq device
1542 + */
1543 +#include <linux/module.h>
1544 +#include <linux/skbuff.h>
1545 +#include <linux/netfilter/x_tables.h>
1546 +#include <linux/netfilter/xt_IMQ.h>
1547 +#include <linux/imq.h>
1548 +
1549 +static unsigned int imq_target(struct sk_buff *pskb,
1550 +                               const struct xt_action_param *par)
1551 +{
1552 +       const struct xt_imq_info *mr = par->targinfo;
1553 +
1554 +       pskb->imq_flags = (mr->todev & IMQ_F_IFMASK) | IMQ_F_ENQUEUE;
1555 +
1556 +       return XT_CONTINUE;
1557 +}
1558 +
1559 +static int imq_checkentry(const struct xt_tgchk_param *par)
1560 +{
1561 +       struct xt_imq_info *mr = par->targinfo;
1562 +
1563 +       if (mr->todev > IMQ_MAX_DEVS - 1) {
1564 +               printk(KERN_WARNING
1565 +                      "IMQ: invalid device specified, highest is %u\n",
1566 +                      IMQ_MAX_DEVS - 1);
1567 +               return -EINVAL;
1568 +       }
1569 +
1570 +       return 0;
1571 +}
1572 +
1573 +static struct xt_target xt_imq_reg[] __read_mostly = {
1574 +       {
1575 +               .name           = "IMQ",
1576 +               .family         = AF_INET,
1577 +               .checkentry     = imq_checkentry,
1578 +               .target         = imq_target,
1579 +               .targetsize     = sizeof(struct xt_imq_info),
1580 +               .table          = "mangle",
1581 +               .me             = THIS_MODULE
1582 +       },
1583 +       {
1584 +               .name           = "IMQ",
1585 +               .family         = AF_INET6,
1586 +               .checkentry     = imq_checkentry,
1587 +               .target         = imq_target,
1588 +               .targetsize     = sizeof(struct xt_imq_info),
1589 +               .table          = "mangle",
1590 +               .me             = THIS_MODULE
1591 +       },
1592 +};
1593 +
1594 +static int __init imq_init(void)
1595 +{
1596 +       return xt_register_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg));
1597 +}
1598 +
1599 +static void __exit imq_fini(void)
1600 +{
1601 +       xt_unregister_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg));
1602 +}
1603 +
1604 +module_init(imq_init);
1605 +module_exit(imq_fini);
1606 +
1607 +MODULE_AUTHOR("http://www.linuximq.net");
1608 +MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. "
1609 +                  "See http://www.linuximq.net/ for more information.");
1610 +MODULE_LICENSE("GPL");
1611 +MODULE_ALIAS("ipt_IMQ");
1612 +MODULE_ALIAS("ip6t_IMQ");
1613 +
This page took 0.247381 seconds and 3 git commands to generate.