]> git.pld-linux.org Git - packages/kernel.git/blob - kernel-imq.patch
- partial update to 3.5.0
[packages/kernel.git] / kernel-imq.patch
1 diff -uNr linux-3.1/drivers/net/imq.c linux-3.1-imq/drivers/net/imq.c
2 --- linux-3.1/drivers/net/imq.c 1970-01-01 02:00:00.000000000 +0200
3 +++ linux-3.1-imq/drivers/net/imq.c     2011-11-04 12:16:10.454992642 +0200
4 @@ -0,0 +1,850 @@
5 +/*
6 + *             Pseudo-driver for the intermediate queue device.
7 + *
8 + *             This program is free software; you can redistribute it and/or
9 + *             modify it under the terms of the GNU General Public License
10 + *             as published by the Free Software Foundation; either version
11 + *             2 of the License, or (at your option) any later version.
12 + *
13 + * Authors:    Patrick McHardy, <kaber@trash.net>
14 + *
15 + *            The first version was written by Martin Devera, <devik@cdi.cz>
16 + *
17 + * Credits:    Jan Rafaj <imq2t@cedric.vabo.cz>
18 + *              - Update patch to 2.4.21
19 + *             Sebastian Strollo <sstrollo@nortelnetworks.com>
20 + *              - Fix "Dead-loop on netdevice imq"-issue
21 + *             Marcel Sebek <sebek64@post.cz>
22 + *              - Update to 2.6.2-rc1
23 + *
24 + *            After some time of inactivity there is a group taking care
25 + *            of IMQ again: http://www.linuximq.net
26 + *
27 + *
28 + *            2004/06/30 - New version of IMQ patch to kernels <=2.6.7
29 + *             including the following changes:
30 + *
31 + *            - Correction of ipv6 support "+"s issue (Hasso Tepper)
32 + *            - Correction of imq_init_devs() issue that resulted in
33 + *            kernel OOPS unloading IMQ as module (Norbert Buchmuller)
34 + *            - Addition of functionality to choose number of IMQ devices
35 + *            during kernel config (Andre Correa)
36 + *            - Addition of functionality to choose how IMQ hooks on
37 + *            PRE and POSTROUTING (after or before NAT) (Andre Correa)
38 + *            - Cosmetic corrections (Norbert Buchmuller) (Andre Correa)
39 + *
40 + *
41 + *             2005/12/16 - IMQ versions between 2.6.7 and 2.6.13 were
42 + *             released with almost no problems. 2.6.14-x was released
43 + *             with some important changes: nfcache was removed; After
44 + *             some weeks of trouble we figured out that some IMQ fields
45 + *             in skb were missing in skbuff.c - skb_clone and copy_skb_header.
46 + *             These functions are correctly patched by this new patch version.
47 + *
48 + *             Thanks for all who helped to figure out all the problems with
49 + *             2.6.14.x: Patrick McHardy, Rune Kock, VeNoMouS, Max CtRiX,
50 + *             Kevin Shanahan, Richard Lucassen, Valery Dachev (hopefully
51 + *             I didn't forget anybody). I apologize again for my lack of time.
52 + *
53 + *
54 + *             2008/06/17 - 2.6.25 - Changed imq.c to use qdisc_run() instead
55 + *             of qdisc_restart() and moved qdisc_run() to tasklet to avoid
56 + *             recursive locking. New initialization routines to fix 'rmmod' not
57 + *             working anymore. Used code from ifb.c. (Jussi Kivilinna)
58 + *
59 + *             2008/08/06 - 2.6.26 - (JK)
60 + *              - Replaced tasklet with 'netif_schedule()'.
61 + *              - Cleaned up and added comments for imq_nf_queue().
62 + *
63 + *             2009/04/12
64 + *              - Add skb_save_cb/skb_restore_cb helper functions for backuping
65 + *                control buffer. This is needed because qdisc-layer on kernels
66 + *                2.6.27 and newer overwrite control buffer. (Jussi Kivilinna)
67 + *              - Add better locking for IMQ device. Hopefully this will solve
68 + *                SMP issues. (Jussi Kivilinna)
69 + *              - Port to 2.6.27
70 + *              - Port to 2.6.28
71 + *              - Port to 2.6.29 + fix rmmod not working
72 + *
73 + *             2009/04/20 - (Jussi Kivilinna)
74 + *              - Use netdevice feature flags to avoid extra packet handling
75 + *                by core networking layer and possibly increase performance.
76 + *
77 + *             2009/09/26 - (Jussi Kivilinna)
78 + *              - Add imq_nf_reinject_lockless to fix deadlock with
79 + *                imq_nf_queue/imq_nf_reinject.
80 + *
81 + *             2009/12/08 - (Jussi Kivilinna)
82 + *              - Port to 2.6.32
83 + *              - Add check for skb->nf_queue_entry==NULL in imq_dev_xmit()
84 + *              - Also add better error checking for skb->nf_queue_entry usage
85 + *
86 + *             2010/02/25 - (Jussi Kivilinna)
87 + *              - Port to 2.6.33
88 + *
89 + *             2010/08/15 - (Jussi Kivilinna)
90 + *              - Port to 2.6.35
91 + *              - Simplify hook registration by using nf_register_hooks.
92 + *              - nf_reinject doesn't need spinlock around it, therefore remove
93 + *                imq_nf_reinject function. Other nf_reinject users protect
94 + *                their own data with spinlock. With IMQ however all data is
95 + *                needed is stored per skbuff, so no locking is needed.
96 + *              - Changed IMQ to use 'separate' NF_IMQ_QUEUE instead of
97 + *                NF_QUEUE, this allows working coexistance of IMQ and other
98 + *                NF_QUEUE users.
99 + *              - Make IMQ multi-queue. Number of IMQ device queues can be
100 + *                increased with 'numqueues' module parameters. Default number
101 + *                of queues is 1, in other words by default IMQ works as
102 + *                single-queue device. Multi-queue selection is based on
103 + *                IFB multi-queue patch by Changli Gao <xiaosuo@gmail.com>.
104 + *
105 + *             2011/03/18 - (Jussi Kivilinna)
106 + *              - Port to 2.6.38
107 + *
108 + *             2011/07/12 - (syoder89@gmail.com)
109 + *              - Crash fix that happens when the receiving interface has more
110 + *                than one queue (add missing skb_set_queue_mapping in
111 + *                imq_select_queue).
112 + *
113 + *             2011/07/26 - (Jussi Kivilinna)
114 + *              - Add queue mapping checks for packets exiting IMQ.
115 + *              - Port to 3.0
116 + *
117 + *             2011/08/16 - (Jussi Kivilinna)
118 + *              - Clear IFF_TX_SKB_SHARING flag that was added for linux 3.0.2
119 + *
120 + *             2011/11/03 - Germano Michel <germanomichel@gmail.com>
121 + *              - Fix IMQ for net namespaces
122 + *
123 + *             2011/11/04 - Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
124 + *              - Port to 3.1
125 + *              - Clean-up, move 'get imq device pointer by imqX name' to
126 + *                separate function from imq_nf_queue().
127 + *
128 + *            Also, many thanks to pablo Sebastian Greco for making the initial
129 + *            patch and to those who helped the testing.
130 + *
131 + *             More info at: http://www.linuximq.net/ (Andre Correa)
132 + */
133 +
134 +#include <linux/module.h>
135 +#include <linux/kernel.h>
136 +#include <linux/moduleparam.h>
137 +#include <linux/list.h>
138 +#include <linux/skbuff.h>
139 +#include <linux/netdevice.h>
140 +#include <linux/etherdevice.h>
141 +#include <linux/rtnetlink.h>
142 +#include <linux/if_arp.h>
143 +#include <linux/netfilter.h>
144 +#include <linux/netfilter_ipv4.h>
145 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
146 +       #include <linux/netfilter_ipv6.h>
147 +#endif
148 +#include <linux/imq.h>
149 +#include <net/pkt_sched.h>
150 +#include <net/netfilter/nf_queue.h>
151 +#include <net/sock.h>
152 +#include <linux/ip.h>
153 +#include <linux/ipv6.h>
154 +#include <linux/if_vlan.h>
155 +#include <linux/if_pppox.h>
156 +#include <net/ip.h>
157 +#include <net/ipv6.h>
158 +
159 +static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num);
160 +
161 +static nf_hookfn imq_nf_hook;
162 +
163 +static struct nf_hook_ops imq_ops[] = {
164 +       {
165 +       /* imq_ingress_ipv4 */
166 +               .hook           = imq_nf_hook,
167 +               .owner          = THIS_MODULE,
168 +               .pf             = PF_INET,
169 +               .hooknum        = NF_INET_PRE_ROUTING,
170 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
171 +               .priority       = NF_IP_PRI_MANGLE + 1,
172 +#else
173 +               .priority       = NF_IP_PRI_NAT_DST + 1,
174 +#endif
175 +       },
176 +       {
177 +       /* imq_egress_ipv4 */
178 +               .hook           = imq_nf_hook,
179 +               .owner          = THIS_MODULE,
180 +               .pf             = PF_INET,
181 +               .hooknum        = NF_INET_POST_ROUTING,
182 +#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
183 +               .priority       = NF_IP_PRI_LAST,
184 +#else
185 +               .priority       = NF_IP_PRI_NAT_SRC - 1,
186 +#endif
187 +       },
188 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
189 +       {
190 +       /* imq_ingress_ipv6 */
191 +               .hook           = imq_nf_hook,
192 +               .owner          = THIS_MODULE,
193 +               .pf             = PF_INET6,
194 +               .hooknum        = NF_INET_PRE_ROUTING,
195 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
196 +               .priority       = NF_IP6_PRI_MANGLE + 1,
197 +#else
198 +               .priority       = NF_IP6_PRI_NAT_DST + 1,
199 +#endif
200 +       },
201 +       {
202 +       /* imq_egress_ipv6 */
203 +               .hook           = imq_nf_hook,
204 +               .owner          = THIS_MODULE,
205 +               .pf             = PF_INET6,
206 +               .hooknum        = NF_INET_POST_ROUTING,
207 +#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
208 +               .priority       = NF_IP6_PRI_LAST,
209 +#else
210 +               .priority       = NF_IP6_PRI_NAT_SRC - 1,
211 +#endif
212 +       },
213 +#endif
214 +};
215 +
216 +#if defined(CONFIG_IMQ_NUM_DEVS)
217 +static int numdevs = CONFIG_IMQ_NUM_DEVS;
218 +#else
219 +static int numdevs = IMQ_MAX_DEVS;
220 +#endif
221 +
222 +static struct net_device *imq_devs_cache[IMQ_MAX_DEVS];
223 +
224 +#define IMQ_MAX_QUEUES 32
225 +static int numqueues = 1;
226 +static u32 imq_hashrnd;
227 +
228 +static inline __be16 pppoe_proto(const struct sk_buff *skb)
229 +{
230 +       return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
231 +                       sizeof(struct pppoe_hdr)));
232 +}
233 +
234 +static u16 imq_hash(struct net_device *dev, struct sk_buff *skb)
235 +{
236 +       unsigned int pull_len;
237 +       u16 protocol = skb->protocol;
238 +       u32 addr1, addr2;
239 +       u32 hash, ihl = 0;
240 +       union {
241 +               u16 in16[2];
242 +               u32 in32;
243 +       } ports;
244 +       u8 ip_proto;
245 +
246 +       pull_len = 0;
247 +
248 +recheck:
249 +       switch (protocol) {
250 +       case htons(ETH_P_8021Q): {
251 +               if (unlikely(skb_pull(skb, VLAN_HLEN) == NULL))
252 +                       goto other;
253 +
254 +               pull_len += VLAN_HLEN;
255 +               skb->network_header += VLAN_HLEN;
256 +
257 +               protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
258 +               goto recheck;
259 +       }
260 +
261 +       case htons(ETH_P_PPP_SES): {
262 +               if (unlikely(skb_pull(skb, PPPOE_SES_HLEN) == NULL))
263 +                       goto other;
264 +
265 +               pull_len += PPPOE_SES_HLEN;
266 +               skb->network_header += PPPOE_SES_HLEN;
267 +
268 +               protocol = pppoe_proto(skb);
269 +               goto recheck;
270 +       }
271 +
272 +       case htons(ETH_P_IP): {
273 +               const struct iphdr *iph = ip_hdr(skb);
274 +
275 +               if (unlikely(!pskb_may_pull(skb, sizeof(struct iphdr))))
276 +                       goto other;
277 +
278 +               addr1 = iph->daddr;
279 +               addr2 = iph->saddr;
280 +
281 +               ip_proto = !(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) ?
282 +                                iph->protocol : 0;
283 +               ihl = ip_hdrlen(skb);
284 +
285 +               break;
286 +       }
287 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
288 +       case htons(ETH_P_IPV6): {
289 +               const struct ipv6hdr *iph = ipv6_hdr(skb);
290 +               __be16 frag_off;
291 +               if (unlikely(!pskb_may_pull(skb, sizeof(struct ipv6hdr))))
292 +                       goto other;
293 +
294 +               addr1 = iph->daddr.s6_addr32[3];
295 +               addr2 = iph->saddr.s6_addr32[3];
296 +               ihl = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &ip_proto, &frag_off);
297 +               if (unlikely(ihl < 0))
298 +                       goto other;
299 +
300 +               break;
301 +       }
302 +#endif
303 +       default:
304 +other:
305 +               if (pull_len != 0) {
306 +                       skb_push(skb, pull_len);
307 +                       skb->network_header -= pull_len;
308 +               }
309 +
310 +               return (u16)(ntohs(protocol) % dev->real_num_tx_queues);
311 +       }
312 +
313 +       if (addr1 > addr2)
314 +               swap(addr1, addr2);
315 +
316 +       switch (ip_proto) {
317 +       case IPPROTO_TCP:
318 +       case IPPROTO_UDP:
319 +       case IPPROTO_DCCP:
320 +       case IPPROTO_ESP:
321 +       case IPPROTO_AH:
322 +       case IPPROTO_SCTP:
323 +       case IPPROTO_UDPLITE: {
324 +               if (likely(skb_copy_bits(skb, ihl, &ports.in32, 4) >= 0)) {
325 +                       if (ports.in16[0] > ports.in16[1])
326 +                               swap(ports.in16[0], ports.in16[1]);
327 +                       break;
328 +               }
329 +               /* fall-through */
330 +       }
331 +       default:
332 +               ports.in32 = 0;
333 +               break;
334 +       }
335 +
336 +       if (pull_len != 0) {
337 +               skb_push(skb, pull_len);
338 +               skb->network_header -= pull_len;
339 +       }
340 +
341 +       hash = jhash_3words(addr1, addr2, ports.in32, imq_hashrnd ^ ip_proto);
342 +
343 +       return (u16)(((u64)hash * dev->real_num_tx_queues) >> 32);
344 +}
345 +
346 +static inline bool sk_tx_queue_recorded(struct sock *sk)
347 +{
348 +       return (sk_tx_queue_get(sk) >= 0);
349 +}
350 +
351 +static struct netdev_queue *imq_select_queue(struct net_device *dev,
352 +                                               struct sk_buff *skb)
353 +{
354 +       u16 queue_index = 0;
355 +       u32 hash;
356 +
357 +       if (likely(dev->real_num_tx_queues == 1))
358 +               goto out;
359 +
360 +       /* IMQ can be receiving ingress or engress packets. */
361 +
362 +       /* Check first for if rx_queue is set */
363 +       if (skb_rx_queue_recorded(skb)) {
364 +               queue_index = skb_get_rx_queue(skb);
365 +               goto out;
366 +       }
367 +
368 +       /* Check if socket has tx_queue set */
369 +       if (sk_tx_queue_recorded(skb->sk)) {
370 +               queue_index = sk_tx_queue_get(skb->sk);
371 +               goto out;
372 +       }
373 +
374 +       /* Try use socket hash */
375 +       if (skb->sk && skb->sk->sk_hash) {
376 +               hash = skb->sk->sk_hash;
377 +               queue_index =
378 +                       (u16)(((u64)hash * dev->real_num_tx_queues) >> 32);
379 +               goto out;
380 +       }
381 +
382 +       /* Generate hash from packet data */
383 +       queue_index = imq_hash(dev, skb);
384 +
385 +out:
386 +       if (unlikely(queue_index >= dev->real_num_tx_queues))
387 +               queue_index = (u16)((u32)queue_index % dev->real_num_tx_queues);
388 +
389 +       skb_set_queue_mapping(skb, queue_index);
390 +       return netdev_get_tx_queue(dev, queue_index);
391 +}
392 +
393 +static struct net_device_stats *imq_get_stats(struct net_device *dev)
394 +{
395 +       return &dev->stats;
396 +}
397 +
398 +/* called for packets kfree'd in qdiscs at places other than enqueue */
399 +static void imq_skb_destructor(struct sk_buff *skb)
400 +{
401 +       struct nf_queue_entry *entry = skb->nf_queue_entry;
402 +
403 +       skb->nf_queue_entry = NULL;
404 +
405 +       if (entry) {
406 +               nf_queue_entry_release_refs(entry);
407 +               kfree(entry);
408 +       }
409 +
410 +       skb_restore_cb(skb); /* kfree backup */
411 +}
412 +
413 +static void imq_done_check_queue_mapping(struct sk_buff *skb,
414 +                                        struct net_device *dev)
415 +{
416 +       unsigned int queue_index;
417 +
418 +       /* Don't let queue_mapping be left too large after exiting IMQ */
419 +       if (likely(skb->dev != dev && skb->dev != NULL)) {
420 +               queue_index = skb_get_queue_mapping(skb);
421 +               if (unlikely(queue_index >= skb->dev->real_num_tx_queues)) {
422 +                       queue_index = (u16)((u32)queue_index %
423 +                                               skb->dev->real_num_tx_queues);
424 +                       skb_set_queue_mapping(skb, queue_index);
425 +               }
426 +       } else {
427 +               /* skb->dev was IMQ device itself or NULL, be on safe side and
428 +                * just clear queue mapping.
429 +                */
430 +               skb_set_queue_mapping(skb, 0);
431 +       }
432 +}
433 +
434 +static netdev_tx_t imq_dev_xmit(struct sk_buff *skb, struct net_device *dev)
435 +{
436 +       struct nf_queue_entry *entry = skb->nf_queue_entry;
437 +
438 +       skb->nf_queue_entry = NULL;
439 +       dev->trans_start = jiffies;
440 +
441 +       dev->stats.tx_bytes += skb->len;
442 +       dev->stats.tx_packets++;
443 +
444 +       if (unlikely(entry == NULL)) {
445 +               /* We don't know what is going on here.. packet is queued for
446 +                * imq device, but (probably) not by us.
447 +                *
448 +                * If this packet was not send here by imq_nf_queue(), then
449 +                * skb_save_cb() was not used and skb_free() should not show:
450 +                *   WARNING: IMQ: kfree_skb: skb->cb_next:..
451 +                * and/or
452 +                *   WARNING: IMQ: kfree_skb: skb->nf_queue_entry...
453 +                *
454 +                * However if this message is shown, then IMQ is somehow broken
455 +                * and you should report this to linuximq.net.
456 +                */
457 +
458 +               /* imq_dev_xmit is black hole that eats all packets, report that
459 +                * we eat this packet happily and increase dropped counters.
460 +                */
461 +
462 +               dev->stats.tx_dropped++;
463 +               dev_kfree_skb(skb);
464 +
465 +               return NETDEV_TX_OK;
466 +       }
467 +
468 +       skb_restore_cb(skb); /* restore skb->cb */
469 +
470 +       skb->imq_flags = 0;
471 +       skb->destructor = NULL;
472 +
473 +       imq_done_check_queue_mapping(skb, dev);
474 +
475 +       nf_reinject(entry, NF_ACCEPT);
476 +
477 +       return NETDEV_TX_OK;
478 +}
479 +
480 +static struct net_device *get_imq_device_by_index(int index)
481 +{
482 +       struct net_device *dev = NULL;
483 +       struct net *net;
484 +       char buf[8];
485 +
486 +       /* get device by name and cache result */
487 +       snprintf(buf, sizeof(buf), "imq%d", index);
488 +
489 +       /* Search device from all namespaces. */
490 +       for_each_net(net) {
491 +               dev = dev_get_by_name(net, buf);
492 +               if (dev)
493 +                       break;
494 +       }
495 +
496 +       if (WARN_ON_ONCE(dev == NULL)) {
497 +               /* IMQ device not found. Exotic config? */
498 +               return ERR_PTR(-ENODEV);
499 +       }
500 +
501 +       imq_devs_cache[index] = dev;
502 +       dev_put(dev);
503 +
504 +       return dev;
505 +}
506 +
507 +static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num)
508 +{
509 +       struct net_device *dev;
510 +       struct sk_buff *skb_orig, *skb, *skb_shared;
511 +       struct Qdisc *q;
512 +       struct netdev_queue *txq;
513 +       spinlock_t *root_lock;
514 +       int users, index;
515 +       int retval = -EINVAL;
516 +       unsigned int orig_queue_index;
517 +
518 +       index = entry->skb->imq_flags & IMQ_F_IFMASK;
519 +       if (unlikely(index > numdevs - 1)) {
520 +               if (net_ratelimit())
521 +                       printk(KERN_WARNING
522 +                              "IMQ: invalid device specified, highest is %u\n",
523 +                              numdevs - 1);
524 +               retval = -EINVAL;
525 +               goto out;
526 +       }
527 +
528 +       /* check for imq device by index from cache */
529 +       dev = imq_devs_cache[index];
530 +       if (unlikely(!dev)) {
531 +               dev = get_imq_device_by_index(index);
532 +               if (IS_ERR(dev)) {
533 +                       retval = PTR_ERR(dev);
534 +                       goto out;
535 +               }
536 +       }
537 +
538 +       if (unlikely(!(dev->flags & IFF_UP))) {
539 +               entry->skb->imq_flags = 0;
540 +               nf_reinject(entry, NF_ACCEPT);
541 +               retval = 0;
542 +               goto out;
543 +       }
544 +       dev->last_rx = jiffies;
545 +
546 +       skb = entry->skb;
547 +       skb_orig = NULL;
548 +
549 +       /* skb has owner? => make clone */
550 +       if (unlikely(skb->destructor)) {
551 +               skb_orig = skb;
552 +               skb = skb_clone(skb, GFP_ATOMIC);
553 +               if (unlikely(!skb)) {
554 +                       retval = -ENOMEM;
555 +                       goto out;
556 +               }
557 +               entry->skb = skb;
558 +       }
559 +
560 +       skb->nf_queue_entry = entry;
561 +
562 +       dev->stats.rx_bytes += skb->len;
563 +       dev->stats.rx_packets++;
564 +
565 +       if (!skb->dev) {
566 +               /* skb->dev == NULL causes problems, try the find cause. */
567 +               if (net_ratelimit()) {
568 +                       dev_warn(&dev->dev,
569 +                                "received packet with skb->dev == NULL\n");
570 +                       dump_stack();
571 +               }
572 +
573 +               skb->dev = dev;
574 +       }
575 +
576 +       /* Disables softirqs for lock below */
577 +       rcu_read_lock_bh();
578 +
579 +       /* Multi-queue selection */
580 +       orig_queue_index = skb_get_queue_mapping(skb);
581 +       txq = imq_select_queue(dev, skb);
582 +
583 +       q = rcu_dereference(txq->qdisc);
584 +       if (unlikely(!q->enqueue))
585 +               goto packet_not_eaten_by_imq_dev;
586 +
587 +       root_lock = qdisc_lock(q);
588 +       spin_lock(root_lock);
589 +
590 +       users = atomic_read(&skb->users);
591 +
592 +       skb_shared = skb_get(skb); /* increase reference count by one */
593 +       skb_save_cb(skb_shared); /* backup skb->cb, as qdisc layer will
594 +                                       overwrite it */
595 +       qdisc_enqueue_root(skb_shared, q); /* might kfree_skb */
596 +
597 +       if (likely(atomic_read(&skb_shared->users) == users + 1)) {
598 +               kfree_skb(skb_shared); /* decrease reference count by one */
599 +
600 +               skb->destructor = &imq_skb_destructor;
601 +
602 +               /* cloned? */
603 +               if (unlikely(skb_orig))
604 +                       kfree_skb(skb_orig); /* free original */
605 +
606 +               spin_unlock(root_lock);
607 +               rcu_read_unlock_bh();
608 +
609 +               /* schedule qdisc dequeue */
610 +               __netif_schedule(q);
611 +
612 +               retval = 0;
613 +               goto out;
614 +       } else {
615 +               skb_restore_cb(skb_shared); /* restore skb->cb */
616 +               skb->nf_queue_entry = NULL;
617 +               /* qdisc dropped packet and decreased skb reference count of
618 +                * skb, so we don't really want to and try refree as that would
619 +                * actually destroy the skb. */
620 +               spin_unlock(root_lock);
621 +               goto packet_not_eaten_by_imq_dev;
622 +       }
623 +
624 +packet_not_eaten_by_imq_dev:
625 +       skb_set_queue_mapping(skb, orig_queue_index);
626 +       rcu_read_unlock_bh();
627 +
628 +       /* cloned? restore original */
629 +       if (unlikely(skb_orig)) {
630 +               kfree_skb(skb);
631 +               entry->skb = skb_orig;
632 +       }
633 +       retval = -1;
634 +out:
635 +       return retval;
636 +}
637 +
638 +static unsigned int imq_nf_hook(unsigned int hook, struct sk_buff *pskb,
639 +                               const struct net_device *indev,
640 +                               const struct net_device *outdev,
641 +                               int (*okfn)(struct sk_buff *))
642 +{
643 +       return (pskb->imq_flags & IMQ_F_ENQUEUE) ? NF_IMQ_QUEUE : NF_ACCEPT;
644 +}
645 +
646 +static int imq_close(struct net_device *dev)
647 +{
648 +       netif_stop_queue(dev);
649 +       return 0;
650 +}
651 +
652 +static int imq_open(struct net_device *dev)
653 +{
654 +       netif_start_queue(dev);
655 +       return 0;
656 +}
657 +
658 +static const struct net_device_ops imq_netdev_ops = {
659 +       .ndo_open               = imq_open,
660 +       .ndo_stop               = imq_close,
661 +       .ndo_start_xmit         = imq_dev_xmit,
662 +       .ndo_get_stats          = imq_get_stats,
663 +};
664 +
665 +static void imq_setup(struct net_device *dev)
666 +{
667 +       dev->netdev_ops         = &imq_netdev_ops;
668 +       dev->type               = ARPHRD_VOID;
669 +       dev->mtu                = 16000; /* too small? */
670 +       dev->tx_queue_len       = 11000; /* too big? */
671 +       dev->flags              = IFF_NOARP;
672 +       dev->features           = NETIF_F_SG | NETIF_F_FRAGLIST |
673 +                                 NETIF_F_GSO | NETIF_F_HW_CSUM |
674 +                                 NETIF_F_HIGHDMA;
675 +       dev->priv_flags         &= ~(IFF_XMIT_DST_RELEASE |
676 +                                    IFF_TX_SKB_SHARING);
677 +}
678 +
679 +static int imq_validate(struct nlattr *tb[], struct nlattr *data[])
680 +{
681 +       int ret = 0;
682 +
683 +       if (tb[IFLA_ADDRESS]) {
684 +               if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
685 +                       ret = -EINVAL;
686 +                       goto end;
687 +               }
688 +               if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
689 +                       ret = -EADDRNOTAVAIL;
690 +                       goto end;
691 +               }
692 +       }
693 +       return 0;
694 +end:
695 +       printk(KERN_WARNING "IMQ: imq_validate failed (%d)\n", ret);
696 +       return ret;
697 +}
698 +
699 +static struct rtnl_link_ops imq_link_ops __read_mostly = {
700 +       .kind           = "imq",
701 +       .priv_size      = 0,
702 +       .setup          = imq_setup,
703 +       .validate       = imq_validate,
704 +};
705 +
706 +static const struct nf_queue_handler imq_nfqh = {
707 +       .name  = "imq",
708 +       .outfn = imq_nf_queue,
709 +};
710 +
711 +static int __init imq_init_hooks(void)
712 +{
713 +       int ret;
714 +
715 +       nf_register_queue_imq_handler(&imq_nfqh);
716 +
717 +       ret = nf_register_hooks(imq_ops, ARRAY_SIZE(imq_ops));
718 +       if (ret < 0)
719 +               nf_unregister_queue_imq_handler();
720 +
721 +       return ret;
722 +}
723 +
724 +static int __init imq_init_one(int index)
725 +{
726 +       struct net_device *dev;
727 +       int ret;
728 +
729 +       dev = alloc_netdev_mq(0, "imq%d", imq_setup, numqueues);
730 +       if (!dev)
731 +               return -ENOMEM;
732 +
733 +       ret = dev_alloc_name(dev, dev->name);
734 +       if (ret < 0)
735 +               goto fail;
736 +
737 +       dev->rtnl_link_ops = &imq_link_ops;
738 +       ret = register_netdevice(dev);
739 +       if (ret < 0)
740 +               goto fail;
741 +
742 +       return 0;
743 +fail:
744 +       free_netdev(dev);
745 +       return ret;
746 +}
747 +
748 +static int __init imq_init_devs(void)
749 +{
750 +       int err, i;
751 +
752 +       if (numdevs < 1 || numdevs > IMQ_MAX_DEVS) {
753 +               printk(KERN_ERR "IMQ: numdevs has to be betweed 1 and %u\n",
754 +                      IMQ_MAX_DEVS);
755 +               return -EINVAL;
756 +       }
757 +
758 +       if (numqueues < 1 || numqueues > IMQ_MAX_QUEUES) {
759 +               printk(KERN_ERR "IMQ: numqueues has to be betweed 1 and %u\n",
760 +                      IMQ_MAX_QUEUES);
761 +               return -EINVAL;
762 +       }
763 +
764 +       get_random_bytes(&imq_hashrnd, sizeof(imq_hashrnd));
765 +
766 +       rtnl_lock();
767 +       err = __rtnl_link_register(&imq_link_ops);
768 +
769 +       for (i = 0; i < numdevs && !err; i++)
770 +               err = imq_init_one(i);
771 +
772 +       if (err) {
773 +               __rtnl_link_unregister(&imq_link_ops);
774 +               memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
775 +       }
776 +       rtnl_unlock();
777 +
778 +       return err;
779 +}
780 +
781 +static int __init imq_init_module(void)
782 +{
783 +       int err;
784 +
785 +#if defined(CONFIG_IMQ_NUM_DEVS)
786 +       BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS > 16);
787 +       BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS < 2);
788 +       BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS - 1 > IMQ_F_IFMASK);
789 +#endif
790 +
791 +       err = imq_init_devs();
792 +       if (err) {
793 +               printk(KERN_ERR "IMQ: Error trying imq_init_devs(net)\n");
794 +               return err;
795 +       }
796 +
797 +       err = imq_init_hooks();
798 +       if (err) {
799 +               printk(KERN_ERR "IMQ: Error trying imq_init_hooks()\n");
800 +               rtnl_link_unregister(&imq_link_ops);
801 +               memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
802 +               return err;
803 +       }
804 +
805 +       printk(KERN_INFO "IMQ driver loaded successfully. "
806 +               "(numdevs = %d, numqueues = %d)\n", numdevs, numqueues);
807 +
808 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
809 +       printk(KERN_INFO "\tHooking IMQ before NAT on PREROUTING.\n");
810 +#else
811 +       printk(KERN_INFO "\tHooking IMQ after NAT on PREROUTING.\n");
812 +#endif
813 +#if defined(CONFIG_IMQ_BEHAVIOR_AB) || defined(CONFIG_IMQ_BEHAVIOR_BB)
814 +       printk(KERN_INFO "\tHooking IMQ before NAT on POSTROUTING.\n");
815 +#else
816 +       printk(KERN_INFO "\tHooking IMQ after NAT on POSTROUTING.\n");
817 +#endif
818 +
819 +       return 0;
820 +}
821 +
822 +static void __exit imq_unhook(void)
823 +{
824 +       nf_unregister_hooks(imq_ops, ARRAY_SIZE(imq_ops));
825 +       nf_unregister_queue_imq_handler();
826 +}
827 +
828 +static void __exit imq_cleanup_devs(void)
829 +{
830 +       rtnl_link_unregister(&imq_link_ops);
831 +       memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
832 +}
833 +
834 +static void __exit imq_exit_module(void)
835 +{
836 +       imq_unhook();
837 +       imq_cleanup_devs();
838 +       printk(KERN_INFO "IMQ driver unloaded successfully.\n");
839 +}
840 +
841 +module_init(imq_init_module);
842 +module_exit(imq_exit_module);
843 +
844 +module_param(numdevs, int, 0);
845 +module_param(numqueues, int, 0);
846 +MODULE_PARM_DESC(numdevs, "number of IMQ devices (how many imq* devices will "
847 +                       "be created)");
848 +MODULE_PARM_DESC(numqueues, "number of queues per IMQ device");
849 +MODULE_AUTHOR("http://www.linuximq.net");
850 +MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See "
851 +                       "http://www.linuximq.net/ for more information.");
852 +MODULE_LICENSE("GPL");
853 +MODULE_ALIAS_RTNL_LINK("imq");
854 +
855 diff -uNr linux-3.1/drivers/net/Kconfig linux-3.1-imq/drivers/net/Kconfig
856 --- linux-3.1/drivers/net/Kconfig       2011-10-24 10:10:05.000000000 +0300
857 +++ linux-3.1-imq/drivers/net/Kconfig   2011-11-04 11:12:52.106390309 +0200
858 @@ -124,6 +124,125 @@
859           To compile this driver as a module, choose M here: the module
860           will be called eql.  If unsure, say N.
861  
862 +config IMQ
863 +       tristate "IMQ (intermediate queueing device) support"
864 +       depends on NETDEVICES && NETFILTER
865 +       ---help---
866 +         The IMQ device(s) is used as placeholder for QoS queueing
867 +         disciplines. Every packet entering/leaving the IP stack can be
868 +         directed through the IMQ device where it's enqueued/dequeued to the
869 +         attached qdisc. This allows you to treat network devices as classes
870 +         and distribute bandwidth among them. Iptables is used to specify
871 +         through which IMQ device, if any, packets travel.
872 +
873 +         More information at: http://www.linuximq.net/
874 +
875 +         To compile this driver as a module, choose M here: the module
876 +         will be called imq.  If unsure, say N.
877 +
878 +choice
879 +       prompt "IMQ behavior (PRE/POSTROUTING)"
880 +       depends on IMQ
881 +       default IMQ_BEHAVIOR_AB
882 +       help
883 +         This setting defines how IMQ behaves in respect to its
884 +         hooking in PREROUTING and POSTROUTING.
885 +
886 +         IMQ can work in any of the following ways:
887 +
888 +             PREROUTING   |      POSTROUTING
889 +         -----------------|-------------------
890 +         #1  After NAT    |      After NAT
891 +         #2  After NAT    |      Before NAT
892 +         #3  Before NAT   |      After NAT
893 +         #4  Before NAT   |      Before NAT
894 +
895 +         The default behavior is to hook before NAT on PREROUTING
896 +         and after NAT on POSTROUTING (#3).
897 +
898 +         This settings are specially usefull when trying to use IMQ
899 +         to shape NATed clients.
900 +
901 +         More information can be found at: www.linuximq.net
902 +
903 +         If not sure leave the default settings alone.
904 +
905 +config IMQ_BEHAVIOR_AA
906 +       bool "IMQ AA"
907 +       help
908 +         This setting defines how IMQ behaves in respect to its
909 +         hooking in PREROUTING and POSTROUTING.
910 +
911 +         Choosing this option will make IMQ hook like this:
912 +
913 +         PREROUTING:   After NAT
914 +         POSTROUTING:  After NAT
915 +
916 +         More information can be found at: www.linuximq.net
917 +
918 +         If not sure leave the default settings alone.
919 +
920 +config IMQ_BEHAVIOR_AB
921 +       bool "IMQ AB"
922 +       help
923 +         This setting defines how IMQ behaves in respect to its
924 +         hooking in PREROUTING and POSTROUTING.
925 +
926 +         Choosing this option will make IMQ hook like this:
927 +
928 +         PREROUTING:   After NAT
929 +         POSTROUTING:  Before NAT
930 +
931 +         More information can be found at: www.linuximq.net
932 +
933 +         If not sure leave the default settings alone.
934 +
935 +config IMQ_BEHAVIOR_BA
936 +       bool "IMQ BA"
937 +       help
938 +         This setting defines how IMQ behaves in respect to its
939 +         hooking in PREROUTING and POSTROUTING.
940 +
941 +         Choosing this option will make IMQ hook like this:
942 +
943 +         PREROUTING:   Before NAT
944 +         POSTROUTING:  After NAT
945 +
946 +         More information can be found at: www.linuximq.net
947 +
948 +         If not sure leave the default settings alone.
949 +
950 +config IMQ_BEHAVIOR_BB
951 +       bool "IMQ BB"
952 +       help
953 +         This setting defines how IMQ behaves in respect to its
954 +         hooking in PREROUTING and POSTROUTING.
955 +
956 +         Choosing this option will make IMQ hook like this:
957 +
958 +         PREROUTING:   Before NAT
959 +         POSTROUTING:  Before NAT
960 +
961 +         More information can be found at: www.linuximq.net
962 +
963 +         If not sure leave the default settings alone.
964 +
965 +endchoice
966 +
967 +config IMQ_NUM_DEVS
968 +       int "Number of IMQ devices"
969 +       range 2 16
970 +       depends on IMQ
971 +       default "16"
972 +       help
973 +         This setting defines how many IMQ devices will be created.
974 +
975 +         The default value is 16.
976 +
977 +         More information can be found at: www.linuximq.net
978 +
979 +         If not sure leave the default settings alone.
980 +
981  config TUN
982         tristate "Universal TUN/TAP device driver support"
983         select CRC32
984 diff -uNr linux-3.1/drivers/net/Makefile linux-3.1-imq/drivers/net/Makefile
985 --- linux-3.1/drivers/net/Makefile      2011-10-24 10:10:05.000000000 +0300
986 +++ linux-3.1-imq/drivers/net/Makefile  2011-11-04 11:12:52.106390309 +0200
987 @@ -7,6 +7,7 @@
988  #
989  obj-$(CONFIG_BONDING) += bonding/
990  obj-$(CONFIG_DUMMY) += dummy.o
991 +obj-$(CONFIG_IMQ) += imq.o
992  obj-$(CONFIG_EQUALIZER) += eql.o
993  obj-$(CONFIG_IFB) += ifb.o
994  obj-$(CONFIG_MACVLAN) += macvlan.o
995 diff -uNr linux-3.1/include/linux/imq.h linux-3.1-imq/include/linux/imq.h
996 --- linux-3.1/include/linux/imq.h       1970-01-01 02:00:00.000000000 +0200
997 +++ linux-3.1-imq/include/linux/imq.h   2011-11-04 11:12:52.109723710 +0200
998 @@ -0,0 +1,13 @@
999 +#ifndef _IMQ_H
1000 +#define _IMQ_H
1001 +
1002 +/* IFMASK (16 device indexes, 0 to 15) and flag(s) fit in 5 bits */
1003 +#define IMQ_F_BITS     5
1004 +
1005 +#define IMQ_F_IFMASK   0x0f
1006 +#define IMQ_F_ENQUEUE  0x10
1007 +
1008 +#define IMQ_MAX_DEVS   (IMQ_F_IFMASK + 1)
1009 +
1010 +#endif /* _IMQ_H */
1011 +
1012 diff -uNr linux-3.1/include/linux/netfilter/xt_IMQ.h linux-3.1-imq/include/linux/netfilter/xt_IMQ.h
1013 --- linux-3.1/include/linux/netfilter/xt_IMQ.h  1970-01-01 02:00:00.000000000 +0200
1014 +++ linux-3.1-imq/include/linux/netfilter/xt_IMQ.h      2011-11-04 11:12:52.109723710 +0200
1015 @@ -0,0 +1,9 @@
1016 +#ifndef _XT_IMQ_H
1017 +#define _XT_IMQ_H
1018 +
1019 +struct xt_imq_info {
1020 +       unsigned int todev;     /* target imq device */
1021 +};
1022 +
1023 +#endif /* _XT_IMQ_H */
1024 +
1025 diff -uNr linux-3.1/include/linux/netfilter.h linux-3.1-imq/include/linux/netfilter.h
1026 --- linux-3.1/include/linux/netfilter.h 2011-10-24 10:10:05.000000000 +0300
1027 +++ linux-3.1-imq/include/linux/netfilter.h     2011-11-04 11:12:52.109723710 +0200
1028 @@ -22,7 +22,8 @@
1029  #define NF_QUEUE 3
1030  #define NF_REPEAT 4
1031  #define NF_STOP 5
1032 -#define NF_MAX_VERDICT NF_STOP
1033 +#define NF_IMQ_QUEUE 6
1034 +#define NF_MAX_VERDICT NF_IMQ_QUEUE
1035  
1036  /* we overload the higher bits for encoding auxiliary data such as the queue
1037   * number or errno values. Not nice, but better than additional function
1038 diff -uNr linux-3.1/include/linux/netfilter_ipv4/ipt_IMQ.h linux-3.1-imq/include/linux/netfilter_ipv4/ipt_IMQ.h
1039 --- linux-3.1/include/linux/netfilter_ipv4/ipt_IMQ.h    1970-01-01 02:00:00.000000000 +0200
1040 +++ linux-3.1-imq/include/linux/netfilter_ipv4/ipt_IMQ.h        2011-11-04 11:12:52.109723710 +0200
1041 @@ -0,0 +1,10 @@
1042 +#ifndef _IPT_IMQ_H
1043 +#define _IPT_IMQ_H
1044 +
1045 +/* Backwards compatibility for old userspace */
1046 +#include <linux/netfilter/xt_IMQ.h>
1047 +
1048 +#define ipt_imq_info xt_imq_info
1049 +
1050 +#endif /* _IPT_IMQ_H */
1051 +
1052 diff -uNr linux-3.1/include/linux/netfilter_ipv6/ip6t_IMQ.h linux-3.1-imq/include/linux/netfilter_ipv6/ip6t_IMQ.h
1053 --- linux-3.1/include/linux/netfilter_ipv6/ip6t_IMQ.h   1970-01-01 02:00:00.000000000 +0200
1054 +++ linux-3.1-imq/include/linux/netfilter_ipv6/ip6t_IMQ.h       2011-11-04 11:12:52.113057113 +0200
1055 @@ -0,0 +1,10 @@
1056 +#ifndef _IP6T_IMQ_H
1057 +#define _IP6T_IMQ_H
1058 +
1059 +/* Backwards compatibility for old userspace */
1060 +#include <linux/netfilter/xt_IMQ.h>
1061 +
1062 +#define ip6t_imq_info xt_imq_info
1063 +
1064 +#endif /* _IP6T_IMQ_H */
1065 +
1066 diff -uNr linux-3.1/include/linux/skbuff.h linux-3.1-imq/include/linux/skbuff.h
1067 --- linux-3.1/include/linux/skbuff.h    2011-10-24 10:10:05.000000000 +0300
1068 +++ linux-3.1-imq/include/linux/skbuff.h        2011-11-04 11:12:52.116390515 +0200
1069 @@ -29,6 +29,9 @@
1070  #include <linux/hrtimer.h>
1071  #include <linux/dma-mapping.h>
1072  #include <linux/netdev_features.h>
1073 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1074 +#include <linux/imq.h>
1075 +#endif
1076  
1077  /* Don't change this without changing skb_csum_unnecessary! */
1078  #define CHECKSUM_NONE 0
1079 @@ -356,6 +359,9 @@
1080          * first. This is owned by whoever has the skb queued ATM.
1081          */
1082         char                    cb[48] __aligned(8);
1083 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1084 +       void                    *cb_next;
1085 +#endif
1086  
1087         unsigned long           _skb_refdst;
1088  #ifdef CONFIG_XFRM
1089 @@ -394,6 +400,9 @@
1090  #ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
1091         struct sk_buff          *nfct_reasm;
1092  #endif
1093 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1094 +       struct nf_queue_entry   *nf_queue_entry;
1095 +#endif
1096  #ifdef CONFIG_BRIDGE_NETFILTER
1097         struct nf_bridge_info   *nf_bridge;
1098  #endif
1099 @@ -418,6 +427,10 @@
1100  
1101         /* 0/13 bit hole */
1102  
1103 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1104 +       __u8                    imq_flags:IMQ_F_BITS;
1105 +#endif
1106 +
1107  #ifdef CONFIG_NET_DMA
1108         dma_cookie_t            dma_cookie;
1109  #endif
1110 @@ -504,6 +517,12 @@
1111         return (struct rtable *)skb_dst(skb);
1112  }
1113  
1114 +
1115 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1116 +extern int skb_save_cb(struct sk_buff *skb);
1117 +extern int skb_restore_cb(struct sk_buff *skb);
1118 +#endif
1119 +
1120  extern void kfree_skb(struct sk_buff *skb);
1121  extern void consume_skb(struct sk_buff *skb);
1122  extern void           __kfree_skb(struct sk_buff *skb);
1123 @@ -2157,6 +2176,10 @@
1124         dst->nfct_reasm = src->nfct_reasm;
1125         nf_conntrack_get_reasm(src->nfct_reasm);
1126  #endif
1127 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1128 +       dst->imq_flags = src->imq_flags;
1129 +       dst->nf_queue_entry = src->nf_queue_entry;
1130 +#endif
1131  #ifdef CONFIG_BRIDGE_NETFILTER
1132         dst->nf_bridge  = src->nf_bridge;
1133         nf_bridge_get(src->nf_bridge);
1134 diff -uNr linux-3.1/include/net/netfilter/nf_queue.h linux-3.1-imq/include/net/netfilter/nf_queue.h
1135 --- linux-3.1/include/net/netfilter/nf_queue.h  2011-10-24 10:10:05.000000000 +0300
1136 +++ linux-3.1-imq/include/net/netfilter/nf_queue.h      2011-11-04 11:12:52.116390515 +0200
1137 @@ -30,5 +30,11 @@
1138                                        const struct nf_queue_handler *qh);
1139  extern void nf_unregister_queue_handlers(const struct nf_queue_handler *qh);
1140  extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
1141 +extern void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
1142 +
1143 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1144 +extern void nf_register_queue_imq_handler(const struct nf_queue_handler *qh);
1145 +extern void nf_unregister_queue_imq_handler(void);
1146 +#endif
1147  
1148  #endif /* _NF_QUEUE_H */
1149 diff -uNr linux-3.1/net/core/dev.c linux-3.1-imq/net/core/dev.c
1150 --- linux-3.1/net/core/dev.c    2011-10-24 10:10:05.000000000 +0300
1151 +++ linux-3.1-imq/net/core/dev.c        2011-11-04 11:12:52.119723915 +0200
1152 @@ -98,6 +98,9 @@
1153  #include <net/net_namespace.h>
1154  #include <net/sock.h>
1155  #include <linux/rtnetlink.h>
1156 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1157 +#include <linux/imq.h>
1158 +#endif
1159  #include <linux/proc_fs.h>
1160  #include <linux/seq_file.h>
1161  #include <linux/stat.h>
1162 @@ -2126,7 +2129,12 @@
1163                 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1164                         skb_dst_drop(skb);
1165  
1166 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1167 +               if (!list_empty(&ptype_all) &&
1168 +                                       !(skb->imq_flags & IMQ_F_ENQUEUE))
1169 +#else
1170                 if (!list_empty(&ptype_all))
1171 +#endif
1172                         dev_queue_xmit_nit(skb, dev);
1173  
1174                 skb_orphan_try(skb);
1175 diff -uNr linux-3.1/net/core/skbuff.c linux-3.1-imq/net/core/skbuff.c
1176 --- linux-3.1/net/core/skbuff.c 2011-10-24 10:10:05.000000000 +0300
1177 +++ linux-3.1-imq/net/core/skbuff.c     2011-11-04 11:12:52.123057315 +0200
1178 @@ -73,6 +73,9 @@
1179  
1180  static struct kmem_cache *skbuff_head_cache __read_mostly;
1181  static struct kmem_cache *skbuff_fclone_cache __read_mostly;
1182 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1183 +static struct kmem_cache *skbuff_cb_store_cache __read_mostly;
1184 +#endif
1185  
1186  static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
1187                                   struct pipe_buffer *buf)
1188 @@ -92,6 +95,82 @@
1189         return 1;
1190  }
1191  
1192 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1193 +/* Control buffer save/restore for IMQ devices */
1194 +struct skb_cb_table {
1195 +       char                    cb[48] __aligned(8);
1196 +       void                    *cb_next;
1197 +       atomic_t                refcnt;
1198 +};
1199 +
1200 +static DEFINE_SPINLOCK(skb_cb_store_lock);
1201 +
1202 +int skb_save_cb(struct sk_buff *skb)
1203 +{
1204 +       struct skb_cb_table *next;
1205 +
1206 +       next = kmem_cache_alloc(skbuff_cb_store_cache, GFP_ATOMIC);
1207 +       if (!next)
1208 +               return -ENOMEM;
1209 +
1210 +       BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb));
1211 +
1212 +       memcpy(next->cb, skb->cb, sizeof(skb->cb));
1213 +       next->cb_next = skb->cb_next;
1214 +
1215 +       atomic_set(&next->refcnt, 1);
1216 +
1217 +       skb->cb_next = next;
1218 +       return 0;
1219 +}
1220 +EXPORT_SYMBOL(skb_save_cb);
1221 +
1222 +int skb_restore_cb(struct sk_buff *skb)
1223 +{
1224 +       struct skb_cb_table *next;
1225 +
1226 +       if (!skb->cb_next)
1227 +               return 0;
1228 +
1229 +       next = skb->cb_next;
1230 +
1231 +       BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb));
1232 +
1233 +       memcpy(skb->cb, next->cb, sizeof(skb->cb));
1234 +       skb->cb_next = next->cb_next;
1235 +
1236 +       spin_lock(&skb_cb_store_lock);
1237 +
1238 +       if (atomic_dec_and_test(&next->refcnt))
1239 +               kmem_cache_free(skbuff_cb_store_cache, next);
1240 +
1241 +       spin_unlock(&skb_cb_store_lock);
1242 +
1243 +       return 0;
1244 +}
1245 +EXPORT_SYMBOL(skb_restore_cb);
1246 +
1247 +static void skb_copy_stored_cb(struct sk_buff *new, const struct sk_buff *__old)
1248 +{
1249 +       struct skb_cb_table *next;
1250 +       struct sk_buff *old;
1251 +
1252 +       if (!__old->cb_next) {
1253 +               new->cb_next = NULL;
1254 +               return;
1255 +       }
1256 +
1257 +       spin_lock(&skb_cb_store_lock);
1258 +
1259 +       old = (struct sk_buff *)__old;
1260 +
1261 +       next = old->cb_next;
1262 +       atomic_inc(&next->refcnt);
1263 +       new->cb_next = next;
1264 +
1265 +       spin_unlock(&skb_cb_store_lock);
1266 +}
1267 +#endif
1268  
1269  /* Pipe buffer operations for a socket. */
1270  static const struct pipe_buf_operations sock_pipe_buf_ops = {
1271 @@ -392,6 +471,26 @@
1272                 WARN_ON(in_irq());
1273                 skb->destructor(skb);
1274         }
1275 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1276 +       /* This should not happen. When it does, avoid memleak by restoring
1277 +       the chain of cb-backups. */
1278 +       while (skb->cb_next != NULL) {
1279 +               if (net_ratelimit())
1280 +                       printk(KERN_WARNING "IMQ: kfree_skb: skb->cb_next: "
1281 +                               "%08x\n", (unsigned int)skb->cb_next);
1282 +
1283 +               skb_restore_cb(skb);
1284 +       }
1285 +       /* This should not happen either, nf_queue_entry is nullified in
1286 +        * imq_dev_xmit(). If we have non-NULL nf_queue_entry then we are
1287 +        * leaking entry pointers, maybe memory. We don't know if this is
1288 +        * pointer to already freed memory, or should this be freed.
1289 +        * If this happens we need to add refcounting, etc for nf_queue_entry.
1290 +        */
1291 +       if (skb->nf_queue_entry && net_ratelimit())
1292 +               printk(KERN_WARNING
1293 +                               "IMQ: kfree_skb: skb->nf_queue_entry != NULL");
1294 +#endif
1295  #if IS_ENABLED(CONFIG_NF_CONNTRACK)
1296         nf_conntrack_put(skb->nfct);
1297  #endif
1298 @@ -533,6 +632,9 @@
1299         new->sp                 = secpath_get(old->sp);
1300  #endif
1301         memcpy(new->cb, old->cb, sizeof(old->cb));
1302 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1303 +       skb_copy_stored_cb(new, old);
1304 +#endif
1305         new->csum               = old->csum;
1306         new->local_df           = old->local_df;
1307         new->pkt_type           = old->pkt_type;
1308 @@ -2888,6 +2990,13 @@
1309                                                 0,
1310                                                 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1311                                                 NULL);
1312 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1313 +       skbuff_cb_store_cache = kmem_cache_create("skbuff_cb_store_cache",
1314 +                                                 sizeof(struct skb_cb_table),
1315 +                                                 0,
1316 +                                                 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1317 +                                                 NULL);
1318 +#endif
1319  }
1320  
1321  /**
1322 diff -uNr linux-3.1/net/ipv6/ip6_output.c linux-3.1-imq/net/ipv6/ip6_output.c
1323 --- linux-3.1/net/ipv6/ip6_output.c     2011-10-24 10:10:05.000000000 +0300
1324 +++ linux-3.1-imq/net/ipv6/ip6_output.c 2011-11-04 11:12:52.123057315 +0200
1325 @@ -102,9 +102,6 @@
1326         struct net_device *dev = dst->dev;
1327         struct neighbour *neigh;
1328  
1329 -       skb->protocol = htons(ETH_P_IPV6);
1330 -       skb->dev = dev;
1331 -
1332         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
1333                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1334  
1335 @@ -170,6 +167,11 @@
1336                 return 0;
1337         }
1338  
1339 +       /* IMQ-patch: moved setting skb->dev and skb->protocol from
1340 +        * ip6_finish_output2 to fix crashing at netif_skb_features(). */
1341 +       skb->protocol = htons(ETH_P_IPV6);
1342 +       skb->dev = dev;
1343 +
1344         return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
1345                             ip6_finish_output,
1346                             !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1347 diff -uNr linux-3.1/net/netfilter/core.c linux-3.1-imq/net/netfilter/core.c
1348 --- linux-3.1/net/netfilter/core.c      2011-10-24 10:10:05.000000000 +0300
1349 +++ linux-3.1-imq/net/netfilter/core.c  2011-11-04 11:12:52.123057315 +0200
1350 @@ -179,9 +179,11 @@
1351                 ret = NF_DROP_GETERR(verdict);
1352                 if (ret == 0)
1353                         ret = -EPERM;
1354 -       } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
1355 +       } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE ||
1356 +                  (verdict & NF_VERDICT_MASK) == NF_IMQ_QUEUE) {
1357                 int err = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
1358 -                                               verdict >> NF_VERDICT_QBITS);
1359 +                                               verdict >> NF_VERDICT_QBITS,
1360 +                                               verdict & NF_VERDICT_MASK);
1361                 if (err < 0) {
1362                         if (err == -ECANCELED)
1363                                 goto next_hook;
1364 diff -uNr linux-3.1/net/netfilter/Kconfig linux-3.1-imq/net/netfilter/Kconfig
1365 --- linux-3.1/net/netfilter/Kconfig     2011-10-24 10:10:05.000000000 +0300
1366 +++ linux-3.1-imq/net/netfilter/Kconfig 2011-11-04 11:12:52.123057315 +0200
1367 @@ -507,6 +507,18 @@
1368           For more information on the LEDs available on your system, see
1369           Documentation/leds-class.txt
1370  
1371 +config NETFILTER_XT_TARGET_IMQ
1372 +        tristate '"IMQ" target support'
1373 +       depends on NETFILTER_XTABLES
1374 +       depends on IP_NF_MANGLE || IP6_NF_MANGLE
1375 +       select IMQ
1376 +       default m if NETFILTER_ADVANCED=n
1377 +        help
1378 +          This option adds a `IMQ' target which is used to specify if and
1379 +          to which imq device packets should get enqueued/dequeued.
1380 +
1381 +          To compile it as a module, choose M here.  If unsure, say N.
1382 +
1383  config NETFILTER_XT_TARGET_MARK
1384         tristate '"MARK" target support'
1385         depends on NETFILTER_ADVANCED
1386 diff -uNr linux-3.1/net/netfilter/Makefile linux-3.1-imq/net/netfilter/Makefile
1387 --- linux-3.1/net/netfilter/Makefile    2011-10-24 10:10:05.000000000 +0300
1388 +++ linux-3.1-imq/net/netfilter/Makefile        2011-11-04 11:12:52.123057315 +0200
1389 @@ -61,6 +61,7 @@
1390  obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
1391  obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
1392  obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
1393 +obj-$(CONFIG_NETFILTER_XT_TARGET_IMQ) += xt_IMQ.o
1394  obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o
1395  obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
1396  obj-$(CONFIG_NETFILTER_XT_TARGET_LOG) += xt_LOG.o
1397 diff -uNr linux-3.1/net/netfilter/nf_internals.h linux-3.1-imq/net/netfilter/nf_internals.h
1398 --- linux-3.1/net/netfilter/nf_internals.h      2011-10-24 10:10:05.000000000 +0300
1399 +++ linux-3.1-imq/net/netfilter/nf_internals.h  2011-11-04 11:12:52.123057315 +0200
1400 @@ -29,7 +29,7 @@
1401                     struct net_device *indev,
1402                     struct net_device *outdev,
1403                     int (*okfn)(struct sk_buff *),
1404 -                   unsigned int queuenum);
1405 +                   unsigned int queuenum, unsigned int queuetype);
1406  extern int __init netfilter_queue_init(void);
1407  
1408  /* nf_log.c */
1409 diff -uNr linux-3.1/net/netfilter/nf_queue.c linux-3.1-imq/net/netfilter/nf_queue.c
1410 --- linux-3.1/net/netfilter/nf_queue.c  2011-10-24 10:10:05.000000000 +0300
1411 +++ linux-3.1-imq/net/netfilter/nf_queue.c      2011-11-04 11:12:52.123057315 +0200
1412 @@ -22,6 +22,26 @@
1413  
1414  static DEFINE_MUTEX(queue_handler_mutex);
1415  
1416 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1417 +static const struct nf_queue_handler *queue_imq_handler;
1418 +
1419 +void nf_register_queue_imq_handler(const struct nf_queue_handler *qh)
1420 +{
1421 +       mutex_lock(&queue_handler_mutex);
1422 +       rcu_assign_pointer(queue_imq_handler, qh);
1423 +       mutex_unlock(&queue_handler_mutex);
1424 +}
1425 +EXPORT_SYMBOL_GPL(nf_register_queue_imq_handler);
1426 +
1427 +void nf_unregister_queue_imq_handler(void)
1428 +{
1429 +       mutex_lock(&queue_handler_mutex);
1430 +       rcu_assign_pointer(queue_imq_handler, NULL);
1431 +       mutex_unlock(&queue_handler_mutex);
1432 +}
1433 +EXPORT_SYMBOL_GPL(nf_unregister_queue_imq_handler);
1434 +#endif
1435 +
1436  /* return EBUSY when somebody else is registered, return EEXIST if the
1437   * same handler is registered, return 0 in case of success. */
1438  int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
1439 @@ -92,7 +112,7 @@
1440  }
1441  EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
1442  
1443 -static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
1444 +void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
1445  {
1446         /* Release those devices we held, or Alexey will kill me. */
1447         if (entry->indev)
1448 @@ -112,6 +132,7 @@
1449         /* Drop reference to owner of hook which queued us. */
1450         module_put(entry->elem->owner);
1451  }
1452 +EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
1453  
1454  /*
1455   * Any packet that leaves via this function must come back
1456 @@ -123,7 +144,8 @@
1457                       struct net_device *indev,
1458                       struct net_device *outdev,
1459                       int (*okfn)(struct sk_buff *),
1460 -                     unsigned int queuenum)
1461 +                     unsigned int queuenum,
1462 +                     unsigned int queuetype)
1463  {
1464         int status = -ENOENT;
1465         struct nf_queue_entry *entry = NULL;
1466 @@ -137,7 +159,17 @@
1467         /* QUEUE == DROP if no one is waiting, to be safe. */
1468         rcu_read_lock();
1469  
1470 -       qh = rcu_dereference(queue_handler[pf]);
1471 +       if (queuetype == NF_IMQ_QUEUE) {
1472 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1473 +               qh = rcu_dereference(queue_imq_handler);
1474 +#else
1475 +               BUG();
1476 +               goto err_unlock;
1477 +#endif
1478 +       } else {
1479 +               qh = rcu_dereference(queue_handler[pf]);
1480 +       }
1481 +
1482         if (!qh) {
1483                 status = -ESRCH;
1484                 goto err_unlock;
1485 @@ -209,7 +241,8 @@
1486              struct net_device *indev,
1487              struct net_device *outdev,
1488              int (*okfn)(struct sk_buff *),
1489 -            unsigned int queuenum)
1490 +            unsigned int queuenum,
1491 +            unsigned int queuetype)
1492  {
1493         struct sk_buff *segs;
1494         int err;
1495 @@ -217,7 +250,7 @@
1496  
1497         if (!skb_is_gso(skb))
1498                 return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
1499 -                                 queuenum);
1500 +                                 queuenum, queuetype);
1501  
1502         switch (pf) {
1503         case NFPROTO_IPV4:
1504 @@ -299,7 +299,7 @@ int nf_queue(struct sk_buff *skb,
1505                 if (err == 0) {
1506                         nf_bridge_adjust_segmented_data(segs);
1507                         err = __nf_queue(segs, elem, pf, hook, indev,
1508 -                                          outdev, okfn, queuenum);
1509 +                                          outdev, okfn, queuenum, queuetype);
1510                 }
1511                 if (err == 0)
1512                         queued++;
1513 @@ -299,9 +332,11 @@
1514                 local_bh_enable();
1515                 break;
1516         case NF_QUEUE:
1517 +       case NF_IMQ_QUEUE:
1518                 err = __nf_queue(skb, elem, entry->pf, entry->hook,
1519                                  entry->indev, entry->outdev, entry->okfn,
1520 -                                verdict >> NF_VERDICT_QBITS);
1521 +                                verdict >> NF_VERDICT_QBITS,
1522 +                                verdict & NF_VERDICT_MASK);
1523                 if (err < 0) {
1524                         if (err == -ECANCELED)
1525                                 goto next_hook;
1526 diff -uNr linux-3.1/net/netfilter/xt_IMQ.c linux-3.1-imq/net/netfilter/xt_IMQ.c
1527 --- linux-3.1/net/netfilter/xt_IMQ.c    1970-01-01 02:00:00.000000000 +0200
1528 +++ linux-3.1-imq/net/netfilter/xt_IMQ.c        2011-11-04 11:12:52.123057315 +0200
1529 @@ -0,0 +1,74 @@
1530 +/*
1531 + * This target marks packets to be enqueued to an imq device
1532 + */
1533 +#include <linux/module.h>
1534 +#include <linux/skbuff.h>
1535 +#include <linux/netfilter/x_tables.h>
1536 +#include <linux/netfilter/xt_IMQ.h>
1537 +#include <linux/imq.h>
1538 +
1539 +static unsigned int imq_target(struct sk_buff *pskb,
1540 +                               const struct xt_action_param *par)
1541 +{
1542 +       const struct xt_imq_info *mr = par->targinfo;
1543 +
1544 +       pskb->imq_flags = (mr->todev & IMQ_F_IFMASK) | IMQ_F_ENQUEUE;
1545 +
1546 +       return XT_CONTINUE;
1547 +}
1548 +
1549 +static int imq_checkentry(const struct xt_tgchk_param *par)
1550 +{
1551 +       struct xt_imq_info *mr = par->targinfo;
1552 +
1553 +       if (mr->todev > IMQ_MAX_DEVS - 1) {
1554 +               printk(KERN_WARNING
1555 +                      "IMQ: invalid device specified, highest is %u\n",
1556 +                      IMQ_MAX_DEVS - 1);
1557 +               return -EINVAL;
1558 +       }
1559 +
1560 +       return 0;
1561 +}
1562 +
1563 +static struct xt_target xt_imq_reg[] __read_mostly = {
1564 +       {
1565 +               .name           = "IMQ",
1566 +               .family         = AF_INET,
1567 +               .checkentry     = imq_checkentry,
1568 +               .target         = imq_target,
1569 +               .targetsize     = sizeof(struct xt_imq_info),
1570 +               .table          = "mangle",
1571 +               .me             = THIS_MODULE
1572 +       },
1573 +       {
1574 +               .name           = "IMQ",
1575 +               .family         = AF_INET6,
1576 +               .checkentry     = imq_checkentry,
1577 +               .target         = imq_target,
1578 +               .targetsize     = sizeof(struct xt_imq_info),
1579 +               .table          = "mangle",
1580 +               .me             = THIS_MODULE
1581 +       },
1582 +};
1583 +
1584 +static int __init imq_init(void)
1585 +{
1586 +       return xt_register_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg));
1587 +}
1588 +
1589 +static void __exit imq_fini(void)
1590 +{
1591 +       xt_unregister_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg));
1592 +}
1593 +
1594 +module_init(imq_init);
1595 +module_exit(imq_fini);
1596 +
1597 +MODULE_AUTHOR("http://www.linuximq.net");
1598 +MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. "
1599 +                  "See http://www.linuximq.net/ for more information.");
1600 +MODULE_LICENSE("GPL");
1601 +MODULE_ALIAS("ipt_IMQ");
1602 +MODULE_ALIAS("ip6t_IMQ");
1603 +
This page took 0.383136 seconds and 4 git commands to generate.