]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-imq.patch
- fixed next error (7 to go)
[packages/kernel.git] / kernel-imq.patch
CommitLineData
a168f21d
AM
1diff -uNr linux-3.0/drivers/net/imq.c linux-3.0-imq/drivers/net/imq.c
2--- linux-3.0/drivers/net/imq.c 1970-01-01 02:00:00.000000000 +0200
3+++ linux-3.0-imq/drivers/net/imq.c 2011-07-26 07:24:09.843279145 +0300
4@@ -0,0 +1,820 @@
2380c486
JR
5+/*
6+ * Pseudo-driver for the intermediate queue device.
7+ *
8+ * This program is free software; you can redistribute it and/or
9+ * modify it under the terms of the GNU General Public License
10+ * as published by the Free Software Foundation; either version
11+ * 2 of the License, or (at your option) any later version.
12+ *
13+ * Authors: Patrick McHardy, <kaber@trash.net>
14+ *
15+ * The first version was written by Martin Devera, <devik@cdi.cz>
16+ *
17+ * Credits: Jan Rafaj <imq2t@cedric.vabo.cz>
18+ * - Update patch to 2.4.21
19+ * Sebastian Strollo <sstrollo@nortelnetworks.com>
20+ * - Fix "Dead-loop on netdevice imq"-issue
21+ * Marcel Sebek <sebek64@post.cz>
22+ * - Update to 2.6.2-rc1
23+ *
24+ * After some time of inactivity there is a group taking care
25+ * of IMQ again: http://www.linuximq.net
26+ *
27+ *
28+ * 2004/06/30 - New version of IMQ patch to kernels <=2.6.7
29+ * including the following changes:
30+ *
31+ * - Correction of ipv6 support "+"s issue (Hasso Tepper)
32+ * - Correction of imq_init_devs() issue that resulted in
33+ * kernel OOPS unloading IMQ as module (Norbert Buchmuller)
34+ * - Addition of functionality to choose number of IMQ devices
35+ * during kernel config (Andre Correa)
36+ * - Addition of functionality to choose how IMQ hooks on
37+ * PRE and POSTROUTING (after or before NAT) (Andre Correa)
38+ * - Cosmetic corrections (Norbert Buchmuller) (Andre Correa)
39+ *
40+ *
41+ * 2005/12/16 - IMQ versions between 2.6.7 and 2.6.13 were
42+ * released with almost no problems. 2.6.14-x was released
43+ * with some important changes: nfcache was removed; After
44+ * some weeks of trouble we figured out that some IMQ fields
45+ * in skb were missing in skbuff.c - skb_clone and copy_skb_header.
46+ * These functions are correctly patched by this new patch version.
47+ *
48+ * Thanks for all who helped to figure out all the problems with
49+ * 2.6.14.x: Patrick McHardy, Rune Kock, VeNoMouS, Max CtRiX,
50+ * Kevin Shanahan, Richard Lucassen, Valery Dachev (hopefully
51+ * I didn't forget anybody). I apologize again for my lack of time.
52+ *
53+ *
f6396b7e 54+ * 2008/06/17 - 2.6.25 - Changed imq.c to use qdisc_run() instead
7f07242b 55+ * of qdisc_restart() and moved qdisc_run() to tasklet to avoid
2380c486
JR
56+ * recursive locking. New initialization routines to fix 'rmmod' not
57+ * working anymore. Used code from ifb.c. (Jussi Kivilinna)
7f07242b 58+ *
59+ * 2008/08/06 - 2.6.26 - (JK)
60+ * - Replaced tasklet with 'netif_schedule()'.
61+ * - Cleaned up and added comments for imq_nf_queue().
62+ *
63+ * 2009/04/12
64+ * - Add skb_save_cb/skb_restore_cb helper functions for backuping
65+ * control buffer. This is needed because qdisc-layer on kernels
66+ * 2.6.27 and newer overwrite control buffer. (Jussi Kivilinna)
67+ * - Add better locking for IMQ device. Hopefully this will solve
68+ * SMP issues. (Jussi Kivilinna)
69+ * - Port to 2.6.27
70+ * - Port to 2.6.28
71+ * - Port to 2.6.29 + fix rmmod not working
72+ *
73+ * 2009/04/20 - (Jussi Kivilinna)
74+ * - Use netdevice feature flags to avoid extra packet handling
75+ * by core networking layer and possibly increase performance.
76+ *
14f08cd0 77+ * 2009/09/26 - (Jussi Kivilinna)
78+ * - Add imq_nf_reinject_lockless to fix deadlock with
79+ * imq_nf_queue/imq_nf_reinject.
80+ *
81+ * 2009/12/08 - (Jussi Kivilinna)
82+ * - Port to 2.6.32
83+ * - Add check for skb->nf_queue_entry==NULL in imq_dev_xmit()
84+ * - Also add better error checking for skb->nf_queue_entry usage
85+ *
76514441
AM
86+ * 2010/02/25 - (Jussi Kivilinna)
87+ * - Port to 2.6.33
88+ *
f6396b7e
AM
89+ * 2010/08/15 - (Jussi Kivilinna)
90+ * - Port to 2.6.35
91+ * - Simplify hook registration by using nf_register_hooks.
92+ * - nf_reinject doesn't need spinlock around it, therefore remove
93+ * imq_nf_reinject function. Other nf_reinject users protect
94+ * their own data with spinlock. With IMQ however all data is
95+ * needed is stored per skbuff, so no locking is needed.
96+ * - Changed IMQ to use 'separate' NF_IMQ_QUEUE instead of
97+ * NF_QUEUE, this allows working coexistance of IMQ and other
98+ * NF_QUEUE users.
99+ * - Make IMQ multi-queue. Number of IMQ device queues can be
100+ * increased with 'numqueues' module parameters. Default number
101+ * of queues is 1, in other words by default IMQ works as
7af23471 102+ * single-queue device. Multi-queue selection is based on
f6396b7e
AM
103+ * IFB multi-queue patch by Changli Gao <xiaosuo@gmail.com>.
104+ *
7af23471
JR
105+ * 2011/03/18 - (Jussi Kivilinna)
106+ * - Port to 2.6.38
107+ *
a168f21d
AM
108+ * 2011/07/12 - (syoder89@gmail.com)
109+ * - Crash fix that happens when the receiving interface has more
110+ * than one queue (add missing skb_set_queue_mapping in
111+ * imq_select_queue).
112+ *
113+ * 2011/07/26 - (Jussi Kivilinna)
114+ * - Add queue mapping checks for packets exiting IMQ.
115+ * - Port to 3.0
116+ *
2380c486
JR
117+ * Also, many thanks to pablo Sebastian Greco for making the initial
118+ * patch and to those who helped the testing.
119+ *
120+ * More info at: http://www.linuximq.net/ (Andre Correa)
121+ */
122+
123+#include <linux/module.h>
124+#include <linux/kernel.h>
125+#include <linux/moduleparam.h>
7f07242b 126+#include <linux/list.h>
2380c486
JR
127+#include <linux/skbuff.h>
128+#include <linux/netdevice.h>
7f07242b 129+#include <linux/etherdevice.h>
2380c486
JR
130+#include <linux/rtnetlink.h>
131+#include <linux/if_arp.h>
132+#include <linux/netfilter.h>
133+#include <linux/netfilter_ipv4.h>
134+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
135+ #include <linux/netfilter_ipv6.h>
136+#endif
137+#include <linux/imq.h>
138+#include <net/pkt_sched.h>
139+#include <net/netfilter/nf_queue.h>
f6396b7e
AM
140+#include <net/sock.h>
141+#include <linux/ip.h>
142+#include <linux/ipv6.h>
143+#include <linux/if_vlan.h>
144+#include <linux/if_pppox.h>
145+#include <net/ip.h>
146+#include <net/ipv6.h>
147+
148+static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num);
2380c486 149+
2380c486
JR
150+static nf_hookfn imq_nf_hook;
151+
f6396b7e
AM
152+static struct nf_hook_ops imq_ops[] = {
153+ {
154+ /* imq_ingress_ipv4 */
155+ .hook = imq_nf_hook,
156+ .owner = THIS_MODULE,
157+ .pf = PF_INET,
158+ .hooknum = NF_INET_PRE_ROUTING,
2380c486 159+#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
f6396b7e 160+ .priority = NF_IP_PRI_MANGLE + 1,
2380c486 161+#else
f6396b7e 162+ .priority = NF_IP_PRI_NAT_DST + 1,
2380c486 163+#endif
f6396b7e
AM
164+ },
165+ {
166+ /* imq_egress_ipv4 */
167+ .hook = imq_nf_hook,
168+ .owner = THIS_MODULE,
169+ .pf = PF_INET,
170+ .hooknum = NF_INET_POST_ROUTING,
2380c486 171+#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
f6396b7e 172+ .priority = NF_IP_PRI_LAST,
2380c486 173+#else
f6396b7e 174+ .priority = NF_IP_PRI_NAT_SRC - 1,
2380c486 175+#endif
f6396b7e 176+ },
2380c486 177+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
f6396b7e
AM
178+ {
179+ /* imq_ingress_ipv6 */
180+ .hook = imq_nf_hook,
181+ .owner = THIS_MODULE,
182+ .pf = PF_INET6,
183+ .hooknum = NF_INET_PRE_ROUTING,
2380c486 184+#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
f6396b7e 185+ .priority = NF_IP6_PRI_MANGLE + 1,
2380c486 186+#else
f6396b7e 187+ .priority = NF_IP6_PRI_NAT_DST + 1,
2380c486 188+#endif
f6396b7e
AM
189+ },
190+ {
191+ /* imq_egress_ipv6 */
192+ .hook = imq_nf_hook,
193+ .owner = THIS_MODULE,
194+ .pf = PF_INET6,
195+ .hooknum = NF_INET_POST_ROUTING,
2380c486 196+#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
f6396b7e 197+ .priority = NF_IP6_PRI_LAST,
2380c486 198+#else
f6396b7e 199+ .priority = NF_IP6_PRI_NAT_SRC - 1,
2380c486 200+#endif
f6396b7e 201+ },
2380c486 202+#endif
f6396b7e 203+};
2380c486
JR
204+
205+#if defined(CONFIG_IMQ_NUM_DEVS)
f6396b7e 206+static int numdevs = CONFIG_IMQ_NUM_DEVS;
2380c486 207+#else
f6396b7e 208+static int numdevs = IMQ_MAX_DEVS;
2380c486
JR
209+#endif
210+
211+static struct net_device *imq_devs_cache[IMQ_MAX_DEVS];
212+
a168f21d
AM
213+#define IMQ_MAX_QUEUES 32
214+static int numqueues = 1;
f6396b7e
AM
215+static u32 imq_hashrnd;
216+
217+static inline __be16 pppoe_proto(const struct sk_buff *skb)
218+{
219+ return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
220+ sizeof(struct pppoe_hdr)));
221+}
222+
223+static u16 imq_hash(struct net_device *dev, struct sk_buff *skb)
224+{
225+ unsigned int pull_len;
226+ u16 protocol = skb->protocol;
227+ u32 addr1, addr2;
228+ u32 hash, ihl = 0;
229+ union {
230+ u16 in16[2];
231+ u32 in32;
232+ } ports;
233+ u8 ip_proto;
234+
235+ pull_len = 0;
236+
237+recheck:
238+ switch (protocol) {
239+ case htons(ETH_P_8021Q): {
240+ if (unlikely(skb_pull(skb, VLAN_HLEN) == NULL))
241+ goto other;
242+
243+ pull_len += VLAN_HLEN;
244+ skb->network_header += VLAN_HLEN;
245+
246+ protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
247+ goto recheck;
248+ }
249+
250+ case htons(ETH_P_PPP_SES): {
251+ if (unlikely(skb_pull(skb, PPPOE_SES_HLEN) == NULL))
252+ goto other;
253+
254+ pull_len += PPPOE_SES_HLEN;
255+ skb->network_header += PPPOE_SES_HLEN;
256+
257+ protocol = pppoe_proto(skb);
258+ goto recheck;
259+ }
260+
261+ case htons(ETH_P_IP): {
262+ const struct iphdr *iph = ip_hdr(skb);
263+
264+ if (unlikely(!pskb_may_pull(skb, sizeof(struct iphdr))))
265+ goto other;
266+
267+ addr1 = iph->daddr;
268+ addr2 = iph->saddr;
269+
270+ ip_proto = !(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) ?
271+ iph->protocol : 0;
272+ ihl = ip_hdrlen(skb);
273+
274+ break;
275+ }
276+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
277+ case htons(ETH_P_IPV6): {
278+ const struct ipv6hdr *iph = ipv6_hdr(skb);
279+
280+ if (unlikely(!pskb_may_pull(skb, sizeof(struct ipv6hdr))))
281+ goto other;
282+
283+ addr1 = iph->daddr.s6_addr32[3];
284+ addr2 = iph->saddr.s6_addr32[3];
285+ ihl = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &ip_proto);
286+ if (unlikely(ihl < 0))
287+ goto other;
288+
289+ break;
290+ }
291+#endif
292+ default:
293+other:
294+ if (pull_len != 0) {
295+ skb_push(skb, pull_len);
296+ skb->network_header -= pull_len;
297+ }
298+
299+ return (u16)(ntohs(protocol) % dev->real_num_tx_queues);
300+ }
301+
302+ if (addr1 > addr2)
303+ swap(addr1, addr2);
304+
305+ switch (ip_proto) {
306+ case IPPROTO_TCP:
307+ case IPPROTO_UDP:
308+ case IPPROTO_DCCP:
309+ case IPPROTO_ESP:
310+ case IPPROTO_AH:
311+ case IPPROTO_SCTP:
312+ case IPPROTO_UDPLITE: {
313+ if (likely(skb_copy_bits(skb, ihl, &ports.in32, 4) >= 0)) {
314+ if (ports.in16[0] > ports.in16[1])
315+ swap(ports.in16[0], ports.in16[1]);
316+ break;
317+ }
318+ /* fall-through */
319+ }
320+ default:
321+ ports.in32 = 0;
322+ break;
323+ }
324+
325+ if (pull_len != 0) {
326+ skb_push(skb, pull_len);
327+ skb->network_header -= pull_len;
328+ }
329+
330+ hash = jhash_3words(addr1, addr2, ports.in32, imq_hashrnd ^ ip_proto);
331+
332+ return (u16)(((u64)hash * dev->real_num_tx_queues) >> 32);
333+}
334+
335+static inline bool sk_tx_queue_recorded(struct sock *sk)
336+{
337+ return (sk_tx_queue_get(sk) >= 0);
338+}
339+
340+static struct netdev_queue *imq_select_queue(struct net_device *dev,
341+ struct sk_buff *skb)
342+{
343+ u16 queue_index = 0;
344+ u32 hash;
345+
346+ if (likely(dev->real_num_tx_queues == 1))
347+ goto out;
348+
349+ /* IMQ can be receiving ingress or engress packets. */
350+
351+ /* Check first for if rx_queue is set */
352+ if (skb_rx_queue_recorded(skb)) {
353+ queue_index = skb_get_rx_queue(skb);
354+ goto out;
355+ }
356+
357+ /* Check if socket has tx_queue set */
358+ if (sk_tx_queue_recorded(skb->sk)) {
359+ queue_index = sk_tx_queue_get(skb->sk);
360+ goto out;
361+ }
362+
363+ /* Try use socket hash */
364+ if (skb->sk && skb->sk->sk_hash) {
365+ hash = skb->sk->sk_hash;
366+ queue_index =
367+ (u16)(((u64)hash * dev->real_num_tx_queues) >> 32);
368+ goto out;
369+ }
370+
371+ /* Generate hash from packet data */
372+ queue_index = imq_hash(dev, skb);
373+
374+out:
375+ if (unlikely(queue_index >= dev->real_num_tx_queues))
376+ queue_index = (u16)((u32)queue_index % dev->real_num_tx_queues);
377+
a168f21d 378+ skb_set_queue_mapping(skb, queue_index);
f6396b7e
AM
379+ return netdev_get_tx_queue(dev, queue_index);
380+}
381+
a168f21d
AM
382+static struct net_device_stats *imq_get_stats(struct net_device *dev)
383+{
384+ return &dev->stats;
385+}
386+
387+/* called for packets kfree'd in qdiscs at places other than enqueue */
388+static void imq_skb_destructor(struct sk_buff *skb)
389+{
390+ struct nf_queue_entry *entry = skb->nf_queue_entry;
391+
392+ skb->nf_queue_entry = NULL;
393+
394+ if (entry) {
395+ nf_queue_entry_release_refs(entry);
396+ kfree(entry);
397+ }
398+
399+ skb_restore_cb(skb); /* kfree backup */
400+}
401+
402+static void imq_done_check_queue_mapping(struct sk_buff *skb,
403+ struct net_device *dev)
404+{
405+ unsigned int queue_index;
406+
407+ /* Don't let queue_mapping be left too large after exiting IMQ */
408+ if (likely(skb->dev != dev && skb->dev != NULL)) {
409+ queue_index = skb_get_queue_mapping(skb);
410+ if (unlikely(queue_index >= skb->dev->real_num_tx_queues)) {
411+ queue_index = (u16)((u32)queue_index %
412+ skb->dev->real_num_tx_queues);
413+ skb_set_queue_mapping(skb, queue_index);
414+ }
415+ } else {
416+ /* skb->dev was IMQ device itself or NULL, be on safe side and
417+ * just clear queue mapping.
418+ */
419+ skb_set_queue_mapping(skb, 0);
420+ }
421+}
422+
423+static netdev_tx_t imq_dev_xmit(struct sk_buff *skb, struct net_device *dev)
424+{
425+ struct nf_queue_entry *entry = skb->nf_queue_entry;
426+
427+ skb->nf_queue_entry = NULL;
428+ dev->trans_start = jiffies;
429+
430+ dev->stats.tx_bytes += skb->len;
431+ dev->stats.tx_packets++;
432+
433+ if (unlikely(entry == NULL)) {
434+ /* We don't know what is going on here.. packet is queued for
435+ * imq device, but (probably) not by us.
436+ *
437+ * If this packet was not send here by imq_nf_queue(), then
438+ * skb_save_cb() was not used and skb_free() should not show:
439+ * WARNING: IMQ: kfree_skb: skb->cb_next:..
440+ * and/or
441+ * WARNING: IMQ: kfree_skb: skb->nf_queue_entry...
442+ *
443+ * However if this message is shown, then IMQ is somehow broken
444+ * and you should report this to linuximq.net.
445+ */
446+
447+ /* imq_dev_xmit is black hole that eats all packets, report that
448+ * we eat this packet happily and increase dropped counters.
449+ */
450+
451+ dev->stats.tx_dropped++;
452+ dev_kfree_skb(skb);
453+
454+ return NETDEV_TX_OK;
455+ }
456+
457+ skb_restore_cb(skb); /* restore skb->cb */
458+
459+ skb->imq_flags = 0;
460+ skb->destructor = NULL;
461+
462+ imq_done_check_queue_mapping(skb, dev);
463+
464+ nf_reinject(entry, NF_ACCEPT);
465+
466+ return NETDEV_TX_OK;
467+}
468+
2380c486
JR
469+static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num)
470+{
471+ struct net_device *dev;
7f07242b 472+ struct sk_buff *skb_orig, *skb, *skb_shared;
2380c486 473+ struct Qdisc *q;
7f07242b 474+ struct netdev_queue *txq;
f6396b7e 475+ spinlock_t *root_lock;
7f07242b 476+ int users, index;
477+ int retval = -EINVAL;
a168f21d 478+ unsigned int orig_queue_index;
7f07242b 479+
480+ index = entry->skb->imq_flags & IMQ_F_IFMASK;
481+ if (unlikely(index > numdevs - 1)) {
482+ if (net_ratelimit())
483+ printk(KERN_WARNING
484+ "IMQ: invalid device specified, highest is %u\n",
485+ numdevs - 1);
486+ retval = -EINVAL;
487+ goto out;
488+ }
2380c486
JR
489+
490+ /* check for imq device by index from cache */
491+ dev = imq_devs_cache[index];
7f07242b 492+ if (unlikely(!dev)) {
2380c486
JR
493+ char buf[8];
494+
495+ /* get device by name and cache result */
496+ snprintf(buf, sizeof(buf), "imq%d", index);
497+ dev = dev_get_by_name(&init_net, buf);
f6396b7e 498+ if (unlikely(!dev)) {
2380c486
JR
499+ /* not found ?!*/
500+ BUG();
7f07242b 501+ retval = -ENODEV;
502+ goto out;
2380c486
JR
503+ }
504+
505+ imq_devs_cache[index] = dev;
7f07242b 506+ dev_put(dev);
2380c486
JR
507+ }
508+
7f07242b 509+ if (unlikely(!(dev->flags & IFF_UP))) {
2380c486 510+ entry->skb->imq_flags = 0;
f6396b7e 511+ nf_reinject(entry, NF_ACCEPT);
7f07242b 512+ retval = 0;
513+ goto out;
2380c486
JR
514+ }
515+ dev->last_rx = jiffies;
516+
7f07242b 517+ skb = entry->skb;
518+ skb_orig = NULL;
519+
520+ /* skb has owner? => make clone */
521+ if (unlikely(skb->destructor)) {
522+ skb_orig = skb;
523+ skb = skb_clone(skb, GFP_ATOMIC);
f6396b7e 524+ if (unlikely(!skb)) {
7f07242b 525+ retval = -ENOMEM;
526+ goto out;
527+ }
528+ entry->skb = skb;
2380c486 529+ }
2380c486 530+
7f07242b 531+ skb->nf_queue_entry = entry;
532+
533+ dev->stats.rx_bytes += skb->len;
2380c486
JR
534+ dev->stats.rx_packets++;
535+
a168f21d
AM
536+ if (!skb->dev) {
537+ /* skb->dev == NULL causes problems, try the find cause. */
538+ if (net_ratelimit()) {
539+ dev_warn(&dev->dev,
540+ "received packet with skb->dev == NULL\n");
541+ dump_stack();
542+ }
543+
544+ skb->dev = dev;
545+ }
546+
f6396b7e
AM
547+ /* Disables softirqs for lock below */
548+ rcu_read_lock_bh();
549+
550+ /* Multi-queue selection */
a168f21d 551+ orig_queue_index = skb_get_queue_mapping(skb);
f6396b7e 552+ txq = imq_select_queue(dev, skb);
2380c486 553+
7f07242b 554+ q = rcu_dereference(txq->qdisc);
555+ if (unlikely(!q->enqueue))
556+ goto packet_not_eaten_by_imq_dev;
2380c486 557+
f6396b7e
AM
558+ root_lock = qdisc_lock(q);
559+ spin_lock(root_lock);
7f07242b 560+
561+ users = atomic_read(&skb->users);
562+
563+ skb_shared = skb_get(skb); /* increase reference count by one */
564+ skb_save_cb(skb_shared); /* backup skb->cb, as qdisc layer will
565+ overwrite it */
566+ qdisc_enqueue_root(skb_shared, q); /* might kfree_skb */
567+
568+ if (likely(atomic_read(&skb_shared->users) == users + 1)) {
569+ kfree_skb(skb_shared); /* decrease reference count by one */
570+
571+ skb->destructor = &imq_skb_destructor;
572+
573+ /* cloned? */
f6396b7e 574+ if (unlikely(skb_orig))
7f07242b 575+ kfree_skb(skb_orig); /* free original */
576+
f6396b7e
AM
577+ spin_unlock(root_lock);
578+ rcu_read_unlock_bh();
7f07242b 579+
580+ /* schedule qdisc dequeue */
581+ __netif_schedule(q);
582+
583+ retval = 0;
584+ goto out;
585+ } else {
586+ skb_restore_cb(skb_shared); /* restore skb->cb */
14f08cd0 587+ skb->nf_queue_entry = NULL;
7f07242b 588+ /* qdisc dropped packet and decreased skb reference count of
589+ * skb, so we don't really want to and try refree as that would
590+ * actually destroy the skb. */
f6396b7e 591+ spin_unlock(root_lock);
7f07242b 592+ goto packet_not_eaten_by_imq_dev;
593+ }
594+
595+packet_not_eaten_by_imq_dev:
a168f21d 596+ skb_set_queue_mapping(skb, orig_queue_index);
f6396b7e
AM
597+ rcu_read_unlock_bh();
598+
7f07242b 599+ /* cloned? restore original */
f6396b7e 600+ if (unlikely(skb_orig)) {
7f07242b 601+ kfree_skb(skb);
602+ entry->skb = skb_orig;
603+ }
604+ retval = -1;
605+out:
606+ return retval;
2380c486
JR
607+}
608+
2380c486
JR
609+static unsigned int imq_nf_hook(unsigned int hook, struct sk_buff *pskb,
610+ const struct net_device *indev,
611+ const struct net_device *outdev,
612+ int (*okfn)(struct sk_buff *))
613+{
f6396b7e 614+ return (pskb->imq_flags & IMQ_F_ENQUEUE) ? NF_IMQ_QUEUE : NF_ACCEPT;
2380c486
JR
615+}
616+
617+static int imq_close(struct net_device *dev)
618+{
2380c486 619+ netif_stop_queue(dev);
2380c486
JR
620+ return 0;
621+}
622+
623+static int imq_open(struct net_device *dev)
624+{
2380c486 625+ netif_start_queue(dev);
2380c486
JR
626+ return 0;
627+}
628+
7f07242b 629+static const struct net_device_ops imq_netdev_ops = {
630+ .ndo_open = imq_open,
631+ .ndo_stop = imq_close,
632+ .ndo_start_xmit = imq_dev_xmit,
633+ .ndo_get_stats = imq_get_stats,
634+};
635+
2380c486
JR
636+static void imq_setup(struct net_device *dev)
637+{
7f07242b 638+ dev->netdev_ops = &imq_netdev_ops;
2380c486
JR
639+ dev->type = ARPHRD_VOID;
640+ dev->mtu = 16000;
641+ dev->tx_queue_len = 11000;
642+ dev->flags = IFF_NOARP;
7f07242b 643+ dev->features = NETIF_F_SG | NETIF_F_FRAGLIST |
644+ NETIF_F_GSO | NETIF_F_HW_CSUM |
645+ NETIF_F_HIGHDMA;
14f08cd0 646+ dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
7f07242b 647+}
648+
649+static int imq_validate(struct nlattr *tb[], struct nlattr *data[])
650+{
651+ int ret = 0;
652+
653+ if (tb[IFLA_ADDRESS]) {
654+ if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
655+ ret = -EINVAL;
656+ goto end;
657+ }
658+ if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
659+ ret = -EADDRNOTAVAIL;
660+ goto end;
661+ }
662+ }
663+ return 0;
664+end:
665+ printk(KERN_WARNING "IMQ: imq_validate failed (%d)\n", ret);
666+ return ret;
2380c486
JR
667+}
668+
669+static struct rtnl_link_ops imq_link_ops __read_mostly = {
670+ .kind = "imq",
7f07242b 671+ .priv_size = 0,
2380c486 672+ .setup = imq_setup,
7f07242b 673+ .validate = imq_validate,
2380c486
JR
674+};
675+
f6396b7e
AM
676+static const struct nf_queue_handler imq_nfqh = {
677+ .name = "imq",
678+ .outfn = imq_nf_queue,
679+};
680+
2380c486
JR
681+static int __init imq_init_hooks(void)
682+{
f6396b7e 683+ int ret;
2380c486 684+
f6396b7e 685+ nf_register_queue_imq_handler(&imq_nfqh);
2380c486 686+
f6396b7e
AM
687+ ret = nf_register_hooks(imq_ops, ARRAY_SIZE(imq_ops));
688+ if (ret < 0)
689+ nf_unregister_queue_imq_handler();
2380c486 690+
f6396b7e 691+ return ret;
2380c486
JR
692+}
693+
694+static int __init imq_init_one(int index)
695+{
696+ struct net_device *dev;
697+ int ret;
698+
f6396b7e 699+ dev = alloc_netdev_mq(0, "imq%d", imq_setup, numqueues);
2380c486
JR
700+ if (!dev)
701+ return -ENOMEM;
702+
703+ ret = dev_alloc_name(dev, dev->name);
704+ if (ret < 0)
705+ goto fail;
706+
707+ dev->rtnl_link_ops = &imq_link_ops;
708+ ret = register_netdevice(dev);
709+ if (ret < 0)
710+ goto fail;
711+
712+ return 0;
713+fail:
714+ free_netdev(dev);
715+ return ret;
716+}
717+
718+static int __init imq_init_devs(void)
719+{
720+ int err, i;
721+
7f07242b 722+ if (numdevs < 1 || numdevs > IMQ_MAX_DEVS) {
2380c486
JR
723+ printk(KERN_ERR "IMQ: numdevs has to be betweed 1 and %u\n",
724+ IMQ_MAX_DEVS);
725+ return -EINVAL;
726+ }
727+
f6396b7e
AM
728+ if (numqueues < 1 || numqueues > IMQ_MAX_QUEUES) {
729+ printk(KERN_ERR "IMQ: numqueues has to be betweed 1 and %u\n",
730+ IMQ_MAX_QUEUES);
731+ return -EINVAL;
732+ }
733+
734+ get_random_bytes(&imq_hashrnd, sizeof(imq_hashrnd));
735+
2380c486
JR
736+ rtnl_lock();
737+ err = __rtnl_link_register(&imq_link_ops);
738+
739+ for (i = 0; i < numdevs && !err; i++)
740+ err = imq_init_one(i);
741+
742+ if (err) {
743+ __rtnl_link_unregister(&imq_link_ops);
744+ memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
745+ }
746+ rtnl_unlock();
747+
748+ return err;
749+}
750+
751+static int __init imq_init_module(void)
752+{
753+ int err;
754+
7f07242b 755+#if defined(CONFIG_IMQ_NUM_DEVS)
756+ BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS > 16);
757+ BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS < 2);
758+ BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS - 1 > IMQ_F_IFMASK);
759+#endif
760+
2380c486
JR
761+ err = imq_init_devs();
762+ if (err) {
763+ printk(KERN_ERR "IMQ: Error trying imq_init_devs(net)\n");
764+ return err;
765+ }
766+
767+ err = imq_init_hooks();
768+ if (err) {
769+ printk(KERN_ERR "IMQ: Error trying imq_init_hooks()\n");
770+ rtnl_link_unregister(&imq_link_ops);
771+ memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
772+ return err;
773+ }
774+
f6396b7e
AM
775+ printk(KERN_INFO "IMQ driver loaded successfully. "
776+ "(numdevs = %d, numqueues = %d)\n", numdevs, numqueues);
2380c486
JR
777+
778+#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
779+ printk(KERN_INFO "\tHooking IMQ before NAT on PREROUTING.\n");
780+#else
781+ printk(KERN_INFO "\tHooking IMQ after NAT on PREROUTING.\n");
782+#endif
783+#if defined(CONFIG_IMQ_BEHAVIOR_AB) || defined(CONFIG_IMQ_BEHAVIOR_BB)
784+ printk(KERN_INFO "\tHooking IMQ before NAT on POSTROUTING.\n");
785+#else
786+ printk(KERN_INFO "\tHooking IMQ after NAT on POSTROUTING.\n");
787+#endif
788+
789+ return 0;
790+}
791+
792+static void __exit imq_unhook(void)
793+{
f6396b7e 794+ nf_unregister_hooks(imq_ops, ARRAY_SIZE(imq_ops));
7f07242b 795+ nf_unregister_queue_imq_handler();
2380c486
JR
796+}
797+
798+static void __exit imq_cleanup_devs(void)
799+{
800+ rtnl_link_unregister(&imq_link_ops);
801+ memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
802+}
803+
804+static void __exit imq_exit_module(void)
805+{
806+ imq_unhook();
807+ imq_cleanup_devs();
808+ printk(KERN_INFO "IMQ driver unloaded successfully.\n");
809+}
810+
811+module_init(imq_init_module);
812+module_exit(imq_exit_module);
813+
814+module_param(numdevs, int, 0);
f6396b7e 815+module_param(numqueues, int, 0);
2380c486
JR
816+MODULE_PARM_DESC(numdevs, "number of IMQ devices (how many imq* devices will "
817+ "be created)");
f6396b7e 818+MODULE_PARM_DESC(numqueues, "number of queues per IMQ device");
2380c486
JR
819+MODULE_AUTHOR("http://www.linuximq.net");
820+MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See "
821+ "http://www.linuximq.net/ for more information.");
822+MODULE_LICENSE("GPL");
823+MODULE_ALIAS_RTNL_LINK("imq");
824+
a168f21d
AM
825diff -uNr linux-3.0/drivers/net/Kconfig linux-3.0-imq/drivers/net/Kconfig
826--- linux-3.0/drivers/net/Kconfig 2011-07-22 05:17:23.000000000 +0300
827+++ linux-3.0-imq/drivers/net/Kconfig 2011-07-26 06:31:36.176747906 +0300
828@@ -124,6 +124,125 @@
2380c486
JR
829 To compile this driver as a module, choose M here: the module
830 will be called eql. If unsure, say N.
831
832+config IMQ
833+ tristate "IMQ (intermediate queueing device) support"
834+ depends on NETDEVICES && NETFILTER
835+ ---help---
836+ The IMQ device(s) is used as placeholder for QoS queueing
837+ disciplines. Every packet entering/leaving the IP stack can be
838+ directed through the IMQ device where it's enqueued/dequeued to the
839+ attached qdisc. This allows you to treat network devices as classes
840+ and distribute bandwidth among them. Iptables is used to specify
841+ through which IMQ device, if any, packets travel.
842+
843+ More information at: http://www.linuximq.net/
844+
845+ To compile this driver as a module, choose M here: the module
846+ will be called imq. If unsure, say N.
847+
848+choice
849+ prompt "IMQ behavior (PRE/POSTROUTING)"
850+ depends on IMQ
851+ default IMQ_BEHAVIOR_AB
852+ help
a168f21d
AM
853+ This setting defines how IMQ behaves in respect to its
854+ hooking in PREROUTING and POSTROUTING.
2380c486 855+
a168f21d 856+ IMQ can work in any of the following ways:
2380c486 857+
a168f21d
AM
858+ PREROUTING | POSTROUTING
859+ -----------------|-------------------
860+ #1 After NAT | After NAT
861+ #2 After NAT | Before NAT
862+ #3 Before NAT | After NAT
863+ #4 Before NAT | Before NAT
2380c486 864+
a168f21d
AM
865+ The default behavior is to hook before NAT on PREROUTING
866+ and after NAT on POSTROUTING (#3).
2380c486 867+
a168f21d
AM
868+ This settings are specially usefull when trying to use IMQ
869+ to shape NATed clients.
2380c486 870+
a168f21d 871+ More information can be found at: www.linuximq.net
2380c486 872+
a168f21d 873+ If not sure leave the default settings alone.
2380c486
JR
874+
875+config IMQ_BEHAVIOR_AA
876+ bool "IMQ AA"
877+ help
a168f21d
AM
878+ This setting defines how IMQ behaves in respect to its
879+ hooking in PREROUTING and POSTROUTING.
2380c486 880+
a168f21d 881+ Choosing this option will make IMQ hook like this:
2380c486 882+
a168f21d
AM
883+ PREROUTING: After NAT
884+ POSTROUTING: After NAT
2380c486 885+
a168f21d 886+ More information can be found at: www.linuximq.net
2380c486 887+
a168f21d 888+ If not sure leave the default settings alone.
2380c486
JR
889+
890+config IMQ_BEHAVIOR_AB
891+ bool "IMQ AB"
892+ help
a168f21d
AM
893+ This setting defines how IMQ behaves in respect to its
894+ hooking in PREROUTING and POSTROUTING.
2380c486 895+
a168f21d 896+ Choosing this option will make IMQ hook like this:
2380c486 897+
a168f21d
AM
898+ PREROUTING: After NAT
899+ POSTROUTING: Before NAT
2380c486 900+
a168f21d 901+ More information can be found at: www.linuximq.net
2380c486 902+
a168f21d 903+ If not sure leave the default settings alone.
2380c486
JR
904+
905+config IMQ_BEHAVIOR_BA
906+ bool "IMQ BA"
907+ help
a168f21d
AM
908+ This setting defines how IMQ behaves in respect to its
909+ hooking in PREROUTING and POSTROUTING.
2380c486 910+
a168f21d 911+ Choosing this option will make IMQ hook like this:
2380c486 912+
a168f21d
AM
913+ PREROUTING: Before NAT
914+ POSTROUTING: After NAT
2380c486 915+
a168f21d 916+ More information can be found at: www.linuximq.net
2380c486 917+
a168f21d 918+ If not sure leave the default settings alone.
2380c486
JR
919+
920+config IMQ_BEHAVIOR_BB
921+ bool "IMQ BB"
922+ help
a168f21d
AM
923+ This setting defines how IMQ behaves in respect to its
924+ hooking in PREROUTING and POSTROUTING.
2380c486 925+
a168f21d 926+ Choosing this option will make IMQ hook like this:
2380c486 927+
a168f21d
AM
928+ PREROUTING: Before NAT
929+ POSTROUTING: Before NAT
2380c486 930+
a168f21d 931+ More information can be found at: www.linuximq.net
2380c486 932+
a168f21d 933+ If not sure leave the default settings alone.
2380c486
JR
934+
935+endchoice
936+
937+config IMQ_NUM_DEVS
2380c486
JR
938+ int "Number of IMQ devices"
939+ range 2 16
940+ depends on IMQ
941+ default "16"
942+ help
a168f21d 943+ This setting defines how many IMQ devices will be created.
2380c486 944+
a168f21d 945+ The default value is 16.
2380c486 946+
a168f21d 947+ More information can be found at: www.linuximq.net
2380c486 948+
a168f21d 949+ If not sure leave the default settings alone.
2380c486
JR
950+
951 config TUN
952 tristate "Universal TUN/TAP device driver support"
953 select CRC32
a168f21d
AM
954diff -uNr linux-3.0/drivers/net/Makefile linux-3.0-imq/drivers/net/Makefile
955--- linux-3.0/drivers/net/Makefile 2011-07-22 05:17:23.000000000 +0300
956+++ linux-3.0-imq/drivers/net/Makefile 2011-07-24 12:06:25.922003276 +0300
957@@ -176,6 +176,7 @@
7af23471 958 obj-$(CONFIG_XEN_NETDEV_BACKEND) += xen-netback/
2380c486
JR
959
960 obj-$(CONFIG_DUMMY) += dummy.o
961+obj-$(CONFIG_IMQ) += imq.o
962 obj-$(CONFIG_IFB) += ifb.o
963 obj-$(CONFIG_MACVLAN) += macvlan.o
d031c9d6 964 obj-$(CONFIG_MACVTAP) += macvtap.o
a168f21d
AM
965diff -uNr linux-3.0/include/linux/imq.h linux-3.0-imq/include/linux/imq.h
966--- linux-3.0/include/linux/imq.h 1970-01-01 02:00:00.000000000 +0200
967+++ linux-3.0-imq/include/linux/imq.h 2011-07-24 12:06:25.932003270 +0300
7f07242b 968@@ -0,0 +1,13 @@
2380c486
JR
969+#ifndef _IMQ_H
970+#define _IMQ_H
971+
7f07242b 972+/* IFMASK (16 device indexes, 0 to 15) and flag(s) fit in 5 bits */
973+#define IMQ_F_BITS 5
974+
975+#define IMQ_F_IFMASK 0x0f
976+#define IMQ_F_ENQUEUE 0x10
2380c486 977+
7f07242b 978+#define IMQ_MAX_DEVS (IMQ_F_IFMASK + 1)
2380c486
JR
979+
980+#endif /* _IMQ_H */
2380c486 981+
a168f21d
AM
982diff -uNr linux-3.0/include/linux/netfilter/xt_IMQ.h linux-3.0-imq/include/linux/netfilter/xt_IMQ.h
983--- linux-3.0/include/linux/netfilter/xt_IMQ.h 1970-01-01 02:00:00.000000000 +0200
984+++ linux-3.0-imq/include/linux/netfilter/xt_IMQ.h 2011-07-24 12:06:25.932003270 +0300
7f07242b 985@@ -0,0 +1,9 @@
986+#ifndef _XT_IMQ_H
987+#define _XT_IMQ_H
988+
989+struct xt_imq_info {
2380c486
JR
990+ unsigned int todev; /* target imq device */
991+};
992+
7f07242b 993+#endif /* _XT_IMQ_H */
994+
a168f21d
AM
995diff -uNr linux-3.0/include/linux/netfilter.h linux-3.0-imq/include/linux/netfilter.h
996--- linux-3.0/include/linux/netfilter.h 2011-07-22 05:17:23.000000000 +0300
997+++ linux-3.0-imq/include/linux/netfilter.h 2011-07-24 12:06:25.955336605 +0300
998@@ -22,7 +22,8 @@
f6396b7e
AM
999 #define NF_QUEUE 3
1000 #define NF_REPEAT 4
1001 #define NF_STOP 5
1002-#define NF_MAX_VERDICT NF_STOP
1003+#define NF_IMQ_QUEUE 6
1004+#define NF_MAX_VERDICT NF_IMQ_QUEUE
1005
1006 /* we overload the higher bits for encoding auxiliary data such as the queue
7af23471 1007 * number or errno values. Not nice, but better than additional function
a168f21d
AM
1008diff -uNr linux-3.0/include/linux/netfilter_ipv4/ipt_IMQ.h linux-3.0-imq/include/linux/netfilter_ipv4/ipt_IMQ.h
1009--- linux-3.0/include/linux/netfilter_ipv4/ipt_IMQ.h 1970-01-01 02:00:00.000000000 +0200
1010+++ linux-3.0-imq/include/linux/netfilter_ipv4/ipt_IMQ.h 2011-07-24 12:06:25.955336605 +0300
7f07242b 1011@@ -0,0 +1,10 @@
1012+#ifndef _IPT_IMQ_H
1013+#define _IPT_IMQ_H
1014+
1015+/* Backwards compatibility for old userspace */
1016+#include <linux/netfilter/xt_IMQ.h>
1017+
1018+#define ipt_imq_info xt_imq_info
1019+
2380c486 1020+#endif /* _IPT_IMQ_H */
7f07242b 1021+
a168f21d
AM
1022diff -uNr linux-3.0/include/linux/netfilter_ipv6/ip6t_IMQ.h linux-3.0-imq/include/linux/netfilter_ipv6/ip6t_IMQ.h
1023--- linux-3.0/include/linux/netfilter_ipv6/ip6t_IMQ.h 1970-01-01 02:00:00.000000000 +0200
1024+++ linux-3.0-imq/include/linux/netfilter_ipv6/ip6t_IMQ.h 2011-07-24 12:06:25.955336605 +0300
7f07242b 1025@@ -0,0 +1,10 @@
2380c486
JR
1026+#ifndef _IP6T_IMQ_H
1027+#define _IP6T_IMQ_H
1028+
7f07242b 1029+/* Backwards compatibility for old userspace */
1030+#include <linux/netfilter/xt_IMQ.h>
1031+
1032+#define ip6t_imq_info xt_imq_info
2380c486
JR
1033+
1034+#endif /* _IP6T_IMQ_H */
7f07242b 1035+
a168f21d
AM
1036diff -uNr linux-3.0/include/linux/skbuff.h linux-3.0-imq/include/linux/skbuff.h
1037--- linux-3.0/include/linux/skbuff.h 2011-07-22 05:17:23.000000000 +0300
1038+++ linux-3.0-imq/include/linux/skbuff.h 2011-07-24 12:06:25.968669945 +0300
76514441 1039@@ -29,6 +29,9 @@
7f07242b 1040 #include <linux/rcupdate.h>
1041 #include <linux/dmaengine.h>
1042 #include <linux/hrtimer.h>
1043+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1044+#include <linux/imq.h>
1045+#endif
1046
14f08cd0 1047 /* Don't change this without changing skb_csum_unnecessary! */
1048 #define CHECKSUM_NONE 0
7af23471 1049@@ -339,6 +342,9 @@
7f07242b 1050 * first. This is owned by whoever has the skb queued ATM.
1051 */
ca0faea1 1052 char cb[48] __aligned(8);
7f07242b 1053+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1054+ void *cb_next;
1055+#endif
1056
f6396b7e 1057 unsigned long _skb_refdst;
ca0faea1 1058 #ifdef CONFIG_XFRM
7af23471
JR
1059@@ -377,6 +383,9 @@
1060 #ifdef NET_SKBUFF_NF_DEFRAG_NEEDED
2380c486
JR
1061 struct sk_buff *nfct_reasm;
1062 #endif
1063+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
2380c486
JR
1064+ struct nf_queue_entry *nf_queue_entry;
1065+#endif
1066 #ifdef CONFIG_BRIDGE_NETFILTER
1067 struct nf_bridge_info *nf_bridge;
1068 #endif
7af23471 1069@@ -401,6 +410,10 @@
14f08cd0 1070
7af23471 1071 /* 0/13 bit hole */
14f08cd0 1072
7f07242b 1073+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1074+ __u8 imq_flags:IMQ_F_BITS;
1075+#endif
14f08cd0 1076+
7f07242b 1077 #ifdef CONFIG_NET_DMA
1078 dma_cookie_t dma_cookie;
7f07242b 1079 #endif
f6396b7e 1080@@ -487,6 +500,12 @@
14f08cd0 1081 return (struct rtable *)skb_dst(skb);
1082 }
7f07242b 1083
1084+
1085+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1086+extern int skb_save_cb(struct sk_buff *skb);
1087+extern int skb_restore_cb(struct sk_buff *skb);
1088+#endif
1089+
1090 extern void kfree_skb(struct sk_buff *skb);
14f08cd0 1091 extern void consume_skb(struct sk_buff *skb);
7f07242b 1092 extern void __kfree_skb(struct sk_buff *skb);
a168f21d 1093@@ -2134,6 +2153,10 @@
2380c486
JR
1094 dst->nfct_reasm = src->nfct_reasm;
1095 nf_conntrack_get_reasm(src->nfct_reasm);
1096 #endif
1097+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1098+ dst->imq_flags = src->imq_flags;
1099+ dst->nf_queue_entry = src->nf_queue_entry;
1100+#endif
1101 #ifdef CONFIG_BRIDGE_NETFILTER
1102 dst->nf_bridge = src->nf_bridge;
1103 nf_bridge_get(src->nf_bridge);
a168f21d
AM
1104diff -uNr linux-3.0/include/net/netfilter/nf_queue.h linux-3.0-imq/include/net/netfilter/nf_queue.h
1105--- linux-3.0/include/net/netfilter/nf_queue.h 2011-07-22 05:17:23.000000000 +0300
1106+++ linux-3.0-imq/include/net/netfilter/nf_queue.h 2011-07-24 12:06:25.975336612 +0300
f6396b7e 1107@@ -30,5 +30,11 @@
7f07242b 1108 const struct nf_queue_handler *qh);
1109 extern void nf_unregister_queue_handlers(const struct nf_queue_handler *qh);
1110 extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
1111+extern void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
1112+
1113+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1114+extern void nf_register_queue_imq_handler(const struct nf_queue_handler *qh);
1115+extern void nf_unregister_queue_imq_handler(void);
1116+#endif
1117
1118 #endif /* _NF_QUEUE_H */
a168f21d
AM
1119diff -uNr linux-3.0/net/core/dev.c linux-3.0-imq/net/core/dev.c
1120--- linux-3.0/net/core/dev.c 2011-07-22 05:17:23.000000000 +0300
1121+++ linux-3.0-imq/net/core/dev.c 2011-07-26 07:52:00.513207402 +0300
76514441 1122@@ -98,6 +98,9 @@
2380c486
JR
1123 #include <net/net_namespace.h>
1124 #include <net/sock.h>
1125 #include <linux/rtnetlink.h>
1126+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1127+#include <linux/imq.h>
1128+#endif
1129 #include <linux/proc_fs.h>
1130 #include <linux/seq_file.h>
1131 #include <linux/stat.h>
a168f21d 1132@@ -2108,7 +2111,12 @@
e933b04e
AM
1133 if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
1134 skb_dst_drop(skb);
7f07242b 1135
2380c486 1136+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
7af23471
JR
1137+ if (!list_empty(&ptype_all) &&
1138+ !(skb->imq_flags & IMQ_F_ENQUEUE))
1139+#else
1140 if (!list_empty(&ptype_all))
2380c486 1141+#endif
2380c486
JR
1142 dev_queue_xmit_nit(skb, dev);
1143
e933b04e 1144 skb_orphan_try(skb);
a168f21d
AM
1145diff -uNr linux-3.0/net/core/skbuff.c linux-3.0-imq/net/core/skbuff.c
1146--- linux-3.0/net/core/skbuff.c 2011-07-22 05:17:23.000000000 +0300
1147+++ linux-3.0-imq/net/core/skbuff.c 2011-07-24 12:06:26.008669943 +0300
1148@@ -73,6 +73,9 @@
7f07242b 1149
1150 static struct kmem_cache *skbuff_head_cache __read_mostly;
1151 static struct kmem_cache *skbuff_fclone_cache __read_mostly;
1152+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1153+static struct kmem_cache *skbuff_cb_store_cache __read_mostly;
1154+#endif
1155
1156 static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
1157 struct pipe_buffer *buf)
a168f21d 1158@@ -92,6 +95,82 @@
7f07242b 1159 return 1;
1160 }
1161
1162+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1163+/* Control buffer save/restore for IMQ devices */
1164+struct skb_cb_table {
f6396b7e 1165+ char cb[48] __aligned(8);
7f07242b 1166+ void *cb_next;
1167+ atomic_t refcnt;
7f07242b 1168+};
2380c486 1169+
7f07242b 1170+static DEFINE_SPINLOCK(skb_cb_store_lock);
1171+
1172+int skb_save_cb(struct sk_buff *skb)
2380c486 1173+{
7f07242b 1174+ struct skb_cb_table *next;
1175+
1176+ next = kmem_cache_alloc(skbuff_cb_store_cache, GFP_ATOMIC);
1177+ if (!next)
1178+ return -ENOMEM;
2380c486 1179+
7f07242b 1180+ BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb));
2380c486 1181+
7f07242b 1182+ memcpy(next->cb, skb->cb, sizeof(skb->cb));
1183+ next->cb_next = skb->cb_next;
1184+
1185+ atomic_set(&next->refcnt, 1);
1186+
1187+ skb->cb_next = next;
1188+ return 0;
2380c486 1189+}
7f07242b 1190+EXPORT_SYMBOL(skb_save_cb);
2380c486 1191+
7f07242b 1192+int skb_restore_cb(struct sk_buff *skb)
2380c486 1193+{
7f07242b 1194+ struct skb_cb_table *next;
2380c486 1195+
7f07242b 1196+ if (!skb->cb_next)
2380c486 1197+ return 0;
7f07242b 1198+
1199+ next = skb->cb_next;
1200+
1201+ BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb));
1202+
1203+ memcpy(skb->cb, next->cb, sizeof(skb->cb));
1204+ skb->cb_next = next->cb_next;
1205+
1206+ spin_lock(&skb_cb_store_lock);
1207+
f6396b7e 1208+ if (atomic_dec_and_test(&next->refcnt))
7f07242b 1209+ kmem_cache_free(skbuff_cb_store_cache, next);
2380c486 1210+
7f07242b 1211+ spin_unlock(&skb_cb_store_lock);
1212+
1213+ return 0;
2380c486 1214+}
7f07242b 1215+EXPORT_SYMBOL(skb_restore_cb);
2380c486 1216+
14f08cd0 1217+static void skb_copy_stored_cb(struct sk_buff *new, const struct sk_buff *__old)
7f07242b 1218+{
1219+ struct skb_cb_table *next;
14f08cd0 1220+ struct sk_buff *old;
7f07242b 1221+
14f08cd0 1222+ if (!__old->cb_next) {
1223+ new->cb_next = NULL;
7f07242b 1224+ return;
1225+ }
1226+
1227+ spin_lock(&skb_cb_store_lock);
1228+
14f08cd0 1229+ old = (struct sk_buff *)__old;
1230+
7f07242b 1231+ next = old->cb_next;
1232+ atomic_inc(&next->refcnt);
1233+ new->cb_next = next;
1234+
1235+ spin_unlock(&skb_cb_store_lock);
1236+}
1237+#endif
1238
1239 /* Pipe buffer operations for a socket. */
5379d87d 1240 static const struct pipe_buf_operations sock_pipe_buf_ops = {
a168f21d 1241@@ -380,6 +459,26 @@
7f07242b 1242 WARN_ON(in_irq());
1243 skb->destructor(skb);
1244 }
1245+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1246+ /* This should not happen. When it does, avoid memleak by restoring
1247+ the chain of cb-backups. */
f6396b7e 1248+ while (skb->cb_next != NULL) {
14f08cd0 1249+ if (net_ratelimit())
1250+ printk(KERN_WARNING "IMQ: kfree_skb: skb->cb_next: "
1251+ "%08x\n", (unsigned int)skb->cb_next);
1252+
7f07242b 1253+ skb_restore_cb(skb);
1254+ }
14f08cd0 1255+ /* This should not happen either, nf_queue_entry is nullified in
1256+ * imq_dev_xmit(). If we have non-NULL nf_queue_entry then we are
1257+ * leaking entry pointers, maybe memory. We don't know if this is
1258+ * pointer to already freed memory, or should this be freed.
1259+ * If this happens we need to add refcounting, etc for nf_queue_entry.
1260+ */
1261+ if (skb->nf_queue_entry && net_ratelimit())
1262+ printk(KERN_WARNING
1263+ "IMQ: kfree_skb: skb->nf_queue_entry != NULL");
7f07242b 1264+#endif
1265 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1266 nf_conntrack_put(skb->nfct);
7af23471 1267 #endif
a168f21d 1268@@ -518,6 +617,9 @@
7f07242b 1269 new->sp = secpath_get(old->sp);
1270 #endif
1271 memcpy(new->cb, old->cb, sizeof(old->cb));
1272+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1273+ skb_copy_stored_cb(new, old);
1274+#endif
13e5c3b1 1275 new->csum = old->csum;
7f07242b 1276 new->local_df = old->local_df;
13e5c3b1 1277 new->pkt_type = old->pkt_type;
a168f21d 1278@@ -2781,6 +2883,13 @@
7f07242b 1279 0,
1280 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1281 NULL);
1282+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1283+ skbuff_cb_store_cache = kmem_cache_create("skbuff_cb_store_cache",
1284+ sizeof(struct skb_cb_table),
1285+ 0,
1286+ SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1287+ NULL);
1288+#endif
1289 }
1290
1291 /**
a168f21d
AM
1292diff -uNr linux-3.0/net/ipv6/ip6_output.c linux-3.0-imq/net/ipv6/ip6_output.c
1293--- linux-3.0/net/ipv6/ip6_output.c 2011-07-22 05:17:23.000000000 +0300
1294+++ linux-3.0-imq/net/ipv6/ip6_output.c 2011-07-24 16:46:04.789482257 +0300
1295@@ -101,9 +101,6 @@
1296 struct dst_entry *dst = skb_dst(skb);
1297 struct net_device *dev = dst->dev;
1298
1299- skb->protocol = htons(ETH_P_IPV6);
1300- skb->dev = dev;
1301-
1302 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
1303 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1304
1305@@ -165,6 +162,11 @@
1306 return 0;
f6396b7e 1307 }
a168f21d
AM
1308
1309+ /* IMQ-patch: moved setting skb->dev and skb->protocol from
1310+ * ip6_finish_output2 to fix crashing at netif_skb_features(). */
1311+ skb->protocol = htons(ETH_P_IPV6);
1312+ skb->dev = dev;
1313+
1314 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
1315 ip6_finish_output,
1316 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
1317diff -uNr linux-3.0/net/netfilter/core.c linux-3.0-imq/net/netfilter/core.c
1318--- linux-3.0/net/netfilter/core.c 2011-07-22 05:17:23.000000000 +0300
1319+++ linux-3.0-imq/net/netfilter/core.c 2011-07-24 12:53:52.972141108 +0300
1320@@ -179,9 +179,11 @@
1321 ret = NF_DROP_GETERR(verdict);
1322 if (ret == 0)
1323 ret = -EPERM;
1324- } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
1325+ } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE ||
1326+ (verdict & NF_VERDICT_MASK) == NF_IMQ_QUEUE) {
1327 ret = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
1328- verdict >> NF_VERDICT_QBITS);
1329+ verdict >> NF_VERDICT_QBITS,
1330+ verdict & NF_VERDICT_MASK);
1331 if (ret < 0) {
1332 if (ret == -ECANCELED)
1333 goto next_hook;
1334diff -uNr linux-3.0/net/netfilter/Kconfig linux-3.0-imq/net/netfilter/Kconfig
1335--- linux-3.0/net/netfilter/Kconfig 2011-07-22 05:17:23.000000000 +0300
1336+++ linux-3.0-imq/net/netfilter/Kconfig 2011-07-24 12:06:26.035336611 +0300
7af23471 1337@@ -507,6 +507,18 @@
14f08cd0 1338 For more information on the LEDs available on your system, see
1339 Documentation/leds-class.txt
7f07242b 1340
1341+config NETFILTER_XT_TARGET_IMQ
1342+ tristate '"IMQ" target support'
1343+ depends on NETFILTER_XTABLES
1344+ depends on IP_NF_MANGLE || IP6_NF_MANGLE
1345+ select IMQ
1346+ default m if NETFILTER_ADVANCED=n
1347+ help
1348+ This option adds a `IMQ' target which is used to specify if and
1349+ to which imq device packets should get enqueued/dequeued.
2380c486 1350+
7f07242b 1351+ To compile it as a module, choose M here. If unsure, say N.
1352+
1353 config NETFILTER_XT_TARGET_MARK
1354 tristate '"MARK" target support'
f6396b7e 1355 depends on NETFILTER_ADVANCED
a168f21d
AM
1356diff -uNr linux-3.0/net/netfilter/Makefile linux-3.0-imq/net/netfilter/Makefile
1357--- linux-3.0/net/netfilter/Makefile 2011-07-22 05:17:23.000000000 +0300
1358+++ linux-3.0-imq/net/netfilter/Makefile 2011-07-24 12:06:26.042003277 +0300
7af23471 1359@@ -56,6 +56,7 @@
d031c9d6 1360 obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
7f07242b 1361 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
1362 obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
7f07242b 1363+obj-$(CONFIG_NETFILTER_XT_TARGET_IMQ) += xt_IMQ.o
14f08cd0 1364 obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
7f07242b 1365 obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o
f6396b7e 1366 obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
a168f21d
AM
1367diff -uNr linux-3.0/net/netfilter/nf_internals.h linux-3.0-imq/net/netfilter/nf_internals.h
1368--- linux-3.0/net/netfilter/nf_internals.h 2011-07-22 05:17:23.000000000 +0300
1369+++ linux-3.0-imq/net/netfilter/nf_internals.h 2011-07-24 12:54:17.615475634 +0300
1370@@ -29,7 +29,7 @@
1371 struct net_device *indev,
f6396b7e
AM
1372 struct net_device *outdev,
1373 int (*okfn)(struct sk_buff *),
a168f21d
AM
1374- unsigned int queuenum);
1375+ unsigned int queuenum, unsigned int queuetype);
f6396b7e
AM
1376 extern int __init netfilter_queue_init(void);
1377
1378 /* nf_log.c */
a168f21d
AM
1379diff -uNr linux-3.0/net/netfilter/nf_queue.c linux-3.0-imq/net/netfilter/nf_queue.c
1380--- linux-3.0/net/netfilter/nf_queue.c 2011-07-22 05:17:23.000000000 +0300
1381+++ linux-3.0-imq/net/netfilter/nf_queue.c 2011-07-24 13:05:00.682173434 +0300
7af23471 1382@@ -22,6 +22,26 @@
7f07242b 1383
1384 static DEFINE_MUTEX(queue_handler_mutex);
1385
1386+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1387+static const struct nf_queue_handler *queue_imq_handler;
1388+
1389+void nf_register_queue_imq_handler(const struct nf_queue_handler *qh)
2380c486 1390+{
7f07242b 1391+ mutex_lock(&queue_handler_mutex);
1392+ rcu_assign_pointer(queue_imq_handler, qh);
1393+ mutex_unlock(&queue_handler_mutex);
2380c486 1394+}
f6396b7e 1395+EXPORT_SYMBOL_GPL(nf_register_queue_imq_handler);
2380c486 1396+
7f07242b 1397+void nf_unregister_queue_imq_handler(void)
2380c486 1398+{
7f07242b 1399+ mutex_lock(&queue_handler_mutex);
1400+ rcu_assign_pointer(queue_imq_handler, NULL);
1401+ mutex_unlock(&queue_handler_mutex);
2380c486 1402+}
f6396b7e 1403+EXPORT_SYMBOL_GPL(nf_unregister_queue_imq_handler);
7f07242b 1404+#endif
2380c486 1405+
7f07242b 1406 /* return EBUSY when somebody else is registered, return EEXIST if the
1407 * same handler is registered, return 0 in case of success. */
1408 int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)
7af23471 1409@@ -92,7 +112,7 @@
7f07242b 1410 }
1411 EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers);
1412
1413-static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
1414+void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
1415 {
1416 /* Release those devices we held, or Alexey will kill me. */
1417 if (entry->indev)
7af23471 1418@@ -112,6 +132,7 @@
7f07242b 1419 /* Drop reference to owner of hook which queued us. */
1420 module_put(entry->elem->owner);
1421 }
1422+EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
1423
1424 /*
1425 * Any packet that leaves via this function must come back
7af23471 1426@@ -123,7 +144,8 @@
f6396b7e
AM
1427 struct net_device *indev,
1428 struct net_device *outdev,
1429 int (*okfn)(struct sk_buff *),
1430- unsigned int queuenum)
1431+ unsigned int queuenum,
a168f21d 1432+ unsigned int queuetype)
f6396b7e 1433 {
7af23471 1434 int status = -ENOENT;
f6396b7e 1435 struct nf_queue_entry *entry = NULL;
a168f21d 1436@@ -137,7 +159,17 @@
7af23471 1437 /* QUEUE == DROP if no one is waiting, to be safe. */
7f07242b 1438 rcu_read_lock();
1439
7af23471 1440- qh = rcu_dereference(queue_handler[pf]);
a168f21d 1441+ if (queuetype == NF_IMQ_QUEUE) {
7f07242b 1442+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
f6396b7e 1443+ qh = rcu_dereference(queue_imq_handler);
7af23471 1444+#else
a168f21d
AM
1445+ BUG();
1446+ goto err_unlock;
7f07242b 1447+#endif
a168f21d
AM
1448+ } else {
1449+ qh = rcu_dereference(queue_handler[pf]);
1450+ }
1451+
7af23471
JR
1452 if (!qh) {
1453 status = -ESRCH;
7f07242b 1454 goto err_unlock;
a168f21d
AM
1455@@ -209,7 +241,8 @@
1456 struct net_device *indev,
1457 struct net_device *outdev,
1458 int (*okfn)(struct sk_buff *),
f6396b7e 1459- unsigned int queuenum)
a168f21d
AM
1460+ unsigned int queuenum,
1461+ unsigned int queuetype)
f6396b7e
AM
1462 {
1463 struct sk_buff *segs;
7af23471 1464 int err;
a168f21d 1465@@ -217,7 +250,7 @@
2380c486 1466
f6396b7e
AM
1467 if (!skb_is_gso(skb))
1468 return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
1469- queuenum);
a168f21d 1470+ queuenum, queuetype);
f6396b7e
AM
1471
1472 switch (pf) {
1473 case NFPROTO_IPV4:
a168f21d 1474@@ -244,7 +277,7 @@
f6396b7e 1475 segs->next = NULL;
7af23471
JR
1476 if (err == 0)
1477 err = __nf_queue(segs, elem, pf, hook, indev,
1478- outdev, okfn, queuenum);
a168f21d 1479+ outdev, okfn, queuenum, queuetype);
7af23471
JR
1480 if (err == 0)
1481 queued++;
1482 else
a168f21d
AM
1483@@ -299,9 +332,11 @@
1484 local_bh_enable();
1485 break;
f6396b7e 1486 case NF_QUEUE:
a168f21d 1487+ case NF_IMQ_QUEUE:
7af23471
JR
1488 err = __nf_queue(skb, elem, entry->pf, entry->hook,
1489 entry->indev, entry->outdev, entry->okfn,
1490- verdict >> NF_VERDICT_QBITS);
a168f21d
AM
1491+ verdict >> NF_VERDICT_QBITS,
1492+ verdict & NF_VERDICT_MASK);
7af23471
JR
1493 if (err < 0) {
1494 if (err == -ECANCELED)
1495 goto next_hook;
a168f21d
AM
1496diff -uNr linux-3.0/net/netfilter/xt_IMQ.c linux-3.0-imq/net/netfilter/xt_IMQ.c
1497--- linux-3.0/net/netfilter/xt_IMQ.c 1970-01-01 02:00:00.000000000 +0200
1498+++ linux-3.0-imq/net/netfilter/xt_IMQ.c 2011-07-24 12:06:26.062003279 +0300
f6396b7e 1499@@ -0,0 +1,74 @@
2380c486
JR
1500+/*
1501+ * This target marks packets to be enqueued to an imq device
1502+ */
1503+#include <linux/module.h>
1504+#include <linux/skbuff.h>
7f07242b 1505+#include <linux/netfilter/x_tables.h>
1506+#include <linux/netfilter/xt_IMQ.h>
2380c486
JR
1507+#include <linux/imq.h>
1508+
1509+static unsigned int imq_target(struct sk_buff *pskb,
f6396b7e 1510+ const struct xt_action_param *par)
2380c486 1511+{
7f07242b 1512+ const struct xt_imq_info *mr = par->targinfo;
2380c486 1513+
7f07242b 1514+ pskb->imq_flags = (mr->todev & IMQ_F_IFMASK) | IMQ_F_ENQUEUE;
2380c486
JR
1515+
1516+ return XT_CONTINUE;
1517+}
1518+
f6396b7e 1519+static int imq_checkentry(const struct xt_tgchk_param *par)
2380c486 1520+{
7f07242b 1521+ struct xt_imq_info *mr = par->targinfo;
2380c486 1522+
7f07242b 1523+ if (mr->todev > IMQ_MAX_DEVS - 1) {
2380c486
JR
1524+ printk(KERN_WARNING
1525+ "IMQ: invalid device specified, highest is %u\n",
7f07242b 1526+ IMQ_MAX_DEVS - 1);
f6396b7e 1527+ return -EINVAL;
2380c486
JR
1528+ }
1529+
f6396b7e 1530+ return 0;
2380c486
JR
1531+}
1532+
7f07242b 1533+static struct xt_target xt_imq_reg[] __read_mostly = {
1534+ {
1535+ .name = "IMQ",
1536+ .family = AF_INET,
1537+ .checkentry = imq_checkentry,
1538+ .target = imq_target,
1539+ .targetsize = sizeof(struct xt_imq_info),
1540+ .table = "mangle",
1541+ .me = THIS_MODULE
1542+ },
1543+ {
1544+ .name = "IMQ",
1545+ .family = AF_INET6,
1546+ .checkentry = imq_checkentry,
1547+ .target = imq_target,
1548+ .targetsize = sizeof(struct xt_imq_info),
1549+ .table = "mangle",
1550+ .me = THIS_MODULE
1551+ },
2380c486
JR
1552+};
1553+
7f07242b 1554+static int __init imq_init(void)
2380c486 1555+{
7f07242b 1556+ return xt_register_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg));
2380c486
JR
1557+}
1558+
7f07242b 1559+static void __exit imq_fini(void)
2380c486 1560+{
7f07242b 1561+ xt_unregister_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg));
2380c486
JR
1562+}
1563+
7f07242b 1564+module_init(imq_init);
1565+module_exit(imq_fini);
2380c486
JR
1566+
1567+MODULE_AUTHOR("http://www.linuximq.net");
f6396b7e
AM
1568+MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. "
1569+ "See http://www.linuximq.net/ for more information.");
2380c486 1570+MODULE_LICENSE("GPL");
7f07242b 1571+MODULE_ALIAS("ipt_IMQ");
1572+MODULE_ALIAS("ip6t_IMQ");
2380c486 1573+
This page took 0.294804 seconds and 4 git commands to generate.