]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-imq.patch
- vserver is disabled for this version (since vserver project is slowly dying - no...
[packages/kernel.git] / kernel-imq.patch
CommitLineData
e2d28598
JR
1net: add Intermediate Queueing Device (imq)
2
3From: Jussi Kivilinna <jussi.kivilinna@iki.fi>
4
f6b6e03d
AM
5This patch is for kernel version 3.12.4+.
6
e2d28598
JR
7See: http://linuximq.net/
8
9Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
10---
11 drivers/net/Kconfig | 119 ++++
12 drivers/net/Makefile | 1
f6b6e03d 13 drivers/net/imq.c | 1007 +++++++++++++++++++++++++++++++
e2d28598
JR
14 include/linux/imq.h | 13
15 include/linux/netfilter/xt_IMQ.h | 9
16 include/linux/netfilter_ipv4/ipt_IMQ.h | 10
17 include/linux/netfilter_ipv6/ip6t_IMQ.h | 10
f6b6e03d 18 include/linux/skbuff.h | 22 +
e2d28598
JR
19 include/net/netfilter/nf_queue.h | 6
20 include/uapi/linux/netfilter.h | 3
21 net/core/dev.c | 8
22 net/core/skbuff.c | 112 +++
23 net/ipv6/ip6_output.c | 10
24 net/netfilter/Kconfig | 12
25 net/netfilter/Makefile | 1
26 net/netfilter/core.c | 6
27 net/netfilter/nf_internals.h | 2
28 net/netfilter/nf_queue.c | 36 +
29 net/netfilter/xt_IMQ.c | 72 ++
f6b6e03d 30 19 files changed, 1449 insertions(+), 10 deletions(-)
e2d28598
JR
31 create mode 100644 drivers/net/imq.c
32 create mode 100644 include/linux/imq.h
33 create mode 100644 include/linux/netfilter/xt_IMQ.h
34 create mode 100644 include/linux/netfilter_ipv4/ipt_IMQ.h
35 create mode 100644 include/linux/netfilter_ipv6/ip6t_IMQ.h
36 create mode 100644 net/netfilter/xt_IMQ.c
37
38diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
f6b6e03d 39index b45b240..5a20da0 100644
e2d28598
JR
40--- a/drivers/net/Kconfig
41+++ b/drivers/net/Kconfig
f6b6e03d 42@@ -203,6 +203,125 @@ config RIONET_RX_SIZE
e2d28598
JR
43 depends on RIONET
44 default "128"
45
46+config IMQ
47+ tristate "IMQ (intermediate queueing device) support"
48+ depends on NETDEVICES && NETFILTER
49+ ---help---
50+ The IMQ device(s) is used as placeholder for QoS queueing
51+ disciplines. Every packet entering/leaving the IP stack can be
52+ directed through the IMQ device where it's enqueued/dequeued to the
53+ attached qdisc. This allows you to treat network devices as classes
54+ and distribute bandwidth among them. Iptables is used to specify
55+ through which IMQ device, if any, packets travel.
56+
57+ More information at: http://www.linuximq.net/
58+
59+ To compile this driver as a module, choose M here: the module
60+ will be called imq. If unsure, say N.
61+
62+choice
63+ prompt "IMQ behavior (PRE/POSTROUTING)"
64+ depends on IMQ
65+ default IMQ_BEHAVIOR_AB
66+ help
67+ This setting defines how IMQ behaves in respect to its
68+ hooking in PREROUTING and POSTROUTING.
69+
70+ IMQ can work in any of the following ways:
71+
72+ PREROUTING | POSTROUTING
73+ -----------------|-------------------
74+ #1 After NAT | After NAT
75+ #2 After NAT | Before NAT
76+ #3 Before NAT | After NAT
77+ #4 Before NAT | Before NAT
78+
79+ The default behavior is to hook before NAT on PREROUTING
80+ and after NAT on POSTROUTING (#3).
81+
82+ This settings are specially usefull when trying to use IMQ
83+ to shape NATed clients.
84+
85+ More information can be found at: www.linuximq.net
86+
87+ If not sure leave the default settings alone.
88+
89+config IMQ_BEHAVIOR_AA
90+ bool "IMQ AA"
91+ help
92+ This setting defines how IMQ behaves in respect to its
93+ hooking in PREROUTING and POSTROUTING.
94+
95+ Choosing this option will make IMQ hook like this:
96+
97+ PREROUTING: After NAT
98+ POSTROUTING: After NAT
99+
100+ More information can be found at: www.linuximq.net
101+
102+ If not sure leave the default settings alone.
103+
104+config IMQ_BEHAVIOR_AB
105+ bool "IMQ AB"
106+ help
107+ This setting defines how IMQ behaves in respect to its
108+ hooking in PREROUTING and POSTROUTING.
109+
110+ Choosing this option will make IMQ hook like this:
111+
112+ PREROUTING: After NAT
113+ POSTROUTING: Before NAT
114+
115+ More information can be found at: www.linuximq.net
116+
117+ If not sure leave the default settings alone.
118+
119+config IMQ_BEHAVIOR_BA
120+ bool "IMQ BA"
121+ help
122+ This setting defines how IMQ behaves in respect to its
123+ hooking in PREROUTING and POSTROUTING.
124+
125+ Choosing this option will make IMQ hook like this:
126+
127+ PREROUTING: Before NAT
128+ POSTROUTING: After NAT
129+
130+ More information can be found at: www.linuximq.net
131+
132+ If not sure leave the default settings alone.
133+
134+config IMQ_BEHAVIOR_BB
135+ bool "IMQ BB"
136+ help
137+ This setting defines how IMQ behaves in respect to its
138+ hooking in PREROUTING and POSTROUTING.
139+
140+ Choosing this option will make IMQ hook like this:
141+
142+ PREROUTING: Before NAT
143+ POSTROUTING: Before NAT
144+
145+ More information can be found at: www.linuximq.net
146+
147+ If not sure leave the default settings alone.
148+
149+endchoice
150+
151+config IMQ_NUM_DEVS
152+ int "Number of IMQ devices"
153+ range 2 16
154+ depends on IMQ
155+ default "16"
156+ help
157+ This setting defines how many IMQ devices will be created.
158+
159+ The default value is 16.
160+
161+ More information can be found at: www.linuximq.net
162+
163+ If not sure leave the default settings alone.
164+
165 config TUN
166 tristate "Universal TUN/TAP device driver support"
167 select CRC32
168diff --git a/drivers/net/Makefile b/drivers/net/Makefile
f6b6e03d 169index 3fef8a8..12dafc0 100644
e2d28598
JR
170--- a/drivers/net/Makefile
171+++ b/drivers/net/Makefile
172@@ -9,6 +9,7 @@ obj-$(CONFIG_BONDING) += bonding/
173 obj-$(CONFIG_DUMMY) += dummy.o
174 obj-$(CONFIG_EQUALIZER) += eql.o
175 obj-$(CONFIG_IFB) += ifb.o
176+obj-$(CONFIG_IMQ) += imq.o
177 obj-$(CONFIG_MACVLAN) += macvlan.o
178 obj-$(CONFIG_MACVTAP) += macvtap.o
179 obj-$(CONFIG_MII) += mii.o
180diff --git a/drivers/net/imq.c b/drivers/net/imq.c
181new file mode 100644
f6b6e03d 182index 0000000..801bc8c
e2d28598
JR
183--- /dev/null
184+++ b/drivers/net/imq.c
f6b6e03d 185@@ -0,0 +1,1007 @@
2380c486
JR
186+/*
187+ * Pseudo-driver for the intermediate queue device.
188+ *
189+ * This program is free software; you can redistribute it and/or
190+ * modify it under the terms of the GNU General Public License
191+ * as published by the Free Software Foundation; either version
192+ * 2 of the License, or (at your option) any later version.
193+ *
194+ * Authors: Patrick McHardy, <kaber@trash.net>
195+ *
196+ * The first version was written by Martin Devera, <devik@cdi.cz>
197+ *
198+ * Credits: Jan Rafaj <imq2t@cedric.vabo.cz>
199+ * - Update patch to 2.4.21
200+ * Sebastian Strollo <sstrollo@nortelnetworks.com>
201+ * - Fix "Dead-loop on netdevice imq"-issue
202+ * Marcel Sebek <sebek64@post.cz>
203+ * - Update to 2.6.2-rc1
204+ *
205+ * After some time of inactivity there is a group taking care
206+ * of IMQ again: http://www.linuximq.net
207+ *
208+ *
209+ * 2004/06/30 - New version of IMQ patch to kernels <=2.6.7
210+ * including the following changes:
211+ *
212+ * - Correction of ipv6 support "+"s issue (Hasso Tepper)
213+ * - Correction of imq_init_devs() issue that resulted in
214+ * kernel OOPS unloading IMQ as module (Norbert Buchmuller)
215+ * - Addition of functionality to choose number of IMQ devices
216+ * during kernel config (Andre Correa)
217+ * - Addition of functionality to choose how IMQ hooks on
218+ * PRE and POSTROUTING (after or before NAT) (Andre Correa)
219+ * - Cosmetic corrections (Norbert Buchmuller) (Andre Correa)
220+ *
221+ *
222+ * 2005/12/16 - IMQ versions between 2.6.7 and 2.6.13 were
223+ * released with almost no problems. 2.6.14-x was released
224+ * with some important changes: nfcache was removed; After
225+ * some weeks of trouble we figured out that some IMQ fields
226+ * in skb were missing in skbuff.c - skb_clone and copy_skb_header.
227+ * These functions are correctly patched by this new patch version.
228+ *
229+ * Thanks for all who helped to figure out all the problems with
230+ * 2.6.14.x: Patrick McHardy, Rune Kock, VeNoMouS, Max CtRiX,
231+ * Kevin Shanahan, Richard Lucassen, Valery Dachev (hopefully
232+ * I didn't forget anybody). I apologize again for my lack of time.
233+ *
234+ *
f6396b7e 235+ * 2008/06/17 - 2.6.25 - Changed imq.c to use qdisc_run() instead
7f07242b 236+ * of qdisc_restart() and moved qdisc_run() to tasklet to avoid
2380c486
JR
237+ * recursive locking. New initialization routines to fix 'rmmod' not
238+ * working anymore. Used code from ifb.c. (Jussi Kivilinna)
7f07242b 239+ *
240+ * 2008/08/06 - 2.6.26 - (JK)
241+ * - Replaced tasklet with 'netif_schedule()'.
242+ * - Cleaned up and added comments for imq_nf_queue().
243+ *
244+ * 2009/04/12
245+ * - Add skb_save_cb/skb_restore_cb helper functions for backuping
246+ * control buffer. This is needed because qdisc-layer on kernels
247+ * 2.6.27 and newer overwrite control buffer. (Jussi Kivilinna)
248+ * - Add better locking for IMQ device. Hopefully this will solve
249+ * SMP issues. (Jussi Kivilinna)
250+ * - Port to 2.6.27
251+ * - Port to 2.6.28
252+ * - Port to 2.6.29 + fix rmmod not working
253+ *
254+ * 2009/04/20 - (Jussi Kivilinna)
255+ * - Use netdevice feature flags to avoid extra packet handling
256+ * by core networking layer and possibly increase performance.
257+ *
14f08cd0 258+ * 2009/09/26 - (Jussi Kivilinna)
259+ * - Add imq_nf_reinject_lockless to fix deadlock with
260+ * imq_nf_queue/imq_nf_reinject.
261+ *
262+ * 2009/12/08 - (Jussi Kivilinna)
263+ * - Port to 2.6.32
264+ * - Add check for skb->nf_queue_entry==NULL in imq_dev_xmit()
265+ * - Also add better error checking for skb->nf_queue_entry usage
266+ *
76514441
AM
267+ * 2010/02/25 - (Jussi Kivilinna)
268+ * - Port to 2.6.33
269+ *
f6396b7e
AM
270+ * 2010/08/15 - (Jussi Kivilinna)
271+ * - Port to 2.6.35
272+ * - Simplify hook registration by using nf_register_hooks.
273+ * - nf_reinject doesn't need spinlock around it, therefore remove
274+ * imq_nf_reinject function. Other nf_reinject users protect
275+ * their own data with spinlock. With IMQ however all data is
276+ * needed is stored per skbuff, so no locking is needed.
277+ * - Changed IMQ to use 'separate' NF_IMQ_QUEUE instead of
278+ * NF_QUEUE, this allows working coexistance of IMQ and other
279+ * NF_QUEUE users.
280+ * - Make IMQ multi-queue. Number of IMQ device queues can be
281+ * increased with 'numqueues' module parameters. Default number
282+ * of queues is 1, in other words by default IMQ works as
7af23471 283+ * single-queue device. Multi-queue selection is based on
f6396b7e
AM
284+ * IFB multi-queue patch by Changli Gao <xiaosuo@gmail.com>.
285+ *
7af23471
JR
286+ * 2011/03/18 - (Jussi Kivilinna)
287+ * - Port to 2.6.38
288+ *
a168f21d
AM
289+ * 2011/07/12 - (syoder89@gmail.com)
290+ * - Crash fix that happens when the receiving interface has more
291+ * than one queue (add missing skb_set_queue_mapping in
292+ * imq_select_queue).
293+ *
294+ * 2011/07/26 - (Jussi Kivilinna)
295+ * - Add queue mapping checks for packets exiting IMQ.
296+ * - Port to 3.0
297+ *
3b94b3c4
AM
298+ * 2011/08/16 - (Jussi Kivilinna)
299+ * - Clear IFF_TX_SKB_SHARING flag that was added for linux 3.0.2
300+ *
301+ * 2011/11/03 - Germano Michel <germanomichel@gmail.com>
302+ * - Fix IMQ for net namespaces
303+ *
304+ * 2011/11/04 - Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
305+ * - Port to 3.1
306+ * - Clean-up, move 'get imq device pointer by imqX name' to
307+ * separate function from imq_nf_queue().
308+ *
4bf69007
AM
309+ * 2012/01/05 - Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
310+ * - Port to 3.2
311+ *
312+ * 2012/03/19 - Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
313+ * - Port to 3.3
314+ *
514e5dae
AM
315+ * 2012/12/12 - Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
316+ * - Port to 3.7
317+ * - Fix checkpatch.pl warnings
318+ *
e2d28598
JR
319+ * 2013/09/10 - Jussi Kivilinna <jussi.kivilinna@iki.fi>
320+ * - Fixed GSO handling for 3.10, see imq_nf_queue() for comments.
321+ * - Don't copy skb->cb_next when copying or cloning skbuffs.
322+ *
f6b6e03d
AM
323+ * 2013/09/16 - Jussi Kivilinna <jussi.kivilinna@iki.fi>
324+ * - Port to 3.11
325+ *
326+ * 2013/11/12 - Jussi Kivilinna <jussi.kivilinna@iki.fi>
327+ * - Port to 3.12
328+ *
2380c486
JR
329+ * Also, many thanks to pablo Sebastian Greco for making the initial
330+ * patch and to those who helped the testing.
331+ *
332+ * More info at: http://www.linuximq.net/ (Andre Correa)
333+ */
334+
335+#include <linux/module.h>
336+#include <linux/kernel.h>
337+#include <linux/moduleparam.h>
7f07242b 338+#include <linux/list.h>
2380c486
JR
339+#include <linux/skbuff.h>
340+#include <linux/netdevice.h>
7f07242b 341+#include <linux/etherdevice.h>
2380c486
JR
342+#include <linux/rtnetlink.h>
343+#include <linux/if_arp.h>
344+#include <linux/netfilter.h>
345+#include <linux/netfilter_ipv4.h>
346+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
347+ #include <linux/netfilter_ipv6.h>
348+#endif
349+#include <linux/imq.h>
350+#include <net/pkt_sched.h>
351+#include <net/netfilter/nf_queue.h>
f6396b7e
AM
352+#include <net/sock.h>
353+#include <linux/ip.h>
354+#include <linux/ipv6.h>
355+#include <linux/if_vlan.h>
356+#include <linux/if_pppox.h>
357+#include <net/ip.h>
358+#include <net/ipv6.h>
359+
e2d28598 360+static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num);
2380c486 361+
2380c486
JR
362+static nf_hookfn imq_nf_hook;
363+
f6396b7e
AM
364+static struct nf_hook_ops imq_ops[] = {
365+ {
366+ /* imq_ingress_ipv4 */
367+ .hook = imq_nf_hook,
368+ .owner = THIS_MODULE,
369+ .pf = PF_INET,
370+ .hooknum = NF_INET_PRE_ROUTING,
2380c486 371+#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
f6396b7e 372+ .priority = NF_IP_PRI_MANGLE + 1,
2380c486 373+#else
f6396b7e 374+ .priority = NF_IP_PRI_NAT_DST + 1,
2380c486 375+#endif
f6396b7e
AM
376+ },
377+ {
378+ /* imq_egress_ipv4 */
379+ .hook = imq_nf_hook,
380+ .owner = THIS_MODULE,
381+ .pf = PF_INET,
382+ .hooknum = NF_INET_POST_ROUTING,
2380c486 383+#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
f6396b7e 384+ .priority = NF_IP_PRI_LAST,
2380c486 385+#else
f6396b7e 386+ .priority = NF_IP_PRI_NAT_SRC - 1,
2380c486 387+#endif
f6396b7e 388+ },
2380c486 389+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
f6396b7e
AM
390+ {
391+ /* imq_ingress_ipv6 */
392+ .hook = imq_nf_hook,
393+ .owner = THIS_MODULE,
394+ .pf = PF_INET6,
395+ .hooknum = NF_INET_PRE_ROUTING,
2380c486 396+#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
f6396b7e 397+ .priority = NF_IP6_PRI_MANGLE + 1,
2380c486 398+#else
f6396b7e 399+ .priority = NF_IP6_PRI_NAT_DST + 1,
2380c486 400+#endif
f6396b7e
AM
401+ },
402+ {
403+ /* imq_egress_ipv6 */
404+ .hook = imq_nf_hook,
405+ .owner = THIS_MODULE,
406+ .pf = PF_INET6,
407+ .hooknum = NF_INET_POST_ROUTING,
2380c486 408+#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
f6396b7e 409+ .priority = NF_IP6_PRI_LAST,
2380c486 410+#else
f6396b7e 411+ .priority = NF_IP6_PRI_NAT_SRC - 1,
2380c486 412+#endif
f6396b7e 413+ },
2380c486 414+#endif
f6396b7e 415+};
2380c486
JR
416+
417+#if defined(CONFIG_IMQ_NUM_DEVS)
f6396b7e 418+static int numdevs = CONFIG_IMQ_NUM_DEVS;
2380c486 419+#else
f6396b7e 420+static int numdevs = IMQ_MAX_DEVS;
2380c486
JR
421+#endif
422+
423+static struct net_device *imq_devs_cache[IMQ_MAX_DEVS];
424+
a168f21d
AM
425+#define IMQ_MAX_QUEUES 32
426+static int numqueues = 1;
f6396b7e
AM
427+static u32 imq_hashrnd;
428+
429+static inline __be16 pppoe_proto(const struct sk_buff *skb)
430+{
431+ return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
432+ sizeof(struct pppoe_hdr)));
433+}
434+
435+static u16 imq_hash(struct net_device *dev, struct sk_buff *skb)
436+{
437+ unsigned int pull_len;
438+ u16 protocol = skb->protocol;
439+ u32 addr1, addr2;
440+ u32 hash, ihl = 0;
441+ union {
442+ u16 in16[2];
443+ u32 in32;
444+ } ports;
445+ u8 ip_proto;
446+
447+ pull_len = 0;
448+
449+recheck:
450+ switch (protocol) {
451+ case htons(ETH_P_8021Q): {
452+ if (unlikely(skb_pull(skb, VLAN_HLEN) == NULL))
453+ goto other;
454+
455+ pull_len += VLAN_HLEN;
456+ skb->network_header += VLAN_HLEN;
457+
458+ protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
459+ goto recheck;
460+ }
461+
462+ case htons(ETH_P_PPP_SES): {
463+ if (unlikely(skb_pull(skb, PPPOE_SES_HLEN) == NULL))
464+ goto other;
465+
466+ pull_len += PPPOE_SES_HLEN;
467+ skb->network_header += PPPOE_SES_HLEN;
468+
469+ protocol = pppoe_proto(skb);
470+ goto recheck;
471+ }
472+
473+ case htons(ETH_P_IP): {
474+ const struct iphdr *iph = ip_hdr(skb);
475+
476+ if (unlikely(!pskb_may_pull(skb, sizeof(struct iphdr))))
477+ goto other;
478+
479+ addr1 = iph->daddr;
480+ addr2 = iph->saddr;
481+
482+ ip_proto = !(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) ?
483+ iph->protocol : 0;
484+ ihl = ip_hdrlen(skb);
485+
486+ break;
487+ }
488+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
489+ case htons(ETH_P_IPV6): {
490+ const struct ipv6hdr *iph = ipv6_hdr(skb);
4bf69007
AM
491+ __be16 fo = 0;
492+
f6396b7e
AM
493+ if (unlikely(!pskb_may_pull(skb, sizeof(struct ipv6hdr))))
494+ goto other;
495+
496+ addr1 = iph->daddr.s6_addr32[3];
497+ addr2 = iph->saddr.s6_addr32[3];
514e5dae
AM
498+ ihl = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &ip_proto,
499+ &fo);
f6396b7e
AM
500+ if (unlikely(ihl < 0))
501+ goto other;
502+
503+ break;
504+ }
505+#endif
506+ default:
507+other:
508+ if (pull_len != 0) {
509+ skb_push(skb, pull_len);
510+ skb->network_header -= pull_len;
511+ }
512+
513+ return (u16)(ntohs(protocol) % dev->real_num_tx_queues);
514+ }
515+
516+ if (addr1 > addr2)
517+ swap(addr1, addr2);
518+
519+ switch (ip_proto) {
520+ case IPPROTO_TCP:
521+ case IPPROTO_UDP:
522+ case IPPROTO_DCCP:
523+ case IPPROTO_ESP:
524+ case IPPROTO_AH:
525+ case IPPROTO_SCTP:
526+ case IPPROTO_UDPLITE: {
527+ if (likely(skb_copy_bits(skb, ihl, &ports.in32, 4) >= 0)) {
528+ if (ports.in16[0] > ports.in16[1])
529+ swap(ports.in16[0], ports.in16[1]);
530+ break;
531+ }
532+ /* fall-through */
533+ }
534+ default:
535+ ports.in32 = 0;
536+ break;
537+ }
538+
539+ if (pull_len != 0) {
540+ skb_push(skb, pull_len);
541+ skb->network_header -= pull_len;
542+ }
543+
544+ hash = jhash_3words(addr1, addr2, ports.in32, imq_hashrnd ^ ip_proto);
545+
546+ return (u16)(((u64)hash * dev->real_num_tx_queues) >> 32);
547+}
548+
549+static inline bool sk_tx_queue_recorded(struct sock *sk)
550+{
551+ return (sk_tx_queue_get(sk) >= 0);
552+}
553+
554+static struct netdev_queue *imq_select_queue(struct net_device *dev,
555+ struct sk_buff *skb)
556+{
557+ u16 queue_index = 0;
558+ u32 hash;
559+
560+ if (likely(dev->real_num_tx_queues == 1))
561+ goto out;
562+
563+ /* IMQ can be receiving ingress or engress packets. */
564+
565+ /* Check first for if rx_queue is set */
566+ if (skb_rx_queue_recorded(skb)) {
567+ queue_index = skb_get_rx_queue(skb);
568+ goto out;
569+ }
570+
571+ /* Check if socket has tx_queue set */
572+ if (sk_tx_queue_recorded(skb->sk)) {
573+ queue_index = sk_tx_queue_get(skb->sk);
574+ goto out;
575+ }
576+
577+ /* Try use socket hash */
578+ if (skb->sk && skb->sk->sk_hash) {
579+ hash = skb->sk->sk_hash;
580+ queue_index =
581+ (u16)(((u64)hash * dev->real_num_tx_queues) >> 32);
582+ goto out;
583+ }
584+
585+ /* Generate hash from packet data */
586+ queue_index = imq_hash(dev, skb);
587+
588+out:
589+ if (unlikely(queue_index >= dev->real_num_tx_queues))
590+ queue_index = (u16)((u32)queue_index % dev->real_num_tx_queues);
591+
a168f21d 592+ skb_set_queue_mapping(skb, queue_index);
f6396b7e
AM
593+ return netdev_get_tx_queue(dev, queue_index);
594+}
595+
a168f21d
AM
596+static struct net_device_stats *imq_get_stats(struct net_device *dev)
597+{
598+ return &dev->stats;
599+}
600+
601+/* called for packets kfree'd in qdiscs at places other than enqueue */
602+static void imq_skb_destructor(struct sk_buff *skb)
603+{
604+ struct nf_queue_entry *entry = skb->nf_queue_entry;
605+
606+ skb->nf_queue_entry = NULL;
607+
608+ if (entry) {
609+ nf_queue_entry_release_refs(entry);
610+ kfree(entry);
611+ }
612+
613+ skb_restore_cb(skb); /* kfree backup */
614+}
615+
616+static void imq_done_check_queue_mapping(struct sk_buff *skb,
617+ struct net_device *dev)
618+{
619+ unsigned int queue_index;
620+
621+ /* Don't let queue_mapping be left too large after exiting IMQ */
622+ if (likely(skb->dev != dev && skb->dev != NULL)) {
623+ queue_index = skb_get_queue_mapping(skb);
624+ if (unlikely(queue_index >= skb->dev->real_num_tx_queues)) {
625+ queue_index = (u16)((u32)queue_index %
626+ skb->dev->real_num_tx_queues);
627+ skb_set_queue_mapping(skb, queue_index);
628+ }
629+ } else {
630+ /* skb->dev was IMQ device itself or NULL, be on safe side and
631+ * just clear queue mapping.
632+ */
633+ skb_set_queue_mapping(skb, 0);
634+ }
635+}
636+
637+static netdev_tx_t imq_dev_xmit(struct sk_buff *skb, struct net_device *dev)
638+{
639+ struct nf_queue_entry *entry = skb->nf_queue_entry;
640+
641+ skb->nf_queue_entry = NULL;
642+ dev->trans_start = jiffies;
643+
644+ dev->stats.tx_bytes += skb->len;
645+ dev->stats.tx_packets++;
646+
647+ if (unlikely(entry == NULL)) {
648+ /* We don't know what is going on here.. packet is queued for
649+ * imq device, but (probably) not by us.
650+ *
651+ * If this packet was not send here by imq_nf_queue(), then
652+ * skb_save_cb() was not used and skb_free() should not show:
653+ * WARNING: IMQ: kfree_skb: skb->cb_next:..
654+ * and/or
655+ * WARNING: IMQ: kfree_skb: skb->nf_queue_entry...
656+ *
657+ * However if this message is shown, then IMQ is somehow broken
658+ * and you should report this to linuximq.net.
659+ */
660+
661+ /* imq_dev_xmit is black hole that eats all packets, report that
662+ * we eat this packet happily and increase dropped counters.
663+ */
664+
665+ dev->stats.tx_dropped++;
666+ dev_kfree_skb(skb);
667+
668+ return NETDEV_TX_OK;
669+ }
670+
671+ skb_restore_cb(skb); /* restore skb->cb */
672+
673+ skb->imq_flags = 0;
674+ skb->destructor = NULL;
675+
676+ imq_done_check_queue_mapping(skb, dev);
677+
678+ nf_reinject(entry, NF_ACCEPT);
679+
680+ return NETDEV_TX_OK;
681+}
682+
3b94b3c4
AM
683+static struct net_device *get_imq_device_by_index(int index)
684+{
685+ struct net_device *dev = NULL;
686+ struct net *net;
687+ char buf[8];
688+
689+ /* get device by name and cache result */
690+ snprintf(buf, sizeof(buf), "imq%d", index);
691+
692+ /* Search device from all namespaces. */
693+ for_each_net(net) {
694+ dev = dev_get_by_name(net, buf);
695+ if (dev)
696+ break;
697+ }
698+
699+ if (WARN_ON_ONCE(dev == NULL)) {
700+ /* IMQ device not found. Exotic config? */
701+ return ERR_PTR(-ENODEV);
702+ }
703+
704+ imq_devs_cache[index] = dev;
705+ dev_put(dev);
706+
707+ return dev;
708+}
709+
e2d28598 710+static struct nf_queue_entry *nf_queue_entry_dup(struct nf_queue_entry *e)
2380c486 711+{
e2d28598
JR
712+ struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
713+ if (entry) {
714+ if (nf_queue_entry_get_refs(entry))
715+ return entry;
716+ kfree(entry);
717+ }
718+ return NULL;
719+}
720+
721+#ifdef CONFIG_BRIDGE_NETFILTER
722+/* When called from bridge netfilter, skb->data must point to MAC header
723+ * before calling skb_gso_segment(). Else, original MAC header is lost
724+ * and segmented skbs will be sent to wrong destination.
725+ */
726+static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
727+{
728+ if (skb->nf_bridge)
729+ __skb_push(skb, skb->network_header - skb->mac_header);
730+}
731+
732+static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
733+{
734+ if (skb->nf_bridge)
735+ __skb_pull(skb, skb->network_header - skb->mac_header);
736+}
737+#else
738+#define nf_bridge_adjust_skb_data(s) do {} while (0)
739+#define nf_bridge_adjust_segmented_data(s) do {} while (0)
740+#endif
741+
742+static void free_entry(struct nf_queue_entry *entry)
743+{
744+ nf_queue_entry_release_refs(entry);
745+ kfree(entry);
746+}
747+
748+static int __imq_nf_queue(struct nf_queue_entry *entry, struct net_device *dev);
749+
750+static int __imq_nf_queue_gso(struct nf_queue_entry *entry,
751+ struct net_device *dev, struct sk_buff *skb)
752+{
753+ int ret = -ENOMEM;
754+ struct nf_queue_entry *entry_seg;
755+
756+ nf_bridge_adjust_segmented_data(skb);
757+
758+ if (skb->next == NULL) { /* last packet, no need to copy entry */
759+ struct sk_buff *gso_skb = entry->skb;
760+ entry->skb = skb;
761+ ret = __imq_nf_queue(entry, dev);
762+ if (ret)
763+ entry->skb = gso_skb;
764+ return ret;
765+ }
766+
767+ skb->next = NULL;
768+
769+ entry_seg = nf_queue_entry_dup(entry);
770+ if (entry_seg) {
771+ entry_seg->skb = skb;
772+ ret = __imq_nf_queue(entry_seg, dev);
773+ if (ret)
774+ free_entry(entry_seg);
775+ }
776+ return ret;
777+}
778+
779+static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num)
780+{
781+ struct sk_buff *skb, *segs;
2380c486 782+ struct net_device *dev;
e2d28598
JR
783+ unsigned int queued;
784+ int index, retval, err;
7f07242b 785+
786+ index = entry->skb->imq_flags & IMQ_F_IFMASK;
787+ if (unlikely(index > numdevs - 1)) {
788+ if (net_ratelimit())
514e5dae
AM
789+ pr_warn("IMQ: invalid device specified, highest is %u\n",
790+ numdevs - 1);
7f07242b 791+ retval = -EINVAL;
e2d28598 792+ goto out_no_dev;
7f07242b 793+ }
2380c486
JR
794+
795+ /* check for imq device by index from cache */
796+ dev = imq_devs_cache[index];
7f07242b 797+ if (unlikely(!dev)) {
3b94b3c4
AM
798+ dev = get_imq_device_by_index(index);
799+ if (IS_ERR(dev)) {
800+ retval = PTR_ERR(dev);
e2d28598 801+ goto out_no_dev;
2380c486 802+ }
2380c486
JR
803+ }
804+
7f07242b 805+ if (unlikely(!(dev->flags & IFF_UP))) {
2380c486 806+ entry->skb->imq_flags = 0;
e2d28598
JR
807+ retval = -ECANCELED;
808+ goto out_no_dev;
2380c486 809+ }
e2d28598
JR
810+
811+ if (!skb_is_gso(entry->skb))
812+ return __imq_nf_queue(entry, dev);
813+
814+ /* Since 3.10.x, GSO handling moved here as result of upstream commit
815+ * a5fedd43d5f6c94c71053a66e4c3d2e35f1731a2 (netfilter: move
816+ * skb_gso_segment into nfnetlink_queue module).
817+ *
818+ * Following code replicates the gso handling from
819+ * 'net/netfilter/nfnetlink_queue_core.c':nfqnl_enqueue_packet().
820+ */
821+
822+ skb = entry->skb;
823+
824+ switch (entry->pf) {
825+ case NFPROTO_IPV4:
826+ skb->protocol = htons(ETH_P_IP);
827+ break;
828+ case NFPROTO_IPV6:
829+ skb->protocol = htons(ETH_P_IPV6);
830+ break;
831+ }
832+
833+ nf_bridge_adjust_skb_data(skb);
834+ segs = skb_gso_segment(skb, 0);
835+ /* Does not use PTR_ERR to limit the number of error codes that can be
836+ * returned by nf_queue. For instance, callers rely on -ECANCELED to
837+ * mean 'ignore this hook'.
838+ */
839+ err = -ENOBUFS;
840+ if (IS_ERR(segs))
841+ goto out_err;
842+ queued = 0;
843+ err = 0;
844+ do {
845+ struct sk_buff *nskb = segs->next;
846+ if (nskb && nskb->next)
847+ nskb->cb_next = NULL;
848+ if (err == 0)
849+ err = __imq_nf_queue_gso(entry, dev, segs);
850+ if (err == 0)
851+ queued++;
852+ else
853+ kfree_skb(segs);
854+ segs = nskb;
855+ } while (segs);
856+
857+ if (queued) {
858+ if (err) /* some segments are already queued */
859+ free_entry(entry);
860+ kfree_skb(skb);
861+ return 0;
862+ }
863+
864+out_err:
865+ nf_bridge_adjust_segmented_data(skb);
866+ retval = err;
867+out_no_dev:
868+ return retval;
869+}
870+
871+static int __imq_nf_queue(struct nf_queue_entry *entry, struct net_device *dev)
872+{
873+ struct sk_buff *skb_orig, *skb, *skb_shared;
874+ struct Qdisc *q;
875+ struct netdev_queue *txq;
876+ spinlock_t *root_lock;
877+ int users;
878+ int retval = -EINVAL;
879+ unsigned int orig_queue_index;
880+
2380c486
JR
881+ dev->last_rx = jiffies;
882+
7f07242b 883+ skb = entry->skb;
884+ skb_orig = NULL;
885+
886+ /* skb has owner? => make clone */
887+ if (unlikely(skb->destructor)) {
888+ skb_orig = skb;
889+ skb = skb_clone(skb, GFP_ATOMIC);
f6396b7e 890+ if (unlikely(!skb)) {
7f07242b 891+ retval = -ENOMEM;
892+ goto out;
893+ }
e2d28598 894+ skb->cb_next = NULL;
7f07242b 895+ entry->skb = skb;
2380c486 896+ }
2380c486 897+
7f07242b 898+ skb->nf_queue_entry = entry;
899+
900+ dev->stats.rx_bytes += skb->len;
2380c486
JR
901+ dev->stats.rx_packets++;
902+
a168f21d
AM
903+ if (!skb->dev) {
904+ /* skb->dev == NULL causes problems, try the find cause. */
905+ if (net_ratelimit()) {
906+ dev_warn(&dev->dev,
907+ "received packet with skb->dev == NULL\n");
908+ dump_stack();
909+ }
910+
911+ skb->dev = dev;
912+ }
913+
f6396b7e
AM
914+ /* Disables softirqs for lock below */
915+ rcu_read_lock_bh();
916+
917+ /* Multi-queue selection */
a168f21d 918+ orig_queue_index = skb_get_queue_mapping(skb);
f6396b7e 919+ txq = imq_select_queue(dev, skb);
2380c486 920+
7f07242b 921+ q = rcu_dereference(txq->qdisc);
922+ if (unlikely(!q->enqueue))
923+ goto packet_not_eaten_by_imq_dev;
2380c486 924+
f6396b7e
AM
925+ root_lock = qdisc_lock(q);
926+ spin_lock(root_lock);
7f07242b 927+
928+ users = atomic_read(&skb->users);
929+
930+ skb_shared = skb_get(skb); /* increase reference count by one */
514e5dae
AM
931+
932+ /* backup skb->cb, as qdisc layer will overwrite it */
933+ skb_save_cb(skb_shared);
7f07242b 934+ qdisc_enqueue_root(skb_shared, q); /* might kfree_skb */
935+
936+ if (likely(atomic_read(&skb_shared->users) == users + 1)) {
937+ kfree_skb(skb_shared); /* decrease reference count by one */
938+
939+ skb->destructor = &imq_skb_destructor;
940+
941+ /* cloned? */
f6396b7e 942+ if (unlikely(skb_orig))
7f07242b 943+ kfree_skb(skb_orig); /* free original */
944+
f6396b7e
AM
945+ spin_unlock(root_lock);
946+ rcu_read_unlock_bh();
7f07242b 947+
948+ /* schedule qdisc dequeue */
949+ __netif_schedule(q);
950+
951+ retval = 0;
952+ goto out;
953+ } else {
954+ skb_restore_cb(skb_shared); /* restore skb->cb */
14f08cd0 955+ skb->nf_queue_entry = NULL;
514e5dae
AM
956+ /*
957+ * qdisc dropped packet and decreased skb reference count of
7f07242b 958+ * skb, so we don't really want to and try refree as that would
514e5dae
AM
959+ * actually destroy the skb.
960+ */
f6396b7e 961+ spin_unlock(root_lock);
7f07242b 962+ goto packet_not_eaten_by_imq_dev;
963+ }
964+
965+packet_not_eaten_by_imq_dev:
a168f21d 966+ skb_set_queue_mapping(skb, orig_queue_index);
f6396b7e
AM
967+ rcu_read_unlock_bh();
968+
7f07242b 969+ /* cloned? restore original */
f6396b7e 970+ if (unlikely(skb_orig)) {
7f07242b 971+ kfree_skb(skb);
972+ entry->skb = skb_orig;
973+ }
974+ retval = -1;
975+out:
976+ return retval;
2380c486
JR
977+}
978+
2380c486
JR
979+static unsigned int imq_nf_hook(unsigned int hook, struct sk_buff *pskb,
980+ const struct net_device *indev,
981+ const struct net_device *outdev,
982+ int (*okfn)(struct sk_buff *))
983+{
f6396b7e 984+ return (pskb->imq_flags & IMQ_F_ENQUEUE) ? NF_IMQ_QUEUE : NF_ACCEPT;
2380c486
JR
985+}
986+
987+static int imq_close(struct net_device *dev)
988+{
2380c486 989+ netif_stop_queue(dev);
2380c486
JR
990+ return 0;
991+}
992+
993+static int imq_open(struct net_device *dev)
994+{
2380c486 995+ netif_start_queue(dev);
2380c486
JR
996+ return 0;
997+}
998+
7f07242b 999+static const struct net_device_ops imq_netdev_ops = {
1000+ .ndo_open = imq_open,
1001+ .ndo_stop = imq_close,
1002+ .ndo_start_xmit = imq_dev_xmit,
1003+ .ndo_get_stats = imq_get_stats,
1004+};
1005+
2380c486
JR
1006+static void imq_setup(struct net_device *dev)
1007+{
7f07242b 1008+ dev->netdev_ops = &imq_netdev_ops;
3b94b3c4
AM
1009+ dev->type = ARPHRD_VOID;
1010+ dev->mtu = 16000; /* too small? */
1011+ dev->tx_queue_len = 11000; /* too big? */
1012+ dev->flags = IFF_NOARP;
1013+ dev->features = NETIF_F_SG | NETIF_F_FRAGLIST |
7f07242b 1014+ NETIF_F_GSO | NETIF_F_HW_CSUM |
1015+ NETIF_F_HIGHDMA;
3b94b3c4
AM
1016+ dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE |
1017+ IFF_TX_SKB_SHARING);
7f07242b 1018+}
1019+
1020+static int imq_validate(struct nlattr *tb[], struct nlattr *data[])
1021+{
1022+ int ret = 0;
1023+
1024+ if (tb[IFLA_ADDRESS]) {
1025+ if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
1026+ ret = -EINVAL;
1027+ goto end;
1028+ }
1029+ if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
1030+ ret = -EADDRNOTAVAIL;
1031+ goto end;
1032+ }
1033+ }
1034+ return 0;
1035+end:
514e5dae 1036+ pr_warn("IMQ: imq_validate failed (%d)\n", ret);
7f07242b 1037+ return ret;
2380c486
JR
1038+}
1039+
1040+static struct rtnl_link_ops imq_link_ops __read_mostly = {
1041+ .kind = "imq",
7f07242b 1042+ .priv_size = 0,
2380c486 1043+ .setup = imq_setup,
7f07242b 1044+ .validate = imq_validate,
2380c486
JR
1045+};
1046+
f6396b7e 1047+static const struct nf_queue_handler imq_nfqh = {
f6396b7e
AM
1048+ .outfn = imq_nf_queue,
1049+};
1050+
2380c486
JR
1051+static int __init imq_init_hooks(void)
1052+{
f6396b7e 1053+ int ret;
2380c486 1054+
f6396b7e 1055+ nf_register_queue_imq_handler(&imq_nfqh);
2380c486 1056+
f6396b7e
AM
1057+ ret = nf_register_hooks(imq_ops, ARRAY_SIZE(imq_ops));
1058+ if (ret < 0)
1059+ nf_unregister_queue_imq_handler();
2380c486 1060+
f6396b7e 1061+ return ret;
2380c486
JR
1062+}
1063+
1064+static int __init imq_init_one(int index)
1065+{
1066+ struct net_device *dev;
1067+ int ret;
1068+
f6396b7e 1069+ dev = alloc_netdev_mq(0, "imq%d", imq_setup, numqueues);
2380c486
JR
1070+ if (!dev)
1071+ return -ENOMEM;
1072+
1073+ ret = dev_alloc_name(dev, dev->name);
1074+ if (ret < 0)
1075+ goto fail;
1076+
1077+ dev->rtnl_link_ops = &imq_link_ops;
1078+ ret = register_netdevice(dev);
1079+ if (ret < 0)
1080+ goto fail;
1081+
1082+ return 0;
1083+fail:
1084+ free_netdev(dev);
1085+ return ret;
1086+}
1087+
1088+static int __init imq_init_devs(void)
1089+{
1090+ int err, i;
1091+
7f07242b 1092+ if (numdevs < 1 || numdevs > IMQ_MAX_DEVS) {
514e5dae 1093+ pr_err("IMQ: numdevs has to be betweed 1 and %u\n",
2380c486
JR
1094+ IMQ_MAX_DEVS);
1095+ return -EINVAL;
1096+ }
1097+
f6396b7e 1098+ if (numqueues < 1 || numqueues > IMQ_MAX_QUEUES) {
514e5dae 1099+ pr_err("IMQ: numqueues has to be betweed 1 and %u\n",
f6396b7e
AM
1100+ IMQ_MAX_QUEUES);
1101+ return -EINVAL;
1102+ }
1103+
1104+ get_random_bytes(&imq_hashrnd, sizeof(imq_hashrnd));
1105+
2380c486
JR
1106+ rtnl_lock();
1107+ err = __rtnl_link_register(&imq_link_ops);
1108+
1109+ for (i = 0; i < numdevs && !err; i++)
1110+ err = imq_init_one(i);
1111+
1112+ if (err) {
1113+ __rtnl_link_unregister(&imq_link_ops);
1114+ memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
1115+ }
1116+ rtnl_unlock();
1117+
1118+ return err;
1119+}
1120+
1121+static int __init imq_init_module(void)
1122+{
1123+ int err;
1124+
7f07242b 1125+#if defined(CONFIG_IMQ_NUM_DEVS)
1126+ BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS > 16);
1127+ BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS < 2);
1128+ BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS - 1 > IMQ_F_IFMASK);
1129+#endif
1130+
2380c486
JR
1131+ err = imq_init_devs();
1132+ if (err) {
514e5dae 1133+ pr_err("IMQ: Error trying imq_init_devs(net)\n");
2380c486
JR
1134+ return err;
1135+ }
1136+
1137+ err = imq_init_hooks();
1138+ if (err) {
514e5dae 1139+ pr_err(KERN_ERR "IMQ: Error trying imq_init_hooks()\n");
2380c486
JR
1140+ rtnl_link_unregister(&imq_link_ops);
1141+ memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
1142+ return err;
1143+ }
1144+
514e5dae
AM
1145+ pr_info("IMQ driver loaded successfully. (numdevs = %d, numqueues = %d)\n",
1146+ numdevs, numqueues);
2380c486
JR
1147+
1148+#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
514e5dae 1149+ pr_info("\tHooking IMQ before NAT on PREROUTING.\n");
2380c486 1150+#else
514e5dae 1151+ pr_info("\tHooking IMQ after NAT on PREROUTING.\n");
2380c486
JR
1152+#endif
1153+#if defined(CONFIG_IMQ_BEHAVIOR_AB) || defined(CONFIG_IMQ_BEHAVIOR_BB)
514e5dae 1154+ pr_info("\tHooking IMQ before NAT on POSTROUTING.\n");
2380c486 1155+#else
514e5dae 1156+ pr_info("\tHooking IMQ after NAT on POSTROUTING.\n");
2380c486
JR
1157+#endif
1158+
1159+ return 0;
1160+}
1161+
1162+static void __exit imq_unhook(void)
1163+{
f6396b7e 1164+ nf_unregister_hooks(imq_ops, ARRAY_SIZE(imq_ops));
7f07242b 1165+ nf_unregister_queue_imq_handler();
2380c486
JR
1166+}
1167+
1168+static void __exit imq_cleanup_devs(void)
1169+{
1170+ rtnl_link_unregister(&imq_link_ops);
1171+ memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
1172+}
1173+
1174+static void __exit imq_exit_module(void)
1175+{
1176+ imq_unhook();
1177+ imq_cleanup_devs();
514e5dae 1178+ pr_info("IMQ driver unloaded successfully.\n");
2380c486
JR
1179+}
1180+
1181+module_init(imq_init_module);
1182+module_exit(imq_exit_module);
1183+
1184+module_param(numdevs, int, 0);
f6396b7e 1185+module_param(numqueues, int, 0);
514e5dae 1186+MODULE_PARM_DESC(numdevs, "number of IMQ devices (how many imq* devices will be created)");
f6396b7e 1187+MODULE_PARM_DESC(numqueues, "number of queues per IMQ device");
2380c486 1188+MODULE_AUTHOR("http://www.linuximq.net");
514e5dae 1189+MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See http://www.linuximq.net/ for more information.");
2380c486
JR
1190+MODULE_LICENSE("GPL");
1191+MODULE_ALIAS_RTNL_LINK("imq");
1192+
e2d28598
JR
1193diff --git a/include/linux/imq.h b/include/linux/imq.h
1194new file mode 100644
1195index 0000000..1babb09
1196--- /dev/null
1197+++ b/include/linux/imq.h
7f07242b 1198@@ -0,0 +1,13 @@
2380c486
JR
1199+#ifndef _IMQ_H
1200+#define _IMQ_H
1201+
7f07242b 1202+/* IFMASK (16 device indexes, 0 to 15) and flag(s) fit in 5 bits */
1203+#define IMQ_F_BITS 5
1204+
1205+#define IMQ_F_IFMASK 0x0f
1206+#define IMQ_F_ENQUEUE 0x10
2380c486 1207+
7f07242b 1208+#define IMQ_MAX_DEVS (IMQ_F_IFMASK + 1)
2380c486
JR
1209+
1210+#endif /* _IMQ_H */
2380c486 1211+
e2d28598
JR
1212diff --git a/include/linux/netfilter/xt_IMQ.h b/include/linux/netfilter/xt_IMQ.h
1213new file mode 100644
1214index 0000000..9b07230
1215--- /dev/null
1216+++ b/include/linux/netfilter/xt_IMQ.h
7f07242b 1217@@ -0,0 +1,9 @@
1218+#ifndef _XT_IMQ_H
1219+#define _XT_IMQ_H
1220+
1221+struct xt_imq_info {
2380c486
JR
1222+ unsigned int todev; /* target imq device */
1223+};
1224+
7f07242b 1225+#endif /* _XT_IMQ_H */
1226+
e2d28598
JR
1227diff --git a/include/linux/netfilter_ipv4/ipt_IMQ.h b/include/linux/netfilter_ipv4/ipt_IMQ.h
1228new file mode 100644
1229index 0000000..7af320f
1230--- /dev/null
1231+++ b/include/linux/netfilter_ipv4/ipt_IMQ.h
7f07242b 1232@@ -0,0 +1,10 @@
1233+#ifndef _IPT_IMQ_H
1234+#define _IPT_IMQ_H
1235+
1236+/* Backwards compatibility for old userspace */
1237+#include <linux/netfilter/xt_IMQ.h>
1238+
1239+#define ipt_imq_info xt_imq_info
1240+
2380c486 1241+#endif /* _IPT_IMQ_H */
7f07242b 1242+
e2d28598
JR
1243diff --git a/include/linux/netfilter_ipv6/ip6t_IMQ.h b/include/linux/netfilter_ipv6/ip6t_IMQ.h
1244new file mode 100644
1245index 0000000..198ac01
1246--- /dev/null
1247+++ b/include/linux/netfilter_ipv6/ip6t_IMQ.h
7f07242b 1248@@ -0,0 +1,10 @@
2380c486
JR
1249+#ifndef _IP6T_IMQ_H
1250+#define _IP6T_IMQ_H
1251+
7f07242b 1252+/* Backwards compatibility for old userspace */
1253+#include <linux/netfilter/xt_IMQ.h>
1254+
1255+#define ip6t_imq_info xt_imq_info
2380c486
JR
1256+
1257+#endif /* _IP6T_IMQ_H */
7f07242b 1258+
e2d28598 1259diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
f6b6e03d 1260index f66f346..d699b19 100644
e2d28598
JR
1261--- a/include/linux/skbuff.h
1262+++ b/include/linux/skbuff.h
1263@@ -33,6 +33,9 @@
3b94b3c4 1264 #include <linux/dma-mapping.h>
0acdeb19 1265 #include <linux/netdev_features.h>
7770d33f 1266 #include <net/flow_keys.h>
7f07242b 1267+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1268+#include <linux/imq.h>
1269+#endif
1270
14f08cd0 1271 /* Don't change this without changing skb_csum_unnecessary! */
1272 #define CHECKSUM_NONE 0
f6b6e03d 1273@@ -418,6 +421,9 @@ struct sk_buff {
7f07242b 1274 * first. This is owned by whoever has the skb queued ATM.
1275 */
ca0faea1 1276 char cb[48] __aligned(8);
7f07242b 1277+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1278+ void *cb_next;
1279+#endif
1280
f6396b7e 1281 unsigned long _skb_refdst;
ca0faea1 1282 #ifdef CONFIG_XFRM
f6b6e03d
AM
1283@@ -453,6 +459,9 @@ struct sk_buff {
1284 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1285 struct nf_conntrack *nfct;
2380c486
JR
1286 #endif
1287+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
2380c486
JR
1288+ struct nf_queue_entry *nf_queue_entry;
1289+#endif
1290 #ifdef CONFIG_BRIDGE_NETFILTER
1291 struct nf_bridge_info *nf_bridge;
1292 #endif
f6b6e03d
AM
1293@@ -490,6 +499,9 @@ struct sk_buff {
1294 */
1295 __u8 encapsulation:1;
5f467116 1296 /* 6/8 bit hole (depending on ndisc_nodetype presence) */
7f07242b 1297+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1298+ __u8 imq_flags:IMQ_F_BITS;
1299+#endif
f6b6e03d
AM
1300 kmemcheck_bitfield_end(flags2);
1301
5f467116 1302 #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
f6b6e03d 1303@@ -625,6 +637,12 @@ static inline struct rtable *skb_rtable(const struct sk_buff *skb)
14f08cd0 1304 return (struct rtable *)skb_dst(skb);
1305 }
7f07242b 1306
1307+
1308+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1309+extern int skb_save_cb(struct sk_buff *skb);
1310+extern int skb_restore_cb(struct sk_buff *skb);
1311+#endif
1312+
1313 extern void kfree_skb(struct sk_buff *skb);
e2d28598 1314 extern void kfree_skb_list(struct sk_buff *segs);
514e5dae 1315 extern void skb_tx_error(struct sk_buff *skb);
f6b6e03d
AM
1316@@ -2635,6 +2653,10 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
1317 nf_conntrack_get(src->nfct);
1318 dst->nfctinfo = src->nfctinfo;
2380c486
JR
1319 #endif
1320+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1321+ dst->imq_flags = src->imq_flags;
1322+ dst->nf_queue_entry = src->nf_queue_entry;
1323+#endif
1324 #ifdef CONFIG_BRIDGE_NETFILTER
1325 dst->nf_bridge = src->nf_bridge;
1326 nf_bridge_get(src->nf_bridge);
e2d28598
JR
1327diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
1328index aaba4bb..f6e92a4 100644
1329--- a/include/net/netfilter/nf_queue.h
1330+++ b/include/net/netfilter/nf_queue.h
1331@@ -29,6 +29,12 @@ struct nf_queue_handler {
514e5dae
AM
1332 void nf_register_queue_handler(const struct nf_queue_handler *qh);
1333 void nf_unregister_queue_handler(void);
7f07242b 1334 extern void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
1335+extern void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
1336+
1337+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1338+extern void nf_register_queue_imq_handler(const struct nf_queue_handler *qh);
1339+extern void nf_unregister_queue_imq_handler(void);
1340+#endif
1341
e2d28598
JR
1342 bool nf_queue_entry_get_refs(struct nf_queue_entry *entry);
1343 void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
1344diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h
1345index f7dc0eb..58c46a9 100644
1346--- a/include/uapi/linux/netfilter.h
1347+++ b/include/uapi/linux/netfilter.h
514e5dae
AM
1348@@ -13,7 +13,8 @@
1349 #define NF_QUEUE 3
1350 #define NF_REPEAT 4
1351 #define NF_STOP 5
1352-#define NF_MAX_VERDICT NF_STOP
1353+#define NF_IMQ_QUEUE 6
1354+#define NF_MAX_VERDICT NF_IMQ_QUEUE
1355
1356 /* we overload the higher bits for encoding auxiliary data such as the queue
1357 * number or errno values. Not nice, but better than additional function
e2d28598 1358diff --git a/net/core/dev.c b/net/core/dev.c
f6b6e03d 1359index 3d13874..9842f21 100644
e2d28598
JR
1360--- a/net/core/dev.c
1361+++ b/net/core/dev.c
f6b6e03d 1362@@ -131,6 +131,9 @@
514e5dae 1363 #include <linux/static_key.h>
5f467116
AM
1364 #include <linux/hashtable.h>
1365 #include <linux/vmalloc.h>
2380c486
JR
1366+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1367+#include <linux/imq.h>
1368+#endif
514e5dae
AM
1369
1370 #include "net-sysfs.h"
1371
f6b6e03d 1372@@ -2595,7 +2598,12 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
514e5dae
AM
1373 }
1374 }
7f07242b 1375
2380c486 1376+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
7af23471
JR
1377+ if (!list_empty(&ptype_all) &&
1378+ !(skb->imq_flags & IMQ_F_ENQUEUE))
1379+#else
1380 if (!list_empty(&ptype_all))
2380c486 1381+#endif
2380c486
JR
1382 dev_queue_xmit_nit(skb, dev);
1383
514e5dae 1384 skb_len = skb->len;
e2d28598 1385diff --git a/net/core/skbuff.c b/net/core/skbuff.c
f6b6e03d 1386index c28c7fe..a5f1888 100644
e2d28598
JR
1387--- a/net/core/skbuff.c
1388+++ b/net/core/skbuff.c
a168f21d 1389@@ -73,6 +73,9 @@
7f07242b 1390
4bf69007 1391 struct kmem_cache *skbuff_head_cache __read_mostly;
7f07242b 1392 static struct kmem_cache *skbuff_fclone_cache __read_mostly;
1393+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1394+static struct kmem_cache *skbuff_cb_store_cache __read_mostly;
1395+#endif
1396
1397 static void sock_pipe_buf_release(struct pipe_inode_info *pipe,
1398 struct pipe_buffer *buf)
e2d28598 1399@@ -92,6 +95,82 @@ static int sock_pipe_buf_steal(struct pipe_inode_info *pipe,
7f07242b 1400 return 1;
1401 }
1402
1403+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1404+/* Control buffer save/restore for IMQ devices */
1405+struct skb_cb_table {
f6396b7e 1406+ char cb[48] __aligned(8);
7f07242b 1407+ void *cb_next;
1408+ atomic_t refcnt;
7f07242b 1409+};
2380c486 1410+
7f07242b 1411+static DEFINE_SPINLOCK(skb_cb_store_lock);
1412+
1413+int skb_save_cb(struct sk_buff *skb)
2380c486 1414+{
7f07242b 1415+ struct skb_cb_table *next;
1416+
1417+ next = kmem_cache_alloc(skbuff_cb_store_cache, GFP_ATOMIC);
1418+ if (!next)
1419+ return -ENOMEM;
2380c486 1420+
7f07242b 1421+ BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb));
2380c486 1422+
7f07242b 1423+ memcpy(next->cb, skb->cb, sizeof(skb->cb));
1424+ next->cb_next = skb->cb_next;
1425+
1426+ atomic_set(&next->refcnt, 1);
1427+
1428+ skb->cb_next = next;
1429+ return 0;
2380c486 1430+}
7f07242b 1431+EXPORT_SYMBOL(skb_save_cb);
2380c486 1432+
7f07242b 1433+int skb_restore_cb(struct sk_buff *skb)
2380c486 1434+{
7f07242b 1435+ struct skb_cb_table *next;
2380c486 1436+
7f07242b 1437+ if (!skb->cb_next)
2380c486 1438+ return 0;
7f07242b 1439+
1440+ next = skb->cb_next;
1441+
1442+ BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb));
1443+
1444+ memcpy(skb->cb, next->cb, sizeof(skb->cb));
1445+ skb->cb_next = next->cb_next;
1446+
1447+ spin_lock(&skb_cb_store_lock);
1448+
f6396b7e 1449+ if (atomic_dec_and_test(&next->refcnt))
7f07242b 1450+ kmem_cache_free(skbuff_cb_store_cache, next);
2380c486 1451+
7f07242b 1452+ spin_unlock(&skb_cb_store_lock);
1453+
1454+ return 0;
2380c486 1455+}
7f07242b 1456+EXPORT_SYMBOL(skb_restore_cb);
2380c486 1457+
14f08cd0 1458+static void skb_copy_stored_cb(struct sk_buff *new, const struct sk_buff *__old)
7f07242b 1459+{
1460+ struct skb_cb_table *next;
14f08cd0 1461+ struct sk_buff *old;
7f07242b 1462+
14f08cd0 1463+ if (!__old->cb_next) {
1464+ new->cb_next = NULL;
7f07242b 1465+ return;
1466+ }
1467+
1468+ spin_lock(&skb_cb_store_lock);
1469+
14f08cd0 1470+ old = (struct sk_buff *)__old;
1471+
7f07242b 1472+ next = old->cb_next;
1473+ atomic_inc(&next->refcnt);
1474+ new->cb_next = next;
1475+
1476+ spin_unlock(&skb_cb_store_lock);
1477+}
1478+#endif
1479
1480 /* Pipe buffer operations for a socket. */
5379d87d 1481 static const struct pipe_buf_operations sock_pipe_buf_ops = {
f6b6e03d 1482@@ -577,6 +656,28 @@ static void skb_release_head_state(struct sk_buff *skb)
7f07242b 1483 WARN_ON(in_irq());
1484 skb->destructor(skb);
1485 }
1486+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
4bf69007
AM
1487+ /*
1488+ * This should not happen. When it does, avoid memleak by restoring
1489+ * the chain of cb-backups.
1490+ */
f6396b7e 1491+ while (skb->cb_next != NULL) {
14f08cd0 1492+ if (net_ratelimit())
514e5dae
AM
1493+ pr_warn("IMQ: kfree_skb: skb->cb_next: %08x\n",
1494+ (unsigned int)skb->cb_next);
14f08cd0 1495+
7f07242b 1496+ skb_restore_cb(skb);
1497+ }
4bf69007
AM
1498+ /*
1499+ * This should not happen either, nf_queue_entry is nullified in
14f08cd0 1500+ * imq_dev_xmit(). If we have non-NULL nf_queue_entry then we are
1501+ * leaking entry pointers, maybe memory. We don't know if this is
1502+ * pointer to already freed memory, or should this be freed.
1503+ * If this happens we need to add refcounting, etc for nf_queue_entry.
1504+ */
1505+ if (skb->nf_queue_entry && net_ratelimit())
514e5dae 1506+ pr_warn("%s\n", "IMQ: kfree_skb: skb->nf_queue_entry != NULL");
7f07242b 1507+#endif
0acdeb19 1508 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
7f07242b 1509 nf_conntrack_put(skb->nfct);
7af23471 1510 #endif
f6b6e03d 1511@@ -709,6 +810,10 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
7f07242b 1512 new->sp = secpath_get(old->sp);
1513 #endif
1514 memcpy(new->cb, old->cb, sizeof(old->cb));
1515+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
e2d28598
JR
1516+ new->cb_next = NULL;
1517+ /*skb_copy_stored_cb(new, old);*/
7f07242b 1518+#endif
13e5c3b1 1519 new->csum = old->csum;
7f07242b 1520 new->local_df = old->local_df;
13e5c3b1 1521 new->pkt_type = old->pkt_type;
f6b6e03d 1522@@ -3112,6 +3217,13 @@ void __init skb_init(void)
7f07242b 1523 0,
1524 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1525 NULL);
1526+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1527+ skbuff_cb_store_cache = kmem_cache_create("skbuff_cb_store_cache",
1528+ sizeof(struct skb_cb_table),
1529+ 0,
1530+ SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1531+ NULL);
1532+#endif
1533 }
1534
1535 /**
e2d28598 1536diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
f6b6e03d 1537index b6fa35e..08dcfef 100644
e2d28598
JR
1538--- a/net/ipv6/ip6_output.c
1539+++ b/net/ipv6/ip6_output.c
f6b6e03d 1540@@ -64,9 +64,6 @@ static int ip6_finish_output2(struct sk_buff *skb)
514e5dae
AM
1541 struct in6_addr *nexthop;
1542 int ret;
a168f21d
AM
1543
1544- skb->protocol = htons(ETH_P_IPV6);
1545- skb->dev = dev;
1546-
1547 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
1548 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1549
f6b6e03d 1550@@ -143,6 +140,13 @@ int ip6_output(struct sk_buff *skb)
a168f21d 1551 return 0;
f6396b7e 1552 }
a168f21d 1553
514e5dae
AM
1554+ /*
1555+ * IMQ-patch: moved setting skb->dev and skb->protocol from
1556+ * ip6_finish_output2 to fix crashing at netif_skb_features().
1557+ */
a168f21d
AM
1558+ skb->protocol = htons(ETH_P_IPV6);
1559+ skb->dev = dev;
1560+
1561 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
1562 ip6_finish_output,
1563 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
e2d28598 1564diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
f6b6e03d 1565index 6e839b6..45ac31c 100644
e2d28598
JR
1566--- a/net/netfilter/Kconfig
1567+++ b/net/netfilter/Kconfig
f6b6e03d 1568@@ -630,6 +630,18 @@ config NETFILTER_XT_TARGET_LOG
4bf69007
AM
1569
1570 To compile it as a module, choose M here. If unsure, say N.
7f07242b 1571
1572+config NETFILTER_XT_TARGET_IMQ
1573+ tristate '"IMQ" target support'
1574+ depends on NETFILTER_XTABLES
1575+ depends on IP_NF_MANGLE || IP6_NF_MANGLE
1576+ select IMQ
1577+ default m if NETFILTER_ADVANCED=n
1578+ help
1579+ This option adds a `IMQ' target which is used to specify if and
1580+ to which imq device packets should get enqueued/dequeued.
2380c486 1581+
7f07242b 1582+ To compile it as a module, choose M here. If unsure, say N.
1583+
1584 config NETFILTER_XT_TARGET_MARK
1585 tristate '"MARK" target support'
f6396b7e 1586 depends on NETFILTER_ADVANCED
e2d28598 1587diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
f6b6e03d 1588index c3a0a12..9647f06 100644
e2d28598
JR
1589--- a/net/netfilter/Makefile
1590+++ b/net/netfilter/Makefile
1591@@ -82,6 +82,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
7f07242b 1592 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
1593 obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
42d62c0a 1594 obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o
4bf69007 1595+obj-$(CONFIG_NETFILTER_XT_TARGET_IMQ) += xt_IMQ.o
14f08cd0 1596 obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
42d62c0a 1597 obj-$(CONFIG_NETFILTER_XT_TARGET_LOG) += xt_LOG.o
514e5dae 1598 obj-$(CONFIG_NETFILTER_XT_TARGET_NETMAP) += xt_NETMAP.o
e2d28598 1599diff --git a/net/netfilter/core.c b/net/netfilter/core.c
f6b6e03d 1600index 593b16e..740cd69 100644
e2d28598
JR
1601--- a/net/netfilter/core.c
1602+++ b/net/netfilter/core.c
1603@@ -191,9 +191,11 @@ next_hook:
1604 ret = NF_DROP_GETERR(verdict);
1605 if (ret == 0)
1606 ret = -EPERM;
1607- } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
1608+ } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE ||
1609+ (verdict & NF_VERDICT_MASK) == NF_IMQ_QUEUE) {
1610 int err = nf_queue(skb, elem, pf, hook, indev, outdev, okfn,
1611- verdict >> NF_VERDICT_QBITS);
1612+ verdict >> NF_VERDICT_QBITS,
1613+ verdict & NF_VERDICT_MASK);
1614 if (err < 0) {
1615 if (err == -ECANCELED)
1616 goto next_hook;
1617diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
1618index 3deec99..c1a1397 100644
1619--- a/net/netfilter/nf_internals.h
1620+++ b/net/netfilter/nf_internals.h
1621@@ -29,7 +29,7 @@ extern int nf_queue(struct sk_buff *skb,
a168f21d 1622 struct net_device *indev,
f6396b7e
AM
1623 struct net_device *outdev,
1624 int (*okfn)(struct sk_buff *),
a168f21d
AM
1625- unsigned int queuenum);
1626+ unsigned int queuenum, unsigned int queuetype);
f6396b7e
AM
1627 extern int __init netfilter_queue_init(void);
1628
1629 /* nf_log.c */
e2d28598
JR
1630diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
1631index 5d24b1f..28317dc 100644
1632--- a/net/netfilter/nf_queue.c
1633+++ b/net/netfilter/nf_queue.c
1634@@ -27,6 +27,23 @@
514e5dae
AM
1635 */
1636 static const struct nf_queue_handler __rcu *queue_handler __read_mostly;
7f07242b 1637
1638+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
514e5dae 1639+static const struct nf_queue_handler __rcu *queue_imq_handler __read_mostly;
7f07242b 1640+
1641+void nf_register_queue_imq_handler(const struct nf_queue_handler *qh)
2380c486 1642+{
7f07242b 1643+ rcu_assign_pointer(queue_imq_handler, qh);
2380c486 1644+}
f6396b7e 1645+EXPORT_SYMBOL_GPL(nf_register_queue_imq_handler);
2380c486 1646+
7f07242b 1647+void nf_unregister_queue_imq_handler(void)
2380c486 1648+{
514e5dae
AM
1649+ RCU_INIT_POINTER(queue_imq_handler, NULL);
1650+ synchronize_rcu();
2380c486 1651+}
f6396b7e 1652+EXPORT_SYMBOL_GPL(nf_unregister_queue_imq_handler);
7f07242b 1653+#endif
2380c486 1654+
7f07242b 1655 /* return EBUSY when somebody else is registered, return EEXIST if the
1656 * same handler is registered, return 0 in case of success. */
514e5dae 1657 void nf_register_queue_handler(const struct nf_queue_handler *qh)
e2d28598 1658@@ -105,7 +122,8 @@ int nf_queue(struct sk_buff *skb,
f6396b7e
AM
1659 struct net_device *indev,
1660 struct net_device *outdev,
1661 int (*okfn)(struct sk_buff *),
1662- unsigned int queuenum)
1663+ unsigned int queuenum,
a168f21d 1664+ unsigned int queuetype)
f6396b7e 1665 {
7af23471 1666 int status = -ENOENT;
f6396b7e 1667 struct nf_queue_entry *entry = NULL;
e2d28598 1668@@ -115,7 +133,17 @@ int nf_queue(struct sk_buff *skb,
7af23471 1669 /* QUEUE == DROP if no one is waiting, to be safe. */
7f07242b 1670 rcu_read_lock();
1671
1716fcea 1672- qh = rcu_dereference(queue_handler);
a168f21d 1673+ if (queuetype == NF_IMQ_QUEUE) {
7f07242b 1674+#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
f6396b7e 1675+ qh = rcu_dereference(queue_imq_handler);
7af23471 1676+#else
a168f21d
AM
1677+ BUG();
1678+ goto err_unlock;
7f07242b 1679+#endif
a168f21d 1680+ } else {
1716fcea 1681+ qh = rcu_dereference(queue_handler);
a168f21d
AM
1682+ }
1683+
7af23471
JR
1684 if (!qh) {
1685 status = -ESRCH;
7f07242b 1686 goto err_unlock;
e2d28598 1687@@ -205,9 +233,11 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict)
a168f21d
AM
1688 local_bh_enable();
1689 break;
f6396b7e 1690 case NF_QUEUE:
a168f21d 1691+ case NF_IMQ_QUEUE:
7770d33f
JR
1692 err = nf_queue(skb, elem, entry->pf, entry->hook,
1693 entry->indev, entry->outdev, entry->okfn,
1694- verdict >> NF_VERDICT_QBITS);
1695+ verdict >> NF_VERDICT_QBITS,
1696+ verdict & NF_VERDICT_MASK);
7af23471
JR
1697 if (err < 0) {
1698 if (err == -ECANCELED)
1699 goto next_hook;
e2d28598
JR
1700diff --git a/net/netfilter/xt_IMQ.c b/net/netfilter/xt_IMQ.c
1701new file mode 100644
1702index 0000000..1c3cd66
1703--- /dev/null
1704+++ b/net/netfilter/xt_IMQ.c
514e5dae 1705@@ -0,0 +1,72 @@
2380c486
JR
1706+/*
1707+ * This target marks packets to be enqueued to an imq device
1708+ */
1709+#include <linux/module.h>
1710+#include <linux/skbuff.h>
7f07242b 1711+#include <linux/netfilter/x_tables.h>
1712+#include <linux/netfilter/xt_IMQ.h>
2380c486
JR
1713+#include <linux/imq.h>
1714+
1715+static unsigned int imq_target(struct sk_buff *pskb,
f6396b7e 1716+ const struct xt_action_param *par)
2380c486 1717+{
7f07242b 1718+ const struct xt_imq_info *mr = par->targinfo;
2380c486 1719+
7f07242b 1720+ pskb->imq_flags = (mr->todev & IMQ_F_IFMASK) | IMQ_F_ENQUEUE;
2380c486
JR
1721+
1722+ return XT_CONTINUE;
1723+}
1724+
f6396b7e 1725+static int imq_checkentry(const struct xt_tgchk_param *par)
2380c486 1726+{
7f07242b 1727+ struct xt_imq_info *mr = par->targinfo;
2380c486 1728+
7f07242b 1729+ if (mr->todev > IMQ_MAX_DEVS - 1) {
514e5dae
AM
1730+ pr_warn("IMQ: invalid device specified, highest is %u\n",
1731+ IMQ_MAX_DEVS - 1);
f6396b7e 1732+ return -EINVAL;
2380c486
JR
1733+ }
1734+
f6396b7e 1735+ return 0;
2380c486
JR
1736+}
1737+
7f07242b 1738+static struct xt_target xt_imq_reg[] __read_mostly = {
1739+ {
1740+ .name = "IMQ",
1741+ .family = AF_INET,
1742+ .checkentry = imq_checkentry,
1743+ .target = imq_target,
1744+ .targetsize = sizeof(struct xt_imq_info),
1745+ .table = "mangle",
1746+ .me = THIS_MODULE
1747+ },
1748+ {
1749+ .name = "IMQ",
1750+ .family = AF_INET6,
1751+ .checkentry = imq_checkentry,
1752+ .target = imq_target,
1753+ .targetsize = sizeof(struct xt_imq_info),
1754+ .table = "mangle",
1755+ .me = THIS_MODULE
1756+ },
2380c486
JR
1757+};
1758+
7f07242b 1759+static int __init imq_init(void)
2380c486 1760+{
7f07242b 1761+ return xt_register_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg));
2380c486
JR
1762+}
1763+
7f07242b 1764+static void __exit imq_fini(void)
2380c486 1765+{
7f07242b 1766+ xt_unregister_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg));
2380c486
JR
1767+}
1768+
7f07242b 1769+module_init(imq_init);
1770+module_exit(imq_fini);
2380c486
JR
1771+
1772+MODULE_AUTHOR("http://www.linuximq.net");
514e5dae 1773+MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See http://www.linuximq.net/ for more information.");
2380c486 1774+MODULE_LICENSE("GPL");
7f07242b 1775+MODULE_ALIAS("ipt_IMQ");
1776+MODULE_ALIAS("ip6t_IMQ");
2380c486 1777+
This page took 0.552422 seconds and 4 git commands to generate.