]> git.pld-linux.org Git - packages/kernel.git/blob - kernel-imq.patch
- unconditional noarch packages
[packages/kernel.git] / kernel-imq.patch
1 diff -urNp -x '*.orig' linux-4.9/drivers/net/Kconfig linux-4.9/drivers/net/Kconfig
2 --- linux-4.9/drivers/net/Kconfig       2021-02-24 15:35:10.207508334 +0100
3 +++ linux-4.9/drivers/net/Kconfig       2021-02-24 15:35:24.097940603 +0100
4 @@ -260,6 +260,125 @@ config RIONET_RX_SIZE
5         depends on RIONET
6         default "128"
7  
8 +config IMQ
9 +       tristate "IMQ (intermediate queueing device) support"
10 +       depends on NETDEVICES && NETFILTER
11 +       ---help---
12 +         The IMQ device(s) is used as placeholder for QoS queueing
13 +         disciplines. Every packet entering/leaving the IP stack can be
14 +         directed through the IMQ device where it's enqueued/dequeued to the
15 +         attached qdisc. This allows you to treat network devices as classes
16 +         and distribute bandwidth among them. Iptables is used to specify
17 +         through which IMQ device, if any, packets travel.
18 +
19 +         More information at: https://github.com/imq/linuximq
20 +
21 +         To compile this driver as a module, choose M here: the module
22 +         will be called imq.  If unsure, say N.
23 +
24 +choice
25 +       prompt "IMQ behavior (PRE/POSTROUTING)"
26 +       depends on IMQ
27 +       default IMQ_BEHAVIOR_AB
28 +       help
29 +         This setting defines how IMQ behaves in respect to its
30 +         hooking in PREROUTING and POSTROUTING.
31 +
32 +         IMQ can work in any of the following ways:
33 +
34 +             PREROUTING   |      POSTROUTING
35 +         -----------------|-------------------
36 +         #1  After NAT    |      After NAT
37 +         #2  After NAT    |      Before NAT
38 +         #3  Before NAT   |      After NAT
39 +         #4  Before NAT   |      Before NAT
40 +
41 +         The default behavior is to hook before NAT on PREROUTING
42 +         and after NAT on POSTROUTING (#3).
43 +
44 +         This settings are specially usefull when trying to use IMQ
45 +         to shape NATed clients.
46 +
47 +         More information can be found at: https://github.com/imq/linuximq
48 +
49 +         If not sure leave the default settings alone.
50 +
51 +config IMQ_BEHAVIOR_AA
52 +       bool "IMQ AA"
53 +       help
54 +         This setting defines how IMQ behaves in respect to its
55 +         hooking in PREROUTING and POSTROUTING.
56 +
57 +         Choosing this option will make IMQ hook like this:
58 +
59 +         PREROUTING:   After NAT
60 +         POSTROUTING:  After NAT
61 +
62 +         More information can be found at: https://github.com/imq/linuximq
63 +
64 +         If not sure leave the default settings alone.
65 +
66 +config IMQ_BEHAVIOR_AB
67 +       bool "IMQ AB"
68 +       help
69 +         This setting defines how IMQ behaves in respect to its
70 +         hooking in PREROUTING and POSTROUTING.
71 +
72 +         Choosing this option will make IMQ hook like this:
73 +
74 +         PREROUTING:   After NAT
75 +         POSTROUTING:  Before NAT
76 +
77 +         More information can be found at: https://github.com/imq/linuximq
78 +
79 +         If not sure leave the default settings alone.
80 +
81 +config IMQ_BEHAVIOR_BA
82 +       bool "IMQ BA"
83 +       help
84 +         This setting defines how IMQ behaves in respect to its
85 +         hooking in PREROUTING and POSTROUTING.
86 +
87 +         Choosing this option will make IMQ hook like this:
88 +
89 +         PREROUTING:   Before NAT
90 +         POSTROUTING:  After NAT
91 +
92 +         More information can be found at: https://github.com/imq/linuximq
93 +
94 +         If not sure leave the default settings alone.
95 +
96 +config IMQ_BEHAVIOR_BB
97 +       bool "IMQ BB"
98 +       help
99 +         This setting defines how IMQ behaves in respect to its
100 +         hooking in PREROUTING and POSTROUTING.
101 +
102 +         Choosing this option will make IMQ hook like this:
103 +
104 +         PREROUTING:   Before NAT
105 +         POSTROUTING:  Before NAT
106 +
107 +         More information can be found at: https://github.com/imq/linuximq
108 +
109 +         If not sure leave the default settings alone.
110 +
111 +endchoice
112 +
113 +config IMQ_NUM_DEVS
114 +       int "Number of IMQ devices"
115 +       range 2 16
116 +       depends on IMQ
117 +       default "16"
118 +       help
119 +         This setting defines how many IMQ devices will be created.
120 +
121 +         The default value is 16.
122 +
123 +         More information can be found at: https://github.com/imq/linuximq
124 +
125 +         If not sure leave the default settings alone.
126 +
127  config TUN
128         tristate "Universal TUN/TAP device driver support"
129         depends on INET
130 diff -urNp -x '*.orig' linux-4.9/drivers/net/Makefile linux-4.9/drivers/net/Makefile
131 --- linux-4.9/drivers/net/Makefile      2016-12-11 20:17:54.000000000 +0100
132 +++ linux-4.9/drivers/net/Makefile      2021-02-24 15:35:24.097940603 +0100
133 @@ -11,6 +11,7 @@ obj-$(CONFIG_DUMMY) += dummy.o
134  obj-$(CONFIG_EQUALIZER) += eql.o
135  obj-$(CONFIG_IFB) += ifb.o
136  obj-$(CONFIG_MACSEC) += macsec.o
137 +obj-$(CONFIG_IMQ) += imq.o
138  obj-$(CONFIG_MACVLAN) += macvlan.o
139  obj-$(CONFIG_MACVTAP) += macvtap.o
140  obj-$(CONFIG_MII) += mii.o
141 diff -urNp -x '*.orig' linux-4.9/drivers/net/imq.c linux-4.9/drivers/net/imq.c
142 --- linux-4.9/drivers/net/imq.c 1970-01-01 01:00:00.000000000 +0100
143 +++ linux-4.9/drivers/net/imq.c 2021-02-24 15:35:24.097940603 +0100
144 @@ -0,0 +1,907 @@
145 +/*
146 + *             Pseudo-driver for the intermediate queue device.
147 + *
148 + *             This program is free software; you can redistribute it and/or
149 + *             modify it under the terms of the GNU General Public License
150 + *             as published by the Free Software Foundation; either version
151 + *             2 of the License, or (at your option) any later version.
152 + *
153 + * Authors:    Patrick McHardy, <kaber@trash.net>
154 + *
155 + *            The first version was written by Martin Devera, <devik@cdi.cz>
156 + *
157 + *                        See Creditis.txt
158 + */
159 +
160 +#include <linux/module.h>
161 +#include <linux/kernel.h>
162 +#include <linux/moduleparam.h>
163 +#include <linux/list.h>
164 +#include <linux/skbuff.h>
165 +#include <linux/netdevice.h>
166 +#include <linux/etherdevice.h>
167 +#include <linux/rtnetlink.h>
168 +#include <linux/if_arp.h>
169 +#include <linux/netfilter.h>
170 +#include <linux/netfilter_ipv4.h>
171 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
172 +       #include <linux/netfilter_ipv6.h>
173 +#endif
174 +#include <linux/imq.h>
175 +#include <net/pkt_sched.h>
176 +#include <net/netfilter/nf_queue.h>
177 +#include <net/sock.h>
178 +#include <linux/ip.h>
179 +#include <linux/ipv6.h>
180 +#include <linux/if_vlan.h>
181 +#include <linux/if_pppox.h>
182 +#include <net/ip.h>
183 +#include <net/ipv6.h>
184 +
185 +static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num);
186 +
187 +static nf_hookfn imq_nf_hook;
188 +
189 +static struct nf_hook_ops imq_ops[] = {
190 +       {
191 +       /* imq_ingress_ipv4 */
192 +               .hook           = imq_nf_hook,
193 +               .pf             = PF_INET,
194 +               .hooknum        = NF_INET_PRE_ROUTING,
195 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
196 +               .priority       = NF_IP_PRI_MANGLE + 1,
197 +#else
198 +               .priority       = NF_IP_PRI_NAT_DST + 1,
199 +#endif
200 +       },
201 +       {
202 +       /* imq_egress_ipv4 */
203 +               .hook           = imq_nf_hook,
204 +               .pf             = PF_INET,
205 +               .hooknum        = NF_INET_POST_ROUTING,
206 +#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
207 +               .priority       = NF_IP_PRI_LAST,
208 +#else
209 +               .priority       = NF_IP_PRI_NAT_SRC - 1,
210 +#endif
211 +       },
212 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
213 +       {
214 +       /* imq_ingress_ipv6 */
215 +               .hook           = imq_nf_hook,
216 +               .pf             = PF_INET6,
217 +               .hooknum        = NF_INET_PRE_ROUTING,
218 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
219 +               .priority       = NF_IP6_PRI_MANGLE + 1,
220 +#else
221 +               .priority       = NF_IP6_PRI_NAT_DST + 1,
222 +#endif
223 +       },
224 +       {
225 +       /* imq_egress_ipv6 */
226 +               .hook           = imq_nf_hook,
227 +               .pf             = PF_INET6,
228 +               .hooknum        = NF_INET_POST_ROUTING,
229 +#if defined(CONFIG_IMQ_BEHAVIOR_AA) || defined(CONFIG_IMQ_BEHAVIOR_BA)
230 +               .priority       = NF_IP6_PRI_LAST,
231 +#else
232 +               .priority       = NF_IP6_PRI_NAT_SRC - 1,
233 +#endif
234 +       },
235 +#endif
236 +};
237 +
238 +#if defined(CONFIG_IMQ_NUM_DEVS)
239 +static int numdevs = CONFIG_IMQ_NUM_DEVS;
240 +#else
241 +static int numdevs = IMQ_MAX_DEVS;
242 +#endif
243 +
244 +static struct net_device *imq_devs_cache[IMQ_MAX_DEVS];
245 +
246 +#define IMQ_MAX_QUEUES 32
247 +static int numqueues = 1;
248 +static u32 imq_hashrnd;
249 +static int imq_dev_accurate_stats = 1;
250 +
251 +static inline __be16 pppoe_proto(const struct sk_buff *skb)
252 +{
253 +       return *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
254 +                       sizeof(struct pppoe_hdr)));
255 +}
256 +
257 +static u16 imq_hash(struct net_device *dev, struct sk_buff *skb)
258 +{
259 +       unsigned int pull_len;
260 +       u16 protocol = skb->protocol;
261 +       u32 addr1, addr2;
262 +       u32 hash, ihl = 0;
263 +       union {
264 +               u16 in16[2];
265 +               u32 in32;
266 +       } ports;
267 +       u8 ip_proto;
268 +
269 +       pull_len = 0;
270 +
271 +recheck:
272 +       switch (protocol) {
273 +       case htons(ETH_P_8021Q): {
274 +               if (unlikely(skb_pull(skb, VLAN_HLEN) == NULL))
275 +                       goto other;
276 +
277 +               pull_len += VLAN_HLEN;
278 +               skb->network_header += VLAN_HLEN;
279 +
280 +               protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
281 +               goto recheck;
282 +       }
283 +
284 +       case htons(ETH_P_PPP_SES): {
285 +               if (unlikely(skb_pull(skb, PPPOE_SES_HLEN) == NULL))
286 +                       goto other;
287 +
288 +               pull_len += PPPOE_SES_HLEN;
289 +               skb->network_header += PPPOE_SES_HLEN;
290 +
291 +               protocol = pppoe_proto(skb);
292 +               goto recheck;
293 +       }
294 +
295 +       case htons(ETH_P_IP): {
296 +               const struct iphdr *iph = ip_hdr(skb);
297 +
298 +               if (unlikely(!pskb_may_pull(skb, sizeof(struct iphdr))))
299 +                       goto other;
300 +
301 +               addr1 = iph->daddr;
302 +               addr2 = iph->saddr;
303 +
304 +               ip_proto = !(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) ?
305 +                                iph->protocol : 0;
306 +               ihl = ip_hdrlen(skb);
307 +
308 +               break;
309 +       }
310 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
311 +       case htons(ETH_P_IPV6): {
312 +               const struct ipv6hdr *iph = ipv6_hdr(skb);
313 +               __be16 fo = 0;
314 +
315 +               if (unlikely(!pskb_may_pull(skb, sizeof(struct ipv6hdr))))
316 +                       goto other;
317 +
318 +               addr1 = iph->daddr.s6_addr32[3];
319 +               addr2 = iph->saddr.s6_addr32[3];
320 +               ihl = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &ip_proto,
321 +                                      &fo);
322 +               if (unlikely(ihl < 0))
323 +                       goto other;
324 +
325 +               break;
326 +       }
327 +#endif
328 +       default:
329 +other:
330 +               if (pull_len != 0) {
331 +                       skb_push(skb, pull_len);
332 +                       skb->network_header -= pull_len;
333 +               }
334 +
335 +               return (u16)(ntohs(protocol) % dev->real_num_tx_queues);
336 +       }
337 +
338 +       if (addr1 > addr2)
339 +               swap(addr1, addr2);
340 +
341 +       switch (ip_proto) {
342 +       case IPPROTO_TCP:
343 +       case IPPROTO_UDP:
344 +       case IPPROTO_DCCP:
345 +       case IPPROTO_ESP:
346 +       case IPPROTO_AH:
347 +       case IPPROTO_SCTP:
348 +       case IPPROTO_UDPLITE: {
349 +               if (likely(skb_copy_bits(skb, ihl, &ports.in32, 4) >= 0)) {
350 +                       if (ports.in16[0] > ports.in16[1])
351 +                               swap(ports.in16[0], ports.in16[1]);
352 +                       break;
353 +               }
354 +               /* fall-through */
355 +       }
356 +       default:
357 +               ports.in32 = 0;
358 +               break;
359 +       }
360 +
361 +       if (pull_len != 0) {
362 +               skb_push(skb, pull_len);
363 +               skb->network_header -= pull_len;
364 +       }
365 +
366 +       hash = jhash_3words(addr1, addr2, ports.in32, imq_hashrnd ^ ip_proto);
367 +
368 +       return (u16)(((u64)hash * dev->real_num_tx_queues) >> 32);
369 +}
370 +
371 +static inline bool sk_tx_queue_recorded(struct sock *sk)
372 +{
373 +       return (sk_tx_queue_get(sk) >= 0);
374 +}
375 +
376 +static struct netdev_queue *imq_select_queue(struct net_device *dev,
377 +                                               struct sk_buff *skb)
378 +{
379 +       u16 queue_index = 0;
380 +       u32 hash;
381 +
382 +       if (likely(dev->real_num_tx_queues == 1))
383 +               goto out;
384 +
385 +       /* IMQ can be receiving ingress or engress packets. */
386 +
387 +       /* Check first for if rx_queue is set */
388 +       if (skb_rx_queue_recorded(skb)) {
389 +               queue_index = skb_get_rx_queue(skb);
390 +               goto out;
391 +       }
392 +
393 +       /* Check if socket has tx_queue set */
394 +       if (sk_tx_queue_recorded(skb->sk)) {
395 +               queue_index = sk_tx_queue_get(skb->sk);
396 +               goto out;
397 +       }
398 +
399 +       /* Try use socket hash */
400 +       if (skb->sk && skb->sk->sk_hash) {
401 +               hash = skb->sk->sk_hash;
402 +               queue_index =
403 +                       (u16)(((u64)hash * dev->real_num_tx_queues) >> 32);
404 +               goto out;
405 +       }
406 +
407 +       /* Generate hash from packet data */
408 +       queue_index = imq_hash(dev, skb);
409 +
410 +out:
411 +       if (unlikely(queue_index >= dev->real_num_tx_queues))
412 +               queue_index = (u16)((u32)queue_index % dev->real_num_tx_queues);
413 +
414 +       skb_set_queue_mapping(skb, queue_index);
415 +       return netdev_get_tx_queue(dev, queue_index);
416 +}
417 +
418 +static struct net_device_stats *imq_get_stats(struct net_device *dev)
419 +{
420 +       return &dev->stats;
421 +}
422 +
423 +/* called for packets kfree'd in qdiscs at places other than enqueue */
424 +static void imq_skb_destructor(struct sk_buff *skb)
425 +{
426 +       struct nf_queue_entry *entry = skb->nf_queue_entry;
427 +
428 +       skb->nf_queue_entry = NULL;
429 +
430 +       if (entry) {
431 +               nf_queue_entry_release_refs(entry);
432 +               kfree(entry);
433 +       }
434 +
435 +       skb_restore_cb(skb); /* kfree backup */
436 +}
437 +
438 +static void imq_done_check_queue_mapping(struct sk_buff *skb,
439 +                                        struct net_device *dev)
440 +{
441 +       unsigned int queue_index;
442 +
443 +       /* Don't let queue_mapping be left too large after exiting IMQ */
444 +       if (likely(skb->dev != dev && skb->dev != NULL)) {
445 +               queue_index = skb_get_queue_mapping(skb);
446 +               if (unlikely(queue_index >= skb->dev->real_num_tx_queues)) {
447 +                       queue_index = (u16)((u32)queue_index %
448 +                                               skb->dev->real_num_tx_queues);
449 +                       skb_set_queue_mapping(skb, queue_index);
450 +               }
451 +       } else {
452 +               /* skb->dev was IMQ device itself or NULL, be on safe side and
453 +                * just clear queue mapping.
454 +                */
455 +               skb_set_queue_mapping(skb, 0);
456 +       }
457 +}
458 +
459 +static netdev_tx_t imq_dev_xmit(struct sk_buff *skb, struct net_device *dev)
460 +{
461 +       struct nf_queue_entry *entry = skb->nf_queue_entry;
462 +
463 +       skb->nf_queue_entry = NULL;
464 +       netif_trans_update(dev);
465 +
466 +       dev->stats.tx_bytes += skb->len;
467 +       dev->stats.tx_packets++;
468 +
469 +       if (unlikely(entry == NULL)) {
470 +               /* We don't know what is going on here.. packet is queued for
471 +                * imq device, but (probably) not by us.
472 +                *
473 +                * If this packet was not send here by imq_nf_queue(), then
474 +                * skb_save_cb() was not used and skb_free() should not show:
475 +                *   WARNING: IMQ: kfree_skb: skb->cb_next:..
476 +                * and/or
477 +                *   WARNING: IMQ: kfree_skb: skb->nf_queue_entry...
478 +                *
479 +                * However if this message is shown, then IMQ is somehow broken
480 +                * and you should report this to linuximq.net.
481 +                */
482 +
483 +               /* imq_dev_xmit is black hole that eats all packets, report that
484 +                * we eat this packet happily and increase dropped counters.
485 +                */
486 +
487 +               dev->stats.tx_dropped++;
488 +               dev_kfree_skb(skb);
489 +
490 +               return NETDEV_TX_OK;
491 +       }
492 +
493 +       skb_restore_cb(skb); /* restore skb->cb */
494 +
495 +       skb->imq_flags = 0;
496 +       skb->destructor = NULL;
497 +
498 +       imq_done_check_queue_mapping(skb, dev);
499 +
500 +       nf_reinject(entry, NF_ACCEPT);
501 +
502 +       return NETDEV_TX_OK;
503 +}
504 +
505 +static struct net_device *get_imq_device_by_index(int index)
506 +{
507 +       struct net_device *dev = NULL;
508 +       struct net *net;
509 +       char buf[8];
510 +
511 +       /* get device by name and cache result */
512 +       snprintf(buf, sizeof(buf), "imq%d", index);
513 +
514 +       /* Search device from all namespaces. */
515 +       for_each_net(net) {
516 +               dev = dev_get_by_name(net, buf);
517 +               if (dev)
518 +                       break;
519 +       }
520 +
521 +       if (WARN_ON_ONCE(dev == NULL)) {
522 +               /* IMQ device not found. Exotic config? */
523 +               return ERR_PTR(-ENODEV);
524 +       }
525 +
526 +       imq_devs_cache[index] = dev;
527 +       dev_put(dev);
528 +
529 +       return dev;
530 +}
531 +
532 +static struct nf_queue_entry *nf_queue_entry_dup(struct nf_queue_entry *e)
533 +{
534 +       struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
535 +       if (entry) {
536 +               nf_queue_entry_get_refs(entry);
537 +                       return entry;
538 +       }
539 +       return NULL;
540 +}
541 +
542 +#ifdef CONFIG_BRIDGE_NETFILTER
543 +/* When called from bridge netfilter, skb->data must point to MAC header
544 + * before calling skb_gso_segment(). Else, original MAC header is lost
545 + * and segmented skbs will be sent to wrong destination.
546 + */
547 +static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
548 +{
549 +       if (skb->nf_bridge)
550 +               __skb_push(skb, skb->network_header - skb->mac_header);
551 +}
552 +
553 +static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
554 +{
555 +       if (skb->nf_bridge)
556 +               __skb_pull(skb, skb->network_header - skb->mac_header);
557 +}
558 +#else
559 +#define nf_bridge_adjust_skb_data(s) do {} while (0)
560 +#define nf_bridge_adjust_segmented_data(s) do {} while (0)
561 +#endif
562 +
563 +static void free_entry(struct nf_queue_entry *entry)
564 +{
565 +       nf_queue_entry_release_refs(entry);
566 +       kfree(entry);
567 +}
568 +
569 +static int __imq_nf_queue(struct nf_queue_entry *entry, struct net_device *dev);
570 +
571 +static int __imq_nf_queue_gso(struct nf_queue_entry *entry,
572 +                             struct net_device *dev, struct sk_buff *skb)
573 +{
574 +       int ret = -ENOMEM;
575 +       struct nf_queue_entry *entry_seg;
576 +
577 +       nf_bridge_adjust_segmented_data(skb);
578 +
579 +       if (skb->next == NULL) { /* last packet, no need to copy entry */
580 +               struct sk_buff *gso_skb = entry->skb;
581 +               entry->skb = skb;
582 +               ret = __imq_nf_queue(entry, dev);
583 +               if (ret)
584 +                       entry->skb = gso_skb;
585 +               return ret;
586 +       }
587 +
588 +       skb->next = NULL;
589 +
590 +       entry_seg = nf_queue_entry_dup(entry);
591 +       if (entry_seg) {
592 +               entry_seg->skb = skb;
593 +               ret = __imq_nf_queue(entry_seg, dev);
594 +               if (ret)
595 +                       free_entry(entry_seg);
596 +       }
597 +       return ret;
598 +}
599 +
600 +static int imq_nf_queue(struct nf_queue_entry *entry, unsigned queue_num)
601 +{
602 +       struct sk_buff *skb, *segs;
603 +       struct net_device *dev;
604 +       unsigned int queued;
605 +       int index, retval, err;
606 +
607 +       index = entry->skb->imq_flags & IMQ_F_IFMASK;
608 +       if (unlikely(index > numdevs - 1)) {
609 +               if (net_ratelimit())
610 +                       pr_warn("IMQ: invalid device specified, highest is %u\n",
611 +                               numdevs - 1);
612 +               retval = -EINVAL;
613 +               goto out_no_dev;
614 +       }
615 +
616 +       /* check for imq device by index from cache */
617 +       dev = imq_devs_cache[index];
618 +       if (unlikely(!dev)) {
619 +               dev = get_imq_device_by_index(index);
620 +               if (IS_ERR(dev)) {
621 +                       retval = PTR_ERR(dev);
622 +                       goto out_no_dev;
623 +               }
624 +       }
625 +
626 +       if (unlikely(!(dev->flags & IFF_UP))) {
627 +               entry->skb->imq_flags = 0;
628 +               retval = -ECANCELED;
629 +               goto out_no_dev;
630 +       }
631 +
632 +       /* Since 3.10.x, GSO handling moved here as result of upstream commit
633 +        * a5fedd43d5f6c94c71053a66e4c3d2e35f1731a2 (netfilter: move
634 +        * skb_gso_segment into nfnetlink_queue module).
635 +        *
636 +        * Following code replicates the gso handling from
637 +        * 'net/netfilter/nfnetlink_queue_core.c':nfqnl_enqueue_packet().
638 +        */
639 +
640 +       skb = entry->skb;
641 +
642 +       switch (entry->state.pf) {
643 +       case NFPROTO_IPV4:
644 +               skb->protocol = htons(ETH_P_IP);
645 +               break;
646 +       case NFPROTO_IPV6:
647 +               skb->protocol = htons(ETH_P_IPV6);
648 +               break;
649 +       }
650 +
651 +       if (!skb_is_gso(entry->skb))
652 +               return __imq_nf_queue(entry, dev);
653 +
654 +       nf_bridge_adjust_skb_data(skb);
655 +       segs = skb_gso_segment(skb, 0);
656 +       /* Does not use PTR_ERR to limit the number of error codes that can be
657 +        * returned by nf_queue.  For instance, callers rely on -ECANCELED to
658 +        * mean 'ignore this hook'.
659 +        */
660 +       err = -ENOBUFS;
661 +       if (IS_ERR(segs))
662 +               goto out_err;
663 +       queued = 0;
664 +       err = 0;
665 +       do {
666 +               struct sk_buff *nskb = segs->next;
667 +               if (nskb && nskb->next)
668 +                       nskb->cb_next = NULL;
669 +               if (err == 0)
670 +                       err = __imq_nf_queue_gso(entry, dev, segs);
671 +               if (err == 0)
672 +                       queued++;
673 +               else
674 +                       kfree_skb(segs);
675 +               segs = nskb;
676 +       } while (segs);
677 +
678 +       if (queued) {
679 +               if (err) /* some segments are already queued */
680 +                       free_entry(entry);
681 +               kfree_skb(skb);
682 +               return 0;
683 +       }
684 +
685 +out_err:
686 +       nf_bridge_adjust_segmented_data(skb);
687 +       retval = err;
688 +out_no_dev:
689 +       return retval;
690 +}
691 +
692 +static int __imq_nf_queue(struct nf_queue_entry *entry, struct net_device *dev)
693 +{
694 +       struct sk_buff *skb_orig, *skb, *skb_shared, *skb_popd;
695 +       struct Qdisc *q;
696 +       struct sk_buff *to_free = NULL;
697 +       struct netdev_queue *txq;
698 +       spinlock_t *root_lock;
699 +       int users;
700 +       int retval = -EINVAL;
701 +       unsigned int orig_queue_index;
702 +
703 +       dev->last_rx = jiffies;
704 +
705 +       skb = entry->skb;
706 +       skb_orig = NULL;
707 +
708 +       /* skb has owner? => make clone */
709 +       if (unlikely(skb->destructor)) {
710 +               skb_orig = skb;
711 +               skb = skb_clone(skb, GFP_ATOMIC);
712 +               if (unlikely(!skb)) {
713 +                       retval = -ENOMEM;
714 +                       goto out;
715 +               }
716 +               skb->cb_next = NULL;
717 +               entry->skb = skb;
718 +       }
719 +
720 +       dev->stats.rx_bytes += skb->len;
721 +       dev->stats.rx_packets++;
722 +
723 +       if (!skb->dev) {
724 +               /* skb->dev == NULL causes problems, try the find cause. */
725 +               if (net_ratelimit()) {
726 +                       dev_warn(&dev->dev,
727 +                                "received packet with skb->dev == NULL\n");
728 +                       dump_stack();
729 +               }
730 +
731 +               skb->dev = dev;
732 +       }
733 +
734 +       /* Disables softirqs for lock below */
735 +       rcu_read_lock_bh();
736 +
737 +       /* Multi-queue selection */
738 +       orig_queue_index = skb_get_queue_mapping(skb);
739 +       txq = imq_select_queue(dev, skb);
740 +
741 +       q = rcu_dereference(txq->qdisc);
742 +       if (unlikely(!q->enqueue))
743 +               goto packet_not_eaten_by_imq_dev;
744 +
745 +       skb->nf_queue_entry = entry;
746 +       root_lock = qdisc_lock(q);
747 +       spin_lock(root_lock);
748 +
749 +       users = atomic_read(&skb->users);
750 +
751 +       skb_shared = skb_get(skb); /* increase reference count by one */
752 +
753 +       /* backup skb->cb, as qdisc layer will overwrite it */
754 +       skb_save_cb(skb_shared);
755 +       qdisc_enqueue_root(skb_shared, q, &to_free); /* might kfree_skb */
756 +       if (likely(atomic_read(&skb_shared->users) == users + 1)) {
757 +               bool validate;
758 +
759 +               kfree_skb(skb_shared); /* decrease reference count by one */
760 +
761 +               skb->destructor = &imq_skb_destructor;
762 +
763 +               skb_popd = qdisc_dequeue_skb(q, &validate);
764 +
765 +               /* cloned? */
766 +               if (unlikely(skb_orig))
767 +                       kfree_skb(skb_orig); /* free original */
768 +
769 +               spin_unlock(root_lock);
770 +
771 +#if 0
772 +               /* schedule qdisc dequeue */
773 +               __netif_schedule(q);
774 +#else
775 +               if (likely(skb_popd)) {
776 +                       /* Note that we validate skb (GSO, checksum, ...) outside of locks */
777 +                       if (validate)
778 +                       skb_popd = validate_xmit_skb_list(skb_popd, dev);
779 +
780 +                       if (skb_popd) {
781 +                               int dummy_ret;
782 +                               int cpu = smp_processor_id(); /* ok because BHs are off */
783 +
784 +                               txq = skb_get_tx_queue(dev, skb_popd);
785 +                               /*
786 +                               IMQ device will not be frozen or stoped, and it always be successful.
787 +                               So we need not check its status and return value to accelerate.
788 +                               */
789 +                               if (imq_dev_accurate_stats && txq->xmit_lock_owner != cpu) {
790 +                                       HARD_TX_LOCK(dev, txq, cpu);
791 +                                       if (!netif_xmit_frozen_or_stopped(txq)) {
792 +                                               dev_hard_start_xmit(skb_popd, dev, txq, &dummy_ret);
793 +                                       }
794 +                                       HARD_TX_UNLOCK(dev, txq);
795 +                               } else {
796 +                                       if (!netif_xmit_frozen_or_stopped(txq)) {
797 +                                               dev_hard_start_xmit(skb_popd, dev, txq, &dummy_ret);
798 +                                       }
799 +                               }
800 +                       }
801 +               } else {
802 +                       /* No ready skb, then schedule it */
803 +                       __netif_schedule(q);
804 +               }
805 +#endif
806 +               rcu_read_unlock_bh();
807 +               retval = 0;
808 +               goto out;
809 +       } else {
810 +               skb_restore_cb(skb_shared); /* restore skb->cb */
811 +               skb->nf_queue_entry = NULL;
812 +               /*
813 +                * qdisc dropped packet and decreased skb reference count of
814 +                * skb, so we don't really want to and try refree as that would
815 +                * actually destroy the skb.
816 +                */
817 +               spin_unlock(root_lock);
818 +               goto packet_not_eaten_by_imq_dev;
819 +       }
820 +
821 +packet_not_eaten_by_imq_dev:
822 +       skb_set_queue_mapping(skb, orig_queue_index);
823 +       rcu_read_unlock_bh();
824 +
825 +       /* cloned? restore original */
826 +       if (unlikely(skb_orig)) {
827 +               kfree_skb(skb);
828 +               entry->skb = skb_orig;
829 +       }
830 +       retval = -1;
831 +out:
832 +       if (unlikely(to_free)) {
833 +               kfree_skb_list(to_free);
834 +       }
835 +       return retval;
836 +}
837 +static unsigned int imq_nf_hook(void *priv,
838 +                               struct sk_buff *skb,
839 +                               const struct nf_hook_state *state)
840 +{
841 +       return (skb->imq_flags & IMQ_F_ENQUEUE) ? NF_IMQ_QUEUE : NF_ACCEPT;
842 +}
843 +
844 +static int imq_close(struct net_device *dev)
845 +{
846 +       netif_stop_queue(dev);
847 +       return 0;
848 +}
849 +
850 +static int imq_open(struct net_device *dev)
851 +{
852 +       netif_start_queue(dev);
853 +       return 0;
854 +}
855 +
856 +static const struct net_device_ops imq_netdev_ops = {
857 +       .ndo_open               = imq_open,
858 +       .ndo_stop               = imq_close,
859 +       .ndo_start_xmit         = imq_dev_xmit,
860 +       .ndo_get_stats          = imq_get_stats,
861 +};
862 +
863 +static void imq_setup(struct net_device *dev)
864 +{
865 +       dev->netdev_ops         = &imq_netdev_ops;
866 +       dev->type               = ARPHRD_VOID;
867 +       dev->mtu                = 16000; /* too small? */
868 +       dev->tx_queue_len       = 11000; /* too big? */
869 +       dev->flags              = IFF_NOARP;
870 +       dev->features           = NETIF_F_SG | NETIF_F_FRAGLIST |
871 +                                 NETIF_F_GSO | NETIF_F_HW_CSUM |
872 +                                 NETIF_F_HIGHDMA;
873 +       dev->priv_flags         &= ~(IFF_XMIT_DST_RELEASE |
874 +                                    IFF_TX_SKB_SHARING);
875 +}
876 +
877 +static int imq_validate(struct nlattr *tb[], struct nlattr *data[])
878 +{
879 +       int ret = 0;
880 +
881 +       if (tb[IFLA_ADDRESS]) {
882 +               if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) {
883 +                       ret = -EINVAL;
884 +                       goto end;
885 +               }
886 +               if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) {
887 +                       ret = -EADDRNOTAVAIL;
888 +                       goto end;
889 +               }
890 +       }
891 +       return 0;
892 +end:
893 +       pr_warn("IMQ: imq_validate failed (%d)\n", ret);
894 +       return ret;
895 +}
896 +
897 +static struct rtnl_link_ops imq_link_ops __read_mostly = {
898 +       .kind           = "imq",
899 +       .priv_size      = 0,
900 +       .setup          = imq_setup,
901 +       .validate       = imq_validate,
902 +};
903 +
904 +static const struct nf_queue_handler imq_nfqh = {
905 +       .outfn = imq_nf_queue,
906 +};
907 +
908 +static int __init imq_init_hooks(void)
909 +{
910 +       int ret;
911 +
912 +       nf_register_queue_imq_handler(&imq_nfqh);
913 +
914 +       ret = nf_register_hooks(imq_ops, ARRAY_SIZE(imq_ops));
915 +       if (ret < 0)
916 +               nf_unregister_queue_imq_handler();
917 +
918 +       return ret;
919 +}
920 +
921 +static int __init imq_init_one(int index)
922 +{
923 +       struct net_device *dev;
924 +       int ret;
925 +
926 +       dev = alloc_netdev_mq(0, "imq%d", NET_NAME_UNKNOWN, imq_setup, numqueues);
927 +       if (!dev)
928 +               return -ENOMEM;
929 +
930 +       ret = dev_alloc_name(dev, dev->name);
931 +       if (ret < 0)
932 +               goto fail;
933 +
934 +       dev->rtnl_link_ops = &imq_link_ops;
935 +       ret = register_netdevice(dev);
936 +       if (ret < 0)
937 +               goto fail;
938 +
939 +       return 0;
940 +fail:
941 +       free_netdev(dev);
942 +       return ret;
943 +}
944 +
945 +static int __init imq_init_devs(void)
946 +{
947 +       int err, i;
948 +
949 +       if (numdevs < 1 || numdevs > IMQ_MAX_DEVS) {
950 +               pr_err("IMQ: numdevs has to be betweed 1 and %u\n",
951 +                      IMQ_MAX_DEVS);
952 +               return -EINVAL;
953 +       }
954 +
955 +       if (numqueues < 1 || numqueues > IMQ_MAX_QUEUES) {
956 +               pr_err("IMQ: numqueues has to be betweed 1 and %u\n",
957 +                      IMQ_MAX_QUEUES);
958 +               return -EINVAL;
959 +       }
960 +
961 +       get_random_bytes(&imq_hashrnd, sizeof(imq_hashrnd));
962 +
963 +       rtnl_lock();
964 +       err = __rtnl_link_register(&imq_link_ops);
965 +
966 +       for (i = 0; i < numdevs && !err; i++)
967 +               err = imq_init_one(i);
968 +
969 +       if (err) {
970 +               __rtnl_link_unregister(&imq_link_ops);
971 +               memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
972 +       }
973 +       rtnl_unlock();
974 +
975 +       return err;
976 +}
977 +
978 +static int __init imq_init_module(void)
979 +{
980 +       int err;
981 +
982 +#if defined(CONFIG_IMQ_NUM_DEVS)
983 +       BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS > 16);
984 +       BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS < 2);
985 +       BUILD_BUG_ON(CONFIG_IMQ_NUM_DEVS - 1 > IMQ_F_IFMASK);
986 +#endif
987 +
988 +       err = imq_init_devs();
989 +       if (err) {
990 +               pr_err("IMQ: Error trying imq_init_devs(net)\n");
991 +               return err;
992 +       }
993 +
994 +       err = imq_init_hooks();
995 +       if (err) {
996 +               pr_err(KERN_ERR "IMQ: Error trying imq_init_hooks()\n");
997 +               rtnl_link_unregister(&imq_link_ops);
998 +               memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
999 +               return err;
1000 +       }
1001 +
1002 +       pr_info("IMQ driver loaded successfully. (numdevs = %d, numqueues = %d, imq_dev_accurate_stats = %d)\n",
1003 +               numdevs, numqueues, imq_dev_accurate_stats);
1004 +
1005 +#if defined(CONFIG_IMQ_BEHAVIOR_BA) || defined(CONFIG_IMQ_BEHAVIOR_BB)
1006 +       pr_info("\tHooking IMQ before NAT on PREROUTING.\n");
1007 +#else
1008 +       pr_info("\tHooking IMQ after NAT on PREROUTING.\n");
1009 +#endif
1010 +#if defined(CONFIG_IMQ_BEHAVIOR_AB) || defined(CONFIG_IMQ_BEHAVIOR_BB)
1011 +       pr_info("\tHooking IMQ before NAT on POSTROUTING.\n");
1012 +#else
1013 +       pr_info("\tHooking IMQ after NAT on POSTROUTING.\n");
1014 +#endif
1015 +
1016 +       return 0;
1017 +}
1018 +
1019 +static void __exit imq_unhook(void)
1020 +{
1021 +       nf_unregister_hooks(imq_ops, ARRAY_SIZE(imq_ops));
1022 +       nf_unregister_queue_imq_handler();
1023 +}
1024 +
1025 +static void __exit imq_cleanup_devs(void)
1026 +{
1027 +       rtnl_link_unregister(&imq_link_ops);
1028 +       memset(imq_devs_cache, 0, sizeof(imq_devs_cache));
1029 +}
1030 +
1031 +static void __exit imq_exit_module(void)
1032 +{
1033 +       imq_unhook();
1034 +       imq_cleanup_devs();
1035 +       pr_info("IMQ driver unloaded successfully.\n");
1036 +}
1037 +
1038 +module_init(imq_init_module);
1039 +module_exit(imq_exit_module);
1040 +
1041 +module_param(numdevs, int, 0);
1042 +module_param(numqueues, int, 0);
1043 +module_param(imq_dev_accurate_stats, int, 0);
1044 +MODULE_PARM_DESC(numdevs, "number of IMQ devices (how many imq* devices will be created)");
1045 +MODULE_PARM_DESC(numqueues, "number of queues per IMQ device");
1046 +MODULE_PARM_DESC(imq_dev_accurate_stats, "Notify if need the accurate imq device stats");
1047 +
1048 +MODULE_AUTHOR("https://github.com/imq/linuximq");
1049 +MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See https://github.com/imq/linuximq/wiki for more information.");
1050 +MODULE_LICENSE("GPL");
1051 +MODULE_ALIAS_RTNL_LINK("imq");
1052 diff -urNp -x '*.orig' linux-4.9/include/linux/imq.h linux-4.9/include/linux/imq.h
1053 --- linux-4.9/include/linux/imq.h       1970-01-01 01:00:00.000000000 +0100
1054 +++ linux-4.9/include/linux/imq.h       2021-02-24 15:35:24.097940603 +0100
1055 @@ -0,0 +1,13 @@
1056 +#ifndef _IMQ_H
1057 +#define _IMQ_H
1058 +
1059 +/* IFMASK (16 device indexes, 0 to 15) and flag(s) fit in 5 bits */
1060 +#define IMQ_F_BITS     5
1061 +
1062 +#define IMQ_F_IFMASK   0x0f
1063 +#define IMQ_F_ENQUEUE  0x10
1064 +
1065 +#define IMQ_MAX_DEVS   (IMQ_F_IFMASK + 1)
1066 +
1067 +#endif /* _IMQ_H */
1068 +
1069 diff -urNp -x '*.orig' linux-4.9/include/linux/netdevice.h linux-4.9/include/linux/netdevice.h
1070 --- linux-4.9/include/linux/netdevice.h 2021-02-24 15:35:11.047534473 +0100
1071 +++ linux-4.9/include/linux/netdevice.h 2021-02-24 15:35:24.101274040 +0100
1072 @@ -3694,6 +3694,19 @@ static inline void netif_tx_unlock_bh(st
1073         }                                               \
1074  }
1075  
1076 +#define HARD_TX_LOCK_BH(dev, txq) {           \
1077 +    if ((dev->features & NETIF_F_LLTX) == 0) {  \
1078 +        __netif_tx_lock_bh(txq);      \
1079 +    }                       \
1080 +}
1081 +
1082 +#define HARD_TX_UNLOCK_BH(dev, txq) {          \
1083 +    if ((dev->features & NETIF_F_LLTX) == 0) {  \
1084 +        __netif_tx_unlock_bh(txq);         \
1085 +    }                       \
1086 +}
1087 +
1088 +
1089  static inline void netif_tx_disable(struct net_device *dev)
1090  {
1091         unsigned int i;
1092 diff -urNp -x '*.orig' linux-4.9/include/linux/netfilter/xt_IMQ.h linux-4.9/include/linux/netfilter/xt_IMQ.h
1093 --- linux-4.9/include/linux/netfilter/xt_IMQ.h  1970-01-01 01:00:00.000000000 +0100
1094 +++ linux-4.9/include/linux/netfilter/xt_IMQ.h  2021-02-24 15:35:24.101274040 +0100
1095 @@ -0,0 +1,9 @@
1096 +#ifndef _XT_IMQ_H
1097 +#define _XT_IMQ_H
1098 +
1099 +struct xt_imq_info {
1100 +       unsigned int todev;     /* target imq device */
1101 +};
1102 +
1103 +#endif /* _XT_IMQ_H */
1104 +
1105 diff -urNp -x '*.orig' linux-4.9/include/linux/netfilter_ipv4/ipt_IMQ.h linux-4.9/include/linux/netfilter_ipv4/ipt_IMQ.h
1106 --- linux-4.9/include/linux/netfilter_ipv4/ipt_IMQ.h    1970-01-01 01:00:00.000000000 +0100
1107 +++ linux-4.9/include/linux/netfilter_ipv4/ipt_IMQ.h    2021-02-24 15:35:24.101274040 +0100
1108 @@ -0,0 +1,10 @@
1109 +#ifndef _IPT_IMQ_H
1110 +#define _IPT_IMQ_H
1111 +
1112 +/* Backwards compatibility for old userspace */
1113 +#include <linux/netfilter/xt_IMQ.h>
1114 +
1115 +#define ipt_imq_info xt_imq_info
1116 +
1117 +#endif /* _IPT_IMQ_H */
1118 +
1119 diff -urNp -x '*.orig' linux-4.9/include/linux/netfilter_ipv6/ip6t_IMQ.h linux-4.9/include/linux/netfilter_ipv6/ip6t_IMQ.h
1120 --- linux-4.9/include/linux/netfilter_ipv6/ip6t_IMQ.h   1970-01-01 01:00:00.000000000 +0100
1121 +++ linux-4.9/include/linux/netfilter_ipv6/ip6t_IMQ.h   2021-02-24 15:35:24.101274040 +0100
1122 @@ -0,0 +1,10 @@
1123 +#ifndef _IP6T_IMQ_H
1124 +#define _IP6T_IMQ_H
1125 +
1126 +/* Backwards compatibility for old userspace */
1127 +#include <linux/netfilter/xt_IMQ.h>
1128 +
1129 +#define ip6t_imq_info xt_imq_info
1130 +
1131 +#endif /* _IP6T_IMQ_H */
1132 +
1133 diff -urNp -x '*.orig' linux-4.9/include/linux/skbuff.h linux-4.9/include/linux/skbuff.h
1134 --- linux-4.9/include/linux/skbuff.h    2021-02-24 15:35:11.060868221 +0100
1135 +++ linux-4.9/include/linux/skbuff.h    2021-02-24 15:35:24.101274040 +0100
1136 @@ -39,6 +39,10 @@
1137  #include <linux/in6.h>
1138  #include <linux/if_packet.h>
1139  #include <net/flow.h>
1140 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1141 +#include <linux/imq.h>
1142 +#endif
1143 +
1144  
1145  /* The interface for checksum offload between the stack and networking drivers
1146   * is as follows...
1147 @@ -660,6 +664,9 @@ struct sk_buff {
1148          * first. This is owned by whoever has the skb queued ATM.
1149          */
1150         char                    cb[48] __aligned(8);
1151 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1152 +       void                    *cb_next;
1153 +#endif
1154  
1155         unsigned long           _skb_refdst;
1156         void                    (*destructor)(struct sk_buff *skb);
1157 @@ -669,6 +676,9 @@ struct sk_buff {
1158  #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
1159         struct nf_conntrack     *nfct;
1160  #endif
1161 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1162 +       struct nf_queue_entry   *nf_queue_entry;
1163 +#endif
1164  #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
1165         struct nf_bridge_info   *nf_bridge;
1166  #endif
1167 @@ -748,6 +758,9 @@ struct sk_buff {
1168         __u8                    offload_fwd_mark:1;
1169  #endif
1170         /* 2, 4 or 5 bit hole */
1171 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1172 +       __u8                    imq_flags:IMQ_F_BITS;
1173 +#endif
1174  
1175  #ifdef CONFIG_NET_SCHED
1176         __u16                   tc_index;       /* traffic control index */
1177 @@ -908,6 +921,12 @@ void kfree_skb_list(struct sk_buff *segs
1178  void skb_tx_error(struct sk_buff *skb);
1179  void consume_skb(struct sk_buff *skb);
1180  void  __kfree_skb(struct sk_buff *skb);
1181 +
1182 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1183 +int skb_save_cb(struct sk_buff *skb);
1184 +int skb_restore_cb(struct sk_buff *skb);
1185 +#endif
1186 +
1187  extern struct kmem_cache *skbuff_head_cache;
1188  
1189  void kfree_skb_partial(struct sk_buff *skb, bool head_stolen);
1190 @@ -3640,6 +3659,10 @@ static inline void __nf_copy(struct sk_b
1191         if (copy)
1192                 dst->nfctinfo = src->nfctinfo;
1193  #endif
1194 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1195 +       dst->imq_flags = src->imq_flags;
1196 +       dst->nf_queue_entry = src->nf_queue_entry;
1197 +#endif
1198  #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
1199         dst->nf_bridge  = src->nf_bridge;
1200         nf_bridge_get(src->nf_bridge);
1201 diff -urNp -x '*.orig' linux-4.9/include/net/netfilter/nf_queue.h linux-4.9/include/net/netfilter/nf_queue.h
1202 --- linux-4.9/include/net/netfilter/nf_queue.h  2016-12-11 20:17:54.000000000 +0100
1203 +++ linux-4.9/include/net/netfilter/nf_queue.h  2021-02-24 15:35:24.101274040 +0100
1204 @@ -30,6 +30,12 @@ struct nf_queue_handler {
1205  void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh);
1206  void nf_unregister_queue_handler(struct net *net);
1207  void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
1208 +void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
1209 +
1210 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1211 +void nf_register_queue_imq_handler(const struct nf_queue_handler *qh);
1212 +void nf_unregister_queue_imq_handler(void);
1213 +#endif
1214  
1215  void nf_queue_entry_get_refs(struct nf_queue_entry *entry);
1216  void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
1217 diff -urNp -x '*.orig' linux-4.9/include/net/pkt_sched.h linux-4.9/include/net/pkt_sched.h
1218 --- linux-4.9/include/net/pkt_sched.h   2016-12-11 20:17:54.000000000 +0100
1219 +++ linux-4.9/include/net/pkt_sched.h   2021-02-24 15:35:24.101274040 +0100
1220 @@ -105,6 +105,8 @@ int sch_direct_xmit(struct sk_buff *skb,
1221  
1222  void __qdisc_run(struct Qdisc *q);
1223  
1224 +struct sk_buff *qdisc_dequeue_skb(struct Qdisc *q, bool *validate);
1225 +
1226  static inline void qdisc_run(struct Qdisc *q)
1227  {
1228         if (qdisc_run_begin(q))
1229 diff -urNp -x '*.orig' linux-4.9/include/net/sch_generic.h linux-4.9/include/net/sch_generic.h
1230 --- linux-4.9/include/net/sch_generic.h 2021-02-24 15:35:11.077535407 +0100
1231 +++ linux-4.9/include/net/sch_generic.h 2021-02-24 15:35:24.101274040 +0100
1232 @@ -523,6 +523,13 @@ static inline int qdisc_enqueue(struct s
1233         return sch->enqueue(skb, sch, to_free);
1234  }
1235  
1236 +static inline int qdisc_enqueue_root(struct sk_buff *skb, struct Qdisc *sch,
1237 +                                     struct sk_buff **to_free)
1238 +{
1239 +    qdisc_skb_cb(skb)->pkt_len = skb->len;
1240 +    return qdisc_enqueue(skb, sch, to_free) & NET_XMIT_MASK;
1241 +}
1242 +
1243  static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
1244  {
1245         return q->flags & TCQ_F_CPUSTATS;
1246 diff -urNp -x '*.orig' linux-4.9/include/uapi/linux/netfilter.h linux-4.9/include/uapi/linux/netfilter.h
1247 --- linux-4.9/include/uapi/linux/netfilter.h    2016-12-11 20:17:54.000000000 +0100
1248 +++ linux-4.9/include/uapi/linux/netfilter.h    2021-02-24 15:35:24.101274040 +0100
1249 @@ -14,7 +14,8 @@
1250  #define NF_QUEUE 3
1251  #define NF_REPEAT 4
1252  #define NF_STOP 5
1253 -#define NF_MAX_VERDICT NF_STOP
1254 +#define NF_IMQ_QUEUE 6
1255 +#define NF_MAX_VERDICT NF_IMQ_QUEUE
1256  
1257  /* we overload the higher bits for encoding auxiliary data such as the queue
1258   * number or errno values. Not nice, but better than additional function
1259 diff -urNp -x '*.orig' linux-4.9/net/core/dev.c linux-4.9/net/core/dev.c
1260 --- linux-4.9/net/core/dev.c    2021-02-24 15:35:11.220873200 +0100
1261 +++ linux-4.9/net/core/dev.c    2021-02-24 15:35:24.101274040 +0100
1262 @@ -142,6 +142,9 @@
1263  #include <linux/netfilter_ingress.h>
1264  #include <linux/sctp.h>
1265  #include <linux/crash_dump.h>
1266 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1267 +#include <linux/imq.h>
1268 +#endif
1269  
1270  #include "net-sysfs.h"
1271  
1272 @@ -2965,7 +2968,12 @@ static int xmit_one(struct sk_buff *skb,
1273         unsigned int len;
1274         int rc;
1275  
1276 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1277 +       if ((!list_empty(&ptype_all) || !list_empty(&dev->ptype_all)) &&
1278 +               !(skb->imq_flags & IMQ_F_ENQUEUE))
1279 +#else
1280         if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all))
1281 +#endif
1282                 dev_queue_xmit_nit(skb, dev);
1283  
1284         len = skb->len;
1285 @@ -3004,6 +3012,8 @@ out:
1286         return skb;
1287  }
1288  
1289 +EXPORT_SYMBOL_GPL(dev_hard_start_xmit);
1290 +
1291  static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
1292                                           netdev_features_t features)
1293  {
1294 diff -urNp -x '*.orig' linux-4.9/net/core/skbuff.c linux-4.9/net/core/skbuff.c
1295 --- linux-4.9/net/core/skbuff.c 2021-02-24 15:35:11.224206637 +0100
1296 +++ linux-4.9/net/core/skbuff.c 2021-02-24 15:35:24.104607477 +0100
1297 @@ -82,6 +82,87 @@ struct kmem_cache *skbuff_head_cache __r
1298  static struct kmem_cache *skbuff_fclone_cache __read_mostly;
1299  int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
1300  EXPORT_SYMBOL(sysctl_max_skb_frags);
1301 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1302 +static struct kmem_cache *skbuff_cb_store_cache __read_mostly;
1303 +#endif
1304 +
1305 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1306 +/* Control buffer save/restore for IMQ devices */
1307 +struct skb_cb_table {
1308 +       char                    cb[48] __aligned(8);
1309 +       void                    *cb_next;
1310 +       atomic_t                refcnt;
1311 +};
1312 +
1313 +static DEFINE_SPINLOCK(skb_cb_store_lock);
1314 +
1315 +int skb_save_cb(struct sk_buff *skb)
1316 +{
1317 +       struct skb_cb_table *next;
1318 +
1319 +       next = kmem_cache_alloc(skbuff_cb_store_cache, GFP_ATOMIC);
1320 +       if (!next)
1321 +               return -ENOMEM;
1322 +
1323 +       BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb));
1324 +
1325 +       memcpy(next->cb, skb->cb, sizeof(skb->cb));
1326 +       next->cb_next = skb->cb_next;
1327 +
1328 +       atomic_set(&next->refcnt, 1);
1329 +
1330 +       skb->cb_next = next;
1331 +       return 0;
1332 +}
1333 +EXPORT_SYMBOL(skb_save_cb);
1334 +
1335 +int skb_restore_cb(struct sk_buff *skb)
1336 +{
1337 +       struct skb_cb_table *next;
1338 +
1339 +       if (!skb->cb_next)
1340 +               return 0;
1341 +
1342 +       next = skb->cb_next;
1343 +
1344 +       BUILD_BUG_ON(sizeof(skb->cb) != sizeof(next->cb));
1345 +
1346 +       memcpy(skb->cb, next->cb, sizeof(skb->cb));
1347 +       skb->cb_next = next->cb_next;
1348 +
1349 +       spin_lock(&skb_cb_store_lock);
1350 +
1351 +       if (atomic_dec_and_test(&next->refcnt))
1352 +               kmem_cache_free(skbuff_cb_store_cache, next);
1353 +
1354 +       spin_unlock(&skb_cb_store_lock);
1355 +
1356 +       return 0;
1357 +}
1358 +EXPORT_SYMBOL(skb_restore_cb);
1359 +
1360 +static void skb_copy_stored_cb(struct sk_buff *   , const struct sk_buff *     ) __attribute__ ((unused));
1361 +static void skb_copy_stored_cb(struct sk_buff *new, const struct sk_buff *__old)
1362 +{
1363 +       struct skb_cb_table *next;
1364 +       struct sk_buff *old;
1365 +
1366 +       if (!__old->cb_next) {
1367 +               new->cb_next = NULL;
1368 +               return;
1369 +       }
1370 +
1371 +       spin_lock(&skb_cb_store_lock);
1372 +
1373 +       old = (struct sk_buff *)__old;
1374 +
1375 +       next = old->cb_next;
1376 +       atomic_inc(&next->refcnt);
1377 +       new->cb_next = next;
1378 +
1379 +       spin_unlock(&skb_cb_store_lock);
1380 +}
1381 +#endif
1382  
1383  /**
1384   *     skb_panic - private function for out-of-line support
1385 @@ -667,6 +748,28 @@ static void skb_release_head_state(struc
1386                 WARN_ON(in_irq());
1387                 skb->destructor(skb);
1388         }
1389 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1390 +       /*
1391 +        * This should not happen. When it does, avoid memleak by restoring
1392 +        * the chain of cb-backups.
1393 +        */
1394 +       while (skb->cb_next != NULL) {
1395 +               if (net_ratelimit())
1396 +                       pr_warn("IMQ: kfree_skb: skb->cb_next: %08x\n",
1397 +                               (unsigned int)(uintptr_t)skb->cb_next);
1398 +
1399 +               skb_restore_cb(skb);
1400 +       }
1401 +       /*
1402 +        * This should not happen either, nf_queue_entry is nullified in
1403 +        * imq_dev_xmit(). If we have non-NULL nf_queue_entry then we are
1404 +        * leaking entry pointers, maybe memory. We don't know if this is
1405 +        * pointer to already freed memory, or should this be freed.
1406 +        * If this happens we need to add refcounting, etc for nf_queue_entry.
1407 +        */
1408 +       if (skb->nf_queue_entry && net_ratelimit())
1409 +               pr_warn("%s\n", "IMQ: kfree_skb: skb->nf_queue_entry != NULL");
1410 +#endif
1411  #if IS_ENABLED(CONFIG_NF_CONNTRACK)
1412         nf_conntrack_put(skb->nfct);
1413  #endif
1414 @@ -856,6 +959,10 @@ static void __copy_skb_header(struct sk_
1415         new->sp                 = secpath_get(old->sp);
1416  #endif
1417         __nf_copy(new, old, false);
1418 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1419 +       new->cb_next = NULL;
1420 +       /*skb_copy_stored_cb(new, old);*/
1421 +#endif
1422  
1423         /* Note : this field could be in headers_start/headers_end section
1424          * It is not yet because we do not want to have a 16 bit hole
1425 @@ -3536,6 +3643,13 @@ void __init skb_init(void)
1426                                                 0,
1427                                                 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1428                                                 NULL);
1429 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1430 +       skbuff_cb_store_cache = kmem_cache_create("skbuff_cb_store_cache",
1431 +                                                 sizeof(struct skb_cb_table),
1432 +                                                 0,
1433 +                                                 SLAB_HWCACHE_ALIGN|SLAB_PANIC,
1434 +                                                 NULL);
1435 +#endif
1436  }
1437  
1438  static int
1439 diff -urNp -x '*.orig' linux-4.9/net/ipv6/ip6_output.c linux-4.9/net/ipv6/ip6_output.c
1440 --- linux-4.9/net/ipv6/ip6_output.c     2021-02-24 15:35:11.257541008 +0100
1441 +++ linux-4.9/net/ipv6/ip6_output.c     2021-02-24 15:35:24.104607477 +0100
1442 @@ -66,9 +66,6 @@ static int ip6_finish_output2(struct net
1443         struct in6_addr *nexthop;
1444         int ret;
1445  
1446 -       skb->protocol = htons(ETH_P_IPV6);
1447 -       skb->dev = dev;
1448 -
1449         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
1450                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1451  
1452 @@ -150,6 +147,13 @@ int ip6_output(struct net *net, struct s
1453                 return 0;
1454         }
1455  
1456 +       /*
1457 +       * IMQ-patch: moved setting skb->dev and skb->protocol from
1458 +       * ip6_finish_output2 to fix crashing at netif_skb_features().
1459 +       */
1460 +       skb->protocol = htons(ETH_P_IPV6);
1461 +       skb->dev = dev;
1462 +
1463         return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
1464                             net, sk, skb, NULL, dev,
1465                             ip6_finish_output,
1466 diff -urNp -x '*.orig' linux-4.9/net/netfilter/Kconfig linux-4.9/net/netfilter/Kconfig
1467 --- linux-4.9/net/netfilter/Kconfig     2021-02-24 15:35:11.727555634 +0100
1468 +++ linux-4.9/net/netfilter/Kconfig     2021-02-24 15:35:24.104607477 +0100
1469 @@ -823,6 +823,18 @@ config NETFILTER_XT_TARGET_LOG
1470  
1471           To compile it as a module, choose M here.  If unsure, say N.
1472  
1473 +config NETFILTER_XT_TARGET_IMQ
1474 +        tristate '"IMQ" target support'
1475 +       depends on NETFILTER_XTABLES
1476 +       depends on IP_NF_MANGLE || IP6_NF_MANGLE
1477 +       select IMQ
1478 +       default m if NETFILTER_ADVANCED=n
1479 +        help
1480 +          This option adds a `IMQ' target which is used to specify if and
1481 +          to which imq device packets should get enqueued/dequeued.
1482 +
1483 +          To compile it as a module, choose M here.  If unsure, say N.
1484 +
1485  config NETFILTER_XT_TARGET_MARK
1486         tristate '"MARK" target support'
1487         depends on NETFILTER_ADVANCED
1488 diff -urNp -x '*.orig' linux-4.9/net/netfilter/Makefile linux-4.9/net/netfilter/Makefile
1489 --- linux-4.9/net/netfilter/Makefile    2021-02-24 15:35:11.727555634 +0100
1490 +++ linux-4.9/net/netfilter/Makefile    2021-02-24 15:35:24.104607477 +0100
1491 @@ -119,6 +119,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CT) +=
1492  obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
1493  obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
1494  obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o
1495 +obj-$(CONFIG_NETFILTER_XT_TARGET_IMQ) += xt_IMQ.o
1496  obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
1497  obj-$(CONFIG_NETFILTER_XT_TARGET_LOG) += xt_LOG.o
1498  obj-$(CONFIG_NETFILTER_XT_TARGET_NETMAP) += xt_NETMAP.o
1499 diff -urNp -x '*.orig' linux-4.9/net/netfilter/core.c linux-4.9/net/netfilter/core.c
1500 --- linux-4.9/net/netfilter/core.c      2021-02-24 15:35:11.287541941 +0100
1501 +++ linux-4.9/net/netfilter/core.c      2021-02-24 15:35:24.104607477 +0100
1502 @@ -360,8 +360,11 @@ next_hook:
1503                 ret = NF_DROP_GETERR(verdict);
1504                 if (ret == 0)
1505                         ret = -EPERM;
1506 -       } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
1507 +       } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE ||
1508 +                  (verdict & NF_VERDICT_MASK) == NF_IMQ_QUEUE) {
1509                 ret = nf_queue(skb, state, &entry, verdict);
1510 +               if (ret == -ECANCELED)
1511 +                       goto next_hook;
1512                 if (ret == 1 && entry)
1513                         goto next_hook;
1514         } else {
1515 diff -urNp -x '*.orig' linux-4.9/net/netfilter/nf_queue.c linux-4.9/net/netfilter/nf_queue.c
1516 --- linux-4.9/net/netfilter/nf_queue.c  2016-12-11 20:17:54.000000000 +0100
1517 +++ linux-4.9/net/netfilter/nf_queue.c  2021-02-24 15:35:24.104607477 +0100
1518 @@ -27,6 +27,23 @@
1519   * receives, no matter what.
1520   */
1521  
1522 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1523 +static const struct nf_queue_handler __rcu *queue_imq_handler __read_mostly;
1524 +
1525 +void nf_register_queue_imq_handler(const struct nf_queue_handler *qh)
1526 +{
1527 +       rcu_assign_pointer(queue_imq_handler, qh);
1528 +}
1529 +EXPORT_SYMBOL_GPL(nf_register_queue_imq_handler);
1530 +
1531 +void nf_unregister_queue_imq_handler(void)
1532 +{
1533 +       RCU_INIT_POINTER(queue_imq_handler, NULL);
1534 +       synchronize_rcu();
1535 +}
1536 +EXPORT_SYMBOL_GPL(nf_unregister_queue_imq_handler);
1537 +#endif
1538 +
1539  /* return EBUSY when somebody else is registered, return EEXIST if the
1540   * same handler is registered, return 0 in case of success. */
1541  void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh)
1542 @@ -108,16 +125,28 @@ void nf_queue_nf_hook_drop(struct net *n
1543  }
1544  
1545  static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
1546 -                     unsigned int queuenum)
1547 +                     unsigned int verdict)
1548  {
1549         int status = -ENOENT;
1550         struct nf_queue_entry *entry = NULL;
1551         const struct nf_afinfo *afinfo;
1552         const struct nf_queue_handler *qh;
1553         struct net *net = state->net;
1554 +       unsigned int queuetype = verdict & NF_VERDICT_MASK;
1555 +       unsigned int queuenum  = verdict >> NF_VERDICT_QBITS;
1556  
1557         /* QUEUE == DROP if no one is waiting, to be safe. */
1558 -       qh = rcu_dereference(net->nf.queue_handler);
1559 +       if (queuetype == NF_IMQ_QUEUE) {
1560 +#if defined(CONFIG_IMQ) || defined(CONFIG_IMQ_MODULE)
1561 +               qh = rcu_dereference(queue_imq_handler);
1562 +#else
1563 +               BUG();
1564 +               goto err_unlock;
1565 +#endif
1566 +       } else {
1567 +               qh = rcu_dereference(net->nf.queue_handler);
1568 +       }
1569 +
1570         if (!qh) {
1571                 status = -ESRCH;
1572                 goto err;
1573 @@ -218,6 +247,7 @@ okfn:
1574                 local_bh_enable();
1575                 break;
1576         case NF_QUEUE:
1577 +       case NF_IMQ_QUEUE:
1578                 err = nf_queue(skb, &entry->state, &hook_entry, verdict);
1579                 if (err == 1) {
1580                         if (hook_entry)
1581 diff -urNp -x '*.orig' linux-4.9/net/netfilter/xt_IMQ.c linux-4.9/net/netfilter/xt_IMQ.c
1582 --- linux-4.9/net/netfilter/xt_IMQ.c    1970-01-01 01:00:00.000000000 +0100
1583 +++ linux-4.9/net/netfilter/xt_IMQ.c    2021-02-24 15:35:24.104607477 +0100
1584 @@ -0,0 +1,72 @@
1585 +/*
1586 + * This target marks packets to be enqueued to an imq device
1587 + */
1588 +#include <linux/module.h>
1589 +#include <linux/skbuff.h>
1590 +#include <linux/netfilter/x_tables.h>
1591 +#include <linux/netfilter/xt_IMQ.h>
1592 +#include <linux/imq.h>
1593 +
1594 +static unsigned int imq_target(struct sk_buff *pskb,
1595 +                               const struct xt_action_param *par)
1596 +{
1597 +       const struct xt_imq_info *mr = par->targinfo;
1598 +
1599 +       pskb->imq_flags = (mr->todev & IMQ_F_IFMASK) | IMQ_F_ENQUEUE;
1600 +
1601 +       return XT_CONTINUE;
1602 +}
1603 +
1604 +static int imq_checkentry(const struct xt_tgchk_param *par)
1605 +{
1606 +       struct xt_imq_info *mr = par->targinfo;
1607 +
1608 +       if (mr->todev > IMQ_MAX_DEVS - 1) {
1609 +               pr_warn("IMQ: invalid device specified, highest is %u\n",
1610 +                       IMQ_MAX_DEVS - 1);
1611 +               return -EINVAL;
1612 +       }
1613 +
1614 +       return 0;
1615 +}
1616 +
1617 +static struct xt_target xt_imq_reg[] __read_mostly = {
1618 +       {
1619 +               .name           = "IMQ",
1620 +               .family         = AF_INET,
1621 +               .checkentry     = imq_checkentry,
1622 +               .target         = imq_target,
1623 +               .targetsize     = sizeof(struct xt_imq_info),
1624 +               .table          = "mangle",
1625 +               .me             = THIS_MODULE
1626 +       },
1627 +       {
1628 +               .name           = "IMQ",
1629 +               .family         = AF_INET6,
1630 +               .checkentry     = imq_checkentry,
1631 +               .target         = imq_target,
1632 +               .targetsize     = sizeof(struct xt_imq_info),
1633 +               .table          = "mangle",
1634 +               .me             = THIS_MODULE
1635 +       },
1636 +};
1637 +
1638 +static int __init imq_init(void)
1639 +{
1640 +       return xt_register_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg));
1641 +}
1642 +
1643 +static void __exit imq_fini(void)
1644 +{
1645 +       xt_unregister_targets(xt_imq_reg, ARRAY_SIZE(xt_imq_reg));
1646 +}
1647 +
1648 +module_init(imq_init);
1649 +module_exit(imq_fini);
1650 +
1651 +MODULE_AUTHOR("https://github.com/imq/linuximq");
1652 +MODULE_DESCRIPTION("Pseudo-driver for the intermediate queue device. See https://github.com/imq/linuximq/wiki for more information.");
1653 +MODULE_LICENSE("GPL");
1654 +MODULE_ALIAS("ipt_IMQ");
1655 +MODULE_ALIAS("ip6t_IMQ");
1656 +
1657 diff -urNp -x '*.orig' linux-4.9/net/sched/sch_generic.c linux-4.9/net/sched/sch_generic.c
1658 --- linux-4.9/net/sched/sch_generic.c   2021-02-24 15:35:11.317542875 +0100
1659 +++ linux-4.9/net/sched/sch_generic.c   2021-02-24 15:35:24.104607477 +0100
1660 @@ -154,6 +154,14 @@ bulk:
1661         return skb;
1662  }
1663  
1664 +struct sk_buff *qdisc_dequeue_skb(struct Qdisc *q, bool *validate)
1665 +{
1666 +       int packets;
1667 +
1668 +       return dequeue_skb(q, validate, &packets);
1669 +}
1670 +EXPORT_SYMBOL(qdisc_dequeue_skb);
1671 +
1672  /*
1673   * Transmit possibly several skbs, and handle the return status as
1674   * required. Owning running seqcount bit guarantees that
This page took 0.359132 seconds and 3 git commands to generate.