1 diff -urNp v2.6.27/linux/include/net/ip_vs.h linux/include/net/ip_vs.h
2 --- v2.6.27/linux/include/net/ip_vs.h 2008-10-11 12:46:15.000000000 +0300
3 +++ linux/include/net/ip_vs.h 2008-10-11 14:24:47.000000000 +0300
5 #include <linux/timer.h>
7 #include <net/checksum.h>
8 +#include <linux/skbuff.h>
9 +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
10 +#include <net/netfilter/nf_conntrack.h>
11 +#include <net/netfilter/nf_conntrack_core.h>
12 +#include <net/netfilter/nf_conntrack_expect.h>
13 +#include <net/netfilter/nf_conntrack_helper.h>
16 #ifdef CONFIG_IP_VS_DEBUG
17 #include <linux/net.h>
18 @@ -474,6 +481,16 @@ extern void ip_vs_init_hash_table(struct
22 + * Netfilter connection tracking
23 + * (from ip_vs_nfct.c)
25 +extern int ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum);
26 +extern void ip_vs_nfct_expect_related(struct sk_buff *skb,
27 + struct ip_vs_conn *cp,
28 + __be16 port, __u16 proto, int from_rs);
29 +extern void ip_vs_nfct_conn_drop(struct ip_vs_conn *cp);
32 * IPVS connection entry hash table
34 #ifndef CONFIG_IP_VS_TAB_BITS
35 @@ -643,9 +660,42 @@ extern int sysctl_ip_vs_expire_nodest_co
36 extern int sysctl_ip_vs_expire_quiescent_template;
37 extern int sysctl_ip_vs_sync_threshold[2];
38 extern int sysctl_ip_vs_nat_icmp_send;
39 +extern int sysctl_ip_vs_snat_reroute;
40 extern struct ip_vs_stats ip_vs_stats;
41 extern const struct ctl_path net_vs_ctl_path[];
43 +#ifdef CONFIG_IP_VS_NFCT
45 +extern int sysctl_ip_vs_conntrack;
47 +static inline int ip_vs_use_conntrack(struct sk_buff *skb)
49 + return sysctl_ip_vs_conntrack && skb->nfct;
52 +/* Returns boolean and skb is freed on failure */
53 +static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum)
55 + if (!ip_vs_use_conntrack(skb))
57 + return nf_ct_is_confirmed((struct nf_conn *) skb->nfct) ||
58 + ip_vs_nfct_confirm(skb, cp, hooknum);
63 +static inline int ip_vs_use_conntrack(struct sk_buff *skb)
68 +static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum)
75 extern struct ip_vs_service *
76 ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport);
78 diff -urNp v2.6.27/linux/net/ipv4/ipvs/Kconfig linux/net/ipv4/ipvs/Kconfig
79 --- v2.6.27/linux/net/ipv4/ipvs/Kconfig 2007-07-10 09:18:43.000000000 +0300
80 +++ linux/net/ipv4/ipvs/Kconfig 2008-10-11 14:19:27.000000000 +0300
81 @@ -221,4 +221,12 @@ config IP_VS_FTP
82 If you want to compile it in kernel, say Y. To compile it as a
83 module, choose M here. If unsure, say N.
86 + bool "Netfilter connection tracking"
87 + depends on NF_CONNTRACK
89 + The Netfilter connection tracking support allows the IPVS
90 + connection state to be exported to the Netfilter framework
91 + for filtering purposes.
94 diff -urNp v2.6.27/linux/net/ipv4/ipvs/Makefile linux/net/ipv4/ipvs/Makefile
95 --- v2.6.27/linux/net/ipv4/ipvs/Makefile 2005-06-18 08:50:52.000000000 +0300
96 +++ linux/net/ipv4/ipvs/Makefile 2008-10-11 14:19:27.000000000 +0300
97 @@ -9,10 +9,13 @@ ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UD
98 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_ESP) += ip_vs_proto_esp.o
99 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH) += ip_vs_proto_ah.o
101 +ip_vs-extra_objs-y :=
102 +ip_vs-extra_objs-$(CONFIG_IP_VS_NFCT) += ip_vs_nfct.o
104 ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \
105 ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \
106 ip_vs_est.o ip_vs_proto.o \
107 - $(ip_vs_proto-objs-y)
108 + $(ip_vs_proto-objs-y) $(ip_vs-extra_objs-y)
112 diff -urNp v2.6.27/linux/net/ipv4/ipvs/ip_vs_conn.c linux/net/ipv4/ipvs/ip_vs_conn.c
113 --- v2.6.27/linux/net/ipv4/ipvs/ip_vs_conn.c 2008-10-11 12:46:16.000000000 +0300
114 +++ linux/net/ipv4/ipvs/ip_vs_conn.c 2008-10-11 14:19:27.000000000 +0300
115 @@ -591,6 +591,11 @@ static void ip_vs_conn_expire(unsigned l
117 ip_vs_control_del(cp);
119 +#ifdef CONFIG_IP_VS_NFCT
120 + if (sysctl_ip_vs_conntrack)
121 + ip_vs_nfct_conn_drop(cp);
124 if (unlikely(cp->app != NULL))
125 ip_vs_unbind_app(cp);
126 ip_vs_unbind_dest(cp);
127 diff -urNp v2.6.27/linux/net/ipv4/ipvs/ip_vs_core.c linux/net/ipv4/ipvs/ip_vs_core.c
128 --- v2.6.27/linux/net/ipv4/ipvs/ip_vs_core.c 2008-10-11 12:46:16.000000000 +0300
129 +++ linux/net/ipv4/ipvs/ip_vs_core.c 2008-10-11 14:19:27.000000000 +0300
130 @@ -659,6 +659,8 @@ static int ip_vs_out_icmp(struct sk_buff
132 skb->ipvs_property = 1;
134 + if (sysctl_ip_vs_snat_reroute && ip_route_me_harder(skb, RTN_LOCAL))
138 __ip_vs_conn_put(cp);
139 @@ -759,19 +761,31 @@ ip_vs_out(unsigned int hooknum, struct s
140 if (!skb_make_writable(skb, ihl))
143 + if (!ip_vs_confirm_conntrack(skb, cp, hooknum))
146 /* mangle the packet */
147 if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
149 ip_hdr(skb)->saddr = cp->vaddr;
150 ip_send_check(ip_hdr(skb));
153 + * nf_iterate does not expect change in the skb->dst->dev.
154 + * It looks like it is not fatal to enable this code for hooks
155 + * where our handlers are at the end of the chain list and
156 + * when all next handlers use skb->dst->dev and not outdev.
157 + * It will definitely route properly the inout NAT traffic
158 + * when multiple paths are used.
161 /* For policy routing, packets originating from this
162 * machine itself may be routed differently to packets
163 * passing through. We want this packet to be routed as
164 * if it came from this machine itself. So re-compute
165 * the routing information.
167 - if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
168 + if (sysctl_ip_vs_snat_reroute && ip_route_me_harder(skb, RTN_LOCAL) != 0)
171 IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
172 @@ -786,8 +800,11 @@ ip_vs_out(unsigned int hooknum, struct s
176 - ip_vs_conn_put(cp);
180 + ip_vs_conn_put(cp);
185 diff -urNp v2.6.27/linux/net/ipv4/ipvs/ip_vs_ctl.c linux/net/ipv4/ipvs/ip_vs_ctl.c
186 --- v2.6.27/linux/net/ipv4/ipvs/ip_vs_ctl.c 2008-10-11 12:46:16.000000000 +0300
187 +++ linux/net/ipv4/ipvs/ip_vs_ctl.c 2008-10-11 14:19:27.000000000 +0300
188 @@ -79,6 +79,10 @@ int sysctl_ip_vs_expire_nodest_conn = 0;
189 int sysctl_ip_vs_expire_quiescent_template = 0;
190 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
191 int sysctl_ip_vs_nat_icmp_send = 0;
192 +int sysctl_ip_vs_snat_reroute = 0;
193 +#ifdef CONFIG_IP_VS_NFCT
194 +int sysctl_ip_vs_conntrack = 0;
198 #ifdef CONFIG_IP_VS_DEBUG
199 @@ -1457,6 +1461,15 @@ static struct ctl_table vs_vars[] = {
201 .proc_handler = &proc_dointvec,
203 +#ifdef CONFIG_IP_VS_NFCT
205 + .procname = "conntrack",
206 + .data = &sysctl_ip_vs_conntrack,
207 + .maxlen = sizeof(int),
209 + .proc_handler = &proc_dointvec,
213 .procname = "drop_entry",
214 .data = &sysctl_ip_vs_drop_entry,
215 @@ -1478,6 +1491,13 @@ static struct ctl_table vs_vars[] = {
217 .proc_handler = &proc_do_defense_mode,
220 + .procname = "snat_reroute",
221 + .data = &sysctl_ip_vs_snat_reroute,
222 + .maxlen = sizeof(int),
224 + .proc_handler = &proc_dointvec,
228 .procname = "timeout_established",
229 diff -urNp v2.6.27/linux/net/ipv4/ipvs/ip_vs_ftp.c linux/net/ipv4/ipvs/ip_vs_ftp.c
230 --- v2.6.27/linux/net/ipv4/ipvs/ip_vs_ftp.c 2008-10-11 12:46:16.000000000 +0300
231 +++ linux/net/ipv4/ipvs/ip_vs_ftp.c 2008-10-11 14:19:27.000000000 +0300
232 @@ -193,6 +193,11 @@ static int ip_vs_ftp_out(struct ip_vs_ap
233 ip_vs_control_add(n_cp, cp);
236 +#ifdef CONFIG_IP_VS_NFCT
238 + ip_vs_nfct_expect_related(skb, n_cp, 0, IPPROTO_TCP, 0);
242 * Replace the old passive address with the new one
244 @@ -325,6 +330,11 @@ static int ip_vs_ftp_in(struct ip_vs_app
245 ip_vs_control_add(n_cp, cp);
248 +#ifdef CONFIG_IP_VS_NFCT
250 + ip_vs_nfct_expect_related(skb, n_cp, n_cp->dport, IPPROTO_TCP, 1);
254 * Move tunnel to listen state
256 diff -urNp v2.6.27/linux/net/ipv4/ipvs/ip_vs_nfct.c linux/net/ipv4/ipvs/ip_vs_nfct.c
257 --- v2.6.27/linux/net/ipv4/ipvs/ip_vs_nfct.c 1970-01-01 02:00:00.000000000 +0200
258 +++ linux/net/ipv4/ipvs/ip_vs_nfct.c 2008-10-11 14:19:27.000000000 +0300
261 + * ip_vs_nfct.c: Netfilter connection tracking support for IPVS
263 + * Portions Copyright (C) 2001-2002
264 + * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
266 + * Portions Copyright (C) 2003-2008
270 + * This code is free software; you can redistribute it and/or modify
271 + * it under the terms of the GNU General Public License as published by
272 + * the Free Software Foundation; either version 2 of the License, or
273 + * (at your option) any later version.
275 + * This program is distributed in the hope that it will be useful,
276 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
277 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
278 + * GNU General Public License for more details.
280 + * You should have received a copy of the GNU General Public License
281 + * along with this program; if not, write to the Free Software
282 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
286 + * Ben North <ben@redfrontdoor.org>
287 + * Julian Anastasov <ja@ssi.bg> Reorganize and sync with latest kernels
292 + * - provide conntrack confirmation for new and related connections, by
293 + * this way we can see their proper conntrack state in all hooks
294 + * - support for all forwarding methods, not only NAT
295 + * - FTP support (NAT), ability to support other NAT apps with expectations
296 + * - to correctly create expectations for related NAT connections the proper
297 + * NF conntrack support must be already installed, eg. ip_vs_ftp requires
298 + * nf_conntrack_ftp for the same ports
302 +#include <linux/module.h>
303 +#include <linux/types.h>
304 +#include <linux/kernel.h>
305 +#include <linux/errno.h>
306 +#include <linux/compiler.h>
307 +#include <linux/vmalloc.h>
308 +#include <linux/skbuff.h>
310 +#include <linux/netfilter.h>
311 +#include <linux/netfilter_ipv4.h>
312 +#include <net/ip_vs.h>
315 +EXPORT_SYMBOL(ip_vs_nfct_expect_related);
318 +#define FMT_TUPLE "%u.%u.%u.%u:%u->%u.%u.%u.%u:%u/%u"
319 +#define ARG_TUPLE(t) NIPQUAD((t)->src.u3.ip), ntohs((t)->src.u.all), \
320 + NIPQUAD((t)->dst.u3.ip), ntohs((t)->dst.u.all), \
323 +#define FMT_CONN "%u.%u.%u.%u:%u->%u.%u.%u.%u:%u->%u.%u.%u.%u:%u/%u:%u"
324 +#define ARG_CONN(c) NIPQUAD((c)->caddr), ntohs((c)->cport), \
325 + NIPQUAD((c)->vaddr), ntohs((c)->vport), \
326 + NIPQUAD((c)->daddr), ntohs((c)->dport), \
327 + (c)->protocol, (c)->state
329 +/* Returns boolean and skb is freed on failure */
330 +static int __ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp,
331 + unsigned int hooknum)
335 + * - the nfct is !NULL and is not confirmed
336 + * - we are called before any mangle
339 + struct iphdr *iph = ip_hdr(skb);
340 + struct nf_conn *ct = (struct nf_conn *) skb->nfct;
341 + struct nf_conntrack_tuple new_reply;
343 + __be16 _ports[2], *pptr;
344 +#ifdef CONFIG_IP_VS_DEBUG
345 + struct nf_conntrack_tuple *orig_tup =
346 + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
347 + struct nf_conntrack_tuple *orig_rep =
348 + &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
350 +#ifdef CONFIG_NF_NAT_NEEDED
351 + int initialized = !!(ct->status & IPS_NAT_DONE_MASK);
353 + int initialized = 0;
356 + IP_VS_DBG(7, "%s: ct=%p, init=%d, tuples=" FMT_TUPLE ", " FMT_TUPLE
357 + ", cp=" FMT_CONN "\n",
358 + __FUNCTION__, ct, initialized,
359 + ARG_TUPLE(orig_tup), ARG_TUPLE(orig_rep), ARG_CONN(cp));
361 +#ifdef CONFIG_NF_NAT_NEEDED
363 + * This is really bad, may be we are trying to alter DNAT conn?
364 + * This is not supported, avoid the confirmation.
366 + if (initialized && ct->status & IPS_NAT_MASK) {
367 +#ifdef CONFIG_IP_VS_DEBUG
368 + IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, init=%d\n",
369 + __FUNCTION__, ct, ct->status, initialized);
375 + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ || NF_INET_FORWARD == hooknum)
379 + * Alter reply only for IP_VS_CONN_F_MASQ in outin direction.
380 + * For related connections in inout direction it is done in
381 + * expectfn callback.
384 + pptr = skb_header_pointer(skb, ip_hdrlen(skb),
385 + sizeof(_ports), _ports);
389 + new_reply = (struct nf_conntrack_tuple) {
390 + .dst = { .protonum = iph->protocol, .dir = IP_CT_DIR_REPLY }};
392 + new_reply.src.u3.ip = cp->daddr;
393 + new_reply.src.u.tcp.port = cp->dport;
394 + new_reply.src.l3num = PF_INET;
395 + new_reply.dst.u3.ip = iph->saddr;
396 + new_reply.dst.u.tcp.port = pptr[0];
398 + nf_conntrack_alter_reply(ct, &new_reply);
400 + IP_VS_DBG(7, "%s: ct=%p, init=%d, orig=" FMT_TUPLE
401 + ", new_reply=" FMT_TUPLE " => alter_reply\n",
402 + __FUNCTION__, ct, initialized,
403 + ARG_TUPLE(orig_tup), ARG_TUPLE(&new_reply));
406 + * No need to rehash NAT info because we don't change source
407 + * address in original direction
412 + ret = __nf_conntrack_confirm(skb);
414 + if (ret != NF_STOLEN) {
415 + IP_VS_DBG(7, "%s: ct=%p, init=%d, orig=" FMT_TUPLE " => confirm ret=%d\n",
416 + __FUNCTION__, ct, initialized, ARG_TUPLE(orig_tup), ret);
419 + if (ret != NF_ACCEPT)
424 + if (ret != NF_STOLEN)
430 + * Confirm (and optionally alter) the conntrack entry if needed
431 + * because the IPVS packets do not reach ipv4_confirm.
433 +int ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp,
434 + unsigned int hooknum)
436 + struct iphdr *iph = ip_hdr(skb);
437 + struct nf_conn *ct = (struct nf_conn *) skb->nfct;
439 + /* By the time we're sending the packet out the other
440 + * side, there should be a confirmed Netfilter CT entry
441 + * for this connection. This may not be the case,
442 + * however, if it's a brand new connection, or if the NF
443 + * entry has timed out before ours has. Either way, if
444 + * the NF CT entry is unconfirmed, confirm it, and deal
445 + * with reply tuple mangling at the same time.
448 + /* We only deal with TCP or UDP packets */
449 + if (iph->protocol != IPPROTO_TCP && iph->protocol != IPPROTO_UDP)
452 + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
454 + * Do not be surprised if non-NAT conntracks stay in SYN_SENT
455 + * state, may be the replies from the real server go
456 + * directly to client. In any case, keep them in REPLIED
457 + * state (ESTABLISHED).
459 + if (iph->protocol != IPPROTO_TCP ||
460 + IP_VS_TCP_S_ESTABLISHED == cp->state) {
461 + set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
466 + * We assume the reused connections do not change their rip:rport
467 + * and we do not need to alter their conntrack reply
469 + return __ip_vs_nfct_confirm(skb, cp, hooknum);
473 + * We are called from init_conntrack() as expectfn handler
476 +static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
477 + struct nf_conntrack_expect *exp)
479 + struct nf_conntrack_tuple *orig, new_reply;
480 + struct ip_vs_conn *cp;
482 + if (exp->tuple.src.l3num != PF_INET)
486 + * - We assume that no NF locks are held before this callback
487 + * - ip_vs_conn_out_get and ip_vs_conn_in_get should match their
488 + * expectations even if they use wildcard values, now we provide
489 + * the actual values from the newly created original conntrack direction
490 + * - the conntrack is confirmed when packet reaches IPVS hooks
494 + orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
495 + cp = ip_vs_conn_out_get(orig->dst.protonum,
496 + orig->src.u3.ip, orig->src.u.tcp.port,
497 + orig->dst.u3.ip, orig->dst.u.tcp.port);
499 + /* Change reply CLIENT->RS to CLIENT->VS */
500 + new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
501 + IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " FMT_TUPLE
502 + ", found inout cp=" FMT_CONN "\n",
503 + __FUNCTION__, ct, ct->status,
504 + ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
506 + new_reply.dst.u3.ip = cp->vaddr;
507 + new_reply.dst.u.tcp.port = cp->vport;
508 + IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
509 + ", inout cp=" FMT_CONN "\n",
511 + ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
517 + cp = ip_vs_conn_in_get(orig->dst.protonum,
518 + orig->src.u3.ip, orig->src.u.tcp.port,
519 + orig->dst.u3.ip, orig->dst.u.tcp.port);
521 + /* Change reply VS->CLIENT to RS->CLIENT */
522 + new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
523 + IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " FMT_TUPLE
524 + ", found outin cp=" FMT_CONN "\n",
525 + __FUNCTION__, ct, ct->status,
526 + ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
528 + new_reply.src.u3.ip = cp->daddr;
529 + new_reply.src.u.tcp.port = cp->dport;
530 + IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
531 + ", outin cp=" FMT_CONN "\n",
533 + ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
537 + IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE " - unknown expect\n",
538 + __FUNCTION__, ct, ct->status, ARG_TUPLE(orig));
543 + /* Never alter conntrack for non-NAT conns */
544 + if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
545 + nf_conntrack_alter_reply(ct, &new_reply);
546 + ip_vs_conn_put(cp);
551 + * Create NF conntrack expectation with wildcard (optional) source port.
552 + * Then the default callback function will alter the reply and will confirm
553 + * the conntrack entry when the first packet comes.
555 +void ip_vs_nfct_expect_related(struct sk_buff *skb, struct ip_vs_conn *cp,
556 + __be16 port, __u16 proto, int from_rs)
558 + struct nf_conn *ct = (struct nf_conn *) skb->nfct;
559 + struct nf_conntrack_expect *e;
561 + if (!sysctl_ip_vs_conntrack)
565 + IP_VS_DBG(7, "%s: ct=%p for cp=" FMT_CONN "\n",
566 + __FUNCTION__, ct, ARG_CONN(cp));
570 + if (!(e = nf_ct_expect_alloc(ct)))
573 + e->expectfn = ip_vs_nfct_expect_callback;
576 + e->class = NF_CT_EXPECT_CLASS_DEFAULT;
577 + memset(&e->tuple, 0, sizeof(e->tuple));
578 + e->tuple.src.u.tcp.port = port;
579 + e->tuple.src.l3num = PF_INET;
580 + e->tuple.dst.protonum = proto;
581 + memset(&e->mask, 0, sizeof(e->mask));
582 + e->mask.src.u3.ip = 0xffffffff;
583 + e->mask.src.u.all = port? 0xffff : 0;
586 + e->tuple.src.u3.ip = cp->daddr;
587 + e->tuple.dst.u3.ip = cp->caddr;
588 + e->tuple.dst.u.tcp.port = cp->cport;
590 + e->tuple.src.u3.ip = cp->caddr;
591 + e->tuple.dst.u3.ip = cp->vaddr;
592 + e->tuple.dst.u.tcp.port = cp->vport;
595 + IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
596 + __FUNCTION__, ct, ARG_TUPLE(&e->tuple));
597 + nf_ct_expect_related(e);
598 + nf_ct_expect_put(e);
602 + * Our connection was terminated, try to drop the conntrack immediately
604 +void ip_vs_nfct_conn_drop(struct ip_vs_conn *cp)
606 + struct nf_conntrack_tuple_hash *h;
607 + struct nf_conn *ct;
608 + struct nf_conntrack_tuple tuple;
613 + tuple = (struct nf_conntrack_tuple) {
614 + .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } };
615 + tuple.src.u3.ip = cp->caddr;
616 + tuple.src.u.all = cp->cport;
617 + tuple.src.l3num = PF_INET;
618 + tuple.dst.u3.ip = cp->vaddr;
619 + tuple.dst.u.all = cp->vport;
621 + IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE
622 + " for conn " FMT_CONN "\n",
623 + __FUNCTION__, ARG_TUPLE(&tuple), ARG_CONN(cp));
625 + h = nf_conntrack_find_get(&tuple);
627 + ct = nf_ct_tuplehash_to_ctrack(h);
628 + if (del_timer(&ct->timeout)) {
629 + IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
631 + __FUNCTION__, ct, ARG_TUPLE(&tuple));
632 + if (ct->timeout.function)
633 + ct->timeout.function(ct->timeout.data);
635 + IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
637 + __FUNCTION__, ct, ARG_TUPLE(&tuple));
641 + IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
642 + __FUNCTION__, ARG_TUPLE(&tuple));
646 diff -urNp v2.6.27/linux/net/ipv4/ipvs/ip_vs_xmit.c linux/net/ipv4/ipvs/ip_vs_xmit.c
647 --- v2.6.27/linux/net/ipv4/ipvs/ip_vs_xmit.c 2008-10-11 12:46:16.000000000 +0300
648 +++ linux/net/ipv4/ipvs/ip_vs_xmit.c 2008-10-11 14:19:27.000000000 +0300
649 @@ -139,7 +139,6 @@ int
650 ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
651 struct ip_vs_protocol *pp)
653 - /* we do not touch skb and do not need pskb ptr */
657 @@ -197,6 +196,9 @@ ip_vs_bypass_xmit(struct sk_buff *skb, s
658 dst_release(skb->dst);
659 skb->dst = &rt->u.dst;
661 + if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
664 /* Another hack: avoid icmp_send in ip_fragment */
667 @@ -209,6 +211,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, s
668 dst_link_failure(skb);
675 @@ -261,6 +264,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru
676 dst_release(skb->dst);
677 skb->dst = &rt->u.dst;
679 + if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
682 /* mangle the packet */
683 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
685 @@ -284,8 +290,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru
687 dst_link_failure(skb);
696 @@ -384,14 +391,17 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s
697 /* fix old IP header checksum */
698 ip_send_check(old_iph);
700 - skb_push(skb, sizeof(struct iphdr));
701 - skb_reset_network_header(skb);
702 - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
705 dst_release(skb->dst);
706 skb->dst = &rt->u.dst;
708 + if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
711 + skb_push(skb, sizeof(struct iphdr));
712 + skb_reset_network_header(skb);
713 + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
716 * Push down and install the IPIP header.
718 @@ -419,6 +429,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s
719 dst_link_failure(skb);
726 @@ -464,6 +475,9 @@ ip_vs_dr_xmit(struct sk_buff *skb, struc
727 dst_release(skb->dst);
728 skb->dst = &rt->u.dst;
730 + if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
733 /* Another hack: avoid icmp_send in ip_fragment */
736 @@ -476,6 +490,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struc
737 dst_link_failure(skb);
744 @@ -535,6 +550,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, str
745 dst_release(skb->dst);
746 skb->dst = &rt->u.dst;
748 + /* TODO: properly alter reply for NFCT */
750 ip_vs_nat_icmp(skb, pp, cp, 0);
752 /* Another hack: avoid icmp_send in ip_fragment */