1 diff -urNp v2.6.25/linux/include/net/ip_vs.h linux/include/net/ip_vs.h
2 --- v2.6.25/linux/include/net/ip_vs.h 2008-04-17 09:58:08.000000000 +0300
3 +++ linux/include/net/ip_vs.h 2008-04-19 19:59:24.000000000 +0300
6 #include <linux/sysctl.h> /* For ctl_path */
9 +#include <linux/skbuff.h>
10 +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
11 +#include <net/netfilter/nf_conntrack.h>
12 +#include <net/netfilter/nf_conntrack_core.h>
13 +#include <net/netfilter/nf_conntrack_expect.h>
14 +#include <net/netfilter/nf_conntrack_helper.h>
18 #define IP_VS_VERSION_CODE 0x010201
19 #define NVERSION(version) \
20 (version >> 16) & 0xFF, \
21 @@ -686,6 +696,16 @@ extern void ip_vs_init_hash_table(struct
25 + * Netfilter connection tracking
26 + * (from ip_vs_nfct.c)
28 +extern int ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum);
29 +extern void ip_vs_nfct_expect_related(struct sk_buff *skb,
30 + struct ip_vs_conn *cp,
31 + __be16 port, __u16 proto, int from_rs);
32 +extern void ip_vs_nfct_conn_drop(struct ip_vs_conn *cp);
35 * IPVS connection entry hash table
37 #ifndef CONFIG_IP_VS_TAB_BITS
38 @@ -855,9 +875,42 @@ extern int sysctl_ip_vs_expire_nodest_co
39 extern int sysctl_ip_vs_expire_quiescent_template;
40 extern int sysctl_ip_vs_sync_threshold[2];
41 extern int sysctl_ip_vs_nat_icmp_send;
42 +extern int sysctl_ip_vs_snat_reroute;
43 extern struct ip_vs_stats ip_vs_stats;
44 extern struct ctl_path net_vs_ctl_path[];
46 +#ifdef CONFIG_IP_VS_NFCT
48 +extern int sysctl_ip_vs_conntrack;
50 +static inline int ip_vs_use_conntrack(struct sk_buff *skb)
52 + return sysctl_ip_vs_conntrack && skb->nfct;
55 +/* Returns boolean and skb is freed on failure */
56 +static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum)
58 + if (!ip_vs_use_conntrack(skb))
60 + return nf_ct_is_confirmed((struct nf_conn *) skb->nfct) ||
61 + ip_vs_nfct_confirm(skb, cp, hooknum);
66 +static inline int ip_vs_use_conntrack(struct sk_buff *skb)
71 +static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum)
78 extern struct ip_vs_service *
79 ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport);
81 diff -urNp v2.6.25/linux/net/ipv4/ipvs/Kconfig linux/net/ipv4/ipvs/Kconfig
82 --- v2.6.25/linux/net/ipv4/ipvs/Kconfig 2007-07-10 09:18:43.000000000 +0300
83 +++ linux/net/ipv4/ipvs/Kconfig 2008-04-19 19:55:40.000000000 +0300
84 @@ -221,4 +221,12 @@ config IP_VS_FTP
85 If you want to compile it in kernel, say Y. To compile it as a
86 module, choose M here. If unsure, say N.
89 + bool "Netfilter connection tracking"
90 + depends on NF_CONNTRACK
92 + The Netfilter connection tracking support allows the IPVS
93 + connection state to be exported to the Netfilter framework
94 + for filtering purposes.
97 diff -urNp v2.6.25/linux/net/ipv4/ipvs/Makefile linux/net/ipv4/ipvs/Makefile
98 --- v2.6.25/linux/net/ipv4/ipvs/Makefile 2005-06-18 08:50:52.000000000 +0300
99 +++ linux/net/ipv4/ipvs/Makefile 2008-04-19 19:55:40.000000000 +0300
100 @@ -9,10 +9,13 @@ ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UD
101 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_ESP) += ip_vs_proto_esp.o
102 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH) += ip_vs_proto_ah.o
104 +ip_vs-extra_objs-y :=
105 +ip_vs-extra_objs-$(CONFIG_IP_VS_NFCT) += ip_vs_nfct.o
107 ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \
108 ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \
109 ip_vs_est.o ip_vs_proto.o \
110 - $(ip_vs_proto-objs-y)
111 + $(ip_vs_proto-objs-y) $(ip_vs-extra_objs-y)
115 diff -urNp v2.6.25/linux/net/ipv4/ipvs/ip_vs_conn.c linux/net/ipv4/ipvs/ip_vs_conn.c
116 --- v2.6.25/linux/net/ipv4/ipvs/ip_vs_conn.c 2008-04-17 09:58:09.000000000 +0300
117 +++ linux/net/ipv4/ipvs/ip_vs_conn.c 2008-04-19 19:55:40.000000000 +0300
118 @@ -593,6 +593,11 @@ static void ip_vs_conn_expire(unsigned l
120 ip_vs_control_del(cp);
122 +#ifdef CONFIG_IP_VS_NFCT
123 + if (sysctl_ip_vs_conntrack)
124 + ip_vs_nfct_conn_drop(cp);
127 if (unlikely(cp->app != NULL))
128 ip_vs_unbind_app(cp);
129 ip_vs_unbind_dest(cp);
130 diff -urNp v2.6.25/linux/net/ipv4/ipvs/ip_vs_core.c linux/net/ipv4/ipvs/ip_vs_core.c
131 --- v2.6.25/linux/net/ipv4/ipvs/ip_vs_core.c 2008-04-17 09:58:09.000000000 +0300
132 +++ linux/net/ipv4/ipvs/ip_vs_core.c 2008-04-19 19:55:40.000000000 +0300
133 @@ -661,6 +661,8 @@ static int ip_vs_out_icmp(struct sk_buff
135 skb->ipvs_property = 1;
137 + if (sysctl_ip_vs_snat_reroute && ip_route_me_harder(skb, RTN_LOCAL))
141 __ip_vs_conn_put(cp);
142 @@ -761,19 +763,31 @@ ip_vs_out(unsigned int hooknum, struct s
143 if (!skb_make_writable(skb, ihl))
146 + if (!ip_vs_confirm_conntrack(skb, cp, hooknum))
149 /* mangle the packet */
150 if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
152 ip_hdr(skb)->saddr = cp->vaddr;
153 ip_send_check(ip_hdr(skb));
156 + * nf_iterate does not expect change in the skb->dst->dev.
157 + * It looks like it is not fatal to enable this code for hooks
158 + * where our handlers are at the end of the chain list and
159 + * when all next handlers use skb->dst->dev and not outdev.
160 + * It will definitely route properly the inout NAT traffic
161 + * when multiple paths are used.
164 /* For policy routing, packets originating from this
165 * machine itself may be routed differently to packets
166 * passing through. We want this packet to be routed as
167 * if it came from this machine itself. So re-compute
168 * the routing information.
170 - if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
171 + if (sysctl_ip_vs_snat_reroute && ip_route_me_harder(skb, RTN_LOCAL) != 0)
174 IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
175 @@ -788,8 +802,11 @@ ip_vs_out(unsigned int hooknum, struct s
179 - ip_vs_conn_put(cp);
183 + ip_vs_conn_put(cp);
188 diff -urNp v2.6.25/linux/net/ipv4/ipvs/ip_vs_ctl.c linux/net/ipv4/ipvs/ip_vs_ctl.c
189 --- v2.6.25/linux/net/ipv4/ipvs/ip_vs_ctl.c 2008-04-17 09:58:09.000000000 +0300
190 +++ linux/net/ipv4/ipvs/ip_vs_ctl.c 2008-04-19 19:55:40.000000000 +0300
191 @@ -81,6 +81,10 @@ int sysctl_ip_vs_expire_nodest_conn = 0;
192 int sysctl_ip_vs_expire_quiescent_template = 0;
193 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
194 int sysctl_ip_vs_nat_icmp_send = 0;
195 +int sysctl_ip_vs_snat_reroute = 0;
196 +#ifdef CONFIG_IP_VS_NFCT
197 +int sysctl_ip_vs_conntrack = 0;
201 #ifdef CONFIG_IP_VS_DEBUG
202 @@ -1446,6 +1450,15 @@ static struct ctl_table vs_vars[] = {
204 .proc_handler = &proc_dointvec,
206 +#ifdef CONFIG_IP_VS_NFCT
208 + .procname = "conntrack",
209 + .data = &sysctl_ip_vs_conntrack,
210 + .maxlen = sizeof(int),
212 + .proc_handler = &proc_dointvec,
216 .procname = "drop_entry",
217 .data = &sysctl_ip_vs_drop_entry,
218 @@ -1467,6 +1480,13 @@ static struct ctl_table vs_vars[] = {
220 .proc_handler = &proc_do_defense_mode,
223 + .procname = "snat_reroute",
224 + .data = &sysctl_ip_vs_snat_reroute,
225 + .maxlen = sizeof(int),
227 + .proc_handler = &proc_dointvec,
231 .procname = "timeout_established",
232 diff -urNp v2.6.25/linux/net/ipv4/ipvs/ip_vs_ftp.c linux/net/ipv4/ipvs/ip_vs_ftp.c
233 --- v2.6.25/linux/net/ipv4/ipvs/ip_vs_ftp.c 2008-01-25 10:45:06.000000000 +0200
234 +++ linux/net/ipv4/ipvs/ip_vs_ftp.c 2008-04-19 19:55:40.000000000 +0300
235 @@ -195,6 +195,11 @@ static int ip_vs_ftp_out(struct ip_vs_ap
236 ip_vs_control_add(n_cp, cp);
239 +#ifdef CONFIG_IP_VS_NFCT
241 + ip_vs_nfct_expect_related(skb, n_cp, 0, IPPROTO_TCP, 0);
245 * Replace the old passive address with the new one
247 @@ -327,6 +332,11 @@ static int ip_vs_ftp_in(struct ip_vs_app
248 ip_vs_control_add(n_cp, cp);
251 +#ifdef CONFIG_IP_VS_NFCT
253 + ip_vs_nfct_expect_related(skb, n_cp, n_cp->dport, IPPROTO_TCP, 1);
257 * Move tunnel to listen state
259 diff -urNp v2.6.25/linux/net/ipv4/ipvs/ip_vs_nfct.c linux/net/ipv4/ipvs/ip_vs_nfct.c
260 --- v2.6.25/linux/net/ipv4/ipvs/ip_vs_nfct.c 1970-01-01 02:00:00.000000000 +0200
261 +++ linux/net/ipv4/ipvs/ip_vs_nfct.c 2008-04-19 20:06:46.000000000 +0300
264 + * ip_vs_nfct.c: Netfilter connection tracking support for IPVS
266 + * Portions Copyright (C) 2001-2002
267 + * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
269 + * Portions Copyright (C) 2003-2008
273 + * This code is free software; you can redistribute it and/or modify
274 + * it under the terms of the GNU General Public License as published by
275 + * the Free Software Foundation; either version 2 of the License, or
276 + * (at your option) any later version.
278 + * This program is distributed in the hope that it will be useful,
279 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
280 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
281 + * GNU General Public License for more details.
283 + * You should have received a copy of the GNU General Public License
284 + * along with this program; if not, write to the Free Software
285 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
289 + * Ben North <ben@redfrontdoor.org>
290 + * Julian Anastasov <ja@ssi.bg> Reorganize and sync with latest kernels
295 + * - provide conntrack confirmation for new and related connections, by
296 + * this way we can see their proper conntrack state in all hooks
297 + * - support for all forwarding methods, not only NAT
298 + * - FTP support (NAT), ability to support other NAT apps with expectations
299 + * - to correctly create expectations for related NAT connections the proper
300 + * NF conntrack support must be already installed, eg. ip_vs_ftp requires
301 + * nf_conntrack_ftp for the same ports
305 +#include <linux/module.h>
306 +#include <linux/types.h>
307 +#include <linux/kernel.h>
308 +#include <linux/errno.h>
309 +#include <linux/compiler.h>
310 +#include <linux/vmalloc.h>
311 +#include <linux/skbuff.h>
313 +#include <linux/netfilter.h>
314 +#include <linux/netfilter_ipv4.h>
315 +#include <net/ip_vs.h>
318 +EXPORT_SYMBOL(ip_vs_nfct_expect_related);
321 +#define FMT_TUPLE "%u.%u.%u.%u:%u->%u.%u.%u.%u:%u/%u"
322 +#define ARG_TUPLE(t) NIPQUAD((t)->src.u3.ip), ntohs((t)->src.u.all), \
323 + NIPQUAD((t)->dst.u3.ip), ntohs((t)->dst.u.all), \
326 +#define FMT_CONN "%u.%u.%u.%u:%u->%u.%u.%u.%u:%u->%u.%u.%u.%u:%u/%u:%u"
327 +#define ARG_CONN(c) NIPQUAD((c)->caddr), ntohs((c)->cport), \
328 + NIPQUAD((c)->vaddr), ntohs((c)->vport), \
329 + NIPQUAD((c)->daddr), ntohs((c)->dport), \
330 + (c)->protocol, (c)->state
332 +/* Returns boolean and skb is freed on failure */
333 +static int __ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp,
334 + unsigned int hooknum)
338 + * - the nfct is !NULL and is not confirmed
339 + * - we are called before any mangle
342 + struct iphdr *iph = ip_hdr(skb);
343 + struct nf_conn *ct = (struct nf_conn *) skb->nfct;
344 + struct nf_conntrack_tuple new_reply;
346 + __be16 _ports[2], *pptr;
347 +#ifdef CONFIG_IP_VS_DEBUG
348 + struct nf_conntrack_tuple *orig_tup =
349 + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
350 + struct nf_conntrack_tuple *orig_rep =
351 + &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
353 +#ifdef CONFIG_NF_NAT_NEEDED
354 + int initialized = !!(ct->status & IPS_NAT_DONE_MASK);
356 + int initialized = 0;
359 + IP_VS_DBG(7, "%s: ct=%p, init=%d, tuples=" FMT_TUPLE ", " FMT_TUPLE
360 + ", cp=" FMT_CONN "\n",
361 + __FUNCTION__, ct, initialized,
362 + ARG_TUPLE(orig_tup), ARG_TUPLE(orig_rep), ARG_CONN(cp));
364 +#ifdef CONFIG_NF_NAT_NEEDED
366 + * This is really bad, may be we are trying to alter DNAT conn?
367 + * This is not supported, avoid the confirmation.
369 + if (initialized && ct->status & IPS_NAT_MASK) {
370 +#ifdef CONFIG_IP_VS_DEBUG
371 + IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, init=%d\n",
372 + __FUNCTION__, ct, ct->status, initialized);
378 + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ || NF_INET_FORWARD == hooknum)
382 + * Alter reply only for IP_VS_CONN_F_MASQ in outin direction.
383 + * For related connections in inout direction it is done in
384 + * expectfn callback.
387 + pptr = skb_header_pointer(skb, ip_hdrlen(skb),
388 + sizeof(_ports), _ports);
392 + new_reply = (struct nf_conntrack_tuple) {
393 + .dst = { .protonum = iph->protocol, .dir = IP_CT_DIR_REPLY }};
395 + new_reply.src.u3.ip = cp->daddr;
396 + new_reply.src.u.tcp.port = cp->dport;
397 + new_reply.src.l3num = PF_INET;
398 + new_reply.dst.u3.ip = iph->saddr;
399 + new_reply.dst.u.tcp.port = pptr[0];
401 + nf_conntrack_alter_reply(ct, &new_reply);
403 + IP_VS_DBG(7, "%s: ct=%p, init=%d, orig=" FMT_TUPLE
404 + ", new_reply=" FMT_TUPLE " => alter_reply\n",
405 + __FUNCTION__, ct, initialized,
406 + ARG_TUPLE(orig_tup), ARG_TUPLE(&new_reply));
409 + * No need to rehash NAT info because we don't change source
410 + * address in original direction
415 + ret = __nf_conntrack_confirm(skb);
417 + if (ret != NF_STOLEN) {
418 + IP_VS_DBG(7, "%s: ct=%p, init=%d, orig=" FMT_TUPLE " => confirm ret=%d\n",
419 + __FUNCTION__, ct, initialized, ARG_TUPLE(orig_tup), ret);
422 + if (ret != NF_ACCEPT)
427 + if (ret != NF_STOLEN)
433 + * Confirm (and optionally alter) the conntrack entry if needed
434 + * because the IPVS packets do not reach ipv4_confirm.
436 +int ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp,
437 + unsigned int hooknum)
439 + struct iphdr *iph = ip_hdr(skb);
440 + struct nf_conn *ct = (struct nf_conn *) skb->nfct;
442 + /* By the time we're sending the packet out the other
443 + * side, there should be a confirmed Netfilter CT entry
444 + * for this connection. This may not be the case,
445 + * however, if it's a brand new connection, or if the NF
446 + * entry has timed out before ours has. Either way, if
447 + * the NF CT entry is unconfirmed, confirm it, and deal
448 + * with reply tuple mangling at the same time.
451 + /* We only deal with TCP or UDP packets */
452 + if (iph->protocol != IPPROTO_TCP && iph->protocol != IPPROTO_UDP)
455 + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
457 + * Do not be surprised if non-NAT conntracks stay in SYN_SENT
458 + * state, may be the replies from the real server go
459 + * directly to client. In any case, keep them in REPLIED
460 + * state (ESTABLISHED).
462 + if (iph->protocol != IPPROTO_TCP ||
463 + IP_VS_TCP_S_ESTABLISHED == cp->state) {
464 + set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
469 + * We assume the reused connections do not change their rip:rport
470 + * and we do not need to alter their conntrack reply
472 + return __ip_vs_nfct_confirm(skb, cp, hooknum);
476 + * We are called from init_conntrack() as expectfn handler
479 +static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
480 + struct nf_conntrack_expect *exp)
482 + struct nf_conntrack_tuple *orig, new_reply;
483 + struct ip_vs_conn *cp;
485 + if (exp->tuple.src.l3num != PF_INET)
489 + * - We assume that no NF locks are held before this callback
490 + * - ip_vs_conn_out_get and ip_vs_conn_in_get should match their
491 + * expectations even if they use wildcard values, now we provide
492 + * the actual values from the newly created original conntrack direction
493 + * - the conntrack is confirmed when packet reaches IPVS hooks
497 + orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
498 + cp = ip_vs_conn_out_get(orig->dst.protonum,
499 + orig->src.u3.ip, orig->src.u.tcp.port,
500 + orig->dst.u3.ip, orig->dst.u.tcp.port);
502 + /* Change reply CLIENT->RS to CLIENT->VS */
503 + new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
504 + IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " FMT_TUPLE
505 + ", found inout cp=" FMT_CONN "\n",
506 + __FUNCTION__, ct, ct->status,
507 + ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
509 + new_reply.dst.u3.ip = cp->vaddr;
510 + new_reply.dst.u.tcp.port = cp->vport;
511 + IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
512 + ", inout cp=" FMT_CONN "\n",
514 + ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
520 + cp = ip_vs_conn_in_get(orig->dst.protonum,
521 + orig->src.u3.ip, orig->src.u.tcp.port,
522 + orig->dst.u3.ip, orig->dst.u.tcp.port);
524 + /* Change reply VS->CLIENT to RS->CLIENT */
525 + new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
526 + IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " FMT_TUPLE
527 + ", found outin cp=" FMT_CONN "\n",
528 + __FUNCTION__, ct, ct->status,
529 + ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
531 + new_reply.src.u3.ip = cp->daddr;
532 + new_reply.src.u.tcp.port = cp->dport;
533 + IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
534 + ", outin cp=" FMT_CONN "\n",
536 + ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
540 + IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE " - unknown expect\n",
541 + __FUNCTION__, ct, ct->status, ARG_TUPLE(orig));
546 + /* Never alter conntrack for non-NAT conns */
547 + if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
548 + nf_conntrack_alter_reply(ct, &new_reply);
549 + ip_vs_conn_put(cp);
554 + * Create NF conntrack expectation with wildcard (optional) source port.
555 + * Then the default callback function will alter the reply and will confirm
556 + * the conntrack entry when the first packet comes.
558 +void ip_vs_nfct_expect_related(struct sk_buff *skb, struct ip_vs_conn *cp,
559 + __be16 port, __u16 proto, int from_rs)
561 + struct nf_conn *ct = (struct nf_conn *) skb->nfct;
562 + struct nf_conntrack_expect *e;
564 + if (!sysctl_ip_vs_conntrack)
568 + IP_VS_DBG(7, "%s: ct=%p for cp=" FMT_CONN "\n",
569 + __FUNCTION__, ct, ARG_CONN(cp));
573 + if (!(e = nf_ct_expect_alloc(ct)))
576 + e->expectfn = ip_vs_nfct_expect_callback;
579 + memset(&e->tuple, 0, sizeof(e->tuple));
580 + e->tuple.src.u.tcp.port = port;
581 + e->tuple.src.l3num = PF_INET;
582 + e->tuple.dst.protonum = proto;
583 + memset(&e->mask, 0, sizeof(e->mask));
584 + e->mask.src.u3.ip = 0xffffffff;
585 + e->mask.src.u.all = port? 0xffff : 0;
588 + e->tuple.src.u3.ip = cp->daddr;
589 + e->tuple.dst.u3.ip = cp->caddr;
590 + e->tuple.dst.u.tcp.port = cp->cport;
592 + e->tuple.src.u3.ip = cp->caddr;
593 + e->tuple.dst.u3.ip = cp->vaddr;
594 + e->tuple.dst.u.tcp.port = cp->vport;
597 + IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
598 + __FUNCTION__, ct, ARG_TUPLE(&e->tuple));
599 + nf_ct_expect_related(e);
600 + nf_ct_expect_put(e);
604 + * Our connection was terminated, try to drop the conntrack immediately
606 +void ip_vs_nfct_conn_drop(struct ip_vs_conn *cp)
608 + struct nf_conntrack_tuple_hash *h;
609 + struct nf_conn *ct;
610 + struct nf_conntrack_tuple tuple;
615 + tuple = (struct nf_conntrack_tuple) {
616 + .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } };
617 + tuple.src.u3.ip = cp->caddr;
618 + tuple.src.u.all = cp->cport;
619 + tuple.src.l3num = PF_INET;
620 + tuple.dst.u3.ip = cp->vaddr;
621 + tuple.dst.u.all = cp->vport;
623 + IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE
624 + " for conn " FMT_CONN "\n",
625 + __FUNCTION__, ARG_TUPLE(&tuple), ARG_CONN(cp));
627 + h = nf_conntrack_find_get(&tuple);
629 + ct = nf_ct_tuplehash_to_ctrack(h);
630 + if (del_timer(&ct->timeout)) {
631 + IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
633 + __FUNCTION__, ct, ARG_TUPLE(&tuple));
634 + if (ct->timeout.function)
635 + ct->timeout.function(ct->timeout.data);
637 + IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
639 + __FUNCTION__, ct, ARG_TUPLE(&tuple));
643 + IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
644 + __FUNCTION__, ARG_TUPLE(&tuple));
648 diff -urNp v2.6.25/linux/net/ipv4/ipvs/ip_vs_xmit.c linux/net/ipv4/ipvs/ip_vs_xmit.c
649 --- v2.6.25/linux/net/ipv4/ipvs/ip_vs_xmit.c 2008-04-17 09:58:09.000000000 +0300
650 +++ linux/net/ipv4/ipvs/ip_vs_xmit.c 2008-04-19 20:04:42.000000000 +0300
651 @@ -141,7 +141,6 @@ int
652 ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
653 struct ip_vs_protocol *pp)
655 - /* we do not touch skb and do not need pskb ptr */
659 @@ -199,6 +198,9 @@ ip_vs_bypass_xmit(struct sk_buff *skb, s
660 dst_release(skb->dst);
661 skb->dst = &rt->u.dst;
663 + if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
666 /* Another hack: avoid icmp_send in ip_fragment */
669 @@ -211,6 +213,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, s
670 dst_link_failure(skb);
677 @@ -263,6 +266,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru
678 dst_release(skb->dst);
679 skb->dst = &rt->u.dst;
681 + if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
684 /* mangle the packet */
685 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
687 @@ -286,8 +292,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru
689 dst_link_failure(skb);
698 @@ -386,14 +393,17 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s
699 /* fix old IP header checksum */
700 ip_send_check(old_iph);
702 - skb_push(skb, sizeof(struct iphdr));
703 - skb_reset_network_header(skb);
704 - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
707 dst_release(skb->dst);
708 skb->dst = &rt->u.dst;
710 + if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
713 + skb_push(skb, sizeof(struct iphdr));
714 + skb_reset_network_header(skb);
715 + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
718 * Push down and install the IPIP header.
720 @@ -421,6 +431,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s
721 dst_link_failure(skb);
728 @@ -466,6 +477,9 @@ ip_vs_dr_xmit(struct sk_buff *skb, struc
729 dst_release(skb->dst);
730 skb->dst = &rt->u.dst;
732 + if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
735 /* Another hack: avoid icmp_send in ip_fragment */
738 @@ -478,6 +492,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struc
739 dst_link_failure(skb);
746 @@ -537,6 +552,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, str
747 dst_release(skb->dst);
748 skb->dst = &rt->u.dst;
750 + /* TODO: properly alter reply for NFCT */
752 ip_vs_nat_icmp(skb, pp, cp, 0);
754 /* Another hack: avoid icmp_send in ip_fragment */