1 diff -urNp v2.6.28/linux/include/net/ip_vs.h linux/include/net/ip_vs.h
2 --- v2.6.28/linux/include/net/ip_vs.h 2008-12-25 10:12:24.000000000 +0200
3 +++ linux/include/net/ip_vs.h 2008-12-26 12:32:55.000000000 +0200
6 #include <linux/ipv6.h> /* for struct ipv6hdr */
7 #include <net/ipv6.h> /* for ipv6_addr_copy */
8 +#include <linux/skbuff.h>
10 +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
11 +#include <net/netfilter/nf_conntrack.h>
12 +#include <net/netfilter/nf_conntrack_core.h>
13 +#include <net/netfilter/nf_conntrack_expect.h>
14 +#include <net/netfilter/nf_conntrack_helper.h>
19 @@ -595,6 +603,16 @@ extern void ip_vs_init_hash_table(struct
20 #define IP_VS_APP_TYPE_FTP 1
23 + * Netfilter connection tracking
24 + * (from ip_vs_nfct.c)
26 +extern int ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum);
27 +extern void ip_vs_nfct_expect_related(struct sk_buff *skb,
28 + struct ip_vs_conn *cp,
29 + __be16 port, __u16 proto, int from_rs);
30 +extern void ip_vs_nfct_conn_drop(struct ip_vs_conn *cp);
33 * ip_vs_conn handling functions
36 @@ -780,9 +798,42 @@ extern int sysctl_ip_vs_expire_nodest_co
37 extern int sysctl_ip_vs_expire_quiescent_template;
38 extern int sysctl_ip_vs_sync_threshold[2];
39 extern int sysctl_ip_vs_nat_icmp_send;
40 +extern int sysctl_ip_vs_snat_reroute;
41 extern struct ip_vs_stats ip_vs_stats;
42 extern const struct ctl_path net_vs_ctl_path[];
44 +#ifdef CONFIG_IP_VS_NFCT
46 +extern int sysctl_ip_vs_conntrack;
48 +static inline int ip_vs_use_conntrack(struct sk_buff *skb)
50 + return sysctl_ip_vs_conntrack && skb->nfct;
53 +/* Returns boolean and skb is freed on failure */
54 +static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum)
56 + if (!ip_vs_use_conntrack(skb))
58 + return nf_ct_is_confirmed((struct nf_conn *) skb->nfct) ||
59 + ip_vs_nfct_confirm(skb, cp, hooknum);
64 +static inline int ip_vs_use_conntrack(struct sk_buff *skb)
69 +static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum)
76 extern struct ip_vs_service *
77 ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
78 const union nf_inet_addr *vaddr, __be16 vport);
79 diff -urNp v2.6.28/linux/net/netfilter/ipvs/Kconfig linux/net/netfilter/ipvs/Kconfig
80 --- v2.6.28/linux/net/netfilter/ipvs/Kconfig 2008-12-25 10:12:26.000000000 +0200
81 +++ linux/net/netfilter/ipvs/Kconfig 2008-12-26 12:35:37.000000000 +0200
82 @@ -238,4 +238,12 @@ config IP_VS_FTP
83 If you want to compile it in kernel, say Y. To compile it as a
84 module, choose M here. If unsure, say N.
87 + bool "Netfilter connection tracking"
88 + depends on NF_CONNTRACK
90 + The Netfilter connection tracking support allows the IPVS
91 + connection state to be exported to the Netfilter framework
92 + for filtering purposes.
95 diff -urNp v2.6.28/linux/net/netfilter/ipvs/Makefile linux/net/netfilter/ipvs/Makefile
96 --- v2.6.28/linux/net/netfilter/ipvs/Makefile 2008-12-25 10:12:26.000000000 +0200
97 +++ linux/net/netfilter/ipvs/Makefile 2008-12-26 12:36:38.000000000 +0200
98 @@ -8,10 +8,13 @@ ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TC
99 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
100 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o
102 +ip_vs-extra_objs-y :=
103 +ip_vs-extra_objs-$(CONFIG_IP_VS_NFCT) += ip_vs_nfct.o
105 ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \
106 ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \
107 ip_vs_est.o ip_vs_proto.o \
108 - $(ip_vs_proto-objs-y)
109 + $(ip_vs_proto-objs-y) $(ip_vs-extra_objs-y)
113 diff -urNp v2.6.28/linux/net/netfilter/ipvs/ip_vs_conn.c linux/net/netfilter/ipvs/ip_vs_conn.c
114 --- v2.6.28/linux/net/netfilter/ipvs/ip_vs_conn.c 2008-12-25 10:12:26.000000000 +0200
115 +++ linux/net/netfilter/ipvs/ip_vs_conn.c 2008-12-26 12:38:15.000000000 +0200
116 @@ -642,6 +642,11 @@ static void ip_vs_conn_expire(unsigned l
118 ip_vs_control_del(cp);
120 +#ifdef CONFIG_IP_VS_NFCT
121 + if (sysctl_ip_vs_conntrack)
122 + ip_vs_nfct_conn_drop(cp);
125 if (unlikely(cp->app != NULL))
126 ip_vs_unbind_app(cp);
127 ip_vs_unbind_dest(cp);
128 diff -urNp v2.6.28/linux/net/netfilter/ipvs/ip_vs_core.c linux/net/netfilter/ipvs/ip_vs_core.c
129 --- v2.6.28/linux/net/netfilter/ipvs/ip_vs_core.c 2008-12-25 10:12:26.000000000 +0200
130 +++ linux/net/netfilter/ipvs/ip_vs_core.c 2008-12-26 18:21:56.000000000 +0200
131 @@ -869,13 +869,16 @@ static inline int is_tcp_reset(const str
134 handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
135 - struct ip_vs_conn *cp, int ihl)
136 + struct ip_vs_conn *cp, int ihl, unsigned int hooknum)
138 IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
140 if (!skb_make_writable(skb, ihl))
143 + if (AF_INET == af && !ip_vs_confirm_conntrack(skb, cp, hooknum))
146 /* mangle the packet */
147 if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
149 @@ -890,6 +893,15 @@ handle_response(int af, struct sk_buff *
150 ip_send_check(ip_hdr(skb));
154 + * nf_iterate does not expect change in the skb->dst->dev.
155 + * It looks like it is not fatal to enable this code for hooks
156 + * where our handlers are at the end of the chain list and
157 + * when all next handlers use skb->dst->dev and not outdev.
158 + * It will definitely route properly the inout NAT traffic
159 + * when multiple paths are used.
162 /* For policy routing, packets originating from this
163 * machine itself may be routed differently to packets
164 * passing through. We want this packet to be routed as
165 @@ -902,7 +914,8 @@ handle_response(int af, struct sk_buff *
169 - if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
170 + if (sysctl_ip_vs_snat_reroute &&
171 + ip_route_me_harder(skb, RTN_LOCAL) != 0)
174 IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
175 @@ -917,8 +930,11 @@ handle_response(int af, struct sk_buff *
179 - ip_vs_conn_put(cp);
183 + ip_vs_conn_put(cp);
188 @@ -958,8 +974,13 @@ ip_vs_out(unsigned int hooknum, struct s
189 if (unlikely(iph.protocol == IPPROTO_ICMP)) {
190 int related, verdict = ip_vs_out_icmp(skb, &related);
194 + if (sysctl_ip_vs_snat_reroute &&
195 + NF_ACCEPT == verdict &&
196 + ip_route_me_harder(skb, RTN_LOCAL))
200 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
203 @@ -1033,7 +1054,7 @@ ip_vs_out(unsigned int hooknum, struct s
207 - return handle_response(af, skb, pp, cp, iph.len);
208 + return handle_response(af, skb, pp, cp, iph.len, hooknum);
212 @@ -1298,7 +1319,7 @@ ip_vs_in(unsigned int hooknum, struct sk
213 /* For local client packets, it could be a response */
214 cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
216 - return handle_response(af, skb, pp, cp, iph.len);
217 + return handle_response(af, skb, pp, cp, iph.len, hooknum);
219 if (!pp->conn_schedule(af, skb, pp, &v, &cp))
221 diff -urNp v2.6.28/linux/net/netfilter/ipvs/ip_vs_ctl.c linux/net/netfilter/ipvs/ip_vs_ctl.c
222 --- v2.6.28/linux/net/netfilter/ipvs/ip_vs_ctl.c 2008-12-25 10:12:26.000000000 +0200
223 +++ linux/net/netfilter/ipvs/ip_vs_ctl.c 2008-12-26 17:18:35.000000000 +0200
224 @@ -84,6 +84,10 @@ int sysctl_ip_vs_expire_nodest_conn = 0;
225 int sysctl_ip_vs_expire_quiescent_template = 0;
226 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
227 int sysctl_ip_vs_nat_icmp_send = 0;
228 +int sysctl_ip_vs_snat_reroute = 0;
229 +#ifdef CONFIG_IP_VS_NFCT
230 +int sysctl_ip_vs_conntrack = 0;
234 #ifdef CONFIG_IP_VS_DEBUG
235 @@ -1575,6 +1579,15 @@ static struct ctl_table vs_vars[] = {
237 .proc_handler = &proc_dointvec,
239 +#ifdef CONFIG_IP_VS_NFCT
241 + .procname = "conntrack",
242 + .data = &sysctl_ip_vs_conntrack,
243 + .maxlen = sizeof(int),
245 + .proc_handler = &proc_dointvec,
249 .procname = "drop_entry",
250 .data = &sysctl_ip_vs_drop_entry,
251 @@ -1596,6 +1609,13 @@ static struct ctl_table vs_vars[] = {
253 .proc_handler = &proc_do_defense_mode,
256 + .procname = "snat_reroute",
257 + .data = &sysctl_ip_vs_snat_reroute,
258 + .maxlen = sizeof(int),
260 + .proc_handler = &proc_dointvec,
264 .procname = "timeout_established",
265 diff -urNp v2.6.28/linux/net/netfilter/ipvs/ip_vs_ftp.c linux/net/netfilter/ipvs/ip_vs_ftp.c
266 --- v2.6.28/linux/net/netfilter/ipvs/ip_vs_ftp.c 2008-12-25 10:12:26.000000000 +0200
267 +++ linux/net/netfilter/ipvs/ip_vs_ftp.c 2008-12-26 17:21:25.000000000 +0200
268 @@ -202,6 +202,11 @@ static int ip_vs_ftp_out(struct ip_vs_ap
269 ip_vs_control_add(n_cp, cp);
272 +#ifdef CONFIG_IP_VS_NFCT
274 + ip_vs_nfct_expect_related(skb, n_cp, 0, IPPROTO_TCP, 0);
278 * Replace the old passive address with the new one
280 @@ -342,6 +347,11 @@ static int ip_vs_ftp_in(struct ip_vs_app
281 ip_vs_control_add(n_cp, cp);
284 +#ifdef CONFIG_IP_VS_NFCT
286 + ip_vs_nfct_expect_related(skb, n_cp, n_cp->dport, IPPROTO_TCP, 1);
290 * Move tunnel to listen state
292 diff -urNp v2.6.28/linux/net/netfilter/ipvs/ip_vs_nfct.c linux/net/netfilter/ipvs/ip_vs_nfct.c
293 --- v2.6.28/linux/net/netfilter/ipvs/ip_vs_nfct.c 1970-01-01 02:00:00.000000000 +0200
294 +++ linux/net/netfilter/ipvs/ip_vs_nfct.c 2008-12-26 18:35:40.000000000 +0200
297 + * ip_vs_nfct.c: Netfilter connection tracking support for IPVS
299 + * Portions Copyright (C) 2001-2002
300 + * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
302 + * Portions Copyright (C) 2003-2008
306 + * This code is free software; you can redistribute it and/or modify
307 + * it under the terms of the GNU General Public License as published by
308 + * the Free Software Foundation; either version 2 of the License, or
309 + * (at your option) any later version.
311 + * This program is distributed in the hope that it will be useful,
312 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
313 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
314 + * GNU General Public License for more details.
316 + * You should have received a copy of the GNU General Public License
317 + * along with this program; if not, write to the Free Software
318 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
322 + * Ben North <ben@redfrontdoor.org>
323 + * Julian Anastasov <ja@ssi.bg> Reorganize and sync with latest kernels
328 + * - provide conntrack confirmation for new and related connections, by
329 + * this way we can see their proper conntrack state in all hooks
330 + * - support for all forwarding methods, not only NAT
331 + * - FTP support (NAT), ability to support other NAT apps with expectations
332 + * - to correctly create expectations for related NAT connections the proper
333 + * NF conntrack support must be already installed, eg. ip_vs_ftp requires
334 + * nf_conntrack_ftp for the same ports
338 +#include <linux/module.h>
339 +#include <linux/types.h>
340 +#include <linux/kernel.h>
341 +#include <linux/errno.h>
342 +#include <linux/compiler.h>
343 +#include <linux/vmalloc.h>
344 +#include <linux/skbuff.h>
346 +#include <linux/netfilter.h>
347 +#include <linux/netfilter_ipv4.h>
348 +#include <net/ip_vs.h>
351 +EXPORT_SYMBOL(ip_vs_nfct_expect_related);
354 +#define FMT_TUPLE "%u.%u.%u.%u:%u->%u.%u.%u.%u:%u/%u"
355 +#define ARG_TUPLE(t) NIPQUAD((t)->src.u3.ip), ntohs((t)->src.u.all), \
356 + NIPQUAD((t)->dst.u3.ip), ntohs((t)->dst.u.all), \
359 +#define FMT_CONN "%u.%u.%u.%u:%u->%u.%u.%u.%u:%u->%u.%u.%u.%u:%u/%u:%u"
360 +#define ARG_CONN(c) NIPQUAD((c)->caddr), ntohs((c)->cport), \
361 + NIPQUAD((c)->vaddr), ntohs((c)->vport), \
362 + NIPQUAD((c)->daddr), ntohs((c)->dport), \
363 + (c)->protocol, (c)->state
365 +/* Returns boolean and skb is freed on failure */
366 +static int __ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp,
367 + unsigned int hooknum)
371 + * - the nfct is !NULL and is not confirmed
372 + * - we are called before any mangle
375 + struct iphdr *iph = ip_hdr(skb);
376 + struct nf_conn *ct = (struct nf_conn *) skb->nfct;
377 + struct nf_conntrack_tuple new_reply;
379 + __be16 _ports[2], *pptr;
380 +#ifdef CONFIG_IP_VS_DEBUG
381 + struct nf_conntrack_tuple *orig_tup =
382 + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
383 + struct nf_conntrack_tuple *orig_rep =
384 + &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
386 +#ifdef CONFIG_NF_NAT_NEEDED
387 + int initialized = !!(ct->status & IPS_NAT_DONE_MASK);
389 + int initialized = 0;
392 + IP_VS_DBG(7, "%s: ct=%p, init=%d, tuples=" FMT_TUPLE ", " FMT_TUPLE
393 + ", cp=" FMT_CONN "\n",
394 + __FUNCTION__, ct, initialized,
395 + ARG_TUPLE(orig_tup), ARG_TUPLE(orig_rep), ARG_CONN(cp));
397 +#ifdef CONFIG_NF_NAT_NEEDED
399 + * This is really bad, may be we are trying to alter DNAT conn?
400 + * This is not supported, avoid the confirmation.
402 + if (initialized && ct->status & IPS_NAT_MASK) {
403 +#ifdef CONFIG_IP_VS_DEBUG
404 + IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, init=%d\n",
405 + __FUNCTION__, ct, ct->status, initialized);
411 + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ || NF_INET_FORWARD == hooknum)
415 + * Alter reply only for IP_VS_CONN_F_MASQ in outin direction.
416 + * For related connections in inout direction it is done in
417 + * expectfn callback.
420 + pptr = skb_header_pointer(skb, ip_hdrlen(skb),
421 + sizeof(_ports), _ports);
425 + new_reply = (struct nf_conntrack_tuple) {
426 + .dst = { .protonum = iph->protocol, .dir = IP_CT_DIR_REPLY }};
428 + new_reply.src.u3 = cp->daddr;
429 + new_reply.src.u.tcp.port = cp->dport;
430 + new_reply.src.l3num = PF_INET;
431 + new_reply.dst.u3.ip = iph->saddr;
432 + new_reply.dst.u.tcp.port = pptr[0];
434 + nf_conntrack_alter_reply(ct, &new_reply);
436 + IP_VS_DBG(7, "%s: ct=%p, init=%d, orig=" FMT_TUPLE
437 + ", new_reply=" FMT_TUPLE " => alter_reply\n",
438 + __FUNCTION__, ct, initialized,
439 + ARG_TUPLE(orig_tup), ARG_TUPLE(&new_reply));
442 + * No need to rehash NAT info because we don't change source
443 + * address in original direction
448 + ret = __nf_conntrack_confirm(skb);
450 + if (ret != NF_STOLEN) {
451 + IP_VS_DBG(7, "%s: ct=%p, init=%d, orig=" FMT_TUPLE " => confirm ret=%d\n",
452 + __FUNCTION__, ct, initialized, ARG_TUPLE(orig_tup), ret);
455 + if (ret != NF_ACCEPT)
460 + if (ret != NF_STOLEN)
466 + * Confirm (and optionally alter) the conntrack entry if needed
467 + * because the IPVS packets do not reach ipv4_confirm.
469 +int ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp,
470 + unsigned int hooknum)
472 + struct iphdr *iph = ip_hdr(skb);
473 + struct nf_conn *ct = (struct nf_conn *) skb->nfct;
475 + /* By the time we're sending the packet out the other
476 + * side, there should be a confirmed Netfilter CT entry
477 + * for this connection. This may not be the case,
478 + * however, if it's a brand new connection, or if the NF
479 + * entry has timed out before ours has. Either way, if
480 + * the NF CT entry is unconfirmed, confirm it, and deal
481 + * with reply tuple mangling at the same time.
484 + /* We only deal with TCP or UDP packets */
485 + if (iph->protocol != IPPROTO_TCP && iph->protocol != IPPROTO_UDP)
488 + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
490 + * Do not be surprised if non-NAT conntracks stay in SYN_SENT
491 + * state, may be the replies from the real server go
492 + * directly to client. In any case, keep them in REPLIED
493 + * state (ESTABLISHED).
495 + if (iph->protocol != IPPROTO_TCP ||
496 + IP_VS_TCP_S_ESTABLISHED == cp->state) {
497 + set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
502 + * We assume the reused connections do not change their rip:rport
503 + * and we do not need to alter their conntrack reply
505 + return __ip_vs_nfct_confirm(skb, cp, hooknum);
509 + * We are called from init_conntrack() as expectfn handler
512 +static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
513 + struct nf_conntrack_expect *exp)
515 + struct nf_conntrack_tuple *orig, new_reply;
516 + struct ip_vs_conn *cp;
518 + if (exp->tuple.src.l3num != PF_INET)
522 + * - We assume that no NF locks are held before this callback
523 + * - ip_vs_conn_out_get and ip_vs_conn_in_get should match their
524 + * expectations even if they use wildcard values, now we provide
525 + * the actual values from the newly created original conntrack direction
526 + * - the conntrack is confirmed when packet reaches IPVS hooks
530 + orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
531 + cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum,
532 + &orig->src.u3, orig->src.u.tcp.port,
533 + &orig->dst.u3, orig->dst.u.tcp.port);
535 + /* Change reply CLIENT->RS to CLIENT->VS */
536 + new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
537 + IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " FMT_TUPLE
538 + ", found inout cp=" FMT_CONN "\n",
539 + __FUNCTION__, ct, ct->status,
540 + ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
542 + new_reply.dst.u3 = cp->vaddr;
543 + new_reply.dst.u.tcp.port = cp->vport;
544 + IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
545 + ", inout cp=" FMT_CONN "\n",
547 + ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
553 + cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum,
554 + &orig->src.u3, orig->src.u.tcp.port,
555 + &orig->dst.u3, orig->dst.u.tcp.port);
557 + /* Change reply VS->CLIENT to RS->CLIENT */
558 + new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
559 + IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " FMT_TUPLE
560 + ", found outin cp=" FMT_CONN "\n",
561 + __FUNCTION__, ct, ct->status,
562 + ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
564 + new_reply.src.u3 = cp->daddr;
565 + new_reply.src.u.tcp.port = cp->dport;
566 + IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
567 + ", outin cp=" FMT_CONN "\n",
569 + ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
573 + IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE " - unknown expect\n",
574 + __FUNCTION__, ct, ct->status, ARG_TUPLE(orig));
579 + /* Never alter conntrack for non-NAT conns */
580 + if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
581 + nf_conntrack_alter_reply(ct, &new_reply);
582 + ip_vs_conn_put(cp);
587 + * Create NF conntrack expectation with wildcard (optional) source port.
588 + * Then the default callback function will alter the reply and will confirm
589 + * the conntrack entry when the first packet comes.
591 +void ip_vs_nfct_expect_related(struct sk_buff *skb, struct ip_vs_conn *cp,
592 + __be16 port, __u16 proto, int from_rs)
594 + struct nf_conn *ct = (struct nf_conn *) skb->nfct;
595 + struct nf_conntrack_expect *e;
597 + if (!sysctl_ip_vs_conntrack)
601 + IP_VS_DBG(7, "%s: ct=%p for cp=" FMT_CONN "\n",
602 + __FUNCTION__, ct, ARG_CONN(cp));
606 + if (!(e = nf_ct_expect_alloc(ct)))
609 + e->expectfn = ip_vs_nfct_expect_callback;
612 + e->class = NF_CT_EXPECT_CLASS_DEFAULT;
613 + memset(&e->tuple, 0, sizeof(e->tuple));
614 + e->tuple.src.u.tcp.port = port;
615 + e->tuple.src.l3num = PF_INET;
616 + e->tuple.dst.protonum = proto;
617 + memset(&e->mask, 0, sizeof(e->mask));
618 + e->mask.src.u3.ip = 0xffffffff;
619 + e->mask.src.u.all = port? 0xffff : 0;
622 + e->tuple.src.u3 = cp->daddr;
623 + e->tuple.dst.u3 = cp->caddr;
624 + e->tuple.dst.u.tcp.port = cp->cport;
626 + e->tuple.src.u3 = cp->caddr;
627 + e->tuple.dst.u3 = cp->vaddr;
628 + e->tuple.dst.u.tcp.port = cp->vport;
631 + IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
632 + __FUNCTION__, ct, ARG_TUPLE(&e->tuple));
633 + nf_ct_expect_related(e);
634 + nf_ct_expect_put(e);
638 + * Our connection was terminated, try to drop the conntrack immediately
640 +void ip_vs_nfct_conn_drop(struct ip_vs_conn *cp)
642 + struct nf_conntrack_tuple_hash *h;
643 + struct nf_conn *ct;
644 + struct nf_conntrack_tuple tuple;
649 + tuple = (struct nf_conntrack_tuple) {
650 + .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } };
651 + tuple.src.u3 = cp->caddr;
652 + tuple.src.u.all = cp->cport;
653 + tuple.src.l3num = PF_INET;
654 + tuple.dst.u3 = cp->vaddr;
655 + tuple.dst.u.all = cp->vport;
657 + IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE
658 + " for conn " FMT_CONN "\n",
659 + __FUNCTION__, ARG_TUPLE(&tuple), ARG_CONN(cp));
661 + h = nf_conntrack_find_get(&init_net, &tuple);
663 + ct = nf_ct_tuplehash_to_ctrack(h);
664 + if (del_timer(&ct->timeout)) {
665 + IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
667 + __FUNCTION__, ct, ARG_TUPLE(&tuple));
668 + if (ct->timeout.function)
669 + ct->timeout.function(ct->timeout.data);
671 + IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
673 + __FUNCTION__, ct, ARG_TUPLE(&tuple));
677 + IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
678 + __FUNCTION__, ARG_TUPLE(&tuple));
682 diff -urNp v2.6.28/linux/net/netfilter/ipvs/ip_vs_xmit.c linux/net/netfilter/ipvs/ip_vs_xmit.c
683 --- v2.6.28/linux/net/netfilter/ipvs/ip_vs_xmit.c 2008-12-25 10:12:26.000000000 +0200
684 +++ linux/net/netfilter/ipvs/ip_vs_xmit.c 2008-12-26 17:31:51.000000000 +0200
685 @@ -265,6 +265,9 @@ ip_vs_bypass_xmit(struct sk_buff *skb, s
686 dst_release(skb->dst);
687 skb->dst = &rt->u.dst;
689 + if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
692 /* Another hack: avoid icmp_send in ip_fragment */
695 @@ -277,6 +280,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, s
696 dst_link_failure(skb);
703 @@ -393,6 +397,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru
704 dst_release(skb->dst);
705 skb->dst = &rt->u.dst;
707 + if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
710 /* mangle the packet */
711 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
713 @@ -416,8 +423,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru
715 dst_link_failure(skb);
724 @@ -593,14 +601,17 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s
725 /* fix old IP header checksum */
726 ip_send_check(old_iph);
728 - skb_push(skb, sizeof(struct iphdr));
729 - skb_reset_network_header(skb);
730 - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
734 skb_dst_set(skb, &rt->u.dst);
736 + if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
739 + skb_push(skb, sizeof(struct iphdr));
740 + skb_reset_network_header(skb);
741 + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
744 * Push down and install the IPIP header.
746 @@ -628,6 +639,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s
747 dst_link_failure(skb);
754 @@ -780,6 +792,9 @@ ip_vs_dr_xmit(struct sk_buff *skb, struc
755 dst_release(skb->dst);
756 skb->dst = &rt->u.dst;
758 + if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
761 /* Another hack: avoid icmp_send in ip_fragment */
764 @@ -792,6 +807,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struc
765 dst_link_failure(skb);
772 @@ -905,6 +921,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, str
773 dst_release(skb->dst);
774 skb->dst = &rt->u.dst;
776 + /* TODO: properly alter reply for NFCT */
778 ip_vs_nat_icmp(skb, pp, cp, 0);
780 /* Another hack: avoid icmp_send in ip_fragment */