1 diff -urNp v2.6.22/linux/include/net/ip_vs.h linux/include/net/ip_vs.h
2 --- v2.6.22/linux/include/net/ip_vs.h 2007-02-11 01:06:29.000000000 +0200
3 +++ linux/include/net/ip_vs.h 2007-07-12 12:03:43.000000000 +0300
5 #include <asm/types.h> /* For __uXX types */
6 #include <linux/types.h> /* For __beXX types in userland */
9 +#include <linux/skbuff.h>
10 +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
11 +#include <net/netfilter/nf_conntrack.h>
12 +#include <net/netfilter/nf_conntrack_core.h>
13 +#include <net/netfilter/nf_conntrack_expect.h>
14 +#include <net/netfilter/nf_conntrack_helper.h>
18 #define IP_VS_VERSION_CODE 0x010201
19 #define NVERSION(version) \
20 (version >> 16) & 0xFF, \
21 @@ -358,6 +368,8 @@ enum {
22 NET_IPV4_VS_SYNC_THRESHOLD=24,
23 NET_IPV4_VS_NAT_ICMP_SEND=25,
24 NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE=26,
25 + NET_IPV4_VS_SNAT_REROUTE=27,
26 + NET_IPV4_VS_CONNTRACK=28,
30 @@ -715,6 +727,16 @@ extern void ip_vs_init_hash_table(struct
34 + * Netfilter connection tracking
35 + * (from ip_vs_nfct.c)
37 +extern int ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum);
38 +extern void ip_vs_nfct_expect_related(struct sk_buff *skb,
39 + struct ip_vs_conn *cp,
40 + __be16 port, __u16 proto, int from_rs);
41 +extern void ip_vs_nfct_conn_drop(struct ip_vs_conn *cp);
44 * IPVS connection entry hash table
46 #ifndef CONFIG_IP_VS_TAB_BITS
47 @@ -885,8 +907,41 @@ extern int sysctl_ip_vs_expire_nodest_co
48 extern int sysctl_ip_vs_expire_quiescent_template;
49 extern int sysctl_ip_vs_sync_threshold[2];
50 extern int sysctl_ip_vs_nat_icmp_send;
51 +extern int sysctl_ip_vs_snat_reroute;
52 extern struct ip_vs_stats ip_vs_stats;
54 +#ifdef CONFIG_IP_VS_NFCT
56 +extern int sysctl_ip_vs_conntrack;
58 +static inline int ip_vs_use_conntrack(struct sk_buff *skb)
60 + return sysctl_ip_vs_conntrack && skb->nfct;
63 +/* Returns boolean and skb is freed on failure */
64 +static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum)
66 + if (!ip_vs_use_conntrack(skb))
68 + return nf_ct_is_confirmed((struct nf_conn *) skb->nfct) ||
69 + ip_vs_nfct_confirm(skb, cp, hooknum);
74 +static inline int ip_vs_use_conntrack(struct sk_buff *skb)
79 +static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum)
86 extern struct ip_vs_service *
87 ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport);
89 diff -urNp v2.6.22/linux/net/ipv4/ipvs/Kconfig linux/net/ipv4/ipvs/Kconfig
90 --- v2.6.22/linux/net/ipv4/ipvs/Kconfig 2007-07-10 09:18:43.000000000 +0300
91 +++ linux/net/ipv4/ipvs/Kconfig 2007-07-12 09:48:59.000000000 +0300
92 @@ -221,4 +221,12 @@ config IP_VS_FTP
93 If you want to compile it in kernel, say Y. To compile it as a
94 module, choose M here. If unsure, say N.
97 + bool "Netfilter connection tracking"
98 + depends on NF_CONNTRACK
100 + The Netfilter connection tracking support allows the IPVS
101 + connection state to be exported to the Netfilter framework
102 + for filtering purposes.
105 diff -urNp v2.6.22/linux/net/ipv4/ipvs/Makefile linux/net/ipv4/ipvs/Makefile
106 --- v2.6.22/linux/net/ipv4/ipvs/Makefile 2005-06-18 08:50:52.000000000 +0300
107 +++ linux/net/ipv4/ipvs/Makefile 2007-07-12 09:47:58.000000000 +0300
108 @@ -9,10 +9,13 @@ ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UD
109 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_ESP) += ip_vs_proto_esp.o
110 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH) += ip_vs_proto_ah.o
112 +ip_vs-extra_objs-y :=
113 +ip_vs-extra_objs-$(CONFIG_IP_VS_NFCT) += ip_vs_nfct.o
115 ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \
116 ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \
117 ip_vs_est.o ip_vs_proto.o \
118 - $(ip_vs_proto-objs-y)
119 + $(ip_vs_proto-objs-y) $(ip_vs-extra_objs-y)
123 diff -urNp v2.6.22/linux/net/ipv4/ipvs/ip_vs_conn.c linux/net/ipv4/ipvs/ip_vs_conn.c
124 --- v2.6.22/linux/net/ipv4/ipvs/ip_vs_conn.c 2007-04-28 17:55:11.000000000 +0300
125 +++ linux/net/ipv4/ipvs/ip_vs_conn.c 2007-07-12 09:47:58.000000000 +0300
126 @@ -562,6 +562,11 @@ static void ip_vs_conn_expire(unsigned l
128 ip_vs_control_del(cp);
130 +#ifdef CONFIG_IP_VS_NFCT
131 + if (sysctl_ip_vs_conntrack)
132 + ip_vs_nfct_conn_drop(cp);
135 if (unlikely(cp->app != NULL))
136 ip_vs_unbind_app(cp);
137 ip_vs_unbind_dest(cp);
138 diff -urNp v2.6.22/linux/net/ipv4/ipvs/ip_vs_core.c linux/net/ipv4/ipvs/ip_vs_core.c
139 --- v2.6.22/linux/net/ipv4/ipvs/ip_vs_core.c 2007-07-10 09:18:43.000000000 +0300
140 +++ linux/net/ipv4/ipvs/ip_vs_core.c 2007-07-12 09:47:58.000000000 +0300
141 @@ -701,6 +701,8 @@ static int ip_vs_out_icmp(struct sk_buff
143 skb->ipvs_property = 1;
145 + if (sysctl_ip_vs_snat_reroute && ip_route_me_harder(pskb, RTN_LOCAL))
149 __ip_vs_conn_put(cp);
150 @@ -805,6 +807,9 @@ ip_vs_out(unsigned int hooknum, struct s
151 if (!ip_vs_make_skb_writable(pskb, ihl))
154 + if (!ip_vs_confirm_conntrack(*pskb, cp, hooknum))
157 /* mangle the packet */
158 if (pp->snat_handler && !pp->snat_handler(pskb, pp, cp))
160 @@ -812,13 +817,23 @@ ip_vs_out(unsigned int hooknum, struct s
161 ip_hdr(skb)->saddr = cp->vaddr;
162 ip_send_check(ip_hdr(skb));
165 + * nf_iterate does not expect change in the skb->dst->dev.
166 + * It looks like it is not fatal to enable this code for hooks
167 + * where our handlers are at the end of the chain list and
168 + * when all next handlers use skb->dst->dev and not outdev.
169 + * It will definitely route properly the inout NAT traffic
170 + * when multiple paths are used.
173 /* For policy routing, packets originating from this
174 * machine itself may be routed differently to packets
175 * passing through. We want this packet to be routed as
176 * if it came from this machine itself. So re-compute
177 * the routing information.
179 - if (ip_route_me_harder(pskb, RTN_LOCAL) != 0)
181 + if (sysctl_ip_vs_snat_reroute && ip_route_me_harder(pskb, RTN_LOCAL))
185 @@ -834,8 +849,11 @@ ip_vs_out(unsigned int hooknum, struct s
189 - ip_vs_conn_put(cp);
193 + ip_vs_conn_put(cp);
198 diff -urNp v2.6.22/linux/net/ipv4/ipvs/ip_vs_ctl.c linux/net/ipv4/ipvs/ip_vs_ctl.c
199 --- v2.6.22/linux/net/ipv4/ipvs/ip_vs_ctl.c 2007-07-10 09:18:43.000000000 +0300
200 +++ linux/net/ipv4/ipvs/ip_vs_ctl.c 2007-07-12 09:47:58.000000000 +0300
201 @@ -81,6 +81,10 @@ int sysctl_ip_vs_expire_nodest_conn = 0;
202 int sysctl_ip_vs_expire_quiescent_template = 0;
203 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
204 int sysctl_ip_vs_nat_icmp_send = 0;
205 +int sysctl_ip_vs_snat_reroute = 0;
206 +#ifdef CONFIG_IP_VS_NFCT
207 +int sysctl_ip_vs_conntrack = 0;
211 #ifdef CONFIG_IP_VS_DEBUG
212 @@ -1424,6 +1428,16 @@ static struct ctl_table vs_vars[] = {
214 .proc_handler = &proc_dointvec,
216 +#ifdef CONFIG_IP_VS_NFCT
218 + .ctl_name = NET_IPV4_VS_CONNTRACK,
219 + .procname = "conntrack",
220 + .data = &sysctl_ip_vs_conntrack,
221 + .maxlen = sizeof(int),
223 + .proc_handler = &proc_dointvec,
227 .ctl_name = NET_IPV4_VS_DROP_ENTRY,
228 .procname = "drop_entry",
229 @@ -1448,6 +1462,14 @@ static struct ctl_table vs_vars[] = {
231 .proc_handler = &proc_do_defense_mode,
234 + .ctl_name = NET_IPV4_VS_SNAT_REROUTE,
235 + .procname = "snat_reroute",
236 + .data = &sysctl_ip_vs_snat_reroute,
237 + .maxlen = sizeof(int),
239 + .proc_handler = &proc_dointvec,
243 .ctl_name = NET_IPV4_VS_TO_ES,
244 diff -urNp v2.6.22/linux/net/ipv4/ipvs/ip_vs_ftp.c linux/net/ipv4/ipvs/ip_vs_ftp.c
245 --- v2.6.22/linux/net/ipv4/ipvs/ip_vs_ftp.c 2007-07-10 09:18:43.000000000 +0300
246 +++ linux/net/ipv4/ipvs/ip_vs_ftp.c 2007-07-12 09:47:58.000000000 +0300
247 @@ -194,6 +194,11 @@ static int ip_vs_ftp_out(struct ip_vs_ap
248 ip_vs_control_add(n_cp, cp);
251 +#ifdef CONFIG_IP_VS_NFCT
253 + ip_vs_nfct_expect_related(*pskb, n_cp, 0, IPPROTO_TCP, 0);
257 * Replace the old passive address with the new one
259 @@ -326,6 +331,11 @@ static int ip_vs_ftp_in(struct ip_vs_app
260 ip_vs_control_add(n_cp, cp);
263 +#ifdef CONFIG_IP_VS_NFCT
265 + ip_vs_nfct_expect_related(*pskb, n_cp, n_cp->dport, IPPROTO_TCP, 1);
269 * Move tunnel to listen state
271 diff -urNp v2.6.22/linux/net/ipv4/ipvs/ip_vs_nfct.c linux/net/ipv4/ipvs/ip_vs_nfct.c
272 --- v2.6.22/linux/net/ipv4/ipvs/ip_vs_nfct.c 1970-01-01 02:00:00.000000000 +0200
273 +++ linux/net/ipv4/ipvs/ip_vs_nfct.c 2007-07-12 12:04:31.000000000 +0300
276 + * ip_vs_nfct.c: Netfilter connection tracking support for IPVS
278 + * Portions Copyright (C) 2001-2002
279 + * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
281 + * Portions Copyright (C) 2003-2007
285 + * This code is free software; you can redistribute it and/or modify
286 + * it under the terms of the GNU General Public License as published by
287 + * the Free Software Foundation; either version 2 of the License, or
288 + * (at your option) any later version.
290 + * This program is distributed in the hope that it will be useful,
291 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
292 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
293 + * GNU General Public License for more details.
295 + * You should have received a copy of the GNU General Public License
296 + * along with this program; if not, write to the Free Software
297 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
301 + * Ben North <ben@redfrontdoor.org>
302 + * Julian Anastasov <ja@ssi.bg> Reorganize and sync with latest kernels
307 + * - provide conntrack confirmation for new and related connections, by
308 + * this way we can see their proper conntrack state in all hooks
309 + * - support for all forwarding methods, not only NAT
310 + * - FTP support (NAT), ability to support other NAT apps with expectations
311 + * - to correctly create expectations for related NAT connections the proper
312 + * NF conntrack support must be already installed, eg. ip_vs_ftp requires
313 + * nf_conntrack_ftp for the same ports
317 +#include <linux/module.h>
318 +#include <linux/types.h>
319 +#include <linux/kernel.h>
320 +#include <linux/errno.h>
321 +#include <linux/compiler.h>
322 +#include <linux/vmalloc.h>
323 +#include <linux/skbuff.h>
325 +#include <linux/netfilter.h>
326 +#include <linux/netfilter_ipv4.h>
327 +#include <net/ip_vs.h>
330 +EXPORT_SYMBOL(ip_vs_nfct_expect_related);
333 +#define FMT_TUPLE "%u.%u.%u.%u:%u->%u.%u.%u.%u:%u/%u"
334 +#define ARG_TUPLE(t) NIPQUAD((t)->src.u3.ip), ntohs((t)->src.u.all), \
335 + NIPQUAD((t)->dst.u3.ip), ntohs((t)->dst.u.all), \
338 +#define FMT_CONN "%u.%u.%u.%u:%u->%u.%u.%u.%u:%u->%u.%u.%u.%u:%u/%u:%u"
339 +#define ARG_CONN(c) NIPQUAD((c)->caddr), ntohs((c)->cport), \
340 + NIPQUAD((c)->vaddr), ntohs((c)->vport), \
341 + NIPQUAD((c)->daddr), ntohs((c)->dport), \
342 + (c)->protocol, (c)->state
344 +/* Returns boolean and skb is freed on failure */
345 +static int __ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp,
346 + unsigned int hooknum)
350 + * - the nfct is !NULL and is not confirmed
351 + * - we are called before any mangle
354 + struct iphdr *iph = ip_hdr(skb);
355 + struct nf_conn *ct = (struct nf_conn *) skb->nfct;
356 + struct nf_conntrack_tuple new_reply;
358 + __be16 _ports[2], *pptr;
359 +#ifdef CONFIG_IP_VS_DEBUG
360 + struct nf_conntrack_tuple *orig_tup =
361 + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
362 + struct nf_conntrack_tuple *orig_rep =
363 + &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
365 +#ifdef CONFIG_NF_NAT_NEEDED
366 + int initialized = !!(ct->status & IPS_NAT_DONE_MASK);
368 + int initialized = 0;
371 + IP_VS_DBG(7, "%s: ct=%p, init=%d, tuples=" FMT_TUPLE ", " FMT_TUPLE
372 + ", cp=" FMT_CONN "\n",
373 + __FUNCTION__, ct, initialized,
374 + ARG_TUPLE(orig_tup), ARG_TUPLE(orig_rep), ARG_CONN(cp));
376 +#ifdef CONFIG_NF_NAT_NEEDED
378 + * This is really bad, may be we are trying to alter DNAT conn?
379 + * This is not supported, avoid the confirmation.
381 + if (initialized && ct->status & IPS_NAT_MASK) {
382 +#ifdef CONFIG_IP_VS_DEBUG
383 + IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, init=%d\n",
384 + __FUNCTION__, ct, ct->status, initialized);
390 + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ || NF_IP_FORWARD == hooknum)
394 + * Alter reply only for IP_VS_CONN_F_MASQ in outin direction.
395 + * For related connections in inout direction it is done in
396 + * expectfn callback.
399 + pptr = skb_header_pointer(skb, ip_hdrlen(skb),
400 + sizeof(_ports), _ports);
404 + new_reply = (struct nf_conntrack_tuple) {
405 + .dst = { .protonum = iph->protocol, .dir = IP_CT_DIR_REPLY }};
407 + new_reply.src.u3.ip = cp->daddr;
408 + new_reply.src.u.tcp.port = cp->dport;
409 + new_reply.src.l3num = PF_INET;
410 + new_reply.dst.u3.ip = iph->saddr;
411 + new_reply.dst.u.tcp.port = pptr[0];
413 + nf_conntrack_alter_reply(ct, &new_reply);
415 + IP_VS_DBG(7, "%s: ct=%p, init=%d, orig=" FMT_TUPLE
416 + ", new_reply=" FMT_TUPLE " => alter_reply\n",
417 + __FUNCTION__, ct, initialized,
418 + ARG_TUPLE(orig_tup), ARG_TUPLE(&new_reply));
421 + * No need to rehash NAT info because we don't change source
422 + * address in original direction
427 + ret = __nf_conntrack_confirm(&skb);
429 + if (ret != NF_STOLEN) {
430 + IP_VS_DBG(7, "%s: ct=%p, init=%d, orig=" FMT_TUPLE " => confirm ret=%d\n",
431 + __FUNCTION__, ct, initialized, ARG_TUPLE(orig_tup), ret);
434 + if (ret != NF_ACCEPT)
439 + if (ret != NF_STOLEN)
445 + * Confirm (and optionally alter) the conntrack entry if needed
446 + * because the IPVS packets do not reach ipv4_confirm.
448 +int ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp,
449 + unsigned int hooknum)
451 + struct iphdr *iph = ip_hdr(skb);
452 + struct nf_conn *ct = (struct nf_conn *) skb->nfct;
454 + /* By the time we're sending the packet out the other
455 + * side, there should be a confirmed Netfilter CT entry
456 + * for this connection. This may not be the case,
457 + * however, if it's a brand new connection, or if the NF
458 + * entry has timed out before ours has. Either way, if
459 + * the NF CT entry is unconfirmed, confirm it, and deal
460 + * with reply tuple mangling at the same time.
463 + /* We only deal with TCP or UDP packets */
464 + if (iph->protocol != IPPROTO_TCP && iph->protocol != IPPROTO_UDP)
467 + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
469 + * Do not be surprised if non-NAT conntracks stay in SYN_SENT
470 + * state, may be the replies from the real server go
471 + * directly to client. In any case, keep them in REPLIED
472 + * state (ESTABLISHED).
474 + if (iph->protocol != IPPROTO_TCP ||
475 + IP_VS_TCP_S_ESTABLISHED == cp->state) {
476 + set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
481 + * We assume the reused connections do not change their rip:rport
482 + * and we do not need to alter their conntrack reply
484 + return __ip_vs_nfct_confirm(skb, cp, hooknum);
488 + * We are called from init_conntrack() as expectfn handler
491 +static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
492 + struct nf_conntrack_expect *exp)
494 + struct nf_conntrack_tuple *orig, new_reply;
495 + struct ip_vs_conn *cp;
497 + if (exp->tuple.src.l3num != PF_INET)
501 + * - We assume that no NF locks are held before this callback
502 + * - ip_vs_conn_out_get and ip_vs_conn_in_get should match their
503 + * expectations even if they use wildcard values, now we provide
504 + * the actual values from the newly created original conntrack direction
505 + * - the conntrack is confirmed when packet reaches IPVS hooks
509 + orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
510 + cp = ip_vs_conn_out_get(orig->dst.protonum,
511 + orig->src.u3.ip, orig->src.u.tcp.port,
512 + orig->dst.u3.ip, orig->dst.u.tcp.port);
514 + /* Change reply CLIENT->RS to CLIENT->VS */
515 + new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
516 + IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " FMT_TUPLE
517 + ", found inout cp=" FMT_CONN "\n",
518 + __FUNCTION__, ct, ct->status,
519 + ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
521 + new_reply.dst.u3.ip = cp->vaddr;
522 + new_reply.dst.u.tcp.port = cp->vport;
523 + IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
524 + ", inout cp=" FMT_CONN "\n",
526 + ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
532 + cp = ip_vs_conn_in_get(orig->dst.protonum,
533 + orig->src.u3.ip, orig->src.u.tcp.port,
534 + orig->dst.u3.ip, orig->dst.u.tcp.port);
536 + /* Change reply VS->CLIENT to RS->CLIENT */
537 + new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
538 + IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " FMT_TUPLE
539 + ", found outin cp=" FMT_CONN "\n",
540 + __FUNCTION__, ct, ct->status,
541 + ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
543 + new_reply.src.u3.ip = cp->daddr;
544 + new_reply.src.u.tcp.port = cp->dport;
545 + IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
546 + ", outin cp=" FMT_CONN "\n",
548 + ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
552 + IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE " - unknown expect\n",
553 + __FUNCTION__, ct, ct->status, ARG_TUPLE(orig));
558 + /* Never alter conntrack for non-NAT conns */
559 + if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
560 + nf_conntrack_alter_reply(ct, &new_reply);
561 + ip_vs_conn_put(cp);
566 + * Create NF conntrack expectation with wildcard (optional) source port.
567 + * Then the default callback function will alter the reply and will confirm
568 + * the conntrack entry when the first packet comes.
570 +void ip_vs_nfct_expect_related(struct sk_buff *skb, struct ip_vs_conn *cp,
571 + __be16 port, __u16 proto, int from_rs)
573 + struct nf_conn *ct = (struct nf_conn *) skb->nfct;
574 + struct nf_conntrack_expect *e;
576 + if (!sysctl_ip_vs_conntrack)
580 + IP_VS_DBG(7, "%s: ct=%p for cp=" FMT_CONN "\n",
581 + __FUNCTION__, ct, ARG_CONN(cp));
585 + if (!(e = nf_conntrack_expect_alloc(ct)))
588 + e->expectfn = ip_vs_nfct_expect_callback;
591 + memset(&e->tuple, 0, sizeof(e->tuple));
592 + e->tuple.src.u.tcp.port = port;
593 + e->tuple.src.l3num = PF_INET;
594 + e->tuple.dst.protonum = proto;
595 + memset(&e->mask, 0, sizeof(e->mask));
596 + e->mask.src.u3.ip = 0xffffffff;
597 + e->mask.src.u.all = port? 0xffff : 0;
598 + e->mask.src.l3num = 0xffff;
599 + e->mask.dst.u3.ip = 0xffffffff;
600 + e->mask.dst.u.all = 0xffff;
601 + e->mask.dst.protonum = 0xff;
604 + e->tuple.src.u3.ip = cp->daddr;
605 + e->tuple.dst.u3.ip = cp->caddr;
606 + e->tuple.dst.u.tcp.port = cp->cport;
608 + e->tuple.src.u3.ip = cp->caddr;
609 + e->tuple.dst.u3.ip = cp->vaddr;
610 + e->tuple.dst.u.tcp.port = cp->vport;
613 + IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
614 + __FUNCTION__, ct, ARG_TUPLE(&e->tuple));
615 + nf_conntrack_expect_related(e);
616 + nf_conntrack_expect_put(e);
620 + * Our connection was terminated, try to drop the conntrack immediately
622 +void ip_vs_nfct_conn_drop(struct ip_vs_conn *cp)
624 + struct nf_conntrack_tuple_hash *h;
625 + struct nf_conn *ct;
626 + struct nf_conntrack_tuple tuple;
631 + tuple = (struct nf_conntrack_tuple) {
632 + .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } };
633 + tuple.src.u3.ip = cp->caddr;
634 + tuple.src.u.all = cp->cport;
635 + tuple.src.l3num = PF_INET;
636 + tuple.dst.u3.ip = cp->vaddr;
637 + tuple.dst.u.all = cp->vport;
639 + IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE
640 + " for conn " FMT_CONN "\n",
641 + __FUNCTION__, ARG_TUPLE(&tuple), ARG_CONN(cp));
643 + h = nf_conntrack_find_get(&tuple, NULL);
645 + ct = nf_ct_tuplehash_to_ctrack(h);
646 + if (del_timer(&ct->timeout)) {
647 + IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
649 + __FUNCTION__, ct, ARG_TUPLE(&tuple));
650 + if (ct->timeout.function)
651 + ct->timeout.function(ct->timeout.data);
653 + IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
655 + __FUNCTION__, ct, ARG_TUPLE(&tuple));
659 + IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
660 + __FUNCTION__, ARG_TUPLE(&tuple));
664 diff -urNp v2.6.22/linux/net/ipv4/ipvs/ip_vs_xmit.c linux/net/ipv4/ipvs/ip_vs_xmit.c
665 --- v2.6.22/linux/net/ipv4/ipvs/ip_vs_xmit.c 2007-07-10 09:18:43.000000000 +0300
666 +++ linux/net/ipv4/ipvs/ip_vs_xmit.c 2007-07-12 09:54:45.000000000 +0300
667 @@ -199,6 +199,9 @@ ip_vs_bypass_xmit(struct sk_buff *skb, s
668 dst_release(skb->dst);
669 skb->dst = &rt->u.dst;
671 + if (!ip_vs_confirm_conntrack(skb, cp, NF_IP_LOCAL_IN))
674 /* Another hack: avoid icmp_send in ip_fragment */
677 @@ -211,6 +214,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, s
678 dst_link_failure(skb);
685 @@ -263,6 +267,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru
686 dst_release(skb->dst);
687 skb->dst = &rt->u.dst;
689 + if (!ip_vs_confirm_conntrack(skb, cp, NF_IP_LOCAL_IN))
692 /* mangle the packet */
693 if (pp->dnat_handler && !pp->dnat_handler(&skb, pp, cp))
695 @@ -286,8 +293,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru
697 dst_link_failure(skb);
706 @@ -386,14 +394,17 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s
707 /* fix old IP header checksum */
708 ip_send_check(old_iph);
710 - skb_push(skb, sizeof(struct iphdr));
711 - skb_reset_network_header(skb);
712 - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
715 dst_release(skb->dst);
716 skb->dst = &rt->u.dst;
718 + if (!ip_vs_confirm_conntrack(skb, cp, NF_IP_LOCAL_IN))
721 + skb_push(skb, sizeof(struct iphdr));
722 + skb_reset_network_header(skb);
723 + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
726 * Push down and install the IPIP header.
728 @@ -423,6 +434,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s
729 dst_link_failure(skb);
736 @@ -468,6 +480,9 @@ ip_vs_dr_xmit(struct sk_buff *skb, struc
737 dst_release(skb->dst);
738 skb->dst = &rt->u.dst;
740 + if (!ip_vs_confirm_conntrack(skb, cp, NF_IP_LOCAL_IN))
743 /* Another hack: avoid icmp_send in ip_fragment */
746 @@ -480,6 +495,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struc
747 dst_link_failure(skb);
754 @@ -539,6 +555,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, str
755 dst_release(skb->dst);
756 skb->dst = &rt->u.dst;
758 + /* TODO: properly alter reply for NFCT */
760 ip_vs_nat_icmp(skb, pp, cp, 0);
762 /* Another hack: avoid icmp_send in ip_fragment */