]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-ipvs-nfct.patch
- disbaled B43_PCMCIA and SSB_PCMCIAHOST, kills pcmcia dependency in ssb.ko
[packages/kernel.git] / kernel-ipvs-nfct.patch
CommitLineData
6d4e1af8 1diff -urNp v2.6.25/linux/include/net/ip_vs.h linux/include/net/ip_vs.h
2--- v2.6.25/linux/include/net/ip_vs.h 2008-04-17 09:58:08.000000000 +0300
3+++ linux/include/net/ip_vs.h 2008-04-19 19:59:24.000000000 +0300
4@@ -11,6 +11,16 @@
5
6 #include <linux/sysctl.h> /* For ctl_path */
db744e5b 7
8+#ifdef __KERNEL__
9+#include <linux/skbuff.h>
10+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
11+#include <net/netfilter/nf_conntrack.h>
12+#include <net/netfilter/nf_conntrack_core.h>
13+#include <net/netfilter/nf_conntrack_expect.h>
14+#include <net/netfilter/nf_conntrack_helper.h>
15+#endif
16+#endif
17+
18 #define IP_VS_VERSION_CODE 0x010201
19 #define NVERSION(version) \
20 (version >> 16) & 0xFF, \
6d4e1af8 21@@ -686,6 +696,16 @@ extern void ip_vs_init_hash_table(struct
db744e5b 22 */
23
24 /*
25+ * Netfilter connection tracking
26+ * (from ip_vs_nfct.c)
27+ */
28+extern int ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum);
29+extern void ip_vs_nfct_expect_related(struct sk_buff *skb,
30+ struct ip_vs_conn *cp,
31+ __be16 port, __u16 proto, int from_rs);
32+extern void ip_vs_nfct_conn_drop(struct ip_vs_conn *cp);
33+
34+/*
35 * IPVS connection entry hash table
36 */
37 #ifndef CONFIG_IP_VS_TAB_BITS
6d4e1af8 38@@ -855,9 +875,42 @@ extern int sysctl_ip_vs_expire_nodest_co
db744e5b 39 extern int sysctl_ip_vs_expire_quiescent_template;
40 extern int sysctl_ip_vs_sync_threshold[2];
41 extern int sysctl_ip_vs_nat_icmp_send;
42+extern int sysctl_ip_vs_snat_reroute;
43 extern struct ip_vs_stats ip_vs_stats;
6d4e1af8 44 extern struct ctl_path net_vs_ctl_path[];
db744e5b 45
46+#ifdef CONFIG_IP_VS_NFCT
47+
48+extern int sysctl_ip_vs_conntrack;
49+
50+static inline int ip_vs_use_conntrack(struct sk_buff *skb)
51+{
52+ return sysctl_ip_vs_conntrack && skb->nfct;
53+}
54+
55+/* Returns boolean and skb is freed on failure */
56+static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum)
57+{
58+ if (!ip_vs_use_conntrack(skb))
59+ return 1;
60+ return nf_ct_is_confirmed((struct nf_conn *) skb->nfct) ||
61+ ip_vs_nfct_confirm(skb, cp, hooknum);
62+}
63+
64+#else
65+
66+static inline int ip_vs_use_conntrack(struct sk_buff *skb)
67+{
68+ return 0;
69+}
70+
71+static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum)
72+{
73+ return 1;
74+}
75+
76+#endif
77+
78 extern struct ip_vs_service *
79 ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport);
80
6d4e1af8 81diff -urNp v2.6.25/linux/net/ipv4/ipvs/Kconfig linux/net/ipv4/ipvs/Kconfig
82--- v2.6.25/linux/net/ipv4/ipvs/Kconfig 2007-07-10 09:18:43.000000000 +0300
83+++ linux/net/ipv4/ipvs/Kconfig 2008-04-19 19:55:40.000000000 +0300
db744e5b 84@@ -221,4 +221,12 @@ config IP_VS_FTP
85 If you want to compile it in kernel, say Y. To compile it as a
86 module, choose M here. If unsure, say N.
87
88+config IP_VS_NFCT
89+ bool "Netfilter connection tracking"
90+ depends on NF_CONNTRACK
91+ ---help---
92+ The Netfilter connection tracking support allows the IPVS
93+ connection state to be exported to the Netfilter framework
94+ for filtering purposes.
95+
96 endif # IP_VS
6d4e1af8 97diff -urNp v2.6.25/linux/net/ipv4/ipvs/Makefile linux/net/ipv4/ipvs/Makefile
98--- v2.6.25/linux/net/ipv4/ipvs/Makefile 2005-06-18 08:50:52.000000000 +0300
99+++ linux/net/ipv4/ipvs/Makefile 2008-04-19 19:55:40.000000000 +0300
db744e5b 100@@ -9,10 +9,13 @@ ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UD
101 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_ESP) += ip_vs_proto_esp.o
102 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH) += ip_vs_proto_ah.o
103
104+ip_vs-extra_objs-y :=
105+ip_vs-extra_objs-$(CONFIG_IP_VS_NFCT) += ip_vs_nfct.o
106+
107 ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \
108 ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \
109 ip_vs_est.o ip_vs_proto.o \
110- $(ip_vs_proto-objs-y)
111+ $(ip_vs_proto-objs-y) $(ip_vs-extra_objs-y)
112
113
114 # IPVS core
6d4e1af8 115diff -urNp v2.6.25/linux/net/ipv4/ipvs/ip_vs_conn.c linux/net/ipv4/ipvs/ip_vs_conn.c
116--- v2.6.25/linux/net/ipv4/ipvs/ip_vs_conn.c 2008-04-17 09:58:09.000000000 +0300
117+++ linux/net/ipv4/ipvs/ip_vs_conn.c 2008-04-19 19:55:40.000000000 +0300
118@@ -593,6 +593,11 @@ static void ip_vs_conn_expire(unsigned l
db744e5b 119 if (cp->control)
120 ip_vs_control_del(cp);
121
122+#ifdef CONFIG_IP_VS_NFCT
123+ if (sysctl_ip_vs_conntrack)
124+ ip_vs_nfct_conn_drop(cp);
125+#endif
126+
127 if (unlikely(cp->app != NULL))
128 ip_vs_unbind_app(cp);
129 ip_vs_unbind_dest(cp);
6d4e1af8 130diff -urNp v2.6.25/linux/net/ipv4/ipvs/ip_vs_core.c linux/net/ipv4/ipvs/ip_vs_core.c
131--- v2.6.25/linux/net/ipv4/ipvs/ip_vs_core.c 2008-04-17 09:58:09.000000000 +0300
132+++ linux/net/ipv4/ipvs/ip_vs_core.c 2008-04-19 19:55:40.000000000 +0300
db744e5b 133@@ -661,6 +661,8 @@ static int ip_vs_out_icmp(struct sk_buff
134
135 skb->ipvs_property = 1;
136 verdict = NF_ACCEPT;
137+ if (sysctl_ip_vs_snat_reroute && ip_route_me_harder(skb, RTN_LOCAL))
138+ verdict = NF_DROP;
139
140 out:
141 __ip_vs_conn_put(cp);
142@@ -761,19 +763,31 @@ ip_vs_out(unsigned int hooknum, struct s
143 if (!skb_make_writable(skb, ihl))
144 goto drop;
145
146+ if (!ip_vs_confirm_conntrack(skb, cp, hooknum))
147+ goto out;
148+
149 /* mangle the packet */
150 if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
151 goto drop;
152 ip_hdr(skb)->saddr = cp->vaddr;
153 ip_send_check(ip_hdr(skb));
154
155+ /*
156+ * nf_iterate does not expect change in the skb->dst->dev.
157+ * It looks like it is not fatal to enable this code for hooks
158+ * where our handlers are at the end of the chain list and
159+ * when all next handlers use skb->dst->dev and not outdev.
160+ * It will definitely route properly the inout NAT traffic
161+ * when multiple paths are used.
162+ */
163+
164 /* For policy routing, packets originating from this
165 * machine itself may be routed differently to packets
166 * passing through. We want this packet to be routed as
167 * if it came from this machine itself. So re-compute
168 * the routing information.
169 */
170- if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
171+ if (sysctl_ip_vs_snat_reroute && ip_route_me_harder(skb, RTN_LOCAL) != 0)
172 goto drop;
173
174 IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
175@@ -788,8 +802,11 @@ ip_vs_out(unsigned int hooknum, struct s
176 return NF_ACCEPT;
177
178 drop:
179- ip_vs_conn_put(cp);
180 kfree_skb(skb);
181+
182+ out:
183+ ip_vs_conn_put(cp);
184+ LeaveFunction(11);
185 return NF_STOLEN;
186 }
187
6d4e1af8 188diff -urNp v2.6.25/linux/net/ipv4/ipvs/ip_vs_ctl.c linux/net/ipv4/ipvs/ip_vs_ctl.c
189--- v2.6.25/linux/net/ipv4/ipvs/ip_vs_ctl.c 2008-04-17 09:58:09.000000000 +0300
190+++ linux/net/ipv4/ipvs/ip_vs_ctl.c 2008-04-19 19:55:40.000000000 +0300
db744e5b 191@@ -81,6 +81,10 @@ int sysctl_ip_vs_expire_nodest_conn = 0;
192 int sysctl_ip_vs_expire_quiescent_template = 0;
193 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
194 int sysctl_ip_vs_nat_icmp_send = 0;
195+int sysctl_ip_vs_snat_reroute = 0;
196+#ifdef CONFIG_IP_VS_NFCT
197+int sysctl_ip_vs_conntrack = 0;
198+#endif
199
200
201 #ifdef CONFIG_IP_VS_DEBUG
202@@ -1446,6 +1450,15 @@ static struct ctl_table vs_vars[] = {
203 .mode = 0644,
204 .proc_handler = &proc_dointvec,
205 },
206+#ifdef CONFIG_IP_VS_NFCT
207+ {
208+ .procname = "conntrack",
209+ .data = &sysctl_ip_vs_conntrack,
210+ .maxlen = sizeof(int),
211+ .mode = 0644,
212+ .proc_handler = &proc_dointvec,
213+ },
214+#endif
215 {
216 .procname = "drop_entry",
217 .data = &sysctl_ip_vs_drop_entry,
218@@ -1467,6 +1480,13 @@ static struct ctl_table vs_vars[] = {
219 .mode = 0644,
220 .proc_handler = &proc_do_defense_mode,
221 },
222+ {
223+ .procname = "snat_reroute",
224+ .data = &sysctl_ip_vs_snat_reroute,
225+ .maxlen = sizeof(int),
226+ .mode = 0644,
227+ .proc_handler = &proc_dointvec,
228+ },
229 #if 0
230 {
231 .procname = "timeout_established",
6d4e1af8 232diff -urNp v2.6.25/linux/net/ipv4/ipvs/ip_vs_ftp.c linux/net/ipv4/ipvs/ip_vs_ftp.c
233--- v2.6.25/linux/net/ipv4/ipvs/ip_vs_ftp.c 2008-01-25 10:45:06.000000000 +0200
234+++ linux/net/ipv4/ipvs/ip_vs_ftp.c 2008-04-19 19:55:40.000000000 +0300
db744e5b 235@@ -195,6 +195,11 @@ static int ip_vs_ftp_out(struct ip_vs_ap
236 ip_vs_control_add(n_cp, cp);
237 }
238
239+#ifdef CONFIG_IP_VS_NFCT
240+ if (skb->nfct)
241+ ip_vs_nfct_expect_related(skb, n_cp, 0, IPPROTO_TCP, 0);
242+#endif
243+
244 /*
245 * Replace the old passive address with the new one
246 */
247@@ -327,6 +332,11 @@ static int ip_vs_ftp_in(struct ip_vs_app
248 ip_vs_control_add(n_cp, cp);
249 }
250
251+#ifdef CONFIG_IP_VS_NFCT
252+ if (skb->nfct)
253+ ip_vs_nfct_expect_related(skb, n_cp, n_cp->dport, IPPROTO_TCP, 1);
254+#endif
255+
256 /*
257 * Move tunnel to listen state
258 */
6d4e1af8 259diff -urNp v2.6.25/linux/net/ipv4/ipvs/ip_vs_nfct.c linux/net/ipv4/ipvs/ip_vs_nfct.c
260--- v2.6.25/linux/net/ipv4/ipvs/ip_vs_nfct.c 1970-01-01 02:00:00.000000000 +0200
261+++ linux/net/ipv4/ipvs/ip_vs_nfct.c 2008-04-19 20:06:46.000000000 +0300
db744e5b 262@@ -0,0 +1,385 @@
263+/*
264+ * ip_vs_nfct.c: Netfilter connection tracking support for IPVS
265+ *
266+ * Portions Copyright (C) 2001-2002
267+ * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
268+ *
269+ * Portions Copyright (C) 2003-2008
270+ * Julian Anastasov
271+ *
272+ *
273+ * This code is free software; you can redistribute it and/or modify
274+ * it under the terms of the GNU General Public License as published by
275+ * the Free Software Foundation; either version 2 of the License, or
276+ * (at your option) any later version.
277+ *
278+ * This program is distributed in the hope that it will be useful,
279+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
280+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
281+ * GNU General Public License for more details.
282+ *
283+ * You should have received a copy of the GNU General Public License
284+ * along with this program; if not, write to the Free Software
285+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
286+ *
287+ *
288+ * Authors:
289+ * Ben North <ben@redfrontdoor.org>
290+ * Julian Anastasov <ja@ssi.bg> Reorganize and sync with latest kernels
291+ *
292+ *
293+ * Current status:
294+ *
295+ * - provide conntrack confirmation for new and related connections, by
296+ * this way we can see their proper conntrack state in all hooks
297+ * - support for all forwarding methods, not only NAT
298+ * - FTP support (NAT), ability to support other NAT apps with expectations
299+ * - to correctly create expectations for related NAT connections the proper
300+ * NF conntrack support must be already installed, eg. ip_vs_ftp requires
301+ * nf_conntrack_ftp for the same ports
302+ *
303+ */
304+
305+#include <linux/module.h>
306+#include <linux/types.h>
307+#include <linux/kernel.h>
308+#include <linux/errno.h>
309+#include <linux/compiler.h>
310+#include <linux/vmalloc.h>
311+#include <linux/skbuff.h>
312+#include <net/ip.h>
313+#include <linux/netfilter.h>
314+#include <linux/netfilter_ipv4.h>
315+#include <net/ip_vs.h>
316+
317+
318+EXPORT_SYMBOL(ip_vs_nfct_expect_related);
319+
320+
321+#define FMT_TUPLE "%u.%u.%u.%u:%u->%u.%u.%u.%u:%u/%u"
322+#define ARG_TUPLE(t) NIPQUAD((t)->src.u3.ip), ntohs((t)->src.u.all), \
323+ NIPQUAD((t)->dst.u3.ip), ntohs((t)->dst.u.all), \
324+ (t)->dst.protonum
325+
326+#define FMT_CONN "%u.%u.%u.%u:%u->%u.%u.%u.%u:%u->%u.%u.%u.%u:%u/%u:%u"
327+#define ARG_CONN(c) NIPQUAD((c)->caddr), ntohs((c)->cport), \
328+ NIPQUAD((c)->vaddr), ntohs((c)->vport), \
329+ NIPQUAD((c)->daddr), ntohs((c)->dport), \
330+ (c)->protocol, (c)->state
331+
332+/* Returns boolean and skb is freed on failure */
333+static int __ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp,
334+ unsigned int hooknum)
335+{
336+ /*
337+ * The assumptions:
338+ * - the nfct is !NULL and is not confirmed
339+ * - we are called before any mangle
340+ */
341+
342+ struct iphdr *iph = ip_hdr(skb);
343+ struct nf_conn *ct = (struct nf_conn *) skb->nfct;
344+ struct nf_conntrack_tuple new_reply;
345+ int ret = NF_DROP;
346+ __be16 _ports[2], *pptr;
347+#ifdef CONFIG_IP_VS_DEBUG
348+ struct nf_conntrack_tuple *orig_tup =
349+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
350+ struct nf_conntrack_tuple *orig_rep =
351+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
352+#endif
353+#ifdef CONFIG_NF_NAT_NEEDED
354+ int initialized = !!(ct->status & IPS_NAT_DONE_MASK);
355+#else
356+ int initialized = 0;
357+#endif
358+
359+ IP_VS_DBG(7, "%s: ct=%p, init=%d, tuples=" FMT_TUPLE ", " FMT_TUPLE
360+ ", cp=" FMT_CONN "\n",
361+ __FUNCTION__, ct, initialized,
362+ ARG_TUPLE(orig_tup), ARG_TUPLE(orig_rep), ARG_CONN(cp));
363+
364+#ifdef CONFIG_NF_NAT_NEEDED
365+ /*
366+ * This is really bad, may be we are trying to alter DNAT conn?
367+ * This is not supported, avoid the confirmation.
368+ */
369+ if (initialized && ct->status & IPS_NAT_MASK) {
370+#ifdef CONFIG_IP_VS_DEBUG
371+ IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, init=%d\n",
372+ __FUNCTION__, ct, ct->status, initialized);
373+#endif
374+ return 1;
375+ }
376+#endif
377+
6d4e1af8 378+ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ || NF_INET_FORWARD == hooknum)
db744e5b 379+ goto confirm;
380+
381+ /*
382+ * Alter reply only for IP_VS_CONN_F_MASQ in outin direction.
383+ * For related connections in inout direction it is done in
384+ * expectfn callback.
385+ */
386+
387+ pptr = skb_header_pointer(skb, ip_hdrlen(skb),
388+ sizeof(_ports), _ports);
389+ if (!pptr)
390+ goto out;
391+
392+ new_reply = (struct nf_conntrack_tuple) {
393+ .dst = { .protonum = iph->protocol, .dir = IP_CT_DIR_REPLY }};
394+
395+ new_reply.src.u3.ip = cp->daddr;
396+ new_reply.src.u.tcp.port = cp->dport;
397+ new_reply.src.l3num = PF_INET;
398+ new_reply.dst.u3.ip = iph->saddr;
399+ new_reply.dst.u.tcp.port = pptr[0];
400+
401+ nf_conntrack_alter_reply(ct, &new_reply);
402+
403+ IP_VS_DBG(7, "%s: ct=%p, init=%d, orig=" FMT_TUPLE
404+ ", new_reply=" FMT_TUPLE " => alter_reply\n",
405+ __FUNCTION__, ct, initialized,
406+ ARG_TUPLE(orig_tup), ARG_TUPLE(&new_reply));
407+
408+ /*
409+ * No need to rehash NAT info because we don't change source
410+ * address in original direction
411+ */
412+
413+confirm:
414+
415+ ret = __nf_conntrack_confirm(skb);
416+
417+ if (ret != NF_STOLEN) {
418+ IP_VS_DBG(7, "%s: ct=%p, init=%d, orig=" FMT_TUPLE " => confirm ret=%d\n",
419+ __FUNCTION__, ct, initialized, ARG_TUPLE(orig_tup), ret);
420+ }
421+
422+ if (ret != NF_ACCEPT)
423+ goto out;
424+ return 1;
425+
426+out:
427+ if (ret != NF_STOLEN)
428+ kfree_skb(skb);
429+ return 0;
430+}
431+
432+/*
433+ * Confirm (and optionally alter) the conntrack entry if needed
434+ * because the IPVS packets do not reach ipv4_confirm.
435+ */
436+int ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp,
437+ unsigned int hooknum)
438+{
439+ struct iphdr *iph = ip_hdr(skb);
440+ struct nf_conn *ct = (struct nf_conn *) skb->nfct;
441+
442+ /* By the time we're sending the packet out the other
443+ * side, there should be a confirmed Netfilter CT entry
444+ * for this connection. This may not be the case,
445+ * however, if it's a brand new connection, or if the NF
446+ * entry has timed out before ours has. Either way, if
447+ * the NF CT entry is unconfirmed, confirm it, and deal
448+ * with reply tuple mangling at the same time.
449+ */
450+
451+ /* We only deal with TCP or UDP packets */
452+ if (iph->protocol != IPPROTO_TCP && iph->protocol != IPPROTO_UDP)
453+ return 1;
454+
455+ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
456+ /*
457+ * Do not be surprised if non-NAT conntracks stay in SYN_SENT
458+ * state, may be the replies from the real server go
459+ * directly to client. In any case, keep them in REPLIED
460+ * state (ESTABLISHED).
461+ */
462+ if (iph->protocol != IPPROTO_TCP ||
463+ IP_VS_TCP_S_ESTABLISHED == cp->state) {
464+ set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
465+ }
466+ }
467+
468+ /*
469+ * We assume the reused connections do not change their rip:rport
470+ * and we do not need to alter their conntrack reply
471+ */
472+ return __ip_vs_nfct_confirm(skb, cp, hooknum);
473+}
474+
475+/*
476+ * We are called from init_conntrack() as expectfn handler
477+ */
478+
479+static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
480+ struct nf_conntrack_expect *exp)
481+{
482+ struct nf_conntrack_tuple *orig, new_reply;
483+ struct ip_vs_conn *cp;
484+
485+ if (exp->tuple.src.l3num != PF_INET)
486+ return;
487+
488+ /*
489+ * - We assume that no NF locks are held before this callback
490+ * - ip_vs_conn_out_get and ip_vs_conn_in_get should match their
491+ * expectations even if they use wildcard values, now we provide
492+ * the actual values from the newly created original conntrack direction
493+ * - the conntrack is confirmed when packet reaches IPVS hooks
494+ */
495+
496+ /* RS->CLIENT */
497+ orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
498+ cp = ip_vs_conn_out_get(orig->dst.protonum,
499+ orig->src.u3.ip, orig->src.u.tcp.port,
500+ orig->dst.u3.ip, orig->dst.u.tcp.port);
501+ if (cp) {
502+ /* Change reply CLIENT->RS to CLIENT->VS */
503+ new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
504+ IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " FMT_TUPLE
505+ ", found inout cp=" FMT_CONN "\n",
506+ __FUNCTION__, ct, ct->status,
507+ ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
508+ ARG_CONN(cp));
509+ new_reply.dst.u3.ip = cp->vaddr;
510+ new_reply.dst.u.tcp.port = cp->vport;
511+ IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
512+ ", inout cp=" FMT_CONN "\n",
513+ __FUNCTION__, ct,
514+ ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
515+ ARG_CONN(cp));
516+ goto alter;
517+ }
518+
519+ /* CLIENT->VS */
520+ cp = ip_vs_conn_in_get(orig->dst.protonum,
521+ orig->src.u3.ip, orig->src.u.tcp.port,
522+ orig->dst.u3.ip, orig->dst.u.tcp.port);
523+ if (cp) {
524+ /* Change reply VS->CLIENT to RS->CLIENT */
525+ new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
526+ IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " FMT_TUPLE
527+ ", found outin cp=" FMT_CONN "\n",
528+ __FUNCTION__, ct, ct->status,
529+ ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
530+ ARG_CONN(cp));
531+ new_reply.src.u3.ip = cp->daddr;
532+ new_reply.src.u.tcp.port = cp->dport;
533+ IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
534+ ", outin cp=" FMT_CONN "\n",
535+ __FUNCTION__, ct,
536+ ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
537+ ARG_CONN(cp));
538+ goto alter;
539+ }
540+ IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE " - unknown expect\n",
541+ __FUNCTION__, ct, ct->status, ARG_TUPLE(orig));
542+ return;
543+
544+alter:
545+
546+ /* Never alter conntrack for non-NAT conns */
547+ if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
548+ nf_conntrack_alter_reply(ct, &new_reply);
549+ ip_vs_conn_put(cp);
550+ return;
551+}
552+
553+/*
554+ * Create NF conntrack expectation with wildcard (optional) source port.
555+ * Then the default callback function will alter the reply and will confirm
556+ * the conntrack entry when the first packet comes.
557+ */
558+void ip_vs_nfct_expect_related(struct sk_buff *skb, struct ip_vs_conn *cp,
559+ __be16 port, __u16 proto, int from_rs)
560+{
561+ struct nf_conn *ct = (struct nf_conn *) skb->nfct;
562+ struct nf_conntrack_expect *e;
563+
564+ if (!sysctl_ip_vs_conntrack)
565+ return;
566+
567+ if (!ct) {
568+ IP_VS_DBG(7, "%s: ct=%p for cp=" FMT_CONN "\n",
569+ __FUNCTION__, ct, ARG_CONN(cp));
570+ return;
571+ }
572+
573+ if (!(e = nf_ct_expect_alloc(ct)))
574+ return;
575+
576+ e->expectfn = ip_vs_nfct_expect_callback;
577+ e->helper = NULL;
578+ e->flags = 0;
579+ memset(&e->tuple, 0, sizeof(e->tuple));
580+ e->tuple.src.u.tcp.port = port;
581+ e->tuple.src.l3num = PF_INET;
582+ e->tuple.dst.protonum = proto;
583+ memset(&e->mask, 0, sizeof(e->mask));
584+ e->mask.src.u3.ip = 0xffffffff;
585+ e->mask.src.u.all = port? 0xffff : 0;
586+
587+ if (from_rs) {
588+ e->tuple.src.u3.ip = cp->daddr;
589+ e->tuple.dst.u3.ip = cp->caddr;
590+ e->tuple.dst.u.tcp.port = cp->cport;
591+ } else {
592+ e->tuple.src.u3.ip = cp->caddr;
593+ e->tuple.dst.u3.ip = cp->vaddr;
594+ e->tuple.dst.u.tcp.port = cp->vport;
595+ }
596+
597+ IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
598+ __FUNCTION__, ct, ARG_TUPLE(&e->tuple));
599+ nf_ct_expect_related(e);
600+ nf_ct_expect_put(e);
601+}
602+
603+/*
604+ * Our connection was terminated, try to drop the conntrack immediately
605+ */
606+void ip_vs_nfct_conn_drop(struct ip_vs_conn *cp)
607+{
608+ struct nf_conntrack_tuple_hash *h;
609+ struct nf_conn *ct;
610+ struct nf_conntrack_tuple tuple;
611+
612+ if (!cp->cport)
613+ return;
614+
615+ tuple = (struct nf_conntrack_tuple) {
616+ .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } };
617+ tuple.src.u3.ip = cp->caddr;
618+ tuple.src.u.all = cp->cport;
619+ tuple.src.l3num = PF_INET;
620+ tuple.dst.u3.ip = cp->vaddr;
621+ tuple.dst.u.all = cp->vport;
622+
623+ IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE
624+ " for conn " FMT_CONN "\n",
625+ __FUNCTION__, ARG_TUPLE(&tuple), ARG_CONN(cp));
626+
627+ h = nf_conntrack_find_get(&tuple);
628+ if (h) {
629+ ct = nf_ct_tuplehash_to_ctrack(h);
630+ if (del_timer(&ct->timeout)) {
631+ IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
632+ FMT_TUPLE "\n",
633+ __FUNCTION__, ct, ARG_TUPLE(&tuple));
634+ if (ct->timeout.function)
635+ ct->timeout.function(ct->timeout.data);
636+ } else {
637+ IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
638+ FMT_TUPLE "\n",
639+ __FUNCTION__, ct, ARG_TUPLE(&tuple));
640+ }
641+ nf_ct_put(ct);
642+ } else {
643+ IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
644+ __FUNCTION__, ARG_TUPLE(&tuple));
645+ }
646+}
647+
6d4e1af8 648diff -urNp v2.6.25/linux/net/ipv4/ipvs/ip_vs_xmit.c linux/net/ipv4/ipvs/ip_vs_xmit.c
649--- v2.6.25/linux/net/ipv4/ipvs/ip_vs_xmit.c 2008-04-17 09:58:09.000000000 +0300
650+++ linux/net/ipv4/ipvs/ip_vs_xmit.c 2008-04-19 20:04:42.000000000 +0300
db744e5b 651@@ -141,7 +141,6 @@ int
652 ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
653 struct ip_vs_protocol *pp)
654 {
655- /* we do not touch skb and do not need pskb ptr */
656 return NF_ACCEPT;
657 }
658
659@@ -199,6 +198,9 @@ ip_vs_bypass_xmit(struct sk_buff *skb, s
660 dst_release(skb->dst);
661 skb->dst = &rt->u.dst;
662
6d4e1af8 663+ if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
db744e5b 664+ goto tx_error_out;
665+
666 /* Another hack: avoid icmp_send in ip_fragment */
667 skb->local_df = 1;
668
669@@ -211,6 +213,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, s
670 dst_link_failure(skb);
671 tx_error:
672 kfree_skb(skb);
673+ tx_error_out:
674 LeaveFunction(10);
675 return NF_STOLEN;
676 }
677@@ -263,6 +266,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru
678 dst_release(skb->dst);
679 skb->dst = &rt->u.dst;
680
6d4e1af8 681+ if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
db744e5b 682+ goto tx_error_out;
683+
684 /* mangle the packet */
685 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
686 goto tx_error;
687@@ -286,8 +292,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru
688 tx_error_icmp:
689 dst_link_failure(skb);
690 tx_error:
691- LeaveFunction(10);
692 kfree_skb(skb);
693+ tx_error_out:
694+ LeaveFunction(10);
695 return NF_STOLEN;
696 tx_error_put:
697 ip_rt_put(rt);
698@@ -386,14 +393,17 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s
699 /* fix old IP header checksum */
700 ip_send_check(old_iph);
701
702- skb_push(skb, sizeof(struct iphdr));
703- skb_reset_network_header(skb);
704- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
705-
706 /* drop old route */
707 dst_release(skb->dst);
708 skb->dst = &rt->u.dst;
709
6d4e1af8 710+ if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
db744e5b 711+ goto tx_error_out;
712+
713+ skb_push(skb, sizeof(struct iphdr));
714+ skb_reset_network_header(skb);
715+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
716+
717 /*
718 * Push down and install the IPIP header.
719 */
6d4e1af8 720@@ -421,6 +431,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s
db744e5b 721 dst_link_failure(skb);
722 tx_error:
723 kfree_skb(skb);
724+ tx_error_out:
725 LeaveFunction(10);
726 return NF_STOLEN;
727 }
6d4e1af8 728@@ -466,6 +477,9 @@ ip_vs_dr_xmit(struct sk_buff *skb, struc
db744e5b 729 dst_release(skb->dst);
730 skb->dst = &rt->u.dst;
731
6d4e1af8 732+ if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
db744e5b 733+ goto tx_error_out;
734+
735 /* Another hack: avoid icmp_send in ip_fragment */
736 skb->local_df = 1;
737
6d4e1af8 738@@ -478,6 +492,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struc
db744e5b 739 dst_link_failure(skb);
740 tx_error:
741 kfree_skb(skb);
742+ tx_error_out:
743 LeaveFunction(10);
744 return NF_STOLEN;
745 }
6d4e1af8 746@@ -537,6 +552,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, str
db744e5b 747 dst_release(skb->dst);
748 skb->dst = &rt->u.dst;
749
750+ /* TODO: properly alter reply for NFCT */
751+
752 ip_vs_nat_icmp(skb, pp, cp, 0);
753
754 /* Another hack: avoid icmp_send in ip_fragment */
This page took 0.247575 seconds and 4 git commands to generate.