]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-ipvs-nfct.patch
- fix netlink (inet_diag) inside vservers
[packages/kernel.git] / kernel-ipvs-nfct.patch
CommitLineData
9a91a2bf 1diff -urNp v2.6.22/linux/include/net/ip_vs.h linux/include/net/ip_vs.h
2--- v2.6.22/linux/include/net/ip_vs.h 2007-02-11 01:06:29.000000000 +0200
3+++ linux/include/net/ip_vs.h 2007-07-12 12:03:43.000000000 +0300
4@@ -9,6 +9,16 @@
5 #include <asm/types.h> /* For __uXX types */
6 #include <linux/types.h> /* For __beXX types in userland */
7
8+#ifdef __KERNEL__
9+#include <linux/skbuff.h>
10+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
11+#include <net/netfilter/nf_conntrack.h>
12+#include <net/netfilter/nf_conntrack_core.h>
13+#include <net/netfilter/nf_conntrack_expect.h>
14+#include <net/netfilter/nf_conntrack_helper.h>
15+#endif
16+#endif
17+
18 #define IP_VS_VERSION_CODE 0x010201
19 #define NVERSION(version) \
20 (version >> 16) & 0xFF, \
21@@ -358,6 +368,8 @@ enum {
22 NET_IPV4_VS_SYNC_THRESHOLD=24,
23 NET_IPV4_VS_NAT_ICMP_SEND=25,
24 NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE=26,
25+ NET_IPV4_VS_SNAT_REROUTE=27,
26+ NET_IPV4_VS_CONNTRACK=28,
27 NET_IPV4_VS_LAST
28 };
29
30@@ -715,6 +727,16 @@ extern void ip_vs_init_hash_table(struct
31 */
32
33 /*
34+ * Netfilter connection tracking
35+ * (from ip_vs_nfct.c)
36+ */
37+extern int ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum);
38+extern void ip_vs_nfct_expect_related(struct sk_buff *skb,
39+ struct ip_vs_conn *cp,
40+ __be16 port, __u16 proto, int from_rs);
41+extern void ip_vs_nfct_conn_drop(struct ip_vs_conn *cp);
42+
43+/*
44 * IPVS connection entry hash table
45 */
46 #ifndef CONFIG_IP_VS_TAB_BITS
47@@ -885,8 +907,41 @@ extern int sysctl_ip_vs_expire_nodest_co
48 extern int sysctl_ip_vs_expire_quiescent_template;
49 extern int sysctl_ip_vs_sync_threshold[2];
50 extern int sysctl_ip_vs_nat_icmp_send;
51+extern int sysctl_ip_vs_snat_reroute;
52 extern struct ip_vs_stats ip_vs_stats;
53
54+#ifdef CONFIG_IP_VS_NFCT
55+
56+extern int sysctl_ip_vs_conntrack;
57+
58+static inline int ip_vs_use_conntrack(struct sk_buff *skb)
59+{
60+ return sysctl_ip_vs_conntrack && skb->nfct;
61+}
62+
63+/* Returns boolean and skb is freed on failure */
64+static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum)
65+{
66+ if (!ip_vs_use_conntrack(skb))
67+ return 1;
68+ return nf_ct_is_confirmed((struct nf_conn *) skb->nfct) ||
69+ ip_vs_nfct_confirm(skb, cp, hooknum);
70+}
71+
72+#else
73+
74+static inline int ip_vs_use_conntrack(struct sk_buff *skb)
75+{
76+ return 0;
77+}
78+
79+static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum)
80+{
81+ return 1;
82+}
83+
84+#endif
85+
86 extern struct ip_vs_service *
87 ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport);
88
89diff -urNp v2.6.22/linux/net/ipv4/ipvs/Kconfig linux/net/ipv4/ipvs/Kconfig
90--- v2.6.22/linux/net/ipv4/ipvs/Kconfig 2007-07-10 09:18:43.000000000 +0300
91+++ linux/net/ipv4/ipvs/Kconfig 2007-07-12 09:48:59.000000000 +0300
92@@ -221,4 +221,12 @@ config IP_VS_FTP
93 If you want to compile it in kernel, say Y. To compile it as a
94 module, choose M here. If unsure, say N.
95
96+config IP_VS_NFCT
97+ bool "Netfilter connection tracking"
98+ depends on NF_CONNTRACK
99+ ---help---
100+ The Netfilter connection tracking support allows the IPVS
101+ connection state to be exported to the Netfilter framework
102+ for filtering purposes.
103+
104 endif # IP_VS
105diff -urNp v2.6.22/linux/net/ipv4/ipvs/Makefile linux/net/ipv4/ipvs/Makefile
106--- v2.6.22/linux/net/ipv4/ipvs/Makefile 2005-06-18 08:50:52.000000000 +0300
107+++ linux/net/ipv4/ipvs/Makefile 2007-07-12 09:47:58.000000000 +0300
108@@ -9,10 +9,13 @@ ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UD
109 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_ESP) += ip_vs_proto_esp.o
110 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH) += ip_vs_proto_ah.o
111
112+ip_vs-extra_objs-y :=
113+ip_vs-extra_objs-$(CONFIG_IP_VS_NFCT) += ip_vs_nfct.o
114+
115 ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \
116 ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \
117 ip_vs_est.o ip_vs_proto.o \
118- $(ip_vs_proto-objs-y)
119+ $(ip_vs_proto-objs-y) $(ip_vs-extra_objs-y)
120
121
122 # IPVS core
123diff -urNp v2.6.22/linux/net/ipv4/ipvs/ip_vs_conn.c linux/net/ipv4/ipvs/ip_vs_conn.c
124--- v2.6.22/linux/net/ipv4/ipvs/ip_vs_conn.c 2007-04-28 17:55:11.000000000 +0300
125+++ linux/net/ipv4/ipvs/ip_vs_conn.c 2007-07-12 09:47:58.000000000 +0300
126@@ -562,6 +562,11 @@ static void ip_vs_conn_expire(unsigned l
127 if (cp->control)
128 ip_vs_control_del(cp);
129
130+#ifdef CONFIG_IP_VS_NFCT
131+ if (sysctl_ip_vs_conntrack)
132+ ip_vs_nfct_conn_drop(cp);
133+#endif
134+
135 if (unlikely(cp->app != NULL))
136 ip_vs_unbind_app(cp);
137 ip_vs_unbind_dest(cp);
138diff -urNp v2.6.22/linux/net/ipv4/ipvs/ip_vs_core.c linux/net/ipv4/ipvs/ip_vs_core.c
139--- v2.6.22/linux/net/ipv4/ipvs/ip_vs_core.c 2007-07-10 09:18:43.000000000 +0300
140+++ linux/net/ipv4/ipvs/ip_vs_core.c 2007-07-12 09:47:58.000000000 +0300
141@@ -701,6 +701,8 @@ static int ip_vs_out_icmp(struct sk_buff
142
143 skb->ipvs_property = 1;
144 verdict = NF_ACCEPT;
145+ if (sysctl_ip_vs_snat_reroute && ip_route_me_harder(pskb, RTN_LOCAL))
146+ verdict = NF_DROP;
147
148 out:
149 __ip_vs_conn_put(cp);
150@@ -805,6 +807,9 @@ ip_vs_out(unsigned int hooknum, struct s
151 if (!ip_vs_make_skb_writable(pskb, ihl))
152 goto drop;
153
154+ if (!ip_vs_confirm_conntrack(*pskb, cp, hooknum))
155+ goto out;
156+
157 /* mangle the packet */
158 if (pp->snat_handler && !pp->snat_handler(pskb, pp, cp))
159 goto drop;
160@@ -812,13 +817,23 @@ ip_vs_out(unsigned int hooknum, struct s
161 ip_hdr(skb)->saddr = cp->vaddr;
162 ip_send_check(ip_hdr(skb));
163
164+ /*
165+ * nf_iterate does not expect change in the skb->dst->dev.
166+ * It looks like it is not fatal to enable this code for hooks
167+ * where our handlers are at the end of the chain list and
168+ * when all next handlers use skb->dst->dev and not outdev.
169+ * It will definitely route properly the inout NAT traffic
170+ * when multiple paths are used.
171+ */
172+
173 /* For policy routing, packets originating from this
174 * machine itself may be routed differently to packets
175 * passing through. We want this packet to be routed as
176 * if it came from this machine itself. So re-compute
177 * the routing information.
178 */
179- if (ip_route_me_harder(pskb, RTN_LOCAL) != 0)
180+
181+ if (sysctl_ip_vs_snat_reroute && ip_route_me_harder(pskb, RTN_LOCAL))
182 goto drop;
183 skb = *pskb;
184
185@@ -834,8 +849,11 @@ ip_vs_out(unsigned int hooknum, struct s
186 return NF_ACCEPT;
187
188 drop:
189- ip_vs_conn_put(cp);
190 kfree_skb(*pskb);
191+
192+ out:
193+ ip_vs_conn_put(cp);
194+ LeaveFunction(11);
195 return NF_STOLEN;
196 }
197
198diff -urNp v2.6.22/linux/net/ipv4/ipvs/ip_vs_ctl.c linux/net/ipv4/ipvs/ip_vs_ctl.c
199--- v2.6.22/linux/net/ipv4/ipvs/ip_vs_ctl.c 2007-07-10 09:18:43.000000000 +0300
200+++ linux/net/ipv4/ipvs/ip_vs_ctl.c 2007-07-12 09:47:58.000000000 +0300
201@@ -81,6 +81,10 @@ int sysctl_ip_vs_expire_nodest_conn = 0;
202 int sysctl_ip_vs_expire_quiescent_template = 0;
203 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
204 int sysctl_ip_vs_nat_icmp_send = 0;
205+int sysctl_ip_vs_snat_reroute = 0;
206+#ifdef CONFIG_IP_VS_NFCT
207+int sysctl_ip_vs_conntrack = 0;
208+#endif
209
210
211 #ifdef CONFIG_IP_VS_DEBUG
212@@ -1424,6 +1428,16 @@ static struct ctl_table vs_vars[] = {
213 .mode = 0644,
214 .proc_handler = &proc_dointvec,
215 },
216+#ifdef CONFIG_IP_VS_NFCT
217+ {
218+ .ctl_name = NET_IPV4_VS_CONNTRACK,
219+ .procname = "conntrack",
220+ .data = &sysctl_ip_vs_conntrack,
221+ .maxlen = sizeof(int),
222+ .mode = 0644,
223+ .proc_handler = &proc_dointvec,
224+ },
225+#endif
226 {
227 .ctl_name = NET_IPV4_VS_DROP_ENTRY,
228 .procname = "drop_entry",
229@@ -1448,6 +1462,14 @@ static struct ctl_table vs_vars[] = {
230 .mode = 0644,
231 .proc_handler = &proc_do_defense_mode,
232 },
233+ {
234+ .ctl_name = NET_IPV4_VS_SNAT_REROUTE,
235+ .procname = "snat_reroute",
236+ .data = &sysctl_ip_vs_snat_reroute,
237+ .maxlen = sizeof(int),
238+ .mode = 0644,
239+ .proc_handler = &proc_dointvec,
240+ },
241 #if 0
242 {
243 .ctl_name = NET_IPV4_VS_TO_ES,
244diff -urNp v2.6.22/linux/net/ipv4/ipvs/ip_vs_ftp.c linux/net/ipv4/ipvs/ip_vs_ftp.c
245--- v2.6.22/linux/net/ipv4/ipvs/ip_vs_ftp.c 2007-07-10 09:18:43.000000000 +0300
246+++ linux/net/ipv4/ipvs/ip_vs_ftp.c 2007-07-12 09:47:58.000000000 +0300
247@@ -194,6 +194,11 @@ static int ip_vs_ftp_out(struct ip_vs_ap
248 ip_vs_control_add(n_cp, cp);
249 }
250
251+#ifdef CONFIG_IP_VS_NFCT
252+ if ((*pskb)->nfct)
253+ ip_vs_nfct_expect_related(*pskb, n_cp, 0, IPPROTO_TCP, 0);
254+#endif
255+
256 /*
257 * Replace the old passive address with the new one
258 */
259@@ -326,6 +331,11 @@ static int ip_vs_ftp_in(struct ip_vs_app
260 ip_vs_control_add(n_cp, cp);
261 }
262
263+#ifdef CONFIG_IP_VS_NFCT
264+ if ((*pskb)->nfct)
265+ ip_vs_nfct_expect_related(*pskb, n_cp, n_cp->dport, IPPROTO_TCP, 1);
266+#endif
267+
268 /*
269 * Move tunnel to listen state
270 */
271diff -urNp v2.6.22/linux/net/ipv4/ipvs/ip_vs_nfct.c linux/net/ipv4/ipvs/ip_vs_nfct.c
272--- v2.6.22/linux/net/ipv4/ipvs/ip_vs_nfct.c 1970-01-01 02:00:00.000000000 +0200
273+++ linux/net/ipv4/ipvs/ip_vs_nfct.c 2007-07-12 12:04:31.000000000 +0300
274@@ -0,0 +1,389 @@
275+/*
276+ * ip_vs_nfct.c: Netfilter connection tracking support for IPVS
277+ *
278+ * Portions Copyright (C) 2001-2002
279+ * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
280+ *
281+ * Portions Copyright (C) 2003-2007
282+ * Julian Anastasov
283+ *
284+ *
285+ * This code is free software; you can redistribute it and/or modify
286+ * it under the terms of the GNU General Public License as published by
287+ * the Free Software Foundation; either version 2 of the License, or
288+ * (at your option) any later version.
289+ *
290+ * This program is distributed in the hope that it will be useful,
291+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
292+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
293+ * GNU General Public License for more details.
294+ *
295+ * You should have received a copy of the GNU General Public License
296+ * along with this program; if not, write to the Free Software
297+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
298+ *
299+ *
300+ * Authors:
301+ * Ben North <ben@redfrontdoor.org>
302+ * Julian Anastasov <ja@ssi.bg> Reorganize and sync with latest kernels
303+ *
304+ *
305+ * Current status:
306+ *
307+ * - provide conntrack confirmation for new and related connections, by
308+ * this way we can see their proper conntrack state in all hooks
309+ * - support for all forwarding methods, not only NAT
310+ * - FTP support (NAT), ability to support other NAT apps with expectations
311+ * - to correctly create expectations for related NAT connections the proper
312+ * NF conntrack support must be already installed, eg. ip_vs_ftp requires
313+ * nf_conntrack_ftp for the same ports
314+ *
315+ */
316+
317+#include <linux/module.h>
318+#include <linux/types.h>
319+#include <linux/kernel.h>
320+#include <linux/errno.h>
321+#include <linux/compiler.h>
322+#include <linux/vmalloc.h>
323+#include <linux/skbuff.h>
324+#include <net/ip.h>
325+#include <linux/netfilter.h>
326+#include <linux/netfilter_ipv4.h>
327+#include <net/ip_vs.h>
328+
329+
330+EXPORT_SYMBOL(ip_vs_nfct_expect_related);
331+
332+
333+#define FMT_TUPLE "%u.%u.%u.%u:%u->%u.%u.%u.%u:%u/%u"
334+#define ARG_TUPLE(t) NIPQUAD((t)->src.u3.ip), ntohs((t)->src.u.all), \
335+ NIPQUAD((t)->dst.u3.ip), ntohs((t)->dst.u.all), \
336+ (t)->dst.protonum
337+
338+#define FMT_CONN "%u.%u.%u.%u:%u->%u.%u.%u.%u:%u->%u.%u.%u.%u:%u/%u:%u"
339+#define ARG_CONN(c) NIPQUAD((c)->caddr), ntohs((c)->cport), \
340+ NIPQUAD((c)->vaddr), ntohs((c)->vport), \
341+ NIPQUAD((c)->daddr), ntohs((c)->dport), \
342+ (c)->protocol, (c)->state
343+
344+/* Returns boolean and skb is freed on failure */
345+static int __ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp,
346+ unsigned int hooknum)
347+{
348+ /*
349+ * The assumptions:
350+ * - the nfct is !NULL and is not confirmed
351+ * - we are called before any mangle
352+ */
353+
354+ struct iphdr *iph = ip_hdr(skb);
355+ struct nf_conn *ct = (struct nf_conn *) skb->nfct;
356+ struct nf_conntrack_tuple new_reply;
357+ int ret = NF_DROP;
358+ __be16 _ports[2], *pptr;
359+#ifdef CONFIG_IP_VS_DEBUG
360+ struct nf_conntrack_tuple *orig_tup =
361+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
362+ struct nf_conntrack_tuple *orig_rep =
363+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
364+#endif
365+#ifdef CONFIG_NF_NAT_NEEDED
366+ int initialized = !!(ct->status & IPS_NAT_DONE_MASK);
367+#else
368+ int initialized = 0;
369+#endif
370+
371+ IP_VS_DBG(7, "%s: ct=%p, init=%d, tuples=" FMT_TUPLE ", " FMT_TUPLE
372+ ", cp=" FMT_CONN "\n",
373+ __FUNCTION__, ct, initialized,
374+ ARG_TUPLE(orig_tup), ARG_TUPLE(orig_rep), ARG_CONN(cp));
375+
376+#ifdef CONFIG_NF_NAT_NEEDED
377+ /*
378+ * This is really bad, may be we are trying to alter DNAT conn?
379+ * This is not supported, avoid the confirmation.
380+ */
381+ if (initialized && ct->status & IPS_NAT_MASK) {
382+#ifdef CONFIG_IP_VS_DEBUG
383+ IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, init=%d\n",
384+ __FUNCTION__, ct, ct->status, initialized);
385+#endif
386+ return 1;
387+ }
388+#endif
389+
390+ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ || NF_IP_FORWARD == hooknum)
391+ goto confirm;
392+
393+ /*
394+ * Alter reply only for IP_VS_CONN_F_MASQ in outin direction.
395+ * For related connections in inout direction it is done in
396+ * expectfn callback.
397+ */
398+
399+ pptr = skb_header_pointer(skb, ip_hdrlen(skb),
400+ sizeof(_ports), _ports);
401+ if (!pptr)
402+ goto out;
403+
404+ new_reply = (struct nf_conntrack_tuple) {
405+ .dst = { .protonum = iph->protocol, .dir = IP_CT_DIR_REPLY }};
406+
407+ new_reply.src.u3.ip = cp->daddr;
408+ new_reply.src.u.tcp.port = cp->dport;
409+ new_reply.src.l3num = PF_INET;
410+ new_reply.dst.u3.ip = iph->saddr;
411+ new_reply.dst.u.tcp.port = pptr[0];
412+
413+ nf_conntrack_alter_reply(ct, &new_reply);
414+
415+ IP_VS_DBG(7, "%s: ct=%p, init=%d, orig=" FMT_TUPLE
416+ ", new_reply=" FMT_TUPLE " => alter_reply\n",
417+ __FUNCTION__, ct, initialized,
418+ ARG_TUPLE(orig_tup), ARG_TUPLE(&new_reply));
419+
420+ /*
421+ * No need to rehash NAT info because we don't change source
422+ * address in original direction
423+ */
424+
425+confirm:
426+
427+ ret = __nf_conntrack_confirm(&skb);
428+
429+ if (ret != NF_STOLEN) {
430+ IP_VS_DBG(7, "%s: ct=%p, init=%d, orig=" FMT_TUPLE " => confirm ret=%d\n",
431+ __FUNCTION__, ct, initialized, ARG_TUPLE(orig_tup), ret);
432+ }
433+
434+ if (ret != NF_ACCEPT)
435+ goto out;
436+ return 1;
437+
438+out:
439+ if (ret != NF_STOLEN)
440+ kfree_skb(skb);
441+ return 0;
442+}
443+
444+/*
445+ * Confirm (and optionally alter) the conntrack entry if needed
446+ * because the IPVS packets do not reach ipv4_confirm.
447+ */
448+int ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp,
449+ unsigned int hooknum)
450+{
451+ struct iphdr *iph = ip_hdr(skb);
452+ struct nf_conn *ct = (struct nf_conn *) skb->nfct;
453+
454+ /* By the time we're sending the packet out the other
455+ * side, there should be a confirmed Netfilter CT entry
456+ * for this connection. This may not be the case,
457+ * however, if it's a brand new connection, or if the NF
458+ * entry has timed out before ours has. Either way, if
459+ * the NF CT entry is unconfirmed, confirm it, and deal
460+ * with reply tuple mangling at the same time.
461+ */
462+
463+ /* We only deal with TCP or UDP packets */
464+ if (iph->protocol != IPPROTO_TCP && iph->protocol != IPPROTO_UDP)
465+ return 1;
466+
467+ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
468+ /*
469+ * Do not be surprised if non-NAT conntracks stay in SYN_SENT
470+ * state, may be the replies from the real server go
471+ * directly to client. In any case, keep them in REPLIED
472+ * state (ESTABLISHED).
473+ */
474+ if (iph->protocol != IPPROTO_TCP ||
475+ IP_VS_TCP_S_ESTABLISHED == cp->state) {
476+ set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
477+ }
478+ }
479+
480+ /*
481+ * We assume the reused connections do not change their rip:rport
482+ * and we do not need to alter their conntrack reply
483+ */
484+ return __ip_vs_nfct_confirm(skb, cp, hooknum);
485+}
486+
487+/*
488+ * We are called from init_conntrack() as expectfn handler
489+ */
490+
491+static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
492+ struct nf_conntrack_expect *exp)
493+{
494+ struct nf_conntrack_tuple *orig, new_reply;
495+ struct ip_vs_conn *cp;
496+
497+ if (exp->tuple.src.l3num != PF_INET)
498+ return;
499+
500+ /*
501+ * - We assume that no NF locks are held before this callback
502+ * - ip_vs_conn_out_get and ip_vs_conn_in_get should match their
503+ * expectations even if they use wildcard values, now we provide
504+ * the actual values from the newly created original conntrack direction
505+ * - the conntrack is confirmed when packet reaches IPVS hooks
506+ */
507+
508+ /* RS->CLIENT */
509+ orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
510+ cp = ip_vs_conn_out_get(orig->dst.protonum,
511+ orig->src.u3.ip, orig->src.u.tcp.port,
512+ orig->dst.u3.ip, orig->dst.u.tcp.port);
513+ if (cp) {
514+ /* Change reply CLIENT->RS to CLIENT->VS */
515+ new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
516+ IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " FMT_TUPLE
517+ ", found inout cp=" FMT_CONN "\n",
518+ __FUNCTION__, ct, ct->status,
519+ ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
520+ ARG_CONN(cp));
521+ new_reply.dst.u3.ip = cp->vaddr;
522+ new_reply.dst.u.tcp.port = cp->vport;
523+ IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
524+ ", inout cp=" FMT_CONN "\n",
525+ __FUNCTION__, ct,
526+ ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
527+ ARG_CONN(cp));
528+ goto alter;
529+ }
530+
531+ /* CLIENT->VS */
532+ cp = ip_vs_conn_in_get(orig->dst.protonum,
533+ orig->src.u3.ip, orig->src.u.tcp.port,
534+ orig->dst.u3.ip, orig->dst.u.tcp.port);
535+ if (cp) {
536+ /* Change reply VS->CLIENT to RS->CLIENT */
537+ new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
538+ IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " FMT_TUPLE
539+ ", found outin cp=" FMT_CONN "\n",
540+ __FUNCTION__, ct, ct->status,
541+ ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
542+ ARG_CONN(cp));
543+ new_reply.src.u3.ip = cp->daddr;
544+ new_reply.src.u.tcp.port = cp->dport;
545+ IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
546+ ", outin cp=" FMT_CONN "\n",
547+ __FUNCTION__, ct,
548+ ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
549+ ARG_CONN(cp));
550+ goto alter;
551+ }
552+ IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE " - unknown expect\n",
553+ __FUNCTION__, ct, ct->status, ARG_TUPLE(orig));
554+ return;
555+
556+alter:
557+
558+ /* Never alter conntrack for non-NAT conns */
559+ if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
560+ nf_conntrack_alter_reply(ct, &new_reply);
561+ ip_vs_conn_put(cp);
562+ return;
563+}
564+
565+/*
566+ * Create NF conntrack expectation with wildcard (optional) source port.
567+ * Then the default callback function will alter the reply and will confirm
568+ * the conntrack entry when the first packet comes.
569+ */
570+void ip_vs_nfct_expect_related(struct sk_buff *skb, struct ip_vs_conn *cp,
571+ __be16 port, __u16 proto, int from_rs)
572+{
573+ struct nf_conn *ct = (struct nf_conn *) skb->nfct;
574+ struct nf_conntrack_expect *e;
575+
576+ if (!sysctl_ip_vs_conntrack)
577+ return;
578+
579+ if (!ct) {
580+ IP_VS_DBG(7, "%s: ct=%p for cp=" FMT_CONN "\n",
581+ __FUNCTION__, ct, ARG_CONN(cp));
582+ return;
583+ }
584+
585+ if (!(e = nf_conntrack_expect_alloc(ct)))
586+ return;
587+
588+ e->expectfn = ip_vs_nfct_expect_callback;
589+ e->helper = NULL;
590+ e->flags = 0;
591+ memset(&e->tuple, 0, sizeof(e->tuple));
592+ e->tuple.src.u.tcp.port = port;
593+ e->tuple.src.l3num = PF_INET;
594+ e->tuple.dst.protonum = proto;
595+ memset(&e->mask, 0, sizeof(e->mask));
596+ e->mask.src.u3.ip = 0xffffffff;
597+ e->mask.src.u.all = port? 0xffff : 0;
598+ e->mask.src.l3num = 0xffff;
599+ e->mask.dst.u3.ip = 0xffffffff;
600+ e->mask.dst.u.all = 0xffff;
601+ e->mask.dst.protonum = 0xff;
602+
603+ if (from_rs) {
604+ e->tuple.src.u3.ip = cp->daddr;
605+ e->tuple.dst.u3.ip = cp->caddr;
606+ e->tuple.dst.u.tcp.port = cp->cport;
607+ } else {
608+ e->tuple.src.u3.ip = cp->caddr;
609+ e->tuple.dst.u3.ip = cp->vaddr;
610+ e->tuple.dst.u.tcp.port = cp->vport;
611+ }
612+
613+ IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
614+ __FUNCTION__, ct, ARG_TUPLE(&e->tuple));
615+ nf_conntrack_expect_related(e);
616+ nf_conntrack_expect_put(e);
617+}
618+
619+/*
620+ * Our connection was terminated, try to drop the conntrack immediately
621+ */
622+void ip_vs_nfct_conn_drop(struct ip_vs_conn *cp)
623+{
624+ struct nf_conntrack_tuple_hash *h;
625+ struct nf_conn *ct;
626+ struct nf_conntrack_tuple tuple;
627+
628+ if (!cp->cport)
629+ return;
630+
631+ tuple = (struct nf_conntrack_tuple) {
632+ .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } };
633+ tuple.src.u3.ip = cp->caddr;
634+ tuple.src.u.all = cp->cport;
635+ tuple.src.l3num = PF_INET;
636+ tuple.dst.u3.ip = cp->vaddr;
637+ tuple.dst.u.all = cp->vport;
638+
639+ IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE
640+ " for conn " FMT_CONN "\n",
641+ __FUNCTION__, ARG_TUPLE(&tuple), ARG_CONN(cp));
642+
643+ h = nf_conntrack_find_get(&tuple, NULL);
644+ if (h) {
645+ ct = nf_ct_tuplehash_to_ctrack(h);
646+ if (del_timer(&ct->timeout)) {
647+ IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
648+ FMT_TUPLE "\n",
649+ __FUNCTION__, ct, ARG_TUPLE(&tuple));
650+ if (ct->timeout.function)
651+ ct->timeout.function(ct->timeout.data);
652+ } else {
653+ IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
654+ FMT_TUPLE "\n",
655+ __FUNCTION__, ct, ARG_TUPLE(&tuple));
656+ }
657+ nf_ct_put(ct);
658+ } else {
659+ IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
660+ __FUNCTION__, ARG_TUPLE(&tuple));
661+ }
662+}
663+
664diff -urNp v2.6.22/linux/net/ipv4/ipvs/ip_vs_xmit.c linux/net/ipv4/ipvs/ip_vs_xmit.c
665--- v2.6.22/linux/net/ipv4/ipvs/ip_vs_xmit.c 2007-07-10 09:18:43.000000000 +0300
666+++ linux/net/ipv4/ipvs/ip_vs_xmit.c 2007-07-12 09:54:45.000000000 +0300
667@@ -199,6 +199,9 @@ ip_vs_bypass_xmit(struct sk_buff *skb, s
668 dst_release(skb->dst);
669 skb->dst = &rt->u.dst;
670
671+ if (!ip_vs_confirm_conntrack(skb, cp, NF_IP_LOCAL_IN))
672+ goto tx_error_out;
673+
674 /* Another hack: avoid icmp_send in ip_fragment */
675 skb->local_df = 1;
676
677@@ -211,6 +214,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, s
678 dst_link_failure(skb);
679 tx_error:
680 kfree_skb(skb);
681+ tx_error_out:
682 LeaveFunction(10);
683 return NF_STOLEN;
684 }
685@@ -263,6 +267,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru
686 dst_release(skb->dst);
687 skb->dst = &rt->u.dst;
688
689+ if (!ip_vs_confirm_conntrack(skb, cp, NF_IP_LOCAL_IN))
690+ goto tx_error_out;
691+
692 /* mangle the packet */
693 if (pp->dnat_handler && !pp->dnat_handler(&skb, pp, cp))
694 goto tx_error;
695@@ -286,8 +293,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru
696 tx_error_icmp:
697 dst_link_failure(skb);
698 tx_error:
699- LeaveFunction(10);
700 kfree_skb(skb);
701+ tx_error_out:
702+ LeaveFunction(10);
703 return NF_STOLEN;
704 tx_error_put:
705 ip_rt_put(rt);
706@@ -386,14 +394,17 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s
707 /* fix old IP header checksum */
708 ip_send_check(old_iph);
709
710- skb_push(skb, sizeof(struct iphdr));
711- skb_reset_network_header(skb);
712- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
713-
714 /* drop old route */
715 dst_release(skb->dst);
716 skb->dst = &rt->u.dst;
717
718+ if (!ip_vs_confirm_conntrack(skb, cp, NF_IP_LOCAL_IN))
719+ goto tx_error_out;
720+
721+ skb_push(skb, sizeof(struct iphdr));
722+ skb_reset_network_header(skb);
723+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
724+
725 /*
726 * Push down and install the IPIP header.
727 */
728@@ -423,6 +434,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s
729 dst_link_failure(skb);
730 tx_error:
731 kfree_skb(skb);
732+ tx_error_out:
733 LeaveFunction(10);
734 return NF_STOLEN;
735 }
736@@ -468,6 +480,9 @@ ip_vs_dr_xmit(struct sk_buff *skb, struc
737 dst_release(skb->dst);
738 skb->dst = &rt->u.dst;
739
740+ if (!ip_vs_confirm_conntrack(skb, cp, NF_IP_LOCAL_IN))
741+ goto tx_error_out;
742+
743 /* Another hack: avoid icmp_send in ip_fragment */
744 skb->local_df = 1;
745
746@@ -480,6 +495,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struc
747 dst_link_failure(skb);
748 tx_error:
749 kfree_skb(skb);
750+ tx_error_out:
751 LeaveFunction(10);
752 return NF_STOLEN;
753 }
754@@ -539,6 +555,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, str
755 dst_release(skb->dst);
756 skb->dst = &rt->u.dst;
757
758+ /* TODO: properly alter reply for NFCT */
759+
760 ip_vs_nat_icmp(skb, pp, cp, 0);
761
762 /* Another hack: avoid icmp_send in ip_fragment */
This page took 0.430188 seconds and 4 git commands to generate.