]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-ipvs-nfct.patch
- move vserver configs to kernel-vserver.config; update config
[packages/kernel.git] / kernel-ipvs-nfct.patch
CommitLineData
78978408
AM
1diff -urNp v2.6.34/linux/include/net/ip_vs.h linux/include/net/ip_vs.h
2--- v2.6.34/linux/include/net/ip_vs.h 2010-05-17 10:49:00.000000000 +0300
3+++ linux/include/net/ip_vs.h 2010-05-19 11:27:25.000000000 +0300
4@@ -25,6 +25,15 @@
2380c486
JR
5 #include <linux/ip.h>
6 #include <linux/ipv6.h> /* for struct ipv6hdr */
7 #include <net/ipv6.h> /* for ipv6_addr_copy */
8+#include <linux/skbuff.h>
9+
10+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
11+#include <net/netfilter/nf_conntrack.h>
12+#include <net/netfilter/nf_conntrack_core.h>
13+#include <net/netfilter/nf_conntrack_expect.h>
14+#include <net/netfilter/nf_conntrack_helper.h>
78978408 15+#include <net/netfilter/nf_conntrack_zones.h>
2380c486
JR
16+#endif
17
78978408
AM
18
19 /* Connections' size value needed by ip_vs_ctl.c */
20@@ -613,6 +622,16 @@ extern void ip_vs_init_hash_table(struct
2380c486
JR
21 #define IP_VS_APP_TYPE_FTP 1
22
23 /*
24+ * Netfilter connection tracking
25+ * (from ip_vs_nfct.c)
26+ */
27+extern int ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum);
28+extern void ip_vs_nfct_expect_related(struct sk_buff *skb,
29+ struct ip_vs_conn *cp,
30+ __be16 port, __u16 proto, int from_rs);
31+extern void ip_vs_nfct_conn_drop(struct ip_vs_conn *cp);
32+
33+/*
34 * ip_vs_conn handling functions
35 * (from ip_vs_conn.c)
36 */
78978408 37@@ -788,9 +807,42 @@ extern int sysctl_ip_vs_expire_nodest_co
2380c486
JR
38 extern int sysctl_ip_vs_expire_quiescent_template;
39 extern int sysctl_ip_vs_sync_threshold[2];
40 extern int sysctl_ip_vs_nat_icmp_send;
41+extern int sysctl_ip_vs_snat_reroute;
42 extern struct ip_vs_stats ip_vs_stats;
43 extern const struct ctl_path net_vs_ctl_path[];
44
45+#ifdef CONFIG_IP_VS_NFCT
46+
47+extern int sysctl_ip_vs_conntrack;
48+
49+static inline int ip_vs_use_conntrack(struct sk_buff *skb)
50+{
51+ return sysctl_ip_vs_conntrack && skb->nfct;
52+}
53+
54+/* Returns boolean and skb is freed on failure */
55+static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum)
56+{
57+ if (!ip_vs_use_conntrack(skb))
58+ return 1;
59+ return nf_ct_is_confirmed((struct nf_conn *) skb->nfct) ||
60+ ip_vs_nfct_confirm(skb, cp, hooknum);
61+}
62+
63+#else
64+
65+static inline int ip_vs_use_conntrack(struct sk_buff *skb)
66+{
67+ return 0;
68+}
69+
70+static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum)
71+{
72+ return 1;
73+}
74+
75+#endif
76+
77 extern struct ip_vs_service *
78 ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
79 const union nf_inet_addr *vaddr, __be16 vport);
78978408
AM
80diff -urNp v2.6.34/linux/net/netfilter/ipvs/Kconfig linux/net/netfilter/ipvs/Kconfig
81--- v2.6.34/linux/net/netfilter/ipvs/Kconfig 2010-05-17 10:49:01.000000000 +0300
82+++ linux/net/netfilter/ipvs/Kconfig 2010-05-19 10:51:31.000000000 +0300
83@@ -250,4 +250,12 @@ config IP_VS_FTP
2380c486
JR
84 If you want to compile it in kernel, say Y. To compile it as a
85 module, choose M here. If unsure, say N.
86
87+config IP_VS_NFCT
88+ bool "Netfilter connection tracking"
89+ depends on NF_CONNTRACK
90+ ---help---
91+ The Netfilter connection tracking support allows the IPVS
92+ connection state to be exported to the Netfilter framework
93+ for filtering purposes.
94+
95 endif # IP_VS
78978408
AM
96diff -urNp v2.6.34/linux/net/netfilter/ipvs/Makefile linux/net/netfilter/ipvs/Makefile
97--- v2.6.34/linux/net/netfilter/ipvs/Makefile 2010-05-17 10:49:01.000000000 +0300
98+++ linux/net/netfilter/ipvs/Makefile 2010-05-19 10:51:31.000000000 +0300
99@@ -9,10 +9,13 @@ ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UD
2380c486 100 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o
78978408 101 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_SCTP) += ip_vs_proto_sctp.o
2380c486
JR
102
103+ip_vs-extra_objs-y :=
104+ip_vs-extra_objs-$(CONFIG_IP_VS_NFCT) += ip_vs_nfct.o
105+
106 ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \
107 ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \
108 ip_vs_est.o ip_vs_proto.o \
109- $(ip_vs_proto-objs-y)
110+ $(ip_vs_proto-objs-y) $(ip_vs-extra_objs-y)
111
112
113 # IPVS core
78978408
AM
114diff -urNp v2.6.34/linux/net/netfilter/ipvs/ip_vs_conn.c linux/net/netfilter/ipvs/ip_vs_conn.c
115--- v2.6.34/linux/net/netfilter/ipvs/ip_vs_conn.c 2010-05-17 10:49:01.000000000 +0300
116+++ linux/net/netfilter/ipvs/ip_vs_conn.c 2010-05-19 10:51:31.000000000 +0300
117@@ -664,6 +664,11 @@ static void ip_vs_conn_expire(unsigned l
2380c486
JR
118 if (cp->control)
119 ip_vs_control_del(cp);
120
121+#ifdef CONFIG_IP_VS_NFCT
122+ if (sysctl_ip_vs_conntrack)
123+ ip_vs_nfct_conn_drop(cp);
124+#endif
125+
126 if (unlikely(cp->app != NULL))
127 ip_vs_unbind_app(cp);
128 ip_vs_unbind_dest(cp);
78978408
AM
129diff -urNp v2.6.34/linux/net/netfilter/ipvs/ip_vs_core.c linux/net/netfilter/ipvs/ip_vs_core.c
130--- v2.6.34/linux/net/netfilter/ipvs/ip_vs_core.c 2010-05-17 10:49:01.000000000 +0300
131+++ linux/net/netfilter/ipvs/ip_vs_core.c 2010-05-19 10:51:31.000000000 +0300
132@@ -893,13 +893,16 @@ static inline int is_tcp_reset(const str
2380c486
JR
133 */
134 static unsigned int
135 handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
136- struct ip_vs_conn *cp, int ihl)
137+ struct ip_vs_conn *cp, int ihl, unsigned int hooknum)
138 {
139 IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
140
141 if (!skb_make_writable(skb, ihl))
142 goto drop;
143
144+ if (AF_INET == af && !ip_vs_confirm_conntrack(skb, cp, hooknum))
145+ goto out;
146+
147 /* mangle the packet */
148 if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
149 goto drop;
78978408 150@@ -914,6 +917,15 @@ handle_response(int af, struct sk_buff *
2380c486
JR
151 ip_send_check(ip_hdr(skb));
152 }
153
154+ /*
155+ * nf_iterate does not expect change in the skb->dst->dev.
156+ * It looks like it is not fatal to enable this code for hooks
157+ * where our handlers are at the end of the chain list and
158+ * when all next handlers use skb->dst->dev and not outdev.
159+ * It will definitely route properly the inout NAT traffic
160+ * when multiple paths are used.
161+ */
162+
163 /* For policy routing, packets originating from this
164 * machine itself may be routed differently to packets
165 * passing through. We want this packet to be routed as
78978408 166@@ -926,7 +938,8 @@ handle_response(int af, struct sk_buff *
2380c486
JR
167 goto drop;
168 } else
169 #endif
170- if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
171+ if (sysctl_ip_vs_snat_reroute &&
172+ ip_route_me_harder(skb, RTN_LOCAL) != 0)
173 goto drop;
174
175 IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
78978408 176@@ -941,8 +954,11 @@ handle_response(int af, struct sk_buff *
2380c486
JR
177 return NF_ACCEPT;
178
179 drop:
180- ip_vs_conn_put(cp);
181 kfree_skb(skb);
182+
183+out:
184+ ip_vs_conn_put(cp);
185+ LeaveFunction(11);
186 return NF_STOLEN;
187 }
188
78978408 189@@ -982,8 +998,13 @@ ip_vs_out(unsigned int hooknum, struct s
2380c486
JR
190 if (unlikely(iph.protocol == IPPROTO_ICMP)) {
191 int related, verdict = ip_vs_out_icmp(skb, &related);
192
193- if (related)
194+ if (related) {
195+ if (sysctl_ip_vs_snat_reroute &&
196+ NF_ACCEPT == verdict &&
197+ ip_route_me_harder(skb, RTN_LOCAL))
198+ verdict = NF_DROP;
199 return verdict;
200+ }
201 ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
202 }
203
78978408 204@@ -1063,7 +1084,7 @@ ip_vs_out(unsigned int hooknum, struct s
2380c486
JR
205 return NF_ACCEPT;
206 }
207
208- return handle_response(af, skb, pp, cp, iph.len);
209+ return handle_response(af, skb, pp, cp, iph.len, hooknum);
210 }
211
212
78978408 213@@ -1340,7 +1361,7 @@ ip_vs_in(unsigned int hooknum, struct sk
2380c486
JR
214 /* For local client packets, it could be a response */
215 cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
216 if (cp)
217- return handle_response(af, skb, pp, cp, iph.len);
218+ return handle_response(af, skb, pp, cp, iph.len, hooknum);
219
220 if (!pp->conn_schedule(af, skb, pp, &v, &cp))
221 return v;
78978408
AM
222diff -urNp v2.6.34/linux/net/netfilter/ipvs/ip_vs_ctl.c linux/net/netfilter/ipvs/ip_vs_ctl.c
223--- v2.6.34/linux/net/netfilter/ipvs/ip_vs_ctl.c 2010-05-17 10:49:01.000000000 +0300
224+++ linux/net/netfilter/ipvs/ip_vs_ctl.c 2010-05-19 10:51:31.000000000 +0300
225@@ -88,6 +88,10 @@ int sysctl_ip_vs_expire_nodest_conn = 0;
2380c486
JR
226 int sysctl_ip_vs_expire_quiescent_template = 0;
227 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
228 int sysctl_ip_vs_nat_icmp_send = 0;
229+int sysctl_ip_vs_snat_reroute = 0;
230+#ifdef CONFIG_IP_VS_NFCT
231+int sysctl_ip_vs_conntrack = 0;
232+#endif
233
234
235 #ifdef CONFIG_IP_VS_DEBUG
78978408 236@@ -1579,6 +1583,15 @@ static struct ctl_table vs_vars[] = {
2380c486 237 .mode = 0644,
78978408 238 .proc_handler = proc_do_defense_mode,
2380c486
JR
239 },
240+#ifdef CONFIG_IP_VS_NFCT
241+ {
242+ .procname = "conntrack",
243+ .data = &sysctl_ip_vs_conntrack,
244+ .maxlen = sizeof(int),
245+ .mode = 0644,
246+ .proc_handler = &proc_dointvec,
247+ },
248+#endif
249 {
78978408
AM
250 .procname = "secure_tcp",
251 .data = &sysctl_ip_vs_secure_tcp,
252@@ -1586,6 +1599,13 @@ static struct ctl_table vs_vars[] = {
2380c486 253 .mode = 0644,
78978408 254 .proc_handler = proc_do_defense_mode,
2380c486
JR
255 },
256+ {
257+ .procname = "snat_reroute",
258+ .data = &sysctl_ip_vs_snat_reroute,
259+ .maxlen = sizeof(int),
260+ .mode = 0644,
261+ .proc_handler = &proc_dointvec,
262+ },
263 #if 0
264 {
265 .procname = "timeout_established",
78978408
AM
266diff -urNp v2.6.34/linux/net/netfilter/ipvs/ip_vs_ftp.c linux/net/netfilter/ipvs/ip_vs_ftp.c
267--- v2.6.34/linux/net/netfilter/ipvs/ip_vs_ftp.c 2010-05-17 10:49:01.000000000 +0300
268+++ linux/net/netfilter/ipvs/ip_vs_ftp.c 2010-05-19 10:51:31.000000000 +0300
269@@ -204,6 +204,11 @@ static int ip_vs_ftp_out(struct ip_vs_ap
2380c486
JR
270 ip_vs_control_add(n_cp, cp);
271 }
272
273+#ifdef CONFIG_IP_VS_NFCT
274+ if (skb->nfct)
275+ ip_vs_nfct_expect_related(skb, n_cp, 0, IPPROTO_TCP, 0);
276+#endif
277+
278 /*
279 * Replace the old passive address with the new one
280 */
78978408 281@@ -343,6 +348,11 @@ static int ip_vs_ftp_in(struct ip_vs_app
2380c486
JR
282 ip_vs_control_add(n_cp, cp);
283 }
284
285+#ifdef CONFIG_IP_VS_NFCT
286+ if (skb->nfct)
287+ ip_vs_nfct_expect_related(skb, n_cp, n_cp->dport, IPPROTO_TCP, 1);
288+#endif
289+
290 /*
291 * Move tunnel to listen state
292 */
78978408
AM
293diff -urNp v2.6.34/linux/net/netfilter/ipvs/ip_vs_nfct.c linux/net/netfilter/ipvs/ip_vs_nfct.c
294--- v2.6.34/linux/net/netfilter/ipvs/ip_vs_nfct.c 1970-01-01 02:00:00.000000000 +0200
295+++ linux/net/netfilter/ipvs/ip_vs_nfct.c 2010-05-19 11:18:37.000000000 +0300
296@@ -0,0 +1,376 @@
2380c486
JR
297+/*
298+ * ip_vs_nfct.c: Netfilter connection tracking support for IPVS
299+ *
300+ * Portions Copyright (C) 2001-2002
301+ * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
302+ *
303+ * Portions Copyright (C) 2003-2008
304+ * Julian Anastasov
305+ *
306+ *
307+ * This code is free software; you can redistribute it and/or modify
308+ * it under the terms of the GNU General Public License as published by
309+ * the Free Software Foundation; either version 2 of the License, or
310+ * (at your option) any later version.
311+ *
312+ * This program is distributed in the hope that it will be useful,
313+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
314+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
315+ * GNU General Public License for more details.
316+ *
317+ * You should have received a copy of the GNU General Public License
318+ * along with this program; if not, write to the Free Software
319+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
320+ *
321+ *
322+ * Authors:
323+ * Ben North <ben@redfrontdoor.org>
324+ * Julian Anastasov <ja@ssi.bg> Reorganize and sync with latest kernels
325+ *
326+ *
327+ * Current status:
328+ *
329+ * - provide conntrack confirmation for new and related connections, by
330+ * this way we can see their proper conntrack state in all hooks
331+ * - support for all forwarding methods, not only NAT
332+ * - FTP support (NAT), ability to support other NAT apps with expectations
333+ * - to correctly create expectations for related NAT connections the proper
334+ * NF conntrack support must be already installed, eg. ip_vs_ftp requires
335+ * nf_conntrack_ftp for the same ports
336+ *
337+ */
338+
78978408
AM
339+#define KMSG_COMPONENT "IPVS"
340+#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
341+
2380c486
JR
342+#include <linux/module.h>
343+#include <linux/types.h>
344+#include <linux/kernel.h>
345+#include <linux/errno.h>
346+#include <linux/compiler.h>
347+#include <linux/vmalloc.h>
348+#include <linux/skbuff.h>
349+#include <net/ip.h>
350+#include <linux/netfilter.h>
351+#include <linux/netfilter_ipv4.h>
352+#include <net/ip_vs.h>
353+
354+
355+EXPORT_SYMBOL(ip_vs_nfct_expect_related);
356+
357+
358+#define FMT_TUPLE "%u.%u.%u.%u:%u->%u.%u.%u.%u:%u/%u"
359+#define ARG_TUPLE(t) NIPQUAD((t)->src.u3.ip), ntohs((t)->src.u.all), \
360+ NIPQUAD((t)->dst.u3.ip), ntohs((t)->dst.u.all), \
361+ (t)->dst.protonum
362+
363+#define FMT_CONN "%u.%u.%u.%u:%u->%u.%u.%u.%u:%u->%u.%u.%u.%u:%u/%u:%u"
364+#define ARG_CONN(c) NIPQUAD((c)->caddr), ntohs((c)->cport), \
365+ NIPQUAD((c)->vaddr), ntohs((c)->vport), \
366+ NIPQUAD((c)->daddr), ntohs((c)->dport), \
367+ (c)->protocol, (c)->state
368+
369+/* Returns boolean and skb is freed on failure */
370+static int __ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp,
371+ unsigned int hooknum)
372+{
373+ /*
374+ * The assumptions:
375+ * - the nfct is !NULL and is not confirmed
376+ * - we are called before any mangle
377+ */
378+
379+ struct iphdr *iph = ip_hdr(skb);
380+ struct nf_conn *ct = (struct nf_conn *) skb->nfct;
381+ struct nf_conntrack_tuple new_reply;
382+ int ret = NF_DROP;
383+ __be16 _ports[2], *pptr;
384+#ifdef CONFIG_IP_VS_DEBUG
385+ struct nf_conntrack_tuple *orig_tup =
386+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
387+ struct nf_conntrack_tuple *orig_rep =
388+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
389+#endif
390+#ifdef CONFIG_NF_NAT_NEEDED
391+ int initialized = !!(ct->status & IPS_NAT_DONE_MASK);
392+#else
393+ int initialized = 0;
394+#endif
395+
396+ IP_VS_DBG(7, "%s: ct=%p, init=%d, tuples=" FMT_TUPLE ", " FMT_TUPLE
397+ ", cp=" FMT_CONN "\n",
78978408 398+ __func__, ct, initialized,
2380c486
JR
399+ ARG_TUPLE(orig_tup), ARG_TUPLE(orig_rep), ARG_CONN(cp));
400+
401+#ifdef CONFIG_NF_NAT_NEEDED
402+ /*
403+ * This is really bad, may be we are trying to alter DNAT conn?
404+ * This is not supported, avoid the confirmation.
405+ */
406+ if (initialized && ct->status & IPS_NAT_MASK) {
407+#ifdef CONFIG_IP_VS_DEBUG
408+ IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, init=%d\n",
78978408 409+ __func__, ct, ct->status, initialized);
2380c486
JR
410+#endif
411+ return 1;
412+ }
413+#endif
414+
415+ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ || NF_INET_FORWARD == hooknum)
416+ goto confirm;
417+
418+ /*
419+ * Alter reply only for IP_VS_CONN_F_MASQ in outin direction.
420+ * For related connections in inout direction it is done in
421+ * expectfn callback.
422+ */
423+
424+ pptr = skb_header_pointer(skb, ip_hdrlen(skb),
425+ sizeof(_ports), _ports);
426+ if (!pptr)
427+ goto out;
428+
429+ new_reply = (struct nf_conntrack_tuple) {
430+ .dst = { .protonum = iph->protocol, .dir = IP_CT_DIR_REPLY }};
431+
432+ new_reply.src.u3 = cp->daddr;
433+ new_reply.src.u.tcp.port = cp->dport;
434+ new_reply.src.l3num = PF_INET;
435+ new_reply.dst.u3.ip = iph->saddr;
436+ new_reply.dst.u.tcp.port = pptr[0];
437+
438+ nf_conntrack_alter_reply(ct, &new_reply);
439+
440+ IP_VS_DBG(7, "%s: ct=%p, init=%d, orig=" FMT_TUPLE
441+ ", new_reply=" FMT_TUPLE " => alter_reply\n",
78978408 442+ __func__, ct, initialized,
2380c486
JR
443+ ARG_TUPLE(orig_tup), ARG_TUPLE(&new_reply));
444+
445+ /*
446+ * No need to rehash NAT info because we don't change source
447+ * address in original direction
448+ */
449+
450+confirm:
451+
452+ ret = __nf_conntrack_confirm(skb);
453+
454+ if (ret != NF_STOLEN) {
455+ IP_VS_DBG(7, "%s: ct=%p, init=%d, orig=" FMT_TUPLE " => confirm ret=%d\n",
78978408 456+ __func__, ct, initialized, ARG_TUPLE(orig_tup), ret);
2380c486
JR
457+ }
458+
459+ if (ret != NF_ACCEPT)
460+ goto out;
461+ return 1;
462+
463+out:
464+ if (ret != NF_STOLEN)
465+ kfree_skb(skb);
466+ return 0;
467+}
468+
469+/*
470+ * Confirm (and optionally alter) the conntrack entry if needed
471+ * because the IPVS packets do not reach ipv4_confirm.
472+ */
473+int ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp,
474+ unsigned int hooknum)
475+{
476+ struct iphdr *iph = ip_hdr(skb);
477+ struct nf_conn *ct = (struct nf_conn *) skb->nfct;
478+
479+ /* By the time we're sending the packet out the other
480+ * side, there should be a confirmed Netfilter CT entry
481+ * for this connection. This may not be the case,
482+ * however, if it's a brand new connection, or if the NF
483+ * entry has timed out before ours has. Either way, if
484+ * the NF CT entry is unconfirmed, confirm it, and deal
485+ * with reply tuple mangling at the same time.
486+ */
487+
488+ /* We only deal with TCP or UDP packets */
489+ if (iph->protocol != IPPROTO_TCP && iph->protocol != IPPROTO_UDP)
490+ return 1;
491+
492+ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
493+ /*
494+ * Do not be surprised if non-NAT conntracks stay in SYN_SENT
495+ * state, may be the replies from the real server go
496+ * directly to client. In any case, keep them in REPLIED
497+ * state (ESTABLISHED).
498+ */
499+ if (iph->protocol != IPPROTO_TCP ||
500+ IP_VS_TCP_S_ESTABLISHED == cp->state) {
501+ set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
502+ }
503+ }
504+
505+ /*
506+ * We assume the reused connections do not change their rip:rport
507+ * and we do not need to alter their conntrack reply
508+ */
509+ return __ip_vs_nfct_confirm(skb, cp, hooknum);
510+}
511+
512+/*
513+ * We are called from init_conntrack() as expectfn handler
514+ */
515+
516+static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
517+ struct nf_conntrack_expect *exp)
518+{
519+ struct nf_conntrack_tuple *orig, new_reply;
520+ struct ip_vs_conn *cp;
521+
522+ if (exp->tuple.src.l3num != PF_INET)
523+ return;
524+
525+ /*
526+ * - We assume that no NF locks are held before this callback
527+ * - ip_vs_conn_out_get and ip_vs_conn_in_get should match their
528+ * expectations even if they use wildcard values, now we provide
529+ * the actual values from the newly created original conntrack direction
530+ * - the conntrack is confirmed when packet reaches IPVS hooks
531+ */
532+
533+ /* RS->CLIENT */
534+ orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
535+ cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum,
536+ &orig->src.u3, orig->src.u.tcp.port,
537+ &orig->dst.u3, orig->dst.u.tcp.port);
538+ if (cp) {
539+ /* Change reply CLIENT->RS to CLIENT->VS */
540+ new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
541+ IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " FMT_TUPLE
542+ ", found inout cp=" FMT_CONN "\n",
78978408 543+ __func__, ct, ct->status,
2380c486
JR
544+ ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
545+ ARG_CONN(cp));
546+ new_reply.dst.u3 = cp->vaddr;
547+ new_reply.dst.u.tcp.port = cp->vport;
548+ IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
549+ ", inout cp=" FMT_CONN "\n",
78978408 550+ __func__, ct,
2380c486
JR
551+ ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
552+ ARG_CONN(cp));
553+ goto alter;
554+ }
555+
556+ /* CLIENT->VS */
557+ cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum,
558+ &orig->src.u3, orig->src.u.tcp.port,
559+ &orig->dst.u3, orig->dst.u.tcp.port);
560+ if (cp) {
561+ /* Change reply VS->CLIENT to RS->CLIENT */
562+ new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
563+ IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " FMT_TUPLE
564+ ", found outin cp=" FMT_CONN "\n",
78978408 565+ __func__, ct, ct->status,
2380c486
JR
566+ ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
567+ ARG_CONN(cp));
568+ new_reply.src.u3 = cp->daddr;
569+ new_reply.src.u.tcp.port = cp->dport;
570+ IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
571+ ", outin cp=" FMT_CONN "\n",
78978408 572+ __func__, ct,
2380c486
JR
573+ ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
574+ ARG_CONN(cp));
575+ goto alter;
576+ }
577+ IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE " - unknown expect\n",
78978408 578+ __func__, ct, ct->status, ARG_TUPLE(orig));
2380c486
JR
579+ return;
580+
581+alter:
582+
583+ /* Never alter conntrack for non-NAT conns */
584+ if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
585+ nf_conntrack_alter_reply(ct, &new_reply);
586+ ip_vs_conn_put(cp);
587+ return;
588+}
589+
590+/*
591+ * Create NF conntrack expectation with wildcard (optional) source port.
592+ * Then the default callback function will alter the reply and will confirm
593+ * the conntrack entry when the first packet comes.
594+ */
595+void ip_vs_nfct_expect_related(struct sk_buff *skb, struct ip_vs_conn *cp,
596+ __be16 port, __u16 proto, int from_rs)
597+{
598+ struct nf_conn *ct = (struct nf_conn *) skb->nfct;
599+ struct nf_conntrack_expect *e;
600+
601+ if (!sysctl_ip_vs_conntrack)
602+ return;
603+
604+ if (!ct) {
605+ IP_VS_DBG(7, "%s: ct=%p for cp=" FMT_CONN "\n",
78978408 606+ __func__, ct, ARG_CONN(cp));
2380c486
JR
607+ return;
608+ }
609+
610+ if (!(e = nf_ct_expect_alloc(ct)))
611+ return;
612+
78978408
AM
613+ nf_ct_expect_init(e, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct),
614+ from_rs ? &cp->daddr : &cp->caddr,
615+ from_rs ? &cp->caddr : &cp->vaddr,
616+ proto, port ? &port : NULL,
617+ from_rs ? &cp->cport : &cp->vport);
618+
619+ e->expectfn = ip_vs_nfct_expect_callback;
2380c486
JR
620+
621+ IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
78978408 622+ __func__, ct, ARG_TUPLE(&e->tuple));
2380c486
JR
623+ nf_ct_expect_related(e);
624+ nf_ct_expect_put(e);
625+}
626+
627+/*
628+ * Our connection was terminated, try to drop the conntrack immediately
629+ */
630+void ip_vs_nfct_conn_drop(struct ip_vs_conn *cp)
631+{
632+ struct nf_conntrack_tuple_hash *h;
633+ struct nf_conn *ct;
634+ struct nf_conntrack_tuple tuple;
635+
636+ if (!cp->cport)
637+ return;
638+
639+ tuple = (struct nf_conntrack_tuple) {
640+ .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } };
641+ tuple.src.u3 = cp->caddr;
642+ tuple.src.u.all = cp->cport;
643+ tuple.src.l3num = PF_INET;
644+ tuple.dst.u3 = cp->vaddr;
645+ tuple.dst.u.all = cp->vport;
646+
647+ IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE
648+ " for conn " FMT_CONN "\n",
78978408 649+ __func__, ARG_TUPLE(&tuple), ARG_CONN(cp));
2380c486 650+
78978408 651+ h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple);
2380c486
JR
652+ if (h) {
653+ ct = nf_ct_tuplehash_to_ctrack(h);
78978408 654+ /* Show what happens instead of calling nf_ct_kill() */
2380c486
JR
655+ if (del_timer(&ct->timeout)) {
656+ IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
657+ FMT_TUPLE "\n",
78978408 658+ __func__, ct, ARG_TUPLE(&tuple));
2380c486
JR
659+ if (ct->timeout.function)
660+ ct->timeout.function(ct->timeout.data);
661+ } else {
662+ IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
663+ FMT_TUPLE "\n",
78978408 664+ __func__, ct, ARG_TUPLE(&tuple));
2380c486
JR
665+ }
666+ nf_ct_put(ct);
667+ } else {
668+ IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
78978408 669+ __func__, ARG_TUPLE(&tuple));
2380c486
JR
670+ }
671+}
672+
78978408
AM
673diff -urNp v2.6.34/linux/net/netfilter/ipvs/ip_vs_xmit.c linux/net/netfilter/ipvs/ip_vs_xmit.c
674--- v2.6.34/linux/net/netfilter/ipvs/ip_vs_xmit.c 2010-05-17 10:49:02.000000000 +0300
675+++ linux/net/netfilter/ipvs/ip_vs_xmit.c 2010-05-19 10:51:31.000000000 +0300
676@@ -267,6 +267,9 @@ ip_vs_bypass_xmit(struct sk_buff *skb, s
677 skb_dst_drop(skb);
678 skb_dst_set(skb, &rt->u.dst);
2380c486
JR
679
680+ if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
681+ goto tx_error_out;
682+
683 /* Another hack: avoid icmp_send in ip_fragment */
684 skb->local_df = 1;
685
78978408 686@@ -279,6 +282,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, s
2380c486
JR
687 dst_link_failure(skb);
688 tx_error:
689 kfree_skb(skb);
690+ tx_error_out:
691 LeaveFunction(10);
692 return NF_STOLEN;
693 }
78978408
AM
694@@ -395,6 +399,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru
695 skb_dst_drop(skb);
696 skb_dst_set(skb, &rt->u.dst);
2380c486
JR
697
698+ if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
699+ goto tx_error_out;
700+
701 /* mangle the packet */
702 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
703 goto tx_error;
78978408 704@@ -418,8 +425,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru
2380c486
JR
705 tx_error_icmp:
706 dst_link_failure(skb);
707 tx_error:
708- LeaveFunction(10);
709 kfree_skb(skb);
710+ tx_error_out:
711+ LeaveFunction(10);
712 return NF_STOLEN;
713 tx_error_put:
714 ip_rt_put(rt);
78978408 715@@ -595,14 +603,17 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s
2380c486
JR
716 /* fix old IP header checksum */
717 ip_send_check(old_iph);
718
719- skb_push(skb, sizeof(struct iphdr));
720- skb_reset_network_header(skb);
721- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
722-
723 /* drop old route */
13e5c3b1 724 skb_dst_drop(skb);
28eb7fa5 725 skb_dst_set(skb, &rt->dst);
2380c486
JR
726
727+ if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
728+ goto tx_error_out;
729+
730+ skb_push(skb, sizeof(struct iphdr));
731+ skb_reset_network_header(skb);
732+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
733+
734 /*
735 * Push down and install the IPIP header.
736 */
78978408 737@@ -630,6 +641,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s
2380c486
JR
738 dst_link_failure(skb);
739 tx_error:
740 kfree_skb(skb);
741+ tx_error_out:
742 LeaveFunction(10);
743 return NF_STOLEN;
744 }
78978408
AM
745@@ -782,6 +794,9 @@ ip_vs_dr_xmit(struct sk_buff *skb, struc
746 skb_dst_drop(skb);
747 skb_dst_set(skb, &rt->u.dst);
2380c486
JR
748
749+ if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
750+ goto tx_error_out;
751+
752 /* Another hack: avoid icmp_send in ip_fragment */
753 skb->local_df = 1;
754
78978408 755@@ -794,6 +809,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struc
2380c486
JR
756 dst_link_failure(skb);
757 tx_error:
758 kfree_skb(skb);
759+ tx_error_out:
760 LeaveFunction(10);
761 return NF_STOLEN;
762 }
78978408
AM
763@@ -907,6 +923,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, str
764 skb_dst_drop(skb);
765 skb_dst_set(skb, &rt->u.dst);
2380c486
JR
766
767+ /* TODO: properly alter reply for NFCT */
768+
769 ip_vs_nat_icmp(skb, pp, cp, 0);
770
771 /* Another hack: avoid icmp_send in ip_fragment */
This page took 0.131401 seconds and 4 git commands to generate.