]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-ipvs-nfct.patch
- fix build with glibc 2.10
[packages/kernel.git] / kernel-ipvs-nfct.patch
CommitLineData
8e6b03ae 1diff -urNp v2.6.27/linux/include/net/ip_vs.h linux/include/net/ip_vs.h
2--- v2.6.27/linux/include/net/ip_vs.h 2008-10-11 12:46:15.000000000 +0300
3+++ linux/include/net/ip_vs.h 2008-10-11 14:24:47.000000000 +0300
4@@ -21,6 +21,13 @@
5 #include <linux/timer.h>
6d4e1af8 6
ec625505 7 #include <net/checksum.h>
db744e5b 8+#include <linux/skbuff.h>
9+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
10+#include <net/netfilter/nf_conntrack.h>
11+#include <net/netfilter/nf_conntrack_core.h>
12+#include <net/netfilter/nf_conntrack_expect.h>
13+#include <net/netfilter/nf_conntrack_helper.h>
14+#endif
8e6b03ae 15
ec625505
AM
16 #ifdef CONFIG_IP_VS_DEBUG
17 #include <linux/net.h>
8e6b03ae 18@@ -474,6 +481,16 @@ extern void ip_vs_init_hash_table(struct
db744e5b 19 */
20
21 /*
22+ * Netfilter connection tracking
23+ * (from ip_vs_nfct.c)
24+ */
25+extern int ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum);
26+extern void ip_vs_nfct_expect_related(struct sk_buff *skb,
27+ struct ip_vs_conn *cp,
28+ __be16 port, __u16 proto, int from_rs);
29+extern void ip_vs_nfct_conn_drop(struct ip_vs_conn *cp);
30+
31+/*
32 * IPVS connection entry hash table
33 */
34 #ifndef CONFIG_IP_VS_TAB_BITS
8e6b03ae 35@@ -643,9 +660,42 @@ extern int sysctl_ip_vs_expire_nodest_co
db744e5b 36 extern int sysctl_ip_vs_expire_quiescent_template;
37 extern int sysctl_ip_vs_sync_threshold[2];
38 extern int sysctl_ip_vs_nat_icmp_send;
39+extern int sysctl_ip_vs_snat_reroute;
40 extern struct ip_vs_stats ip_vs_stats;
ec625505 41 extern const struct ctl_path net_vs_ctl_path[];
db744e5b 42
43+#ifdef CONFIG_IP_VS_NFCT
44+
45+extern int sysctl_ip_vs_conntrack;
46+
47+static inline int ip_vs_use_conntrack(struct sk_buff *skb)
48+{
8e6b03ae 49+ return sysctl_ip_vs_conntrack && skb->nfct;
db744e5b 50+}
51+
52+/* Returns boolean and skb is freed on failure */
53+static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum)
54+{
8e6b03ae 55+ if (!ip_vs_use_conntrack(skb))
56+ return 1;
57+ return nf_ct_is_confirmed((struct nf_conn *) skb->nfct) ||
58+ ip_vs_nfct_confirm(skb, cp, hooknum);
db744e5b 59+}
60+
61+#else
62+
63+static inline int ip_vs_use_conntrack(struct sk_buff *skb)
64+{
8e6b03ae 65+ return 0;
db744e5b 66+}
67+
68+static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum)
69+{
8e6b03ae 70+ return 1;
db744e5b 71+}
72+
73+#endif
74+
75 extern struct ip_vs_service *
76 ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport);
77
8e6b03ae 78diff -urNp v2.6.27/linux/net/ipv4/ipvs/Kconfig linux/net/ipv4/ipvs/Kconfig
79--- v2.6.27/linux/net/ipv4/ipvs/Kconfig 2007-07-10 09:18:43.000000000 +0300
80+++ linux/net/ipv4/ipvs/Kconfig 2008-10-11 14:19:27.000000000 +0300
db744e5b 81@@ -221,4 +221,12 @@ config IP_VS_FTP
82 If you want to compile it in kernel, say Y. To compile it as a
83 module, choose M here. If unsure, say N.
84
85+config IP_VS_NFCT
86+ bool "Netfilter connection tracking"
87+ depends on NF_CONNTRACK
88+ ---help---
89+ The Netfilter connection tracking support allows the IPVS
90+ connection state to be exported to the Netfilter framework
91+ for filtering purposes.
92+
93 endif # IP_VS
8e6b03ae 94diff -urNp v2.6.27/linux/net/ipv4/ipvs/Makefile linux/net/ipv4/ipvs/Makefile
95--- v2.6.27/linux/net/ipv4/ipvs/Makefile 2005-06-18 08:50:52.000000000 +0300
96+++ linux/net/ipv4/ipvs/Makefile 2008-10-11 14:19:27.000000000 +0300
db744e5b 97@@ -9,10 +9,13 @@ ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UD
98 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_ESP) += ip_vs_proto_esp.o
99 ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH) += ip_vs_proto_ah.o
100
101+ip_vs-extra_objs-y :=
102+ip_vs-extra_objs-$(CONFIG_IP_VS_NFCT) += ip_vs_nfct.o
103+
104 ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \
105 ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \
106 ip_vs_est.o ip_vs_proto.o \
107- $(ip_vs_proto-objs-y)
108+ $(ip_vs_proto-objs-y) $(ip_vs-extra_objs-y)
109
110
111 # IPVS core
8e6b03ae 112diff -urNp v2.6.27/linux/net/ipv4/ipvs/ip_vs_conn.c linux/net/ipv4/ipvs/ip_vs_conn.c
113--- v2.6.27/linux/net/ipv4/ipvs/ip_vs_conn.c 2008-10-11 12:46:16.000000000 +0300
114+++ linux/net/ipv4/ipvs/ip_vs_conn.c 2008-10-11 14:19:27.000000000 +0300
115@@ -591,6 +591,11 @@ static void ip_vs_conn_expire(unsigned l
db744e5b 116 if (cp->control)
117 ip_vs_control_del(cp);
118
119+#ifdef CONFIG_IP_VS_NFCT
120+ if (sysctl_ip_vs_conntrack)
121+ ip_vs_nfct_conn_drop(cp);
122+#endif
123+
124 if (unlikely(cp->app != NULL))
125 ip_vs_unbind_app(cp);
126 ip_vs_unbind_dest(cp);
8e6b03ae 127diff -urNp v2.6.27/linux/net/ipv4/ipvs/ip_vs_core.c linux/net/ipv4/ipvs/ip_vs_core.c
128--- v2.6.27/linux/net/ipv4/ipvs/ip_vs_core.c 2008-10-11 12:46:16.000000000 +0300
129+++ linux/net/ipv4/ipvs/ip_vs_core.c 2008-10-11 14:19:27.000000000 +0300
130@@ -659,6 +659,8 @@ static int ip_vs_out_icmp(struct sk_buff
db744e5b 131
132 skb->ipvs_property = 1;
133 verdict = NF_ACCEPT;
134+ if (sysctl_ip_vs_snat_reroute && ip_route_me_harder(skb, RTN_LOCAL))
135+ verdict = NF_DROP;
136
137 out:
138 __ip_vs_conn_put(cp);
8e6b03ae 139@@ -759,19 +761,31 @@ ip_vs_out(unsigned int hooknum, struct s
db744e5b 140 if (!skb_make_writable(skb, ihl))
141 goto drop;
142
143+ if (!ip_vs_confirm_conntrack(skb, cp, hooknum))
144+ goto out;
145+
146 /* mangle the packet */
147 if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
148 goto drop;
149 ip_hdr(skb)->saddr = cp->vaddr;
150 ip_send_check(ip_hdr(skb));
151
152+ /*
153+ * nf_iterate does not expect change in the skb->dst->dev.
154+ * It looks like it is not fatal to enable this code for hooks
155+ * where our handlers are at the end of the chain list and
156+ * when all next handlers use skb->dst->dev and not outdev.
157+ * It will definitely route properly the inout NAT traffic
158+ * when multiple paths are used.
159+ */
160+
161 /* For policy routing, packets originating from this
162 * machine itself may be routed differently to packets
163 * passing through. We want this packet to be routed as
164 * if it came from this machine itself. So re-compute
165 * the routing information.
166 */
167- if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
168+ if (sysctl_ip_vs_snat_reroute && ip_route_me_harder(skb, RTN_LOCAL) != 0)
169 goto drop;
170
171 IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
8e6b03ae 172@@ -786,8 +800,11 @@ ip_vs_out(unsigned int hooknum, struct s
db744e5b 173 return NF_ACCEPT;
174
175 drop:
176- ip_vs_conn_put(cp);
177 kfree_skb(skb);
178+
179+ out:
180+ ip_vs_conn_put(cp);
181+ LeaveFunction(11);
182 return NF_STOLEN;
183 }
184
8e6b03ae 185diff -urNp v2.6.27/linux/net/ipv4/ipvs/ip_vs_ctl.c linux/net/ipv4/ipvs/ip_vs_ctl.c
186--- v2.6.27/linux/net/ipv4/ipvs/ip_vs_ctl.c 2008-10-11 12:46:16.000000000 +0300
187+++ linux/net/ipv4/ipvs/ip_vs_ctl.c 2008-10-11 14:19:27.000000000 +0300
188@@ -79,6 +79,10 @@ int sysctl_ip_vs_expire_nodest_conn = 0;
db744e5b 189 int sysctl_ip_vs_expire_quiescent_template = 0;
190 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
191 int sysctl_ip_vs_nat_icmp_send = 0;
192+int sysctl_ip_vs_snat_reroute = 0;
193+#ifdef CONFIG_IP_VS_NFCT
194+int sysctl_ip_vs_conntrack = 0;
195+#endif
196
197
198 #ifdef CONFIG_IP_VS_DEBUG
8e6b03ae 199@@ -1457,6 +1461,15 @@ static struct ctl_table vs_vars[] = {
db744e5b 200 .mode = 0644,
201 .proc_handler = &proc_dointvec,
202 },
203+#ifdef CONFIG_IP_VS_NFCT
204+ {
205+ .procname = "conntrack",
206+ .data = &sysctl_ip_vs_conntrack,
207+ .maxlen = sizeof(int),
208+ .mode = 0644,
209+ .proc_handler = &proc_dointvec,
210+ },
211+#endif
212 {
213 .procname = "drop_entry",
214 .data = &sysctl_ip_vs_drop_entry,
8e6b03ae 215@@ -1478,6 +1491,13 @@ static struct ctl_table vs_vars[] = {
db744e5b 216 .mode = 0644,
217 .proc_handler = &proc_do_defense_mode,
218 },
219+ {
220+ .procname = "snat_reroute",
221+ .data = &sysctl_ip_vs_snat_reroute,
222+ .maxlen = sizeof(int),
223+ .mode = 0644,
224+ .proc_handler = &proc_dointvec,
225+ },
226 #if 0
227 {
228 .procname = "timeout_established",
8e6b03ae 229diff -urNp v2.6.27/linux/net/ipv4/ipvs/ip_vs_ftp.c linux/net/ipv4/ipvs/ip_vs_ftp.c
230--- v2.6.27/linux/net/ipv4/ipvs/ip_vs_ftp.c 2008-10-11 12:46:16.000000000 +0300
231+++ linux/net/ipv4/ipvs/ip_vs_ftp.c 2008-10-11 14:19:27.000000000 +0300
232@@ -193,6 +193,11 @@ static int ip_vs_ftp_out(struct ip_vs_ap
db744e5b 233 ip_vs_control_add(n_cp, cp);
234 }
235
236+#ifdef CONFIG_IP_VS_NFCT
237+ if (skb->nfct)
238+ ip_vs_nfct_expect_related(skb, n_cp, 0, IPPROTO_TCP, 0);
239+#endif
240+
241 /*
242 * Replace the old passive address with the new one
243 */
8e6b03ae 244@@ -325,6 +330,11 @@ static int ip_vs_ftp_in(struct ip_vs_app
db744e5b 245 ip_vs_control_add(n_cp, cp);
246 }
247
248+#ifdef CONFIG_IP_VS_NFCT
249+ if (skb->nfct)
250+ ip_vs_nfct_expect_related(skb, n_cp, n_cp->dport, IPPROTO_TCP, 1);
251+#endif
252+
253 /*
254 * Move tunnel to listen state
255 */
8e6b03ae 256diff -urNp v2.6.27/linux/net/ipv4/ipvs/ip_vs_nfct.c linux/net/ipv4/ipvs/ip_vs_nfct.c
257--- v2.6.27/linux/net/ipv4/ipvs/ip_vs_nfct.c 1970-01-01 02:00:00.000000000 +0200
258+++ linux/net/ipv4/ipvs/ip_vs_nfct.c 2008-10-11 14:19:27.000000000 +0300
259@@ -0,0 +1,386 @@
db744e5b 260+/*
261+ * ip_vs_nfct.c: Netfilter connection tracking support for IPVS
262+ *
263+ * Portions Copyright (C) 2001-2002
264+ * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
265+ *
266+ * Portions Copyright (C) 2003-2008
267+ * Julian Anastasov
268+ *
269+ *
270+ * This code is free software; you can redistribute it and/or modify
271+ * it under the terms of the GNU General Public License as published by
272+ * the Free Software Foundation; either version 2 of the License, or
273+ * (at your option) any later version.
274+ *
275+ * This program is distributed in the hope that it will be useful,
276+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
277+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
278+ * GNU General Public License for more details.
279+ *
280+ * You should have received a copy of the GNU General Public License
281+ * along with this program; if not, write to the Free Software
282+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
283+ *
284+ *
285+ * Authors:
286+ * Ben North <ben@redfrontdoor.org>
287+ * Julian Anastasov <ja@ssi.bg> Reorganize and sync with latest kernels
288+ *
289+ *
290+ * Current status:
291+ *
292+ * - provide conntrack confirmation for new and related connections, by
293+ * this way we can see their proper conntrack state in all hooks
294+ * - support for all forwarding methods, not only NAT
295+ * - FTP support (NAT), ability to support other NAT apps with expectations
296+ * - to correctly create expectations for related NAT connections the proper
297+ * NF conntrack support must be already installed, eg. ip_vs_ftp requires
298+ * nf_conntrack_ftp for the same ports
299+ *
300+ */
301+
302+#include <linux/module.h>
303+#include <linux/types.h>
304+#include <linux/kernel.h>
305+#include <linux/errno.h>
306+#include <linux/compiler.h>
307+#include <linux/vmalloc.h>
308+#include <linux/skbuff.h>
309+#include <net/ip.h>
310+#include <linux/netfilter.h>
311+#include <linux/netfilter_ipv4.h>
312+#include <net/ip_vs.h>
313+
314+
315+EXPORT_SYMBOL(ip_vs_nfct_expect_related);
316+
317+
318+#define FMT_TUPLE "%u.%u.%u.%u:%u->%u.%u.%u.%u:%u/%u"
319+#define ARG_TUPLE(t) NIPQUAD((t)->src.u3.ip), ntohs((t)->src.u.all), \
320+ NIPQUAD((t)->dst.u3.ip), ntohs((t)->dst.u.all), \
321+ (t)->dst.protonum
322+
323+#define FMT_CONN "%u.%u.%u.%u:%u->%u.%u.%u.%u:%u->%u.%u.%u.%u:%u/%u:%u"
324+#define ARG_CONN(c) NIPQUAD((c)->caddr), ntohs((c)->cport), \
325+ NIPQUAD((c)->vaddr), ntohs((c)->vport), \
326+ NIPQUAD((c)->daddr), ntohs((c)->dport), \
327+ (c)->protocol, (c)->state
328+
329+/* Returns boolean and skb is freed on failure */
330+static int __ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp,
331+ unsigned int hooknum)
332+{
333+ /*
334+ * The assumptions:
335+ * - the nfct is !NULL and is not confirmed
336+ * - we are called before any mangle
337+ */
338+
339+ struct iphdr *iph = ip_hdr(skb);
340+ struct nf_conn *ct = (struct nf_conn *) skb->nfct;
341+ struct nf_conntrack_tuple new_reply;
342+ int ret = NF_DROP;
343+ __be16 _ports[2], *pptr;
344+#ifdef CONFIG_IP_VS_DEBUG
345+ struct nf_conntrack_tuple *orig_tup =
346+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
347+ struct nf_conntrack_tuple *orig_rep =
348+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
349+#endif
350+#ifdef CONFIG_NF_NAT_NEEDED
351+ int initialized = !!(ct->status & IPS_NAT_DONE_MASK);
352+#else
353+ int initialized = 0;
354+#endif
355+
356+ IP_VS_DBG(7, "%s: ct=%p, init=%d, tuples=" FMT_TUPLE ", " FMT_TUPLE
357+ ", cp=" FMT_CONN "\n",
358+ __FUNCTION__, ct, initialized,
359+ ARG_TUPLE(orig_tup), ARG_TUPLE(orig_rep), ARG_CONN(cp));
360+
361+#ifdef CONFIG_NF_NAT_NEEDED
362+ /*
363+ * This is really bad, may be we are trying to alter DNAT conn?
364+ * This is not supported, avoid the confirmation.
365+ */
366+ if (initialized && ct->status & IPS_NAT_MASK) {
367+#ifdef CONFIG_IP_VS_DEBUG
368+ IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, init=%d\n",
369+ __FUNCTION__, ct, ct->status, initialized);
370+#endif
371+ return 1;
372+ }
373+#endif
374+
6d4e1af8 375+ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ || NF_INET_FORWARD == hooknum)
db744e5b 376+ goto confirm;
377+
378+ /*
379+ * Alter reply only for IP_VS_CONN_F_MASQ in outin direction.
380+ * For related connections in inout direction it is done in
381+ * expectfn callback.
382+ */
383+
384+ pptr = skb_header_pointer(skb, ip_hdrlen(skb),
385+ sizeof(_ports), _ports);
386+ if (!pptr)
387+ goto out;
388+
389+ new_reply = (struct nf_conntrack_tuple) {
390+ .dst = { .protonum = iph->protocol, .dir = IP_CT_DIR_REPLY }};
391+
392+ new_reply.src.u3.ip = cp->daddr;
393+ new_reply.src.u.tcp.port = cp->dport;
394+ new_reply.src.l3num = PF_INET;
395+ new_reply.dst.u3.ip = iph->saddr;
396+ new_reply.dst.u.tcp.port = pptr[0];
397+
398+ nf_conntrack_alter_reply(ct, &new_reply);
399+
400+ IP_VS_DBG(7, "%s: ct=%p, init=%d, orig=" FMT_TUPLE
401+ ", new_reply=" FMT_TUPLE " => alter_reply\n",
402+ __FUNCTION__, ct, initialized,
403+ ARG_TUPLE(orig_tup), ARG_TUPLE(&new_reply));
404+
405+ /*
406+ * No need to rehash NAT info because we don't change source
407+ * address in original direction
408+ */
409+
410+confirm:
411+
412+ ret = __nf_conntrack_confirm(skb);
413+
414+ if (ret != NF_STOLEN) {
415+ IP_VS_DBG(7, "%s: ct=%p, init=%d, orig=" FMT_TUPLE " => confirm ret=%d\n",
416+ __FUNCTION__, ct, initialized, ARG_TUPLE(orig_tup), ret);
417+ }
418+
419+ if (ret != NF_ACCEPT)
420+ goto out;
421+ return 1;
422+
423+out:
424+ if (ret != NF_STOLEN)
425+ kfree_skb(skb);
426+ return 0;
427+}
428+
429+/*
430+ * Confirm (and optionally alter) the conntrack entry if needed
431+ * because the IPVS packets do not reach ipv4_confirm.
432+ */
433+int ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp,
434+ unsigned int hooknum)
435+{
436+ struct iphdr *iph = ip_hdr(skb);
437+ struct nf_conn *ct = (struct nf_conn *) skb->nfct;
438+
439+ /* By the time we're sending the packet out the other
440+ * side, there should be a confirmed Netfilter CT entry
441+ * for this connection. This may not be the case,
442+ * however, if it's a brand new connection, or if the NF
443+ * entry has timed out before ours has. Either way, if
444+ * the NF CT entry is unconfirmed, confirm it, and deal
445+ * with reply tuple mangling at the same time.
446+ */
447+
448+ /* We only deal with TCP or UDP packets */
449+ if (iph->protocol != IPPROTO_TCP && iph->protocol != IPPROTO_UDP)
450+ return 1;
451+
452+ if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
453+ /*
454+ * Do not be surprised if non-NAT conntracks stay in SYN_SENT
455+ * state, may be the replies from the real server go
456+ * directly to client. In any case, keep them in REPLIED
457+ * state (ESTABLISHED).
458+ */
459+ if (iph->protocol != IPPROTO_TCP ||
460+ IP_VS_TCP_S_ESTABLISHED == cp->state) {
461+ set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
462+ }
463+ }
464+
465+ /*
466+ * We assume the reused connections do not change their rip:rport
467+ * and we do not need to alter their conntrack reply
468+ */
469+ return __ip_vs_nfct_confirm(skb, cp, hooknum);
470+}
471+
472+/*
473+ * We are called from init_conntrack() as expectfn handler
474+ */
475+
476+static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
477+ struct nf_conntrack_expect *exp)
478+{
479+ struct nf_conntrack_tuple *orig, new_reply;
480+ struct ip_vs_conn *cp;
481+
482+ if (exp->tuple.src.l3num != PF_INET)
483+ return;
484+
485+ /*
486+ * - We assume that no NF locks are held before this callback
487+ * - ip_vs_conn_out_get and ip_vs_conn_in_get should match their
488+ * expectations even if they use wildcard values, now we provide
489+ * the actual values from the newly created original conntrack direction
490+ * - the conntrack is confirmed when packet reaches IPVS hooks
491+ */
492+
493+ /* RS->CLIENT */
494+ orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
495+ cp = ip_vs_conn_out_get(orig->dst.protonum,
496+ orig->src.u3.ip, orig->src.u.tcp.port,
497+ orig->dst.u3.ip, orig->dst.u.tcp.port);
498+ if (cp) {
499+ /* Change reply CLIENT->RS to CLIENT->VS */
500+ new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
501+ IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " FMT_TUPLE
502+ ", found inout cp=" FMT_CONN "\n",
503+ __FUNCTION__, ct, ct->status,
504+ ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
505+ ARG_CONN(cp));
506+ new_reply.dst.u3.ip = cp->vaddr;
507+ new_reply.dst.u.tcp.port = cp->vport;
508+ IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
509+ ", inout cp=" FMT_CONN "\n",
510+ __FUNCTION__, ct,
511+ ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
512+ ARG_CONN(cp));
513+ goto alter;
514+ }
515+
516+ /* CLIENT->VS */
517+ cp = ip_vs_conn_in_get(orig->dst.protonum,
518+ orig->src.u3.ip, orig->src.u.tcp.port,
519+ orig->dst.u3.ip, orig->dst.u.tcp.port);
520+ if (cp) {
521+ /* Change reply VS->CLIENT to RS->CLIENT */
522+ new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
523+ IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " FMT_TUPLE
524+ ", found outin cp=" FMT_CONN "\n",
525+ __FUNCTION__, ct, ct->status,
526+ ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
527+ ARG_CONN(cp));
528+ new_reply.src.u3.ip = cp->daddr;
529+ new_reply.src.u.tcp.port = cp->dport;
530+ IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
531+ ", outin cp=" FMT_CONN "\n",
532+ __FUNCTION__, ct,
533+ ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
534+ ARG_CONN(cp));
535+ goto alter;
536+ }
537+ IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE " - unknown expect\n",
538+ __FUNCTION__, ct, ct->status, ARG_TUPLE(orig));
539+ return;
540+
541+alter:
542+
543+ /* Never alter conntrack for non-NAT conns */
544+ if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
545+ nf_conntrack_alter_reply(ct, &new_reply);
546+ ip_vs_conn_put(cp);
547+ return;
548+}
549+
550+/*
551+ * Create NF conntrack expectation with wildcard (optional) source port.
552+ * Then the default callback function will alter the reply and will confirm
553+ * the conntrack entry when the first packet comes.
554+ */
555+void ip_vs_nfct_expect_related(struct sk_buff *skb, struct ip_vs_conn *cp,
556+ __be16 port, __u16 proto, int from_rs)
557+{
558+ struct nf_conn *ct = (struct nf_conn *) skb->nfct;
559+ struct nf_conntrack_expect *e;
560+
561+ if (!sysctl_ip_vs_conntrack)
562+ return;
563+
564+ if (!ct) {
565+ IP_VS_DBG(7, "%s: ct=%p for cp=" FMT_CONN "\n",
566+ __FUNCTION__, ct, ARG_CONN(cp));
567+ return;
568+ }
569+
570+ if (!(e = nf_ct_expect_alloc(ct)))
571+ return;
572+
573+ e->expectfn = ip_vs_nfct_expect_callback;
574+ e->helper = NULL;
575+ e->flags = 0;
8e6b03ae 576+ e->class = NF_CT_EXPECT_CLASS_DEFAULT;
db744e5b 577+ memset(&e->tuple, 0, sizeof(e->tuple));
578+ e->tuple.src.u.tcp.port = port;
579+ e->tuple.src.l3num = PF_INET;
580+ e->tuple.dst.protonum = proto;
581+ memset(&e->mask, 0, sizeof(e->mask));
582+ e->mask.src.u3.ip = 0xffffffff;
583+ e->mask.src.u.all = port? 0xffff : 0;
584+
585+ if (from_rs) {
586+ e->tuple.src.u3.ip = cp->daddr;
587+ e->tuple.dst.u3.ip = cp->caddr;
588+ e->tuple.dst.u.tcp.port = cp->cport;
589+ } else {
590+ e->tuple.src.u3.ip = cp->caddr;
591+ e->tuple.dst.u3.ip = cp->vaddr;
592+ e->tuple.dst.u.tcp.port = cp->vport;
593+ }
594+
595+ IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
596+ __FUNCTION__, ct, ARG_TUPLE(&e->tuple));
597+ nf_ct_expect_related(e);
598+ nf_ct_expect_put(e);
599+}
600+
601+/*
602+ * Our connection was terminated, try to drop the conntrack immediately
603+ */
604+void ip_vs_nfct_conn_drop(struct ip_vs_conn *cp)
605+{
606+ struct nf_conntrack_tuple_hash *h;
607+ struct nf_conn *ct;
608+ struct nf_conntrack_tuple tuple;
609+
610+ if (!cp->cport)
611+ return;
612+
613+ tuple = (struct nf_conntrack_tuple) {
614+ .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } };
615+ tuple.src.u3.ip = cp->caddr;
616+ tuple.src.u.all = cp->cport;
617+ tuple.src.l3num = PF_INET;
618+ tuple.dst.u3.ip = cp->vaddr;
619+ tuple.dst.u.all = cp->vport;
620+
621+ IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE
622+ " for conn " FMT_CONN "\n",
623+ __FUNCTION__, ARG_TUPLE(&tuple), ARG_CONN(cp));
624+
625+ h = nf_conntrack_find_get(&tuple);
626+ if (h) {
627+ ct = nf_ct_tuplehash_to_ctrack(h);
628+ if (del_timer(&ct->timeout)) {
629+ IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
630+ FMT_TUPLE "\n",
631+ __FUNCTION__, ct, ARG_TUPLE(&tuple));
632+ if (ct->timeout.function)
633+ ct->timeout.function(ct->timeout.data);
634+ } else {
635+ IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
636+ FMT_TUPLE "\n",
637+ __FUNCTION__, ct, ARG_TUPLE(&tuple));
638+ }
639+ nf_ct_put(ct);
640+ } else {
641+ IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
642+ __FUNCTION__, ARG_TUPLE(&tuple));
643+ }
644+}
645+
8e6b03ae 646diff -urNp v2.6.27/linux/net/ipv4/ipvs/ip_vs_xmit.c linux/net/ipv4/ipvs/ip_vs_xmit.c
647--- v2.6.27/linux/net/ipv4/ipvs/ip_vs_xmit.c 2008-10-11 12:46:16.000000000 +0300
648+++ linux/net/ipv4/ipvs/ip_vs_xmit.c 2008-10-11 14:19:27.000000000 +0300
649@@ -139,7 +139,6 @@ int
db744e5b 650 ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
651 struct ip_vs_protocol *pp)
652 {
653- /* we do not touch skb and do not need pskb ptr */
654 return NF_ACCEPT;
655 }
656
8e6b03ae 657@@ -197,6 +196,9 @@ ip_vs_bypass_xmit(struct sk_buff *skb, s
db744e5b 658 dst_release(skb->dst);
659 skb->dst = &rt->u.dst;
660
6d4e1af8 661+ if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
db744e5b 662+ goto tx_error_out;
663+
664 /* Another hack: avoid icmp_send in ip_fragment */
665 skb->local_df = 1;
666
8e6b03ae 667@@ -209,6 +211,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, s
db744e5b 668 dst_link_failure(skb);
669 tx_error:
670 kfree_skb(skb);
671+ tx_error_out:
672 LeaveFunction(10);
673 return NF_STOLEN;
674 }
8e6b03ae 675@@ -261,6 +264,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru
db744e5b 676 dst_release(skb->dst);
677 skb->dst = &rt->u.dst;
678
6d4e1af8 679+ if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
db744e5b 680+ goto tx_error_out;
681+
682 /* mangle the packet */
683 if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
684 goto tx_error;
8e6b03ae 685@@ -284,8 +290,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru
db744e5b 686 tx_error_icmp:
687 dst_link_failure(skb);
688 tx_error:
689- LeaveFunction(10);
690 kfree_skb(skb);
691+ tx_error_out:
692+ LeaveFunction(10);
693 return NF_STOLEN;
694 tx_error_put:
695 ip_rt_put(rt);
8e6b03ae 696@@ -384,14 +391,17 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s
db744e5b 697 /* fix old IP header checksum */
698 ip_send_check(old_iph);
699
700- skb_push(skb, sizeof(struct iphdr));
701- skb_reset_network_header(skb);
702- memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
703-
704 /* drop old route */
705 dst_release(skb->dst);
706 skb->dst = &rt->u.dst;
707
6d4e1af8 708+ if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
db744e5b 709+ goto tx_error_out;
710+
711+ skb_push(skb, sizeof(struct iphdr));
712+ skb_reset_network_header(skb);
713+ memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
714+
715 /*
716 * Push down and install the IPIP header.
717 */
8e6b03ae 718@@ -419,6 +429,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s
db744e5b 719 dst_link_failure(skb);
720 tx_error:
721 kfree_skb(skb);
722+ tx_error_out:
723 LeaveFunction(10);
724 return NF_STOLEN;
725 }
8e6b03ae 726@@ -464,6 +475,9 @@ ip_vs_dr_xmit(struct sk_buff *skb, struc
db744e5b 727 dst_release(skb->dst);
728 skb->dst = &rt->u.dst;
729
6d4e1af8 730+ if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
db744e5b 731+ goto tx_error_out;
732+
733 /* Another hack: avoid icmp_send in ip_fragment */
734 skb->local_df = 1;
735
8e6b03ae 736@@ -476,6 +490,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struc
db744e5b 737 dst_link_failure(skb);
738 tx_error:
739 kfree_skb(skb);
740+ tx_error_out:
741 LeaveFunction(10);
742 return NF_STOLEN;
743 }
8e6b03ae 744@@ -535,6 +550,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, str
db744e5b 745 dst_release(skb->dst);
746 skb->dst = &rt->u.dst;
747
748+ /* TODO: properly alter reply for NFCT */
749+
750 ip_vs_nat_icmp(skb, pp, cp, 0);
751
752 /* Another hack: avoid icmp_send in ip_fragment */
This page took 0.328059 seconds and 4 git commands to generate.