]>
Commit | Line | Data |
---|---|---|
78978408 AM |
1 | diff -urNp v2.6.34/linux/include/net/ip_vs.h linux/include/net/ip_vs.h |
2 | --- v2.6.34/linux/include/net/ip_vs.h 2010-05-17 10:49:00.000000000 +0300 | |
3 | +++ linux/include/net/ip_vs.h 2010-05-19 11:27:25.000000000 +0300 | |
4 | @@ -25,6 +25,15 @@ | |
2380c486 JR |
5 | #include <linux/ip.h> |
6 | #include <linux/ipv6.h> /* for struct ipv6hdr */ | |
7 | #include <net/ipv6.h> /* for ipv6_addr_copy */ | |
8 | +#include <linux/skbuff.h> | |
9 | + | |
10 | +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) | |
11 | +#include <net/netfilter/nf_conntrack.h> | |
12 | +#include <net/netfilter/nf_conntrack_core.h> | |
13 | +#include <net/netfilter/nf_conntrack_expect.h> | |
14 | +#include <net/netfilter/nf_conntrack_helper.h> | |
78978408 | 15 | +#include <net/netfilter/nf_conntrack_zones.h> |
2380c486 JR |
16 | +#endif |
17 | ||
78978408 AM |
18 | |
19 | /* Connections' size value needed by ip_vs_ctl.c */ | |
20 | @@ -613,6 +622,16 @@ extern void ip_vs_init_hash_table(struct | |
2380c486 JR |
21 | #define IP_VS_APP_TYPE_FTP 1 |
22 | ||
23 | /* | |
24 | + * Netfilter connection tracking | |
25 | + * (from ip_vs_nfct.c) | |
26 | + */ | |
27 | +extern int ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum); | |
28 | +extern void ip_vs_nfct_expect_related(struct sk_buff *skb, | |
29 | + struct ip_vs_conn *cp, | |
30 | + __be16 port, __u16 proto, int from_rs); | |
31 | +extern void ip_vs_nfct_conn_drop(struct ip_vs_conn *cp); | |
32 | + | |
33 | +/* | |
34 | * ip_vs_conn handling functions | |
35 | * (from ip_vs_conn.c) | |
36 | */ | |
78978408 | 37 | @@ -788,9 +807,42 @@ extern int sysctl_ip_vs_expire_nodest_co |
2380c486 JR |
38 | extern int sysctl_ip_vs_expire_quiescent_template; |
39 | extern int sysctl_ip_vs_sync_threshold[2]; | |
40 | extern int sysctl_ip_vs_nat_icmp_send; | |
41 | +extern int sysctl_ip_vs_snat_reroute; | |
42 | extern struct ip_vs_stats ip_vs_stats; | |
43 | extern const struct ctl_path net_vs_ctl_path[]; | |
44 | ||
45 | +#ifdef CONFIG_IP_VS_NFCT | |
46 | + | |
47 | +extern int sysctl_ip_vs_conntrack; | |
48 | + | |
49 | +static inline int ip_vs_use_conntrack(struct sk_buff *skb) | |
50 | +{ | |
51 | + return sysctl_ip_vs_conntrack && skb->nfct; | |
52 | +} | |
53 | + | |
54 | +/* Returns boolean and skb is freed on failure */ | |
55 | +static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum) | |
56 | +{ | |
57 | + if (!ip_vs_use_conntrack(skb)) | |
58 | + return 1; | |
59 | + return nf_ct_is_confirmed((struct nf_conn *) skb->nfct) || | |
60 | + ip_vs_nfct_confirm(skb, cp, hooknum); | |
61 | +} | |
62 | + | |
63 | +#else | |
64 | + | |
65 | +static inline int ip_vs_use_conntrack(struct sk_buff *skb) | |
66 | +{ | |
67 | + return 0; | |
68 | +} | |
69 | + | |
70 | +static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum) | |
71 | +{ | |
72 | + return 1; | |
73 | +} | |
74 | + | |
75 | +#endif | |
76 | + | |
77 | extern struct ip_vs_service * | |
78 | ip_vs_service_get(int af, __u32 fwmark, __u16 protocol, | |
79 | const union nf_inet_addr *vaddr, __be16 vport); | |
78978408 AM |
80 | diff -urNp v2.6.34/linux/net/netfilter/ipvs/Kconfig linux/net/netfilter/ipvs/Kconfig |
81 | --- v2.6.34/linux/net/netfilter/ipvs/Kconfig 2010-05-17 10:49:01.000000000 +0300 | |
82 | +++ linux/net/netfilter/ipvs/Kconfig 2010-05-19 10:51:31.000000000 +0300 | |
83 | @@ -250,4 +250,12 @@ config IP_VS_FTP | |
2380c486 JR |
84 | If you want to compile it in kernel, say Y. To compile it as a |
85 | module, choose M here. If unsure, say N. | |
86 | ||
87 | +config IP_VS_NFCT | |
88 | + bool "Netfilter connection tracking" | |
89 | + depends on NF_CONNTRACK | |
90 | + ---help--- | |
91 | + The Netfilter connection tracking support allows the IPVS | |
92 | + connection state to be exported to the Netfilter framework | |
93 | + for filtering purposes. | |
94 | + | |
95 | endif # IP_VS | |
78978408 AM |
96 | diff -urNp v2.6.34/linux/net/netfilter/ipvs/Makefile linux/net/netfilter/ipvs/Makefile |
97 | --- v2.6.34/linux/net/netfilter/ipvs/Makefile 2010-05-17 10:49:01.000000000 +0300 | |
98 | +++ linux/net/netfilter/ipvs/Makefile 2010-05-19 10:51:31.000000000 +0300 | |
99 | @@ -9,10 +9,13 @@ ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UD | |
2380c486 | 100 | ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o |
78978408 | 101 | ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_SCTP) += ip_vs_proto_sctp.o |
2380c486 JR |
102 | |
103 | +ip_vs-extra_objs-y := | |
104 | +ip_vs-extra_objs-$(CONFIG_IP_VS_NFCT) += ip_vs_nfct.o | |
105 | + | |
106 | ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \ | |
107 | ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \ | |
108 | ip_vs_est.o ip_vs_proto.o \ | |
109 | - $(ip_vs_proto-objs-y) | |
110 | + $(ip_vs_proto-objs-y) $(ip_vs-extra_objs-y) | |
111 | ||
112 | ||
113 | # IPVS core | |
78978408 AM |
114 | diff -urNp v2.6.34/linux/net/netfilter/ipvs/ip_vs_conn.c linux/net/netfilter/ipvs/ip_vs_conn.c |
115 | --- v2.6.34/linux/net/netfilter/ipvs/ip_vs_conn.c 2010-05-17 10:49:01.000000000 +0300 | |
116 | +++ linux/net/netfilter/ipvs/ip_vs_conn.c 2010-05-19 10:51:31.000000000 +0300 | |
117 | @@ -664,6 +664,11 @@ static void ip_vs_conn_expire(unsigned l | |
2380c486 JR |
118 | if (cp->control) |
119 | ip_vs_control_del(cp); | |
120 | ||
121 | +#ifdef CONFIG_IP_VS_NFCT | |
122 | + if (sysctl_ip_vs_conntrack) | |
123 | + ip_vs_nfct_conn_drop(cp); | |
124 | +#endif | |
125 | + | |
126 | if (unlikely(cp->app != NULL)) | |
127 | ip_vs_unbind_app(cp); | |
128 | ip_vs_unbind_dest(cp); | |
78978408 AM |
129 | diff -urNp v2.6.34/linux/net/netfilter/ipvs/ip_vs_core.c linux/net/netfilter/ipvs/ip_vs_core.c |
130 | --- v2.6.34/linux/net/netfilter/ipvs/ip_vs_core.c 2010-05-17 10:49:01.000000000 +0300 | |
131 | +++ linux/net/netfilter/ipvs/ip_vs_core.c 2010-05-19 10:51:31.000000000 +0300 | |
132 | @@ -893,13 +893,16 @@ static inline int is_tcp_reset(const str | |
2380c486 JR |
133 | */ |
134 | static unsigned int | |
135 | handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, | |
136 | - struct ip_vs_conn *cp, int ihl) | |
137 | + struct ip_vs_conn *cp, int ihl, unsigned int hooknum) | |
138 | { | |
139 | IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet"); | |
140 | ||
141 | if (!skb_make_writable(skb, ihl)) | |
142 | goto drop; | |
143 | ||
144 | + if (AF_INET == af && !ip_vs_confirm_conntrack(skb, cp, hooknum)) | |
145 | + goto out; | |
146 | + | |
147 | /* mangle the packet */ | |
148 | if (pp->snat_handler && !pp->snat_handler(skb, pp, cp)) | |
149 | goto drop; | |
78978408 | 150 | @@ -914,6 +917,15 @@ handle_response(int af, struct sk_buff * |
2380c486 JR |
151 | ip_send_check(ip_hdr(skb)); |
152 | } | |
153 | ||
154 | + /* | |
155 | + * nf_iterate does not expect change in the skb->dst->dev. | |
156 | + * It looks like it is not fatal to enable this code for hooks | |
157 | + * where our handlers are at the end of the chain list and | |
158 | + * when all next handlers use skb->dst->dev and not outdev. | |
159 | + * It will definitely route properly the inout NAT traffic | |
160 | + * when multiple paths are used. | |
161 | + */ | |
162 | + | |
163 | /* For policy routing, packets originating from this | |
164 | * machine itself may be routed differently to packets | |
165 | * passing through. We want this packet to be routed as | |
78978408 | 166 | @@ -926,7 +938,8 @@ handle_response(int af, struct sk_buff * |
2380c486 JR |
167 | goto drop; |
168 | } else | |
169 | #endif | |
170 | - if (ip_route_me_harder(skb, RTN_LOCAL) != 0) | |
171 | + if (sysctl_ip_vs_snat_reroute && | |
172 | + ip_route_me_harder(skb, RTN_LOCAL) != 0) | |
173 | goto drop; | |
174 | ||
175 | IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); | |
78978408 | 176 | @@ -941,8 +954,11 @@ handle_response(int af, struct sk_buff * |
2380c486 JR |
177 | return NF_ACCEPT; |
178 | ||
179 | drop: | |
180 | - ip_vs_conn_put(cp); | |
181 | kfree_skb(skb); | |
182 | + | |
183 | +out: | |
184 | + ip_vs_conn_put(cp); | |
185 | + LeaveFunction(11); | |
186 | return NF_STOLEN; | |
187 | } | |
188 | ||
78978408 | 189 | @@ -982,8 +998,13 @@ ip_vs_out(unsigned int hooknum, struct s |
2380c486 JR |
190 | if (unlikely(iph.protocol == IPPROTO_ICMP)) { |
191 | int related, verdict = ip_vs_out_icmp(skb, &related); | |
192 | ||
193 | - if (related) | |
194 | + if (related) { | |
195 | + if (sysctl_ip_vs_snat_reroute && | |
196 | + NF_ACCEPT == verdict && | |
197 | + ip_route_me_harder(skb, RTN_LOCAL)) | |
198 | + verdict = NF_DROP; | |
199 | return verdict; | |
200 | + } | |
201 | ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); | |
202 | } | |
203 | ||
78978408 | 204 | @@ -1063,7 +1084,7 @@ ip_vs_out(unsigned int hooknum, struct s |
2380c486 JR |
205 | return NF_ACCEPT; |
206 | } | |
207 | ||
208 | - return handle_response(af, skb, pp, cp, iph.len); | |
209 | + return handle_response(af, skb, pp, cp, iph.len, hooknum); | |
210 | } | |
211 | ||
212 | ||
78978408 | 213 | @@ -1340,7 +1361,7 @@ ip_vs_in(unsigned int hooknum, struct sk |
2380c486 JR |
214 | /* For local client packets, it could be a response */ |
215 | cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0); | |
216 | if (cp) | |
217 | - return handle_response(af, skb, pp, cp, iph.len); | |
218 | + return handle_response(af, skb, pp, cp, iph.len, hooknum); | |
219 | ||
220 | if (!pp->conn_schedule(af, skb, pp, &v, &cp)) | |
221 | return v; | |
78978408 AM |
222 | diff -urNp v2.6.34/linux/net/netfilter/ipvs/ip_vs_ctl.c linux/net/netfilter/ipvs/ip_vs_ctl.c |
223 | --- v2.6.34/linux/net/netfilter/ipvs/ip_vs_ctl.c 2010-05-17 10:49:01.000000000 +0300 | |
224 | +++ linux/net/netfilter/ipvs/ip_vs_ctl.c 2010-05-19 10:51:31.000000000 +0300 | |
225 | @@ -88,6 +88,10 @@ int sysctl_ip_vs_expire_nodest_conn = 0; | |
2380c486 JR |
226 | int sysctl_ip_vs_expire_quiescent_template = 0; |
227 | int sysctl_ip_vs_sync_threshold[2] = { 3, 50 }; | |
228 | int sysctl_ip_vs_nat_icmp_send = 0; | |
229 | +int sysctl_ip_vs_snat_reroute = 0; | |
230 | +#ifdef CONFIG_IP_VS_NFCT | |
231 | +int sysctl_ip_vs_conntrack = 0; | |
232 | +#endif | |
233 | ||
234 | ||
235 | #ifdef CONFIG_IP_VS_DEBUG | |
78978408 | 236 | @@ -1579,6 +1583,15 @@ static struct ctl_table vs_vars[] = { |
2380c486 | 237 | .mode = 0644, |
78978408 | 238 | .proc_handler = proc_do_defense_mode, |
2380c486 JR |
239 | }, |
240 | +#ifdef CONFIG_IP_VS_NFCT | |
241 | + { | |
242 | + .procname = "conntrack", | |
243 | + .data = &sysctl_ip_vs_conntrack, | |
244 | + .maxlen = sizeof(int), | |
245 | + .mode = 0644, | |
246 | + .proc_handler = &proc_dointvec, | |
247 | + }, | |
248 | +#endif | |
249 | { | |
78978408 AM |
250 | .procname = "secure_tcp", |
251 | .data = &sysctl_ip_vs_secure_tcp, | |
252 | @@ -1586,6 +1599,13 @@ static struct ctl_table vs_vars[] = { | |
2380c486 | 253 | .mode = 0644, |
78978408 | 254 | .proc_handler = proc_do_defense_mode, |
2380c486 JR |
255 | }, |
256 | + { | |
257 | + .procname = "snat_reroute", | |
258 | + .data = &sysctl_ip_vs_snat_reroute, | |
259 | + .maxlen = sizeof(int), | |
260 | + .mode = 0644, | |
261 | + .proc_handler = &proc_dointvec, | |
262 | + }, | |
263 | #if 0 | |
264 | { | |
265 | .procname = "timeout_established", | |
78978408 AM |
266 | diff -urNp v2.6.34/linux/net/netfilter/ipvs/ip_vs_ftp.c linux/net/netfilter/ipvs/ip_vs_ftp.c |
267 | --- v2.6.34/linux/net/netfilter/ipvs/ip_vs_ftp.c 2010-05-17 10:49:01.000000000 +0300 | |
268 | +++ linux/net/netfilter/ipvs/ip_vs_ftp.c 2010-05-19 10:51:31.000000000 +0300 | |
269 | @@ -204,6 +204,11 @@ static int ip_vs_ftp_out(struct ip_vs_ap | |
2380c486 JR |
270 | ip_vs_control_add(n_cp, cp); |
271 | } | |
272 | ||
273 | +#ifdef CONFIG_IP_VS_NFCT | |
274 | + if (skb->nfct) | |
275 | + ip_vs_nfct_expect_related(skb, n_cp, 0, IPPROTO_TCP, 0); | |
276 | +#endif | |
277 | + | |
278 | /* | |
279 | * Replace the old passive address with the new one | |
280 | */ | |
78978408 | 281 | @@ -343,6 +348,11 @@ static int ip_vs_ftp_in(struct ip_vs_app |
2380c486 JR |
282 | ip_vs_control_add(n_cp, cp); |
283 | } | |
284 | ||
285 | +#ifdef CONFIG_IP_VS_NFCT | |
286 | + if (skb->nfct) | |
287 | + ip_vs_nfct_expect_related(skb, n_cp, n_cp->dport, IPPROTO_TCP, 1); | |
288 | +#endif | |
289 | + | |
290 | /* | |
291 | * Move tunnel to listen state | |
292 | */ | |
78978408 AM |
293 | diff -urNp v2.6.34/linux/net/netfilter/ipvs/ip_vs_nfct.c linux/net/netfilter/ipvs/ip_vs_nfct.c |
294 | --- v2.6.34/linux/net/netfilter/ipvs/ip_vs_nfct.c 1970-01-01 02:00:00.000000000 +0200 | |
295 | +++ linux/net/netfilter/ipvs/ip_vs_nfct.c 2010-05-19 11:18:37.000000000 +0300 | |
296 | @@ -0,0 +1,376 @@ | |
2380c486 JR |
297 | +/* |
298 | + * ip_vs_nfct.c: Netfilter connection tracking support for IPVS | |
299 | + * | |
300 | + * Portions Copyright (C) 2001-2002 | |
301 | + * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland. | |
302 | + * | |
303 | + * Portions Copyright (C) 2003-2008 | |
304 | + * Julian Anastasov | |
305 | + * | |
306 | + * | |
307 | + * This code is free software; you can redistribute it and/or modify | |
308 | + * it under the terms of the GNU General Public License as published by | |
309 | + * the Free Software Foundation; either version 2 of the License, or | |
310 | + * (at your option) any later version. | |
311 | + * | |
312 | + * This program is distributed in the hope that it will be useful, | |
313 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
314 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
315 | + * GNU General Public License for more details. | |
316 | + * | |
317 | + * You should have received a copy of the GNU General Public License | |
318 | + * along with this program; if not, write to the Free Software | |
319 | + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
320 | + * | |
321 | + * | |
322 | + * Authors: | |
323 | + * Ben North <ben@redfrontdoor.org> | |
324 | + * Julian Anastasov <ja@ssi.bg> Reorganize and sync with latest kernels | |
325 | + * | |
326 | + * | |
327 | + * Current status: | |
328 | + * | |
329 | + * - provide conntrack confirmation for new and related connections, by | |
330 | + * this way we can see their proper conntrack state in all hooks | |
331 | + * - support for all forwarding methods, not only NAT | |
332 | + * - FTP support (NAT), ability to support other NAT apps with expectations | |
333 | + * - to correctly create expectations for related NAT connections the proper | |
334 | + * NF conntrack support must be already installed, eg. ip_vs_ftp requires | |
335 | + * nf_conntrack_ftp for the same ports | |
336 | + * | |
337 | + */ | |
338 | + | |
78978408 AM |
339 | +#define KMSG_COMPONENT "IPVS" |
340 | +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt | |
341 | + | |
2380c486 JR |
342 | +#include <linux/module.h> |
343 | +#include <linux/types.h> | |
344 | +#include <linux/kernel.h> | |
345 | +#include <linux/errno.h> | |
346 | +#include <linux/compiler.h> | |
347 | +#include <linux/vmalloc.h> | |
348 | +#include <linux/skbuff.h> | |
349 | +#include <net/ip.h> | |
350 | +#include <linux/netfilter.h> | |
351 | +#include <linux/netfilter_ipv4.h> | |
352 | +#include <net/ip_vs.h> | |
353 | + | |
354 | + | |
355 | +EXPORT_SYMBOL(ip_vs_nfct_expect_related); | |
356 | + | |
357 | + | |
358 | +#define FMT_TUPLE "%u.%u.%u.%u:%u->%u.%u.%u.%u:%u/%u" | |
359 | +#define ARG_TUPLE(t) NIPQUAD((t)->src.u3.ip), ntohs((t)->src.u.all), \ | |
360 | + NIPQUAD((t)->dst.u3.ip), ntohs((t)->dst.u.all), \ | |
361 | + (t)->dst.protonum | |
362 | + | |
363 | +#define FMT_CONN "%u.%u.%u.%u:%u->%u.%u.%u.%u:%u->%u.%u.%u.%u:%u/%u:%u" | |
364 | +#define ARG_CONN(c) NIPQUAD((c)->caddr), ntohs((c)->cport), \ | |
365 | + NIPQUAD((c)->vaddr), ntohs((c)->vport), \ | |
366 | + NIPQUAD((c)->daddr), ntohs((c)->dport), \ | |
367 | + (c)->protocol, (c)->state | |
368 | + | |
369 | +/* Returns boolean and skb is freed on failure */ | |
370 | +static int __ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp, | |
371 | + unsigned int hooknum) | |
372 | +{ | |
373 | + /* | |
374 | + * The assumptions: | |
375 | + * - the nfct is !NULL and is not confirmed | |
376 | + * - we are called before any mangle | |
377 | + */ | |
378 | + | |
379 | + struct iphdr *iph = ip_hdr(skb); | |
380 | + struct nf_conn *ct = (struct nf_conn *) skb->nfct; | |
381 | + struct nf_conntrack_tuple new_reply; | |
382 | + int ret = NF_DROP; | |
383 | + __be16 _ports[2], *pptr; | |
384 | +#ifdef CONFIG_IP_VS_DEBUG | |
385 | + struct nf_conntrack_tuple *orig_tup = | |
386 | + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; | |
387 | + struct nf_conntrack_tuple *orig_rep = | |
388 | + &ct->tuplehash[IP_CT_DIR_REPLY].tuple; | |
389 | +#endif | |
390 | +#ifdef CONFIG_NF_NAT_NEEDED | |
391 | + int initialized = !!(ct->status & IPS_NAT_DONE_MASK); | |
392 | +#else | |
393 | + int initialized = 0; | |
394 | +#endif | |
395 | + | |
396 | + IP_VS_DBG(7, "%s: ct=%p, init=%d, tuples=" FMT_TUPLE ", " FMT_TUPLE | |
397 | + ", cp=" FMT_CONN "\n", | |
78978408 | 398 | + __func__, ct, initialized, |
2380c486 JR |
399 | + ARG_TUPLE(orig_tup), ARG_TUPLE(orig_rep), ARG_CONN(cp)); |
400 | + | |
401 | +#ifdef CONFIG_NF_NAT_NEEDED | |
402 | + /* | |
403 | + * This is really bad, may be we are trying to alter DNAT conn? | |
404 | + * This is not supported, avoid the confirmation. | |
405 | + */ | |
406 | + if (initialized && ct->status & IPS_NAT_MASK) { | |
407 | +#ifdef CONFIG_IP_VS_DEBUG | |
408 | + IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, init=%d\n", | |
78978408 | 409 | + __func__, ct, ct->status, initialized); |
2380c486 JR |
410 | +#endif |
411 | + return 1; | |
412 | + } | |
413 | +#endif | |
414 | + | |
415 | + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ || NF_INET_FORWARD == hooknum) | |
416 | + goto confirm; | |
417 | + | |
418 | + /* | |
419 | + * Alter reply only for IP_VS_CONN_F_MASQ in outin direction. | |
420 | + * For related connections in inout direction it is done in | |
421 | + * expectfn callback. | |
422 | + */ | |
423 | + | |
424 | + pptr = skb_header_pointer(skb, ip_hdrlen(skb), | |
425 | + sizeof(_ports), _ports); | |
426 | + if (!pptr) | |
427 | + goto out; | |
428 | + | |
429 | + new_reply = (struct nf_conntrack_tuple) { | |
430 | + .dst = { .protonum = iph->protocol, .dir = IP_CT_DIR_REPLY }}; | |
431 | + | |
432 | + new_reply.src.u3 = cp->daddr; | |
433 | + new_reply.src.u.tcp.port = cp->dport; | |
434 | + new_reply.src.l3num = PF_INET; | |
435 | + new_reply.dst.u3.ip = iph->saddr; | |
436 | + new_reply.dst.u.tcp.port = pptr[0]; | |
437 | + | |
438 | + nf_conntrack_alter_reply(ct, &new_reply); | |
439 | + | |
440 | + IP_VS_DBG(7, "%s: ct=%p, init=%d, orig=" FMT_TUPLE | |
441 | + ", new_reply=" FMT_TUPLE " => alter_reply\n", | |
78978408 | 442 | + __func__, ct, initialized, |
2380c486 JR |
443 | + ARG_TUPLE(orig_tup), ARG_TUPLE(&new_reply)); |
444 | + | |
445 | + /* | |
446 | + * No need to rehash NAT info because we don't change source | |
447 | + * address in original direction | |
448 | + */ | |
449 | + | |
450 | +confirm: | |
451 | + | |
452 | + ret = __nf_conntrack_confirm(skb); | |
453 | + | |
454 | + if (ret != NF_STOLEN) { | |
455 | + IP_VS_DBG(7, "%s: ct=%p, init=%d, orig=" FMT_TUPLE " => confirm ret=%d\n", | |
78978408 | 456 | + __func__, ct, initialized, ARG_TUPLE(orig_tup), ret); |
2380c486 JR |
457 | + } |
458 | + | |
459 | + if (ret != NF_ACCEPT) | |
460 | + goto out; | |
461 | + return 1; | |
462 | + | |
463 | +out: | |
464 | + if (ret != NF_STOLEN) | |
465 | + kfree_skb(skb); | |
466 | + return 0; | |
467 | +} | |
468 | + | |
469 | +/* | |
470 | + * Confirm (and optionally alter) the conntrack entry if needed | |
471 | + * because the IPVS packets do not reach ipv4_confirm. | |
472 | + */ | |
473 | +int ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp, | |
474 | + unsigned int hooknum) | |
475 | +{ | |
476 | + struct iphdr *iph = ip_hdr(skb); | |
477 | + struct nf_conn *ct = (struct nf_conn *) skb->nfct; | |
478 | + | |
479 | + /* By the time we're sending the packet out the other | |
480 | + * side, there should be a confirmed Netfilter CT entry | |
481 | + * for this connection. This may not be the case, | |
482 | + * however, if it's a brand new connection, or if the NF | |
483 | + * entry has timed out before ours has. Either way, if | |
484 | + * the NF CT entry is unconfirmed, confirm it, and deal | |
485 | + * with reply tuple mangling at the same time. | |
486 | + */ | |
487 | + | |
488 | + /* We only deal with TCP or UDP packets */ | |
489 | + if (iph->protocol != IPPROTO_TCP && iph->protocol != IPPROTO_UDP) | |
490 | + return 1; | |
491 | + | |
492 | + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { | |
493 | + /* | |
494 | + * Do not be surprised if non-NAT conntracks stay in SYN_SENT | |
495 | + * state, may be the replies from the real server go | |
496 | + * directly to client. In any case, keep them in REPLIED | |
497 | + * state (ESTABLISHED). | |
498 | + */ | |
499 | + if (iph->protocol != IPPROTO_TCP || | |
500 | + IP_VS_TCP_S_ESTABLISHED == cp->state) { | |
501 | + set_bit(IPS_SEEN_REPLY_BIT, &ct->status); | |
502 | + } | |
503 | + } | |
504 | + | |
505 | + /* | |
506 | + * We assume the reused connections do not change their rip:rport | |
507 | + * and we do not need to alter their conntrack reply | |
508 | + */ | |
509 | + return __ip_vs_nfct_confirm(skb, cp, hooknum); | |
510 | +} | |
511 | + | |
512 | +/* | |
513 | + * We are called from init_conntrack() as expectfn handler | |
514 | + */ | |
515 | + | |
516 | +static void ip_vs_nfct_expect_callback(struct nf_conn *ct, | |
517 | + struct nf_conntrack_expect *exp) | |
518 | +{ | |
519 | + struct nf_conntrack_tuple *orig, new_reply; | |
520 | + struct ip_vs_conn *cp; | |
521 | + | |
522 | + if (exp->tuple.src.l3num != PF_INET) | |
523 | + return; | |
524 | + | |
525 | + /* | |
526 | + * - We assume that no NF locks are held before this callback | |
527 | + * - ip_vs_conn_out_get and ip_vs_conn_in_get should match their | |
528 | + * expectations even if they use wildcard values, now we provide | |
529 | + * the actual values from the newly created original conntrack direction | |
530 | + * - the conntrack is confirmed when packet reaches IPVS hooks | |
531 | + */ | |
532 | + | |
533 | + /* RS->CLIENT */ | |
534 | + orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; | |
535 | + cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum, | |
536 | + &orig->src.u3, orig->src.u.tcp.port, | |
537 | + &orig->dst.u3, orig->dst.u.tcp.port); | |
538 | + if (cp) { | |
539 | + /* Change reply CLIENT->RS to CLIENT->VS */ | |
540 | + new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | |
541 | + IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " FMT_TUPLE | |
542 | + ", found inout cp=" FMT_CONN "\n", | |
78978408 | 543 | + __func__, ct, ct->status, |
2380c486 JR |
544 | + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), |
545 | + ARG_CONN(cp)); | |
546 | + new_reply.dst.u3 = cp->vaddr; | |
547 | + new_reply.dst.u.tcp.port = cp->vport; | |
548 | + IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE | |
549 | + ", inout cp=" FMT_CONN "\n", | |
78978408 | 550 | + __func__, ct, |
2380c486 JR |
551 | + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), |
552 | + ARG_CONN(cp)); | |
553 | + goto alter; | |
554 | + } | |
555 | + | |
556 | + /* CLIENT->VS */ | |
557 | + cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum, | |
558 | + &orig->src.u3, orig->src.u.tcp.port, | |
559 | + &orig->dst.u3, orig->dst.u.tcp.port); | |
560 | + if (cp) { | |
561 | + /* Change reply VS->CLIENT to RS->CLIENT */ | |
562 | + new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; | |
563 | + IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " FMT_TUPLE | |
564 | + ", found outin cp=" FMT_CONN "\n", | |
78978408 | 565 | + __func__, ct, ct->status, |
2380c486 JR |
566 | + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), |
567 | + ARG_CONN(cp)); | |
568 | + new_reply.src.u3 = cp->daddr; | |
569 | + new_reply.src.u.tcp.port = cp->dport; | |
570 | + IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE | |
571 | + ", outin cp=" FMT_CONN "\n", | |
78978408 | 572 | + __func__, ct, |
2380c486 JR |
573 | + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), |
574 | + ARG_CONN(cp)); | |
575 | + goto alter; | |
576 | + } | |
577 | + IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE " - unknown expect\n", | |
78978408 | 578 | + __func__, ct, ct->status, ARG_TUPLE(orig)); |
2380c486 JR |
579 | + return; |
580 | + | |
581 | +alter: | |
582 | + | |
583 | + /* Never alter conntrack for non-NAT conns */ | |
584 | + if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ) | |
585 | + nf_conntrack_alter_reply(ct, &new_reply); | |
586 | + ip_vs_conn_put(cp); | |
587 | + return; | |
588 | +} | |
589 | + | |
590 | +/* | |
591 | + * Create NF conntrack expectation with wildcard (optional) source port. | |
592 | + * Then the default callback function will alter the reply and will confirm | |
593 | + * the conntrack entry when the first packet comes. | |
594 | + */ | |
595 | +void ip_vs_nfct_expect_related(struct sk_buff *skb, struct ip_vs_conn *cp, | |
596 | + __be16 port, __u16 proto, int from_rs) | |
597 | +{ | |
598 | + struct nf_conn *ct = (struct nf_conn *) skb->nfct; | |
599 | + struct nf_conntrack_expect *e; | |
600 | + | |
601 | + if (!sysctl_ip_vs_conntrack) | |
602 | + return; | |
603 | + | |
604 | + if (!ct) { | |
605 | + IP_VS_DBG(7, "%s: ct=%p for cp=" FMT_CONN "\n", | |
78978408 | 606 | + __func__, ct, ARG_CONN(cp)); |
2380c486 JR |
607 | + return; |
608 | + } | |
609 | + | |
610 | + if (!(e = nf_ct_expect_alloc(ct))) | |
611 | + return; | |
612 | + | |
78978408 AM |
613 | + nf_ct_expect_init(e, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), |
614 | + from_rs ? &cp->daddr : &cp->caddr, | |
615 | + from_rs ? &cp->caddr : &cp->vaddr, | |
616 | + proto, port ? &port : NULL, | |
617 | + from_rs ? &cp->cport : &cp->vport); | |
618 | + | |
619 | + e->expectfn = ip_vs_nfct_expect_callback; | |
2380c486 JR |
620 | + |
621 | + IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n", | |
78978408 | 622 | + __func__, ct, ARG_TUPLE(&e->tuple)); |
2380c486 JR |
623 | + nf_ct_expect_related(e); |
624 | + nf_ct_expect_put(e); | |
625 | +} | |
626 | + | |
627 | +/* | |
628 | + * Our connection was terminated, try to drop the conntrack immediately | |
629 | + */ | |
630 | +void ip_vs_nfct_conn_drop(struct ip_vs_conn *cp) | |
631 | +{ | |
632 | + struct nf_conntrack_tuple_hash *h; | |
633 | + struct nf_conn *ct; | |
634 | + struct nf_conntrack_tuple tuple; | |
635 | + | |
636 | + if (!cp->cport) | |
637 | + return; | |
638 | + | |
639 | + tuple = (struct nf_conntrack_tuple) { | |
640 | + .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } }; | |
641 | + tuple.src.u3 = cp->caddr; | |
642 | + tuple.src.u.all = cp->cport; | |
643 | + tuple.src.l3num = PF_INET; | |
644 | + tuple.dst.u3 = cp->vaddr; | |
645 | + tuple.dst.u.all = cp->vport; | |
646 | + | |
647 | + IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE | |
648 | + " for conn " FMT_CONN "\n", | |
78978408 | 649 | + __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); |
2380c486 | 650 | + |
78978408 | 651 | + h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple); |
2380c486 JR |
652 | + if (h) { |
653 | + ct = nf_ct_tuplehash_to_ctrack(h); | |
78978408 | 654 | + /* Show what happens instead of calling nf_ct_kill() */ |
2380c486 JR |
655 | + if (del_timer(&ct->timeout)) { |
656 | + IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple=" | |
657 | + FMT_TUPLE "\n", | |
78978408 | 658 | + __func__, ct, ARG_TUPLE(&tuple)); |
2380c486 JR |
659 | + if (ct->timeout.function) |
660 | + ct->timeout.function(ct->timeout.data); | |
661 | + } else { | |
662 | + IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple=" | |
663 | + FMT_TUPLE "\n", | |
78978408 | 664 | + __func__, ct, ARG_TUPLE(&tuple)); |
2380c486 JR |
665 | + } |
666 | + nf_ct_put(ct); | |
667 | + } else { | |
668 | + IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n", | |
78978408 | 669 | + __func__, ARG_TUPLE(&tuple)); |
2380c486 JR |
670 | + } |
671 | +} | |
672 | + | |
78978408 AM |
673 | diff -urNp v2.6.34/linux/net/netfilter/ipvs/ip_vs_xmit.c linux/net/netfilter/ipvs/ip_vs_xmit.c |
674 | --- v2.6.34/linux/net/netfilter/ipvs/ip_vs_xmit.c 2010-05-17 10:49:02.000000000 +0300 | |
675 | +++ linux/net/netfilter/ipvs/ip_vs_xmit.c 2010-05-19 10:51:31.000000000 +0300 | |
676 | @@ -267,6 +267,9 @@ ip_vs_bypass_xmit(struct sk_buff *skb, s | |
677 | skb_dst_drop(skb); | |
678 | skb_dst_set(skb, &rt->u.dst); | |
2380c486 JR |
679 | |
680 | + if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN)) | |
681 | + goto tx_error_out; | |
682 | + | |
683 | /* Another hack: avoid icmp_send in ip_fragment */ | |
684 | skb->local_df = 1; | |
685 | ||
78978408 | 686 | @@ -279,6 +282,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, s |
2380c486 JR |
687 | dst_link_failure(skb); |
688 | tx_error: | |
689 | kfree_skb(skb); | |
690 | + tx_error_out: | |
691 | LeaveFunction(10); | |
692 | return NF_STOLEN; | |
693 | } | |
78978408 AM |
694 | @@ -395,6 +399,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru |
695 | skb_dst_drop(skb); | |
696 | skb_dst_set(skb, &rt->u.dst); | |
2380c486 JR |
697 | |
698 | + if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN)) | |
699 | + goto tx_error_out; | |
700 | + | |
701 | /* mangle the packet */ | |
702 | if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) | |
703 | goto tx_error; | |
78978408 | 704 | @@ -418,8 +425,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru |
2380c486 JR |
705 | tx_error_icmp: |
706 | dst_link_failure(skb); | |
707 | tx_error: | |
708 | - LeaveFunction(10); | |
709 | kfree_skb(skb); | |
710 | + tx_error_out: | |
711 | + LeaveFunction(10); | |
712 | return NF_STOLEN; | |
713 | tx_error_put: | |
714 | ip_rt_put(rt); | |
78978408 | 715 | @@ -595,14 +603,17 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s |
2380c486 JR |
716 | /* fix old IP header checksum */ |
717 | ip_send_check(old_iph); | |
718 | ||
719 | - skb_push(skb, sizeof(struct iphdr)); | |
720 | - skb_reset_network_header(skb); | |
721 | - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | |
722 | - | |
723 | /* drop old route */ | |
13e5c3b1 | 724 | skb_dst_drop(skb); |
28eb7fa5 | 725 | skb_dst_set(skb, &rt->dst); |
2380c486 JR |
726 | |
727 | + if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN)) | |
728 | + goto tx_error_out; | |
729 | + | |
730 | + skb_push(skb, sizeof(struct iphdr)); | |
731 | + skb_reset_network_header(skb); | |
732 | + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); | |
733 | + | |
734 | /* | |
735 | * Push down and install the IPIP header. | |
736 | */ | |
78978408 | 737 | @@ -630,6 +641,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s |
2380c486 JR |
738 | dst_link_failure(skb); |
739 | tx_error: | |
740 | kfree_skb(skb); | |
741 | + tx_error_out: | |
742 | LeaveFunction(10); | |
743 | return NF_STOLEN; | |
744 | } | |
78978408 AM |
745 | @@ -782,6 +794,9 @@ ip_vs_dr_xmit(struct sk_buff *skb, struc |
746 | skb_dst_drop(skb); | |
747 | skb_dst_set(skb, &rt->u.dst); | |
2380c486 JR |
748 | |
749 | + if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN)) | |
750 | + goto tx_error_out; | |
751 | + | |
752 | /* Another hack: avoid icmp_send in ip_fragment */ | |
753 | skb->local_df = 1; | |
754 | ||
78978408 | 755 | @@ -794,6 +809,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struc |
2380c486 JR |
756 | dst_link_failure(skb); |
757 | tx_error: | |
758 | kfree_skb(skb); | |
759 | + tx_error_out: | |
760 | LeaveFunction(10); | |
761 | return NF_STOLEN; | |
762 | } | |
78978408 AM |
763 | @@ -907,6 +923,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, str |
764 | skb_dst_drop(skb); | |
765 | skb_dst_set(skb, &rt->u.dst); | |
2380c486 JR |
766 | |
767 | + /* TODO: properly alter reply for NFCT */ | |
768 | + | |
769 | ip_vs_nat_icmp(skb, pp, cp, 0); | |
770 | ||
771 | /* Another hack: avoid icmp_send in ip_fragment */ |