]> git.pld-linux.org Git - packages/kernel.git/blob - kernel-ipvs-nfct.patch
- updated for 2.6.25.4
[packages/kernel.git] / kernel-ipvs-nfct.patch
1 diff -urNp v2.6.25/linux/include/net/ip_vs.h linux/include/net/ip_vs.h
2 --- v2.6.25/linux/include/net/ip_vs.h   2008-04-17 09:58:08.000000000 +0300
3 +++ linux/include/net/ip_vs.h   2008-04-19 19:59:24.000000000 +0300
4 @@ -11,6 +11,16 @@
5  
6  #include <linux/sysctl.h>      /* For ctl_path */
7  
8 +#ifdef __KERNEL__
9 +#include <linux/skbuff.h>
10 +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
11 +#include <net/netfilter/nf_conntrack.h>
12 +#include <net/netfilter/nf_conntrack_core.h>
13 +#include <net/netfilter/nf_conntrack_expect.h>
14 +#include <net/netfilter/nf_conntrack_helper.h>
15 +#endif
16 +#endif
17 +
18  #define IP_VS_VERSION_CODE     0x010201
19  #define NVERSION(version)                      \
20         (version >> 16) & 0xFF,                 \
21 @@ -686,6 +696,16 @@ extern void ip_vs_init_hash_table(struct
22   */
23  
24  /*
25 + *      Netfilter connection tracking
26 + *      (from ip_vs_nfct.c)
27 + */
28 +extern int ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum);
29 +extern void ip_vs_nfct_expect_related(struct sk_buff *skb,
30 +                                     struct ip_vs_conn *cp,
31 +                                     __be16 port, __u16 proto, int from_rs);
32 +extern void ip_vs_nfct_conn_drop(struct ip_vs_conn *cp);
33 +
34 +/*
35   *     IPVS connection entry hash table
36   */
37  #ifndef CONFIG_IP_VS_TAB_BITS
38 @@ -855,9 +875,42 @@ extern int sysctl_ip_vs_expire_nodest_co
39  extern int sysctl_ip_vs_expire_quiescent_template;
40  extern int sysctl_ip_vs_sync_threshold[2];
41  extern int sysctl_ip_vs_nat_icmp_send;
42 +extern int sysctl_ip_vs_snat_reroute;
43  extern struct ip_vs_stats ip_vs_stats;
44  extern struct ctl_path net_vs_ctl_path[];
45  
46 +#ifdef CONFIG_IP_VS_NFCT
47 +
48 +extern int sysctl_ip_vs_conntrack;
49 +
50 +static inline int ip_vs_use_conntrack(struct sk_buff *skb)
51 +{
52 +       return sysctl_ip_vs_conntrack && skb->nfct;
53 +}
54 +
55 +/* Returns boolean and skb is freed on failure */
56 +static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum)
57 +{
58 +       if (!ip_vs_use_conntrack(skb))
59 +               return 1;
60 +       return nf_ct_is_confirmed((struct nf_conn *) skb->nfct) ||
61 +               ip_vs_nfct_confirm(skb, cp, hooknum);
62 +}
63 +
64 +#else
65 +
66 +static inline int ip_vs_use_conntrack(struct sk_buff *skb)
67 +{
68 +       return 0;
69 +}
70 +
71 +static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum)
72 +{
73 +       return 1;
74 +}
75 +
76 +#endif
77 +
78  extern struct ip_vs_service *
79  ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport);
80  
81 diff -urNp v2.6.25/linux/net/ipv4/ipvs/Kconfig linux/net/ipv4/ipvs/Kconfig
82 --- v2.6.25/linux/net/ipv4/ipvs/Kconfig 2007-07-10 09:18:43.000000000 +0300
83 +++ linux/net/ipv4/ipvs/Kconfig 2008-04-19 19:55:40.000000000 +0300
84 @@ -221,4 +221,12 @@ config     IP_VS_FTP
85           If you want to compile it in kernel, say Y. To compile it as a
86           module, choose M here. If unsure, say N.
87  
88 +config IP_VS_NFCT
89 +       bool "Netfilter connection tracking"
90 +       depends on NF_CONNTRACK
91 +       ---help---
92 +         The Netfilter connection tracking support allows the IPVS
93 +         connection state to be exported to the Netfilter framework
94 +         for filtering purposes.
95 +
96  endif # IP_VS
97 diff -urNp v2.6.25/linux/net/ipv4/ipvs/Makefile linux/net/ipv4/ipvs/Makefile
98 --- v2.6.25/linux/net/ipv4/ipvs/Makefile        2005-06-18 08:50:52.000000000 +0300
99 +++ linux/net/ipv4/ipvs/Makefile        2008-04-19 19:55:40.000000000 +0300
100 @@ -9,10 +9,13 @@ ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UD
101  ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_ESP) += ip_vs_proto_esp.o
102  ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH) += ip_vs_proto_ah.o
103  
104 +ip_vs-extra_objs-y :=
105 +ip_vs-extra_objs-$(CONFIG_IP_VS_NFCT) += ip_vs_nfct.o
106 +
107  ip_vs-objs :=  ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o        \
108                 ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o                      \
109                 ip_vs_est.o ip_vs_proto.o                                  \
110 -               $(ip_vs_proto-objs-y)
111 +               $(ip_vs_proto-objs-y) $(ip_vs-extra_objs-y)
112  
113  
114  # IPVS core
115 diff -urNp v2.6.25/linux/net/ipv4/ipvs/ip_vs_conn.c linux/net/ipv4/ipvs/ip_vs_conn.c
116 --- v2.6.25/linux/net/ipv4/ipvs/ip_vs_conn.c    2008-04-17 09:58:09.000000000 +0300
117 +++ linux/net/ipv4/ipvs/ip_vs_conn.c    2008-04-19 19:55:40.000000000 +0300
118 @@ -593,6 +593,11 @@ static void ip_vs_conn_expire(unsigned l
119                 if (cp->control)
120                         ip_vs_control_del(cp);
121  
122 +#ifdef CONFIG_IP_VS_NFCT
123 +               if (sysctl_ip_vs_conntrack)
124 +                       ip_vs_nfct_conn_drop(cp);
125 +#endif
126 +
127                 if (unlikely(cp->app != NULL))
128                         ip_vs_unbind_app(cp);
129                 ip_vs_unbind_dest(cp);
130 diff -urNp v2.6.25/linux/net/ipv4/ipvs/ip_vs_core.c linux/net/ipv4/ipvs/ip_vs_core.c
131 --- v2.6.25/linux/net/ipv4/ipvs/ip_vs_core.c    2008-04-17 09:58:09.000000000 +0300
132 +++ linux/net/ipv4/ipvs/ip_vs_core.c    2008-04-19 19:55:40.000000000 +0300
133 @@ -661,6 +661,8 @@ static int ip_vs_out_icmp(struct sk_buff
134  
135         skb->ipvs_property = 1;
136         verdict = NF_ACCEPT;
137 +       if (sysctl_ip_vs_snat_reroute && ip_route_me_harder(skb, RTN_LOCAL))
138 +               verdict = NF_DROP;
139  
140    out:
141         __ip_vs_conn_put(cp);
142 @@ -761,19 +763,31 @@ ip_vs_out(unsigned int hooknum, struct s
143         if (!skb_make_writable(skb, ihl))
144                 goto drop;
145  
146 +       if (!ip_vs_confirm_conntrack(skb, cp, hooknum))
147 +               goto out;
148 +
149         /* mangle the packet */
150         if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
151                 goto drop;
152         ip_hdr(skb)->saddr = cp->vaddr;
153         ip_send_check(ip_hdr(skb));
154  
155 +       /*
156 +        * nf_iterate does not expect change in the skb->dst->dev.
157 +        * It looks like it is not fatal to enable this code for hooks
158 +        * where our handlers are at the end of the chain list and
159 +        * when all next handlers use skb->dst->dev and not outdev.
160 +        * It will definitely route properly the inout NAT traffic
161 +        * when multiple paths are used.
162 +        */
163 +
164         /* For policy routing, packets originating from this
165          * machine itself may be routed differently to packets
166          * passing through.  We want this packet to be routed as
167          * if it came from this machine itself.  So re-compute
168          * the routing information.
169          */
170 -       if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
171 +       if (sysctl_ip_vs_snat_reroute && ip_route_me_harder(skb, RTN_LOCAL) != 0)
172                 goto drop;
173  
174         IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
175 @@ -788,8 +802,11 @@ ip_vs_out(unsigned int hooknum, struct s
176         return NF_ACCEPT;
177  
178    drop:
179 -       ip_vs_conn_put(cp);
180         kfree_skb(skb);
181 +
182 +  out:
183 +       ip_vs_conn_put(cp);
184 +       LeaveFunction(11);
185         return NF_STOLEN;
186  }
187  
188 diff -urNp v2.6.25/linux/net/ipv4/ipvs/ip_vs_ctl.c linux/net/ipv4/ipvs/ip_vs_ctl.c
189 --- v2.6.25/linux/net/ipv4/ipvs/ip_vs_ctl.c     2008-04-17 09:58:09.000000000 +0300
190 +++ linux/net/ipv4/ipvs/ip_vs_ctl.c     2008-04-19 19:55:40.000000000 +0300
191 @@ -81,6 +81,10 @@ int sysctl_ip_vs_expire_nodest_conn = 0;
192  int sysctl_ip_vs_expire_quiescent_template = 0;
193  int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
194  int sysctl_ip_vs_nat_icmp_send = 0;
195 +int sysctl_ip_vs_snat_reroute = 0;
196 +#ifdef CONFIG_IP_VS_NFCT
197 +int sysctl_ip_vs_conntrack = 0;
198 +#endif
199  
200  
201  #ifdef CONFIG_IP_VS_DEBUG
202 @@ -1446,6 +1450,15 @@ static struct ctl_table vs_vars[] = {
203                 .mode           = 0644,
204                 .proc_handler   = &proc_dointvec,
205         },
206 +#ifdef CONFIG_IP_VS_NFCT
207 +       {
208 +               .procname       = "conntrack",
209 +               .data           = &sysctl_ip_vs_conntrack,
210 +               .maxlen         = sizeof(int),
211 +               .mode           = 0644,
212 +               .proc_handler   = &proc_dointvec,
213 +       },
214 +#endif
215         {
216                 .procname       = "drop_entry",
217                 .data           = &sysctl_ip_vs_drop_entry,
218 @@ -1467,6 +1480,13 @@ static struct ctl_table vs_vars[] = {
219                 .mode           = 0644,
220                 .proc_handler   = &proc_do_defense_mode,
221         },
222 +       {
223 +               .procname       = "snat_reroute",
224 +               .data           = &sysctl_ip_vs_snat_reroute,
225 +               .maxlen         = sizeof(int),
226 +               .mode           = 0644,
227 +               .proc_handler   = &proc_dointvec,
228 +       },
229  #if 0
230         {
231                 .procname       = "timeout_established",
232 diff -urNp v2.6.25/linux/net/ipv4/ipvs/ip_vs_ftp.c linux/net/ipv4/ipvs/ip_vs_ftp.c
233 --- v2.6.25/linux/net/ipv4/ipvs/ip_vs_ftp.c     2008-01-25 10:45:06.000000000 +0200
234 +++ linux/net/ipv4/ipvs/ip_vs_ftp.c     2008-04-19 19:55:40.000000000 +0300
235 @@ -195,6 +195,11 @@ static int ip_vs_ftp_out(struct ip_vs_ap
236                         ip_vs_control_add(n_cp, cp);
237                 }
238  
239 +#ifdef CONFIG_IP_VS_NFCT
240 +               if (skb->nfct)
241 +                       ip_vs_nfct_expect_related(skb, n_cp, 0, IPPROTO_TCP, 0);
242 +#endif
243 +
244                 /*
245                  * Replace the old passive address with the new one
246                  */
247 @@ -327,6 +332,11 @@ static int ip_vs_ftp_in(struct ip_vs_app
248                 ip_vs_control_add(n_cp, cp);
249         }
250  
251 +#ifdef CONFIG_IP_VS_NFCT
252 +       if (skb->nfct)
253 +               ip_vs_nfct_expect_related(skb, n_cp, n_cp->dport, IPPROTO_TCP, 1);
254 +#endif
255 +
256         /*
257          *      Move tunnel to listen state
258          */
259 diff -urNp v2.6.25/linux/net/ipv4/ipvs/ip_vs_nfct.c linux/net/ipv4/ipvs/ip_vs_nfct.c
260 --- v2.6.25/linux/net/ipv4/ipvs/ip_vs_nfct.c    1970-01-01 02:00:00.000000000 +0200
261 +++ linux/net/ipv4/ipvs/ip_vs_nfct.c    2008-04-19 20:06:46.000000000 +0300
262 @@ -0,0 +1,385 @@
263 +/*
264 + * ip_vs_nfct.c:       Netfilter connection tracking support for IPVS
265 + *
266 + * Portions Copyright (C) 2001-2002
267 + * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
268 + *
269 + * Portions Copyright (C) 2003-2008
270 + * Julian Anastasov
271 + *
272 + *
273 + * This code is free software; you can redistribute it and/or modify
274 + * it under the terms of the GNU General Public License as published by
275 + * the Free Software Foundation; either version 2 of the License, or
276 + * (at your option) any later version.
277 + *
278 + * This program is distributed in the hope that it will be useful,
279 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
280 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
281 + * GNU General Public License for more details.
282 + *
283 + * You should have received a copy of the GNU General Public License
284 + * along with this program; if not, write to the Free Software
285 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
286 + *
287 + *
288 + * Authors:
289 + * Ben North <ben@redfrontdoor.org>
290 + * Julian Anastasov <ja@ssi.bg>                Reorganize and sync with latest kernels
291 + *
292 + *
293 + * Current status:
294 + *
295 + * - provide conntrack confirmation for new and related connections, by
296 + * this way we can see their proper conntrack state in all hooks
297 + * - support for all forwarding methods, not only NAT
298 + * - FTP support (NAT), ability to support other NAT apps with expectations
299 + * - to correctly create expectations for related NAT connections the proper
300 + * NF conntrack support must be already installed, eg. ip_vs_ftp requires
301 + * nf_conntrack_ftp for the same ports
302 + *
303 + */
304 +
305 +#include <linux/module.h>
306 +#include <linux/types.h>
307 +#include <linux/kernel.h>
308 +#include <linux/errno.h>
309 +#include <linux/compiler.h>
310 +#include <linux/vmalloc.h>
311 +#include <linux/skbuff.h>
312 +#include <net/ip.h>
313 +#include <linux/netfilter.h>
314 +#include <linux/netfilter_ipv4.h>
315 +#include <net/ip_vs.h>
316 +
317 +
318 +EXPORT_SYMBOL(ip_vs_nfct_expect_related);
319 +
320 +
321 +#define FMT_TUPLE      "%u.%u.%u.%u:%u->%u.%u.%u.%u:%u/%u"
322 +#define ARG_TUPLE(t)   NIPQUAD((t)->src.u3.ip), ntohs((t)->src.u.all), \
323 +                       NIPQUAD((t)->dst.u3.ip), ntohs((t)->dst.u.all), \
324 +                       (t)->dst.protonum
325 +
326 +#define FMT_CONN       "%u.%u.%u.%u:%u->%u.%u.%u.%u:%u->%u.%u.%u.%u:%u/%u:%u"
327 +#define ARG_CONN(c)    NIPQUAD((c)->caddr), ntohs((c)->cport), \
328 +                       NIPQUAD((c)->vaddr), ntohs((c)->vport), \
329 +                       NIPQUAD((c)->daddr), ntohs((c)->dport), \
330 +                       (c)->protocol, (c)->state
331 +
332 +/* Returns boolean and skb is freed on failure */
333 +static int __ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp,
334 +                               unsigned int hooknum)
335 +{
336 +       /*
337 +        * The assumptions:
338 +        * - the nfct is !NULL and is not confirmed
339 +        * - we are called before any mangle
340 +        */
341 +
342 +       struct iphdr *iph = ip_hdr(skb);
343 +       struct nf_conn *ct = (struct nf_conn *) skb->nfct;
344 +       struct nf_conntrack_tuple new_reply;
345 +       int ret = NF_DROP;
346 +       __be16 _ports[2], *pptr;
347 +#ifdef CONFIG_IP_VS_DEBUG
348 +       struct nf_conntrack_tuple *orig_tup =
349 +               &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
350 +       struct nf_conntrack_tuple *orig_rep =
351 +               &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
352 +#endif
353 +#ifdef CONFIG_NF_NAT_NEEDED
354 +       int initialized = !!(ct->status & IPS_NAT_DONE_MASK);
355 +#else
356 +       int initialized = 0;
357 +#endif
358 +
359 +       IP_VS_DBG(7, "%s: ct=%p, init=%d, tuples=" FMT_TUPLE ", " FMT_TUPLE
360 +               ", cp=" FMT_CONN "\n",
361 +               __FUNCTION__, ct, initialized,
362 +               ARG_TUPLE(orig_tup), ARG_TUPLE(orig_rep), ARG_CONN(cp));
363 +
364 +#ifdef CONFIG_NF_NAT_NEEDED
365 +       /*
366 +        * This is really bad, may be we are trying to alter DNAT conn?
367 +        * This is not supported, avoid the confirmation.
368 +        */
369 +       if (initialized && ct->status & IPS_NAT_MASK) {
370 +#ifdef CONFIG_IP_VS_DEBUG
371 +               IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, init=%d\n",
372 +                       __FUNCTION__, ct, ct->status, initialized);
373 +#endif
374 +               return 1;
375 +       }
376 +#endif
377 +
378 +       if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ || NF_INET_FORWARD == hooknum)
379 +               goto confirm;
380 +
381 +       /*
382 +        * Alter reply only for IP_VS_CONN_F_MASQ in outin direction.
383 +        * For related connections in inout direction it is done in
384 +        * expectfn callback.
385 +        */
386 +
387 +       pptr = skb_header_pointer(skb, ip_hdrlen(skb),
388 +                                 sizeof(_ports), _ports);
389 +       if (!pptr)
390 +               goto out;
391 +
392 +       new_reply = (struct nf_conntrack_tuple) {
393 +               .dst = { .protonum = iph->protocol, .dir = IP_CT_DIR_REPLY }};
394 +
395 +       new_reply.src.u3.ip = cp->daddr;
396 +       new_reply.src.u.tcp.port = cp->dport;
397 +       new_reply.src.l3num = PF_INET;
398 +       new_reply.dst.u3.ip = iph->saddr;
399 +       new_reply.dst.u.tcp.port = pptr[0];
400 +
401 +       nf_conntrack_alter_reply(ct, &new_reply);
402 +
403 +       IP_VS_DBG(7, "%s: ct=%p, init=%d, orig=" FMT_TUPLE
404 +               ", new_reply=" FMT_TUPLE " => alter_reply\n",
405 +               __FUNCTION__, ct, initialized,
406 +               ARG_TUPLE(orig_tup), ARG_TUPLE(&new_reply));
407 +
408 +       /*
409 +        * No need to rehash NAT info because we don't change source
410 +        * address in original direction
411 +        */
412 +
413 +confirm:
414 +
415 +       ret = __nf_conntrack_confirm(skb);
416 +
417 +       if (ret != NF_STOLEN) {
418 +               IP_VS_DBG(7, "%s: ct=%p, init=%d, orig=" FMT_TUPLE " => confirm ret=%d\n",
419 +                       __FUNCTION__, ct, initialized, ARG_TUPLE(orig_tup), ret);
420 +       }
421 +
422 +       if (ret != NF_ACCEPT)
423 +               goto out;
424 +       return 1;
425 +
426 +out:
427 +       if (ret != NF_STOLEN)
428 +               kfree_skb(skb);
429 +       return 0;
430 +}
431 +
432 +/*
433 + * Confirm (and optionally alter) the conntrack entry if needed
434 + * because the IPVS packets do not reach ipv4_confirm.
435 + */
436 +int ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp,
437 +                      unsigned int hooknum)
438 +{
439 +       struct iphdr *iph = ip_hdr(skb);
440 +       struct nf_conn *ct = (struct nf_conn *) skb->nfct;
441 +
442 +       /* By the time we're sending the packet out the other
443 +        * side, there should be a confirmed Netfilter CT entry
444 +        * for this connection.  This may not be the case,
445 +        * however, if it's a brand new connection, or if the NF
446 +        * entry has timed out before ours has.  Either way, if
447 +        * the NF CT entry is unconfirmed, confirm it, and deal
448 +        * with reply tuple mangling at the same time.
449 +        */
450 +
451 +       /* We only deal with TCP or UDP packets */
452 +       if (iph->protocol != IPPROTO_TCP && iph->protocol != IPPROTO_UDP)
453 +               return 1;
454 +
455 +       if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
456 +               /*
457 +                * Do not be surprised if non-NAT conntracks stay in SYN_SENT
458 +                * state, may be the replies from the real server go
459 +                * directly to client. In any case, keep them in REPLIED
460 +                * state (ESTABLISHED).
461 +                */
462 +               if (iph->protocol != IPPROTO_TCP ||
463 +                   IP_VS_TCP_S_ESTABLISHED == cp->state) {
464 +                       set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
465 +               }
466 +       }
467 +
468 +       /*
469 +        * We assume the reused connections do not change their rip:rport
470 +        * and we do not need to alter their conntrack reply
471 +        */
472 +       return __ip_vs_nfct_confirm(skb, cp, hooknum);
473 +}
474 +
475 +/*
476 + * We are called from init_conntrack() as expectfn handler
477 + */
478 +
479 +static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
480 +       struct nf_conntrack_expect *exp)
481 +{
482 +       struct nf_conntrack_tuple *orig, new_reply;
483 +       struct ip_vs_conn *cp;
484 +
485 +       if (exp->tuple.src.l3num != PF_INET)
486 +               return;
487 +
488 +       /* 
489 +        * - We assume that no NF locks are held before this callback
490 +        * - ip_vs_conn_out_get and ip_vs_conn_in_get should match their
491 +        * expectations even if they use wildcard values, now we provide
492 +        * the actual values from the newly created original conntrack direction
493 +        * - the conntrack is confirmed when packet reaches IPVS hooks
494 +        */
495 +
496 +       /* RS->CLIENT */
497 +       orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
498 +       cp = ip_vs_conn_out_get(orig->dst.protonum,
499 +                               orig->src.u3.ip, orig->src.u.tcp.port,
500 +                               orig->dst.u3.ip, orig->dst.u.tcp.port);
501 +       if (cp) {
502 +               /* Change reply CLIENT->RS to CLIENT->VS */
503 +               new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
504 +               IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " FMT_TUPLE
505 +                       ", found inout cp=" FMT_CONN "\n",
506 +                       __FUNCTION__, ct, ct->status,
507 +                       ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
508 +                       ARG_CONN(cp));
509 +               new_reply.dst.u3.ip = cp->vaddr;
510 +               new_reply.dst.u.tcp.port = cp->vport;
511 +               IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
512 +                       ", inout cp=" FMT_CONN "\n",
513 +                       __FUNCTION__, ct,
514 +                       ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
515 +                       ARG_CONN(cp));
516 +               goto alter;
517 +       }
518 +
519 +       /* CLIENT->VS */
520 +       cp = ip_vs_conn_in_get(orig->dst.protonum,
521 +                               orig->src.u3.ip, orig->src.u.tcp.port,
522 +                               orig->dst.u3.ip, orig->dst.u.tcp.port);
523 +       if (cp) {
524 +               /* Change reply VS->CLIENT to RS->CLIENT */
525 +               new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
526 +               IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " FMT_TUPLE
527 +                       ", found outin cp=" FMT_CONN "\n",
528 +                       __FUNCTION__, ct, ct->status,
529 +                       ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
530 +                       ARG_CONN(cp));
531 +               new_reply.src.u3.ip = cp->daddr;
532 +               new_reply.src.u.tcp.port = cp->dport;
533 +               IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
534 +                       ", outin cp=" FMT_CONN "\n",
535 +                       __FUNCTION__, ct,
536 +                       ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
537 +                       ARG_CONN(cp));
538 +               goto alter;
539 +       }
540 +       IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE " - unknown expect\n",
541 +               __FUNCTION__, ct, ct->status, ARG_TUPLE(orig));
542 +       return;
543 +
544 +alter:
545 +
546 +       /* Never alter conntrack for non-NAT conns */
547 +       if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
548 +               nf_conntrack_alter_reply(ct, &new_reply);
549 +       ip_vs_conn_put(cp);
550 +       return;
551 +}
552 +
553 +/*
554 + * Create NF conntrack expectation with wildcard (optional) source port.
555 + * Then the default callback function will alter the reply and will confirm
556 + * the conntrack entry when the first packet comes.
557 + */
558 +void ip_vs_nfct_expect_related(struct sk_buff *skb, struct ip_vs_conn *cp,
559 +                              __be16 port, __u16 proto, int from_rs)
560 +{
561 +       struct nf_conn *ct = (struct nf_conn *) skb->nfct;
562 +       struct nf_conntrack_expect *e;
563 +
564 +       if (!sysctl_ip_vs_conntrack)
565 +               return;
566 +
567 +       if (!ct) {
568 +               IP_VS_DBG(7, "%s: ct=%p for cp=" FMT_CONN "\n",
569 +                       __FUNCTION__, ct, ARG_CONN(cp));
570 +               return;
571 +       }
572 +
573 +       if (!(e = nf_ct_expect_alloc(ct)))
574 +               return;
575 +
576 +       e->expectfn                     = ip_vs_nfct_expect_callback;
577 +       e->helper                       = NULL;
578 +       e->flags                        = 0;
579 +       memset(&e->tuple, 0, sizeof(e->tuple));
580 +       e->tuple.src.u.tcp.port         = port;
581 +       e->tuple.src.l3num              = PF_INET;
582 +       e->tuple.dst.protonum           = proto;
583 +       memset(&e->mask, 0, sizeof(e->mask));
584 +       e->mask.src.u3.ip               = 0xffffffff;
585 +       e->mask.src.u.all               = port? 0xffff : 0;
586 +
587 +       if (from_rs) {
588 +               e->tuple.src.u3.ip = cp->daddr;
589 +               e->tuple.dst.u3.ip = cp->caddr;
590 +               e->tuple.dst.u.tcp.port = cp->cport;
591 +       } else {
592 +               e->tuple.src.u3.ip = cp->caddr;
593 +               e->tuple.dst.u3.ip = cp->vaddr;
594 +               e->tuple.dst.u.tcp.port = cp->vport;
595 +       }
596 +
597 +       IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
598 +               __FUNCTION__, ct, ARG_TUPLE(&e->tuple));
599 +       nf_ct_expect_related(e);
600 +       nf_ct_expect_put(e);
601 +}
602 +
603 +/*
604 + * Our connection was terminated, try to drop the conntrack immediately
605 + */
606 +void ip_vs_nfct_conn_drop(struct ip_vs_conn *cp)
607 +{
608 +       struct nf_conntrack_tuple_hash *h;
609 +       struct nf_conn *ct;
610 +       struct nf_conntrack_tuple tuple;
611 +
612 +       if (!cp->cport)
613 +               return;
614 +
615 +       tuple = (struct nf_conntrack_tuple) {
616 +               .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } };
617 +       tuple.src.u3.ip = cp->caddr;
618 +       tuple.src.u.all = cp->cport;
619 +       tuple.src.l3num = PF_INET;
620 +       tuple.dst.u3.ip = cp->vaddr;
621 +       tuple.dst.u.all = cp->vport;
622 +
623 +       IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE
624 +               " for conn " FMT_CONN "\n",
625 +               __FUNCTION__, ARG_TUPLE(&tuple), ARG_CONN(cp));
626 +
627 +       h = nf_conntrack_find_get(&tuple);
628 +       if (h) {
629 +               ct = nf_ct_tuplehash_to_ctrack(h);
630 +               if (del_timer(&ct->timeout)) {
631 +                       IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
632 +                               FMT_TUPLE "\n",
633 +                               __FUNCTION__, ct, ARG_TUPLE(&tuple));
634 +                       if (ct->timeout.function)
635 +                               ct->timeout.function(ct->timeout.data);
636 +               } else {
637 +                       IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
638 +                               FMT_TUPLE "\n",
639 +                               __FUNCTION__, ct, ARG_TUPLE(&tuple));
640 +               }
641 +               nf_ct_put(ct);
642 +       } else {
643 +               IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
644 +                       __FUNCTION__, ARG_TUPLE(&tuple));
645 +       }
646 +}
647 +
648 diff -urNp v2.6.25/linux/net/ipv4/ipvs/ip_vs_xmit.c linux/net/ipv4/ipvs/ip_vs_xmit.c
649 --- v2.6.25/linux/net/ipv4/ipvs/ip_vs_xmit.c    2008-04-17 09:58:09.000000000 +0300
650 +++ linux/net/ipv4/ipvs/ip_vs_xmit.c    2008-04-19 20:04:42.000000000 +0300
651 @@ -141,7 +141,6 @@ int
652  ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
653                 struct ip_vs_protocol *pp)
654  {
655 -       /* we do not touch skb and do not need pskb ptr */
656         return NF_ACCEPT;
657  }
658  
659 @@ -199,6 +198,9 @@ ip_vs_bypass_xmit(struct sk_buff *skb, s
660         dst_release(skb->dst);
661         skb->dst = &rt->u.dst;
662  
663 +       if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
664 +               goto tx_error_out;
665 +
666         /* Another hack: avoid icmp_send in ip_fragment */
667         skb->local_df = 1;
668  
669 @@ -211,6 +213,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, s
670         dst_link_failure(skb);
671   tx_error:
672         kfree_skb(skb);
673 + tx_error_out:
674         LeaveFunction(10);
675         return NF_STOLEN;
676  }
677 @@ -263,6 +266,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru
678         dst_release(skb->dst);
679         skb->dst = &rt->u.dst;
680  
681 +       if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
682 +               goto tx_error_out;
683 +
684         /* mangle the packet */
685         if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
686                 goto tx_error;
687 @@ -286,8 +292,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru
688    tx_error_icmp:
689         dst_link_failure(skb);
690    tx_error:
691 -       LeaveFunction(10);
692         kfree_skb(skb);
693 +  tx_error_out:
694 +       LeaveFunction(10);
695         return NF_STOLEN;
696    tx_error_put:
697         ip_rt_put(rt);
698 @@ -386,14 +393,17 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s
699         /* fix old IP header checksum */
700         ip_send_check(old_iph);
701  
702 -       skb_push(skb, sizeof(struct iphdr));
703 -       skb_reset_network_header(skb);
704 -       memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
705 -
706         /* drop old route */
707         dst_release(skb->dst);
708         skb->dst = &rt->u.dst;
709  
710 +       if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
711 +               goto tx_error_out;
712 +
713 +       skb_push(skb, sizeof(struct iphdr));
714 +       skb_reset_network_header(skb);
715 +       memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
716 +
717         /*
718          *      Push down and install the IPIP header.
719          */
720 @@ -421,6 +431,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s
721         dst_link_failure(skb);
722    tx_error:
723         kfree_skb(skb);
724 +  tx_error_out:
725         LeaveFunction(10);
726         return NF_STOLEN;
727  }
728 @@ -466,6 +477,9 @@ ip_vs_dr_xmit(struct sk_buff *skb, struc
729         dst_release(skb->dst);
730         skb->dst = &rt->u.dst;
731  
732 +       if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
733 +               goto tx_error_out;
734 +
735         /* Another hack: avoid icmp_send in ip_fragment */
736         skb->local_df = 1;
737  
738 @@ -478,6 +492,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struc
739         dst_link_failure(skb);
740    tx_error:
741         kfree_skb(skb);
742 +  tx_error_out:
743         LeaveFunction(10);
744         return NF_STOLEN;
745  }
746 @@ -537,6 +552,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, str
747         dst_release(skb->dst);
748         skb->dst = &rt->u.dst;
749  
750 +       /* TODO: properly alter reply for NFCT */
751 +
752         ip_vs_nat_icmp(skb, pp, cp, 0);
753  
754         /* Another hack: avoid icmp_send in ip_fragment */
This page took 0.080215 seconds and 3 git commands to generate.