]> git.pld-linux.org Git - packages/kernel.git/blob - kernel-ipvs-nfct.patch
- updated for 2.6.27.26
[packages/kernel.git] / kernel-ipvs-nfct.patch
1 diff -urNp v2.6.27/linux/include/net/ip_vs.h linux/include/net/ip_vs.h
2 --- v2.6.27/linux/include/net/ip_vs.h   2008-10-11 12:46:15.000000000 +0300
3 +++ linux/include/net/ip_vs.h   2008-10-11 14:24:47.000000000 +0300
4 @@ -21,6 +21,13 @@
5  #include <linux/timer.h>
6  
7  #include <net/checksum.h>
8 +#include <linux/skbuff.h>
9 +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
10 +#include <net/netfilter/nf_conntrack.h>
11 +#include <net/netfilter/nf_conntrack_core.h>
12 +#include <net/netfilter/nf_conntrack_expect.h>
13 +#include <net/netfilter/nf_conntrack_helper.h>
14 +#endif
15  
16  #ifdef CONFIG_IP_VS_DEBUG
17  #include <linux/net.h>
18 @@ -474,6 +481,16 @@ extern void ip_vs_init_hash_table(struct
19   */
20  
21  /*
22 + *      Netfilter connection tracking
23 + *      (from ip_vs_nfct.c)
24 + */
25 +extern int ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum);
26 +extern void ip_vs_nfct_expect_related(struct sk_buff *skb,
27 +                                     struct ip_vs_conn *cp,
28 +                                     __be16 port, __u16 proto, int from_rs);
29 +extern void ip_vs_nfct_conn_drop(struct ip_vs_conn *cp);
30 +
31 +/*
32   *     IPVS connection entry hash table
33   */
34  #ifndef CONFIG_IP_VS_TAB_BITS
35 @@ -643,9 +660,42 @@ extern int sysctl_ip_vs_expire_nodest_co
36  extern int sysctl_ip_vs_expire_quiescent_template;
37  extern int sysctl_ip_vs_sync_threshold[2];
38  extern int sysctl_ip_vs_nat_icmp_send;
39 +extern int sysctl_ip_vs_snat_reroute;
40  extern struct ip_vs_stats ip_vs_stats;
41  extern const struct ctl_path net_vs_ctl_path[];
42  
43 +#ifdef CONFIG_IP_VS_NFCT
44 +
45 +extern int sysctl_ip_vs_conntrack;
46 +
47 +static inline int ip_vs_use_conntrack(struct sk_buff *skb)
48 +{
49 +       return sysctl_ip_vs_conntrack && skb->nfct;
50 +}
51 +
52 +/* Returns boolean and skb is freed on failure */
53 +static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum)
54 +{
55 +       if (!ip_vs_use_conntrack(skb))
56 +               return 1;
57 +       return nf_ct_is_confirmed((struct nf_conn *) skb->nfct) ||
58 +               ip_vs_nfct_confirm(skb, cp, hooknum);
59 +}
60 +
61 +#else
62 +
63 +static inline int ip_vs_use_conntrack(struct sk_buff *skb)
64 +{
65 +       return 0;
66 +}
67 +
68 +static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum)
69 +{
70 +       return 1;
71 +}
72 +
73 +#endif
74 +
75  extern struct ip_vs_service *
76  ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport);
77  
78 diff -urNp v2.6.27/linux/net/ipv4/ipvs/Kconfig linux/net/ipv4/ipvs/Kconfig
79 --- v2.6.27/linux/net/ipv4/ipvs/Kconfig 2007-07-10 09:18:43.000000000 +0300
80 +++ linux/net/ipv4/ipvs/Kconfig 2008-10-11 14:19:27.000000000 +0300
81 @@ -221,4 +221,12 @@ config     IP_VS_FTP
82           If you want to compile it in kernel, say Y. To compile it as a
83           module, choose M here. If unsure, say N.
84  
85 +config IP_VS_NFCT
86 +       bool "Netfilter connection tracking"
87 +       depends on NF_CONNTRACK
88 +       ---help---
89 +         The Netfilter connection tracking support allows the IPVS
90 +         connection state to be exported to the Netfilter framework
91 +         for filtering purposes.
92 +
93  endif # IP_VS
94 diff -urNp v2.6.27/linux/net/ipv4/ipvs/Makefile linux/net/ipv4/ipvs/Makefile
95 --- v2.6.27/linux/net/ipv4/ipvs/Makefile        2005-06-18 08:50:52.000000000 +0300
96 +++ linux/net/ipv4/ipvs/Makefile        2008-10-11 14:19:27.000000000 +0300
97 @@ -9,10 +9,13 @@ ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UD
98  ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_ESP) += ip_vs_proto_esp.o
99  ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH) += ip_vs_proto_ah.o
100  
101 +ip_vs-extra_objs-y :=
102 +ip_vs-extra_objs-$(CONFIG_IP_VS_NFCT) += ip_vs_nfct.o
103 +
104  ip_vs-objs :=  ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o        \
105                 ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o                      \
106                 ip_vs_est.o ip_vs_proto.o                                  \
107 -               $(ip_vs_proto-objs-y)
108 +               $(ip_vs_proto-objs-y) $(ip_vs-extra_objs-y)
109  
110  
111  # IPVS core
112 diff -urNp v2.6.27/linux/net/ipv4/ipvs/ip_vs_conn.c linux/net/ipv4/ipvs/ip_vs_conn.c
113 --- v2.6.27/linux/net/ipv4/ipvs/ip_vs_conn.c    2008-10-11 12:46:16.000000000 +0300
114 +++ linux/net/ipv4/ipvs/ip_vs_conn.c    2008-10-11 14:19:27.000000000 +0300
115 @@ -591,6 +591,11 @@ static void ip_vs_conn_expire(unsigned l
116                 if (cp->control)
117                         ip_vs_control_del(cp);
118  
119 +#ifdef CONFIG_IP_VS_NFCT
120 +               if (sysctl_ip_vs_conntrack)
121 +                       ip_vs_nfct_conn_drop(cp);
122 +#endif
123 +
124                 if (unlikely(cp->app != NULL))
125                         ip_vs_unbind_app(cp);
126                 ip_vs_unbind_dest(cp);
127 diff -urNp v2.6.27/linux/net/ipv4/ipvs/ip_vs_core.c linux/net/ipv4/ipvs/ip_vs_core.c
128 --- v2.6.27/linux/net/ipv4/ipvs/ip_vs_core.c    2008-10-11 12:46:16.000000000 +0300
129 +++ linux/net/ipv4/ipvs/ip_vs_core.c    2008-10-11 14:19:27.000000000 +0300
130 @@ -659,6 +659,8 @@ static int ip_vs_out_icmp(struct sk_buff
131  
132         skb->ipvs_property = 1;
133         verdict = NF_ACCEPT;
134 +       if (sysctl_ip_vs_snat_reroute && ip_route_me_harder(skb, RTN_LOCAL))
135 +               verdict = NF_DROP;
136  
137    out:
138         __ip_vs_conn_put(cp);
139 @@ -759,19 +761,31 @@ ip_vs_out(unsigned int hooknum, struct s
140         if (!skb_make_writable(skb, ihl))
141                 goto drop;
142  
143 +       if (!ip_vs_confirm_conntrack(skb, cp, hooknum))
144 +               goto out;
145 +
146         /* mangle the packet */
147         if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
148                 goto drop;
149         ip_hdr(skb)->saddr = cp->vaddr;
150         ip_send_check(ip_hdr(skb));
151  
152 +       /*
153 +        * nf_iterate does not expect change in the skb->dst->dev.
154 +        * It looks like it is not fatal to enable this code for hooks
155 +        * where our handlers are at the end of the chain list and
156 +        * when all next handlers use skb->dst->dev and not outdev.
157 +        * It will definitely route properly the inout NAT traffic
158 +        * when multiple paths are used.
159 +        */
160 +
161         /* For policy routing, packets originating from this
162          * machine itself may be routed differently to packets
163          * passing through.  We want this packet to be routed as
164          * if it came from this machine itself.  So re-compute
165          * the routing information.
166          */
167 -       if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
168 +       if (sysctl_ip_vs_snat_reroute && ip_route_me_harder(skb, RTN_LOCAL) != 0)
169                 goto drop;
170  
171         IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
172 @@ -786,8 +800,11 @@ ip_vs_out(unsigned int hooknum, struct s
173         return NF_ACCEPT;
174  
175    drop:
176 -       ip_vs_conn_put(cp);
177         kfree_skb(skb);
178 +
179 +  out:
180 +       ip_vs_conn_put(cp);
181 +       LeaveFunction(11);
182         return NF_STOLEN;
183  }
184  
185 diff -urNp v2.6.27/linux/net/ipv4/ipvs/ip_vs_ctl.c linux/net/ipv4/ipvs/ip_vs_ctl.c
186 --- v2.6.27/linux/net/ipv4/ipvs/ip_vs_ctl.c     2008-10-11 12:46:16.000000000 +0300
187 +++ linux/net/ipv4/ipvs/ip_vs_ctl.c     2008-10-11 14:19:27.000000000 +0300
188 @@ -79,6 +79,10 @@ int sysctl_ip_vs_expire_nodest_conn = 0;
189  int sysctl_ip_vs_expire_quiescent_template = 0;
190  int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
191  int sysctl_ip_vs_nat_icmp_send = 0;
192 +int sysctl_ip_vs_snat_reroute = 0;
193 +#ifdef CONFIG_IP_VS_NFCT
194 +int sysctl_ip_vs_conntrack = 0;
195 +#endif
196  
197  
198  #ifdef CONFIG_IP_VS_DEBUG
199 @@ -1457,6 +1461,15 @@ static struct ctl_table vs_vars[] = {
200                 .mode           = 0644,
201                 .proc_handler   = &proc_dointvec,
202         },
203 +#ifdef CONFIG_IP_VS_NFCT
204 +       {
205 +               .procname       = "conntrack",
206 +               .data           = &sysctl_ip_vs_conntrack,
207 +               .maxlen         = sizeof(int),
208 +               .mode           = 0644,
209 +               .proc_handler   = &proc_dointvec,
210 +       },
211 +#endif
212         {
213                 .procname       = "drop_entry",
214                 .data           = &sysctl_ip_vs_drop_entry,
215 @@ -1478,6 +1491,13 @@ static struct ctl_table vs_vars[] = {
216                 .mode           = 0644,
217                 .proc_handler   = &proc_do_defense_mode,
218         },
219 +       {
220 +               .procname       = "snat_reroute",
221 +               .data           = &sysctl_ip_vs_snat_reroute,
222 +               .maxlen         = sizeof(int),
223 +               .mode           = 0644,
224 +               .proc_handler   = &proc_dointvec,
225 +       },
226  #if 0
227         {
228                 .procname       = "timeout_established",
229 diff -urNp v2.6.27/linux/net/ipv4/ipvs/ip_vs_ftp.c linux/net/ipv4/ipvs/ip_vs_ftp.c
230 --- v2.6.27/linux/net/ipv4/ipvs/ip_vs_ftp.c     2008-10-11 12:46:16.000000000 +0300
231 +++ linux/net/ipv4/ipvs/ip_vs_ftp.c     2008-10-11 14:19:27.000000000 +0300
232 @@ -193,6 +193,11 @@ static int ip_vs_ftp_out(struct ip_vs_ap
233                         ip_vs_control_add(n_cp, cp);
234                 }
235  
236 +#ifdef CONFIG_IP_VS_NFCT
237 +               if (skb->nfct)
238 +                       ip_vs_nfct_expect_related(skb, n_cp, 0, IPPROTO_TCP, 0);
239 +#endif
240 +
241                 /*
242                  * Replace the old passive address with the new one
243                  */
244 @@ -325,6 +330,11 @@ static int ip_vs_ftp_in(struct ip_vs_app
245                 ip_vs_control_add(n_cp, cp);
246         }
247  
248 +#ifdef CONFIG_IP_VS_NFCT
249 +       if (skb->nfct)
250 +               ip_vs_nfct_expect_related(skb, n_cp, n_cp->dport, IPPROTO_TCP, 1);
251 +#endif
252 +
253         /*
254          *      Move tunnel to listen state
255          */
256 diff -urNp v2.6.27/linux/net/ipv4/ipvs/ip_vs_nfct.c linux/net/ipv4/ipvs/ip_vs_nfct.c
257 --- v2.6.27/linux/net/ipv4/ipvs/ip_vs_nfct.c    1970-01-01 02:00:00.000000000 +0200
258 +++ linux/net/ipv4/ipvs/ip_vs_nfct.c    2008-10-11 14:19:27.000000000 +0300
259 @@ -0,0 +1,386 @@
260 +/*
261 + * ip_vs_nfct.c:       Netfilter connection tracking support for IPVS
262 + *
263 + * Portions Copyright (C) 2001-2002
264 + * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
265 + *
266 + * Portions Copyright (C) 2003-2008
267 + * Julian Anastasov
268 + *
269 + *
270 + * This code is free software; you can redistribute it and/or modify
271 + * it under the terms of the GNU General Public License as published by
272 + * the Free Software Foundation; either version 2 of the License, or
273 + * (at your option) any later version.
274 + *
275 + * This program is distributed in the hope that it will be useful,
276 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
277 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
278 + * GNU General Public License for more details.
279 + *
280 + * You should have received a copy of the GNU General Public License
281 + * along with this program; if not, write to the Free Software
282 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
283 + *
284 + *
285 + * Authors:
286 + * Ben North <ben@redfrontdoor.org>
287 + * Julian Anastasov <ja@ssi.bg>                Reorganize and sync with latest kernels
288 + *
289 + *
290 + * Current status:
291 + *
292 + * - provide conntrack confirmation for new and related connections, by
293 + * this way we can see their proper conntrack state in all hooks
294 + * - support for all forwarding methods, not only NAT
295 + * - FTP support (NAT), ability to support other NAT apps with expectations
296 + * - to correctly create expectations for related NAT connections the proper
297 + * NF conntrack support must be already installed, eg. ip_vs_ftp requires
298 + * nf_conntrack_ftp for the same ports
299 + *
300 + */
301 +
302 +#include <linux/module.h>
303 +#include <linux/types.h>
304 +#include <linux/kernel.h>
305 +#include <linux/errno.h>
306 +#include <linux/compiler.h>
307 +#include <linux/vmalloc.h>
308 +#include <linux/skbuff.h>
309 +#include <net/ip.h>
310 +#include <linux/netfilter.h>
311 +#include <linux/netfilter_ipv4.h>
312 +#include <net/ip_vs.h>
313 +
314 +
315 +EXPORT_SYMBOL(ip_vs_nfct_expect_related);
316 +
317 +
318 +#define FMT_TUPLE      "%u.%u.%u.%u:%u->%u.%u.%u.%u:%u/%u"
319 +#define ARG_TUPLE(t)   NIPQUAD((t)->src.u3.ip), ntohs((t)->src.u.all), \
320 +                       NIPQUAD((t)->dst.u3.ip), ntohs((t)->dst.u.all), \
321 +                       (t)->dst.protonum
322 +
323 +#define FMT_CONN       "%u.%u.%u.%u:%u->%u.%u.%u.%u:%u->%u.%u.%u.%u:%u/%u:%u"
324 +#define ARG_CONN(c)    NIPQUAD((c)->caddr), ntohs((c)->cport), \
325 +                       NIPQUAD((c)->vaddr), ntohs((c)->vport), \
326 +                       NIPQUAD((c)->daddr), ntohs((c)->dport), \
327 +                       (c)->protocol, (c)->state
328 +
329 +/* Returns boolean and skb is freed on failure */
330 +static int __ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp,
331 +                               unsigned int hooknum)
332 +{
333 +       /*
334 +        * The assumptions:
335 +        * - the nfct is !NULL and is not confirmed
336 +        * - we are called before any mangle
337 +        */
338 +
339 +       struct iphdr *iph = ip_hdr(skb);
340 +       struct nf_conn *ct = (struct nf_conn *) skb->nfct;
341 +       struct nf_conntrack_tuple new_reply;
342 +       int ret = NF_DROP;
343 +       __be16 _ports[2], *pptr;
344 +#ifdef CONFIG_IP_VS_DEBUG
345 +       struct nf_conntrack_tuple *orig_tup =
346 +               &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
347 +       struct nf_conntrack_tuple *orig_rep =
348 +               &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
349 +#endif
350 +#ifdef CONFIG_NF_NAT_NEEDED
351 +       int initialized = !!(ct->status & IPS_NAT_DONE_MASK);
352 +#else
353 +       int initialized = 0;
354 +#endif
355 +
356 +       IP_VS_DBG(7, "%s: ct=%p, init=%d, tuples=" FMT_TUPLE ", " FMT_TUPLE
357 +               ", cp=" FMT_CONN "\n",
358 +               __FUNCTION__, ct, initialized,
359 +               ARG_TUPLE(orig_tup), ARG_TUPLE(orig_rep), ARG_CONN(cp));
360 +
361 +#ifdef CONFIG_NF_NAT_NEEDED
362 +       /*
363 +        * This is really bad, may be we are trying to alter DNAT conn?
364 +        * This is not supported, avoid the confirmation.
365 +        */
366 +       if (initialized && ct->status & IPS_NAT_MASK) {
367 +#ifdef CONFIG_IP_VS_DEBUG
368 +               IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, init=%d\n",
369 +                       __FUNCTION__, ct, ct->status, initialized);
370 +#endif
371 +               return 1;
372 +       }
373 +#endif
374 +
375 +       if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ || NF_INET_FORWARD == hooknum)
376 +               goto confirm;
377 +
378 +       /*
379 +        * Alter reply only for IP_VS_CONN_F_MASQ in outin direction.
380 +        * For related connections in inout direction it is done in
381 +        * expectfn callback.
382 +        */
383 +
384 +       pptr = skb_header_pointer(skb, ip_hdrlen(skb),
385 +                                 sizeof(_ports), _ports);
386 +       if (!pptr)
387 +               goto out;
388 +
389 +       new_reply = (struct nf_conntrack_tuple) {
390 +               .dst = { .protonum = iph->protocol, .dir = IP_CT_DIR_REPLY }};
391 +
392 +       new_reply.src.u3.ip = cp->daddr;
393 +       new_reply.src.u.tcp.port = cp->dport;
394 +       new_reply.src.l3num = PF_INET;
395 +       new_reply.dst.u3.ip = iph->saddr;
396 +       new_reply.dst.u.tcp.port = pptr[0];
397 +
398 +       nf_conntrack_alter_reply(ct, &new_reply);
399 +
400 +       IP_VS_DBG(7, "%s: ct=%p, init=%d, orig=" FMT_TUPLE
401 +               ", new_reply=" FMT_TUPLE " => alter_reply\n",
402 +               __FUNCTION__, ct, initialized,
403 +               ARG_TUPLE(orig_tup), ARG_TUPLE(&new_reply));
404 +
405 +       /*
406 +        * No need to rehash NAT info because we don't change source
407 +        * address in original direction
408 +        */
409 +
410 +confirm:
411 +
412 +       ret = __nf_conntrack_confirm(skb);
413 +
414 +       if (ret != NF_STOLEN) {
415 +               IP_VS_DBG(7, "%s: ct=%p, init=%d, orig=" FMT_TUPLE " => confirm ret=%d\n",
416 +                       __FUNCTION__, ct, initialized, ARG_TUPLE(orig_tup), ret);
417 +       }
418 +
419 +       if (ret != NF_ACCEPT)
420 +               goto out;
421 +       return 1;
422 +
423 +out:
424 +       if (ret != NF_STOLEN)
425 +               kfree_skb(skb);
426 +       return 0;
427 +}
428 +
429 +/*
430 + * Confirm (and optionally alter) the conntrack entry if needed
431 + * because the IPVS packets do not reach ipv4_confirm.
432 + */
433 +int ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp,
434 +                      unsigned int hooknum)
435 +{
436 +       struct iphdr *iph = ip_hdr(skb);
437 +       struct nf_conn *ct = (struct nf_conn *) skb->nfct;
438 +
439 +       /* By the time we're sending the packet out the other
440 +        * side, there should be a confirmed Netfilter CT entry
441 +        * for this connection.  This may not be the case,
442 +        * however, if it's a brand new connection, or if the NF
443 +        * entry has timed out before ours has.  Either way, if
444 +        * the NF CT entry is unconfirmed, confirm it, and deal
445 +        * with reply tuple mangling at the same time.
446 +        */
447 +
448 +       /* We only deal with TCP or UDP packets */
449 +       if (iph->protocol != IPPROTO_TCP && iph->protocol != IPPROTO_UDP)
450 +               return 1;
451 +
452 +       if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
453 +               /*
454 +                * Do not be surprised if non-NAT conntracks stay in SYN_SENT
455 +                * state, may be the replies from the real server go
456 +                * directly to client. In any case, keep them in REPLIED
457 +                * state (ESTABLISHED).
458 +                */
459 +               if (iph->protocol != IPPROTO_TCP ||
460 +                   IP_VS_TCP_S_ESTABLISHED == cp->state) {
461 +                       set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
462 +               }
463 +       }
464 +
465 +       /*
466 +        * We assume the reused connections do not change their rip:rport
467 +        * and we do not need to alter their conntrack reply
468 +        */
469 +       return __ip_vs_nfct_confirm(skb, cp, hooknum);
470 +}
471 +
472 +/*
473 + * We are called from init_conntrack() as expectfn handler
474 + */
475 +
476 +static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
477 +       struct nf_conntrack_expect *exp)
478 +{
479 +       struct nf_conntrack_tuple *orig, new_reply;
480 +       struct ip_vs_conn *cp;
481 +
482 +       if (exp->tuple.src.l3num != PF_INET)
483 +               return;
484 +
485 +       /* 
486 +        * - We assume that no NF locks are held before this callback
487 +        * - ip_vs_conn_out_get and ip_vs_conn_in_get should match their
488 +        * expectations even if they use wildcard values, now we provide
489 +        * the actual values from the newly created original conntrack direction
490 +        * - the conntrack is confirmed when packet reaches IPVS hooks
491 +        */
492 +
493 +       /* RS->CLIENT */
494 +       orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
495 +       cp = ip_vs_conn_out_get(orig->dst.protonum,
496 +                               orig->src.u3.ip, orig->src.u.tcp.port,
497 +                               orig->dst.u3.ip, orig->dst.u.tcp.port);
498 +       if (cp) {
499 +               /* Change reply CLIENT->RS to CLIENT->VS */
500 +               new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
501 +               IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " FMT_TUPLE
502 +                       ", found inout cp=" FMT_CONN "\n",
503 +                       __FUNCTION__, ct, ct->status,
504 +                       ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
505 +                       ARG_CONN(cp));
506 +               new_reply.dst.u3.ip = cp->vaddr;
507 +               new_reply.dst.u.tcp.port = cp->vport;
508 +               IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
509 +                       ", inout cp=" FMT_CONN "\n",
510 +                       __FUNCTION__, ct,
511 +                       ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
512 +                       ARG_CONN(cp));
513 +               goto alter;
514 +       }
515 +
516 +       /* CLIENT->VS */
517 +       cp = ip_vs_conn_in_get(orig->dst.protonum,
518 +                               orig->src.u3.ip, orig->src.u.tcp.port,
519 +                               orig->dst.u3.ip, orig->dst.u.tcp.port);
520 +       if (cp) {
521 +               /* Change reply VS->CLIENT to RS->CLIENT */
522 +               new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
523 +               IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " FMT_TUPLE
524 +                       ", found outin cp=" FMT_CONN "\n",
525 +                       __FUNCTION__, ct, ct->status,
526 +                       ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
527 +                       ARG_CONN(cp));
528 +               new_reply.src.u3.ip = cp->daddr;
529 +               new_reply.src.u.tcp.port = cp->dport;
530 +               IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
531 +                       ", outin cp=" FMT_CONN "\n",
532 +                       __FUNCTION__, ct,
533 +                       ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
534 +                       ARG_CONN(cp));
535 +               goto alter;
536 +       }
537 +       IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE " - unknown expect\n",
538 +               __FUNCTION__, ct, ct->status, ARG_TUPLE(orig));
539 +       return;
540 +
541 +alter:
542 +
543 +       /* Never alter conntrack for non-NAT conns */
544 +       if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
545 +               nf_conntrack_alter_reply(ct, &new_reply);
546 +       ip_vs_conn_put(cp);
547 +       return;
548 +}
549 +
550 +/*
551 + * Create NF conntrack expectation with wildcard (optional) source port.
552 + * Then the default callback function will alter the reply and will confirm
553 + * the conntrack entry when the first packet comes.
554 + */
555 +void ip_vs_nfct_expect_related(struct sk_buff *skb, struct ip_vs_conn *cp,
556 +                              __be16 port, __u16 proto, int from_rs)
557 +{
558 +       struct nf_conn *ct = (struct nf_conn *) skb->nfct;
559 +       struct nf_conntrack_expect *e;
560 +
561 +       if (!sysctl_ip_vs_conntrack)
562 +               return;
563 +
564 +       if (!ct) {
565 +               IP_VS_DBG(7, "%s: ct=%p for cp=" FMT_CONN "\n",
566 +                       __FUNCTION__, ct, ARG_CONN(cp));
567 +               return;
568 +       }
569 +
570 +       if (!(e = nf_ct_expect_alloc(ct)))
571 +               return;
572 +
573 +       e->expectfn                     = ip_vs_nfct_expect_callback;
574 +       e->helper                       = NULL;
575 +       e->flags                        = 0;
576 +       e->class                        = NF_CT_EXPECT_CLASS_DEFAULT;
577 +       memset(&e->tuple, 0, sizeof(e->tuple));
578 +       e->tuple.src.u.tcp.port         = port;
579 +       e->tuple.src.l3num              = PF_INET;
580 +       e->tuple.dst.protonum           = proto;
581 +       memset(&e->mask, 0, sizeof(e->mask));
582 +       e->mask.src.u3.ip               = 0xffffffff;
583 +       e->mask.src.u.all               = port? 0xffff : 0;
584 +
585 +       if (from_rs) {
586 +               e->tuple.src.u3.ip = cp->daddr;
587 +               e->tuple.dst.u3.ip = cp->caddr;
588 +               e->tuple.dst.u.tcp.port = cp->cport;
589 +       } else {
590 +               e->tuple.src.u3.ip = cp->caddr;
591 +               e->tuple.dst.u3.ip = cp->vaddr;
592 +               e->tuple.dst.u.tcp.port = cp->vport;
593 +       }
594 +
595 +       IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
596 +               __FUNCTION__, ct, ARG_TUPLE(&e->tuple));
597 +       nf_ct_expect_related(e);
598 +       nf_ct_expect_put(e);
599 +}
600 +
601 +/*
602 + * Our connection was terminated, try to drop the conntrack immediately
603 + */
604 +void ip_vs_nfct_conn_drop(struct ip_vs_conn *cp)
605 +{
606 +       struct nf_conntrack_tuple_hash *h;
607 +       struct nf_conn *ct;
608 +       struct nf_conntrack_tuple tuple;
609 +
610 +       if (!cp->cport)
611 +               return;
612 +
613 +       tuple = (struct nf_conntrack_tuple) {
614 +               .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } };
615 +       tuple.src.u3.ip = cp->caddr;
616 +       tuple.src.u.all = cp->cport;
617 +       tuple.src.l3num = PF_INET;
618 +       tuple.dst.u3.ip = cp->vaddr;
619 +       tuple.dst.u.all = cp->vport;
620 +
621 +       IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE
622 +               " for conn " FMT_CONN "\n",
623 +               __FUNCTION__, ARG_TUPLE(&tuple), ARG_CONN(cp));
624 +
625 +       h = nf_conntrack_find_get(&tuple);
626 +       if (h) {
627 +               ct = nf_ct_tuplehash_to_ctrack(h);
628 +               if (del_timer(&ct->timeout)) {
629 +                       IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
630 +                               FMT_TUPLE "\n",
631 +                               __FUNCTION__, ct, ARG_TUPLE(&tuple));
632 +                       if (ct->timeout.function)
633 +                               ct->timeout.function(ct->timeout.data);
634 +               } else {
635 +                       IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
636 +                               FMT_TUPLE "\n",
637 +                               __FUNCTION__, ct, ARG_TUPLE(&tuple));
638 +               }
639 +               nf_ct_put(ct);
640 +       } else {
641 +               IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
642 +                       __FUNCTION__, ARG_TUPLE(&tuple));
643 +       }
644 +}
645 +
646 diff -urNp v2.6.27/linux/net/ipv4/ipvs/ip_vs_xmit.c linux/net/ipv4/ipvs/ip_vs_xmit.c
647 --- v2.6.27/linux/net/ipv4/ipvs/ip_vs_xmit.c    2008-10-11 12:46:16.000000000 +0300
648 +++ linux/net/ipv4/ipvs/ip_vs_xmit.c    2008-10-11 14:19:27.000000000 +0300
649 @@ -139,7 +139,6 @@ int
650  ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
651                 struct ip_vs_protocol *pp)
652  {
653 -       /* we do not touch skb and do not need pskb ptr */
654         return NF_ACCEPT;
655  }
656  
657 @@ -197,6 +196,9 @@ ip_vs_bypass_xmit(struct sk_buff *skb, s
658         dst_release(skb->dst);
659         skb->dst = &rt->u.dst;
660  
661 +       if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
662 +               goto tx_error_out;
663 +
664         /* Another hack: avoid icmp_send in ip_fragment */
665         skb->local_df = 1;
666  
667 @@ -209,6 +211,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, s
668         dst_link_failure(skb);
669   tx_error:
670         kfree_skb(skb);
671 + tx_error_out:
672         LeaveFunction(10);
673         return NF_STOLEN;
674  }
675 @@ -261,6 +264,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru
676         dst_release(skb->dst);
677         skb->dst = &rt->u.dst;
678  
679 +       if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
680 +               goto tx_error_out;
681 +
682         /* mangle the packet */
683         if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
684                 goto tx_error;
685 @@ -284,8 +290,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru
686    tx_error_icmp:
687         dst_link_failure(skb);
688    tx_error:
689 -       LeaveFunction(10);
690         kfree_skb(skb);
691 +  tx_error_out:
692 +       LeaveFunction(10);
693         return NF_STOLEN;
694    tx_error_put:
695         ip_rt_put(rt);
696 @@ -384,14 +391,17 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s
697         /* fix old IP header checksum */
698         ip_send_check(old_iph);
699  
700 -       skb_push(skb, sizeof(struct iphdr));
701 -       skb_reset_network_header(skb);
702 -       memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
703 -
704         /* drop old route */
705         dst_release(skb->dst);
706         skb->dst = &rt->u.dst;
707  
708 +       if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
709 +               goto tx_error_out;
710 +
711 +       skb_push(skb, sizeof(struct iphdr));
712 +       skb_reset_network_header(skb);
713 +       memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
714 +
715         /*
716          *      Push down and install the IPIP header.
717          */
718 @@ -419,6 +429,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s
719         dst_link_failure(skb);
720    tx_error:
721         kfree_skb(skb);
722 +  tx_error_out:
723         LeaveFunction(10);
724         return NF_STOLEN;
725  }
726 @@ -464,6 +475,9 @@ ip_vs_dr_xmit(struct sk_buff *skb, struc
727         dst_release(skb->dst);
728         skb->dst = &rt->u.dst;
729  
730 +       if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
731 +               goto tx_error_out;
732 +
733         /* Another hack: avoid icmp_send in ip_fragment */
734         skb->local_df = 1;
735  
736 @@ -476,6 +490,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struc
737         dst_link_failure(skb);
738    tx_error:
739         kfree_skb(skb);
740 +  tx_error_out:
741         LeaveFunction(10);
742         return NF_STOLEN;
743  }
744 @@ -535,6 +550,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, str
745         dst_release(skb->dst);
746         skb->dst = &rt->u.dst;
747  
748 +       /* TODO: properly alter reply for NFCT */
749 +
750         ip_vs_nat_icmp(skb, pp, cp, 0);
751  
752         /* Another hack: avoid icmp_send in ip_fragment */
This page took 0.145566 seconds and 3 git commands to generate.