]> git.pld-linux.org Git - packages/kernel.git/blob - kernel-ipvs-nfct.patch
- add doc bcond
[packages/kernel.git] / kernel-ipvs-nfct.patch
1 diff -urNp v2.6.28/linux/include/net/ip_vs.h linux/include/net/ip_vs.h
2 --- v2.6.28/linux/include/net/ip_vs.h   2008-12-25 10:12:24.000000000 +0200
3 +++ linux/include/net/ip_vs.h   2008-12-26 12:32:55.000000000 +0200
4 @@ -25,6 +25,14 @@
5  #include <linux/ip.h>
6  #include <linux/ipv6.h>                        /* for struct ipv6hdr */
7  #include <net/ipv6.h>                  /* for ipv6_addr_copy */
8 +#include <linux/skbuff.h>
9 +
10 +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
11 +#include <net/netfilter/nf_conntrack.h>
12 +#include <net/netfilter/nf_conntrack_core.h>
13 +#include <net/netfilter/nf_conntrack_expect.h>
14 +#include <net/netfilter/nf_conntrack_helper.h>
15 +#endif
16  
17  struct ip_vs_iphdr {
18         int len;
19 @@ -595,6 +603,16 @@ extern void ip_vs_init_hash_table(struct
20  #define IP_VS_APP_TYPE_FTP     1
21  
22  /*
23 + *      Netfilter connection tracking
24 + *      (from ip_vs_nfct.c)
25 + */
26 +extern int ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum);
27 +extern void ip_vs_nfct_expect_related(struct sk_buff *skb,
28 +                                     struct ip_vs_conn *cp,
29 +                                     __be16 port, __u16 proto, int from_rs);
30 +extern void ip_vs_nfct_conn_drop(struct ip_vs_conn *cp);
31 +
32 +/*
33   *     ip_vs_conn handling functions
34   *     (from ip_vs_conn.c)
35   */
36 @@ -780,9 +798,42 @@ extern int sysctl_ip_vs_expire_nodest_co
37  extern int sysctl_ip_vs_expire_quiescent_template;
38  extern int sysctl_ip_vs_sync_threshold[2];
39  extern int sysctl_ip_vs_nat_icmp_send;
40 +extern int sysctl_ip_vs_snat_reroute;
41  extern struct ip_vs_stats ip_vs_stats;
42  extern const struct ctl_path net_vs_ctl_path[];
43  
44 +#ifdef CONFIG_IP_VS_NFCT
45 +
46 +extern int sysctl_ip_vs_conntrack;
47 +
48 +static inline int ip_vs_use_conntrack(struct sk_buff *skb)
49 +{
50 +       return sysctl_ip_vs_conntrack && skb->nfct;
51 +}
52 +
53 +/* Returns boolean and skb is freed on failure */
54 +static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum)
55 +{
56 +       if (!ip_vs_use_conntrack(skb))
57 +               return 1;
58 +       return nf_ct_is_confirmed((struct nf_conn *) skb->nfct) ||
59 +               ip_vs_nfct_confirm(skb, cp, hooknum);
60 +}
61 +
62 +#else
63 +
64 +static inline int ip_vs_use_conntrack(struct sk_buff *skb)
65 +{
66 +       return 0;
67 +}
68 +
69 +static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum)
70 +{
71 +       return 1;
72 +}
73 +
74 +#endif
75 +
76  extern struct ip_vs_service *
77  ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
78                   const union nf_inet_addr *vaddr, __be16 vport);
79 diff -urNp v2.6.28/linux/net/netfilter/ipvs/Kconfig linux/net/netfilter/ipvs/Kconfig
80 --- v2.6.28/linux/net/netfilter/ipvs/Kconfig    2008-12-25 10:12:26.000000000 +0200
81 +++ linux/net/netfilter/ipvs/Kconfig    2008-12-26 12:35:37.000000000 +0200
82 @@ -238,4 +238,12 @@ config     IP_VS_FTP
83           If you want to compile it in kernel, say Y. To compile it as a
84           module, choose M here. If unsure, say N.
85  
86 +config IP_VS_NFCT
87 +       bool "Netfilter connection tracking"
88 +       depends on NF_CONNTRACK
89 +       ---help---
90 +         The Netfilter connection tracking support allows the IPVS
91 +         connection state to be exported to the Netfilter framework
92 +         for filtering purposes.
93 +
94  endif # IP_VS
95 diff -urNp v2.6.28/linux/net/netfilter/ipvs/Makefile linux/net/netfilter/ipvs/Makefile
96 --- v2.6.28/linux/net/netfilter/ipvs/Makefile   2008-12-25 10:12:26.000000000 +0200
97 +++ linux/net/netfilter/ipvs/Makefile   2008-12-26 12:36:38.000000000 +0200
98 @@ -8,10 +8,13 @@ ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_TC
99  ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o
100  ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o
101  
102 +ip_vs-extra_objs-y :=
103 +ip_vs-extra_objs-$(CONFIG_IP_VS_NFCT) += ip_vs_nfct.o
104 +
105  ip_vs-objs :=  ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o        \
106                 ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o                      \
107                 ip_vs_est.o ip_vs_proto.o                                  \
108 -               $(ip_vs_proto-objs-y)
109 +               $(ip_vs_proto-objs-y) $(ip_vs-extra_objs-y)
110  
111  
112  # IPVS core
113 diff -urNp v2.6.28/linux/net/netfilter/ipvs/ip_vs_conn.c linux/net/netfilter/ipvs/ip_vs_conn.c
114 --- v2.6.28/linux/net/netfilter/ipvs/ip_vs_conn.c       2008-12-25 10:12:26.000000000 +0200
115 +++ linux/net/netfilter/ipvs/ip_vs_conn.c       2008-12-26 12:38:15.000000000 +0200
116 @@ -642,6 +642,11 @@ static void ip_vs_conn_expire(unsigned l
117                 if (cp->control)
118                         ip_vs_control_del(cp);
119  
120 +#ifdef CONFIG_IP_VS_NFCT
121 +               if (sysctl_ip_vs_conntrack)
122 +                       ip_vs_nfct_conn_drop(cp);
123 +#endif
124 +
125                 if (unlikely(cp->app != NULL))
126                         ip_vs_unbind_app(cp);
127                 ip_vs_unbind_dest(cp);
128 diff -urNp v2.6.28/linux/net/netfilter/ipvs/ip_vs_core.c linux/net/netfilter/ipvs/ip_vs_core.c
129 --- v2.6.28/linux/net/netfilter/ipvs/ip_vs_core.c       2008-12-25 10:12:26.000000000 +0200
130 +++ linux/net/netfilter/ipvs/ip_vs_core.c       2008-12-26 18:21:56.000000000 +0200
131 @@ -869,13 +869,16 @@ static inline int is_tcp_reset(const str
132   */
133  static unsigned int
134  handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp,
135 -               struct ip_vs_conn *cp, int ihl)
136 +               struct ip_vs_conn *cp, int ihl, unsigned int hooknum)
137  {
138         IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet");
139  
140         if (!skb_make_writable(skb, ihl))
141                 goto drop;
142  
143 +       if (AF_INET == af && !ip_vs_confirm_conntrack(skb, cp, hooknum))
144 +               goto out;
145 +
146         /* mangle the packet */
147         if (pp->snat_handler && !pp->snat_handler(skb, pp, cp))
148                 goto drop;
149 @@ -890,6 +893,15 @@ handle_response(int af, struct sk_buff *
150                 ip_send_check(ip_hdr(skb));
151         }
152  
153 +       /*
154 +        * nf_iterate does not expect change in the skb->dst->dev.
155 +        * It looks like it is not fatal to enable this code for hooks
156 +        * where our handlers are at the end of the chain list and
157 +        * when all next handlers use skb->dst->dev and not outdev.
158 +        * It will definitely route properly the inout NAT traffic
159 +        * when multiple paths are used.
160 +        */
161 +
162         /* For policy routing, packets originating from this
163          * machine itself may be routed differently to packets
164          * passing through.  We want this packet to be routed as
165 @@ -902,7 +914,8 @@ handle_response(int af, struct sk_buff *
166                         goto drop;
167         } else
168  #endif
169 -               if (ip_route_me_harder(skb, RTN_LOCAL) != 0)
170 +               if (sysctl_ip_vs_snat_reroute &&
171 +                       ip_route_me_harder(skb, RTN_LOCAL) != 0)
172                         goto drop;
173  
174         IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
175 @@ -917,8 +930,11 @@ handle_response(int af, struct sk_buff *
176         return NF_ACCEPT;
177  
178  drop:
179 -       ip_vs_conn_put(cp);
180         kfree_skb(skb);
181 +
182 +out:
183 +       ip_vs_conn_put(cp);
184 +       LeaveFunction(11);
185         return NF_STOLEN;
186  }
187  
188 @@ -958,8 +974,13 @@ ip_vs_out(unsigned int hooknum, struct s
189                 if (unlikely(iph.protocol == IPPROTO_ICMP)) {
190                         int related, verdict = ip_vs_out_icmp(skb, &related);
191  
192 -                       if (related)
193 +                       if (related) {
194 +                               if (sysctl_ip_vs_snat_reroute &&
195 +                                       NF_ACCEPT == verdict &&
196 +                                       ip_route_me_harder(skb, RTN_LOCAL))
197 +                                       verdict = NF_DROP;
198                                 return verdict;
199 +                       }
200                         ip_vs_fill_iphdr(af, skb_network_header(skb), &iph);
201                 }
202  
203 @@ -1033,7 +1054,7 @@ ip_vs_out(unsigned int hooknum, struct s
204                 return NF_ACCEPT;
205         }
206  
207 -       return handle_response(af, skb, pp, cp, iph.len);
208 +       return handle_response(af, skb, pp, cp, iph.len, hooknum);
209  }
210  
211  
212 @@ -1298,7 +1319,7 @@ ip_vs_in(unsigned int hooknum, struct sk
213                 /* For local client packets, it could be a response */
214                 cp = pp->conn_out_get(af, skb, pp, &iph, iph.len, 0);
215                 if (cp)
216 -                       return handle_response(af, skb, pp, cp, iph.len);
217 +                       return handle_response(af, skb, pp, cp, iph.len, hooknum);
218  
219                 if (!pp->conn_schedule(af, skb, pp, &v, &cp))
220                         return v;
221 diff -urNp v2.6.28/linux/net/netfilter/ipvs/ip_vs_ctl.c linux/net/netfilter/ipvs/ip_vs_ctl.c
222 --- v2.6.28/linux/net/netfilter/ipvs/ip_vs_ctl.c        2008-12-25 10:12:26.000000000 +0200
223 +++ linux/net/netfilter/ipvs/ip_vs_ctl.c        2008-12-26 17:18:35.000000000 +0200
224 @@ -84,6 +84,10 @@ int sysctl_ip_vs_expire_nodest_conn = 0;
225  int sysctl_ip_vs_expire_quiescent_template = 0;
226  int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
227  int sysctl_ip_vs_nat_icmp_send = 0;
228 +int sysctl_ip_vs_snat_reroute = 0;
229 +#ifdef CONFIG_IP_VS_NFCT
230 +int sysctl_ip_vs_conntrack = 0;
231 +#endif
232  
233  
234  #ifdef CONFIG_IP_VS_DEBUG
235 @@ -1575,6 +1579,15 @@ static struct ctl_table vs_vars[] = {
236                 .mode           = 0644,
237                 .proc_handler   = &proc_dointvec,
238         },
239 +#ifdef CONFIG_IP_VS_NFCT
240 +       {
241 +               .procname       = "conntrack",
242 +               .data           = &sysctl_ip_vs_conntrack,
243 +               .maxlen         = sizeof(int),
244 +               .mode           = 0644,
245 +               .proc_handler   = &proc_dointvec,
246 +       },
247 +#endif
248         {
249                 .procname       = "drop_entry",
250                 .data           = &sysctl_ip_vs_drop_entry,
251 @@ -1596,6 +1609,13 @@ static struct ctl_table vs_vars[] = {
252                 .mode           = 0644,
253                 .proc_handler   = &proc_do_defense_mode,
254         },
255 +       {
256 +               .procname       = "snat_reroute",
257 +               .data           = &sysctl_ip_vs_snat_reroute,
258 +               .maxlen         = sizeof(int),
259 +               .mode           = 0644,
260 +               .proc_handler   = &proc_dointvec,
261 +       },
262  #if 0
263         {
264                 .procname       = "timeout_established",
265 diff -urNp v2.6.28/linux/net/netfilter/ipvs/ip_vs_ftp.c linux/net/netfilter/ipvs/ip_vs_ftp.c
266 --- v2.6.28/linux/net/netfilter/ipvs/ip_vs_ftp.c        2008-12-25 10:12:26.000000000 +0200
267 +++ linux/net/netfilter/ipvs/ip_vs_ftp.c        2008-12-26 17:21:25.000000000 +0200
268 @@ -202,6 +202,11 @@ static int ip_vs_ftp_out(struct ip_vs_ap
269                         ip_vs_control_add(n_cp, cp);
270                 }
271  
272 +#ifdef CONFIG_IP_VS_NFCT
273 +               if (skb->nfct)
274 +                       ip_vs_nfct_expect_related(skb, n_cp, 0, IPPROTO_TCP, 0);
275 +#endif
276 +
277                 /*
278                  * Replace the old passive address with the new one
279                  */
280 @@ -342,6 +347,11 @@ static int ip_vs_ftp_in(struct ip_vs_app
281                 ip_vs_control_add(n_cp, cp);
282         }
283  
284 +#ifdef CONFIG_IP_VS_NFCT
285 +       if (skb->nfct)
286 +               ip_vs_nfct_expect_related(skb, n_cp, n_cp->dport, IPPROTO_TCP, 1);
287 +#endif
288 +
289         /*
290          *      Move tunnel to listen state
291          */
292 diff -urNp v2.6.28/linux/net/netfilter/ipvs/ip_vs_nfct.c linux/net/netfilter/ipvs/ip_vs_nfct.c
293 --- v2.6.28/linux/net/netfilter/ipvs/ip_vs_nfct.c       1970-01-01 02:00:00.000000000 +0200
294 +++ linux/net/netfilter/ipvs/ip_vs_nfct.c       2008-12-26 18:35:40.000000000 +0200
295 @@ -0,0 +1,386 @@
296 +/*
297 + * ip_vs_nfct.c:       Netfilter connection tracking support for IPVS
298 + *
299 + * Portions Copyright (C) 2001-2002
300 + * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
301 + *
302 + * Portions Copyright (C) 2003-2008
303 + * Julian Anastasov
304 + *
305 + *
306 + * This code is free software; you can redistribute it and/or modify
307 + * it under the terms of the GNU General Public License as published by
308 + * the Free Software Foundation; either version 2 of the License, or
309 + * (at your option) any later version.
310 + *
311 + * This program is distributed in the hope that it will be useful,
312 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
313 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
314 + * GNU General Public License for more details.
315 + *
316 + * You should have received a copy of the GNU General Public License
317 + * along with this program; if not, write to the Free Software
318 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
319 + *
320 + *
321 + * Authors:
322 + * Ben North <ben@redfrontdoor.org>
323 + * Julian Anastasov <ja@ssi.bg>                Reorganize and sync with latest kernels
324 + *
325 + *
326 + * Current status:
327 + *
328 + * - provide conntrack confirmation for new and related connections, by
329 + * this way we can see their proper conntrack state in all hooks
330 + * - support for all forwarding methods, not only NAT
331 + * - FTP support (NAT), ability to support other NAT apps with expectations
332 + * - to correctly create expectations for related NAT connections the proper
333 + * NF conntrack support must be already installed, eg. ip_vs_ftp requires
334 + * nf_conntrack_ftp for the same ports
335 + *
336 + */
337 +
338 +#include <linux/module.h>
339 +#include <linux/types.h>
340 +#include <linux/kernel.h>
341 +#include <linux/errno.h>
342 +#include <linux/compiler.h>
343 +#include <linux/vmalloc.h>
344 +#include <linux/skbuff.h>
345 +#include <net/ip.h>
346 +#include <linux/netfilter.h>
347 +#include <linux/netfilter_ipv4.h>
348 +#include <net/ip_vs.h>
349 +
350 +
351 +EXPORT_SYMBOL(ip_vs_nfct_expect_related);
352 +
353 +
354 +#define FMT_TUPLE      "%u.%u.%u.%u:%u->%u.%u.%u.%u:%u/%u"
355 +#define ARG_TUPLE(t)   NIPQUAD((t)->src.u3.ip), ntohs((t)->src.u.all), \
356 +                       NIPQUAD((t)->dst.u3.ip), ntohs((t)->dst.u.all), \
357 +                       (t)->dst.protonum
358 +
359 +#define FMT_CONN       "%u.%u.%u.%u:%u->%u.%u.%u.%u:%u->%u.%u.%u.%u:%u/%u:%u"
360 +#define ARG_CONN(c)    NIPQUAD((c)->caddr), ntohs((c)->cport), \
361 +                       NIPQUAD((c)->vaddr), ntohs((c)->vport), \
362 +                       NIPQUAD((c)->daddr), ntohs((c)->dport), \
363 +                       (c)->protocol, (c)->state
364 +
365 +/* Returns boolean and skb is freed on failure */
366 +static int __ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp,
367 +                               unsigned int hooknum)
368 +{
369 +       /*
370 +        * The assumptions:
371 +        * - the nfct is !NULL and is not confirmed
372 +        * - we are called before any mangle
373 +        */
374 +
375 +       struct iphdr *iph = ip_hdr(skb);
376 +       struct nf_conn *ct = (struct nf_conn *) skb->nfct;
377 +       struct nf_conntrack_tuple new_reply;
378 +       int ret = NF_DROP;
379 +       __be16 _ports[2], *pptr;
380 +#ifdef CONFIG_IP_VS_DEBUG
381 +       struct nf_conntrack_tuple *orig_tup =
382 +               &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
383 +       struct nf_conntrack_tuple *orig_rep =
384 +               &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
385 +#endif
386 +#ifdef CONFIG_NF_NAT_NEEDED
387 +       int initialized = !!(ct->status & IPS_NAT_DONE_MASK);
388 +#else
389 +       int initialized = 0;
390 +#endif
391 +
392 +       IP_VS_DBG(7, "%s: ct=%p, init=%d, tuples=" FMT_TUPLE ", " FMT_TUPLE
393 +               ", cp=" FMT_CONN "\n",
394 +               __FUNCTION__, ct, initialized,
395 +               ARG_TUPLE(orig_tup), ARG_TUPLE(orig_rep), ARG_CONN(cp));
396 +
397 +#ifdef CONFIG_NF_NAT_NEEDED
398 +       /*
399 +        * This is really bad, may be we are trying to alter DNAT conn?
400 +        * This is not supported, avoid the confirmation.
401 +        */
402 +       if (initialized && ct->status & IPS_NAT_MASK) {
403 +#ifdef CONFIG_IP_VS_DEBUG
404 +               IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, init=%d\n",
405 +                       __FUNCTION__, ct, ct->status, initialized);
406 +#endif
407 +               return 1;
408 +       }
409 +#endif
410 +
411 +       if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ || NF_INET_FORWARD == hooknum)
412 +               goto confirm;
413 +
414 +       /*
415 +        * Alter reply only for IP_VS_CONN_F_MASQ in outin direction.
416 +        * For related connections in inout direction it is done in
417 +        * expectfn callback.
418 +        */
419 +
420 +       pptr = skb_header_pointer(skb, ip_hdrlen(skb),
421 +                                 sizeof(_ports), _ports);
422 +       if (!pptr)
423 +               goto out;
424 +
425 +       new_reply = (struct nf_conntrack_tuple) {
426 +               .dst = { .protonum = iph->protocol, .dir = IP_CT_DIR_REPLY }};
427 +
428 +       new_reply.src.u3 = cp->daddr;
429 +       new_reply.src.u.tcp.port = cp->dport;
430 +       new_reply.src.l3num = PF_INET;
431 +       new_reply.dst.u3.ip = iph->saddr;
432 +       new_reply.dst.u.tcp.port = pptr[0];
433 +
434 +       nf_conntrack_alter_reply(ct, &new_reply);
435 +
436 +       IP_VS_DBG(7, "%s: ct=%p, init=%d, orig=" FMT_TUPLE
437 +               ", new_reply=" FMT_TUPLE " => alter_reply\n",
438 +               __FUNCTION__, ct, initialized,
439 +               ARG_TUPLE(orig_tup), ARG_TUPLE(&new_reply));
440 +
441 +       /*
442 +        * No need to rehash NAT info because we don't change source
443 +        * address in original direction
444 +        */
445 +
446 +confirm:
447 +
448 +       ret = __nf_conntrack_confirm(skb);
449 +
450 +       if (ret != NF_STOLEN) {
451 +               IP_VS_DBG(7, "%s: ct=%p, init=%d, orig=" FMT_TUPLE " => confirm ret=%d\n",
452 +                       __FUNCTION__, ct, initialized, ARG_TUPLE(orig_tup), ret);
453 +       }
454 +
455 +       if (ret != NF_ACCEPT)
456 +               goto out;
457 +       return 1;
458 +
459 +out:
460 +       if (ret != NF_STOLEN)
461 +               kfree_skb(skb);
462 +       return 0;
463 +}
464 +
465 +/*
466 + * Confirm (and optionally alter) the conntrack entry if needed
467 + * because the IPVS packets do not reach ipv4_confirm.
468 + */
469 +int ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp,
470 +                      unsigned int hooknum)
471 +{
472 +       struct iphdr *iph = ip_hdr(skb);
473 +       struct nf_conn *ct = (struct nf_conn *) skb->nfct;
474 +
475 +       /* By the time we're sending the packet out the other
476 +        * side, there should be a confirmed Netfilter CT entry
477 +        * for this connection.  This may not be the case,
478 +        * however, if it's a brand new connection, or if the NF
479 +        * entry has timed out before ours has.  Either way, if
480 +        * the NF CT entry is unconfirmed, confirm it, and deal
481 +        * with reply tuple mangling at the same time.
482 +        */
483 +
484 +       /* We only deal with TCP or UDP packets */
485 +       if (iph->protocol != IPPROTO_TCP && iph->protocol != IPPROTO_UDP)
486 +               return 1;
487 +
488 +       if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
489 +               /*
490 +                * Do not be surprised if non-NAT conntracks stay in SYN_SENT
491 +                * state, may be the replies from the real server go
492 +                * directly to client. In any case, keep them in REPLIED
493 +                * state (ESTABLISHED).
494 +                */
495 +               if (iph->protocol != IPPROTO_TCP ||
496 +                   IP_VS_TCP_S_ESTABLISHED == cp->state) {
497 +                       set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
498 +               }
499 +       }
500 +
501 +       /*
502 +        * We assume the reused connections do not change their rip:rport
503 +        * and we do not need to alter their conntrack reply
504 +        */
505 +       return __ip_vs_nfct_confirm(skb, cp, hooknum);
506 +}
507 +
508 +/*
509 + * We are called from init_conntrack() as expectfn handler
510 + */
511 +
512 +static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
513 +       struct nf_conntrack_expect *exp)
514 +{
515 +       struct nf_conntrack_tuple *orig, new_reply;
516 +       struct ip_vs_conn *cp;
517 +
518 +       if (exp->tuple.src.l3num != PF_INET)
519 +               return;
520 +
521 +       /* 
522 +        * - We assume that no NF locks are held before this callback
523 +        * - ip_vs_conn_out_get and ip_vs_conn_in_get should match their
524 +        * expectations even if they use wildcard values, now we provide
525 +        * the actual values from the newly created original conntrack direction
526 +        * - the conntrack is confirmed when packet reaches IPVS hooks
527 +        */
528 +
529 +       /* RS->CLIENT */
530 +       orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
531 +       cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum,
532 +                               &orig->src.u3, orig->src.u.tcp.port,
533 +                               &orig->dst.u3, orig->dst.u.tcp.port);
534 +       if (cp) {
535 +               /* Change reply CLIENT->RS to CLIENT->VS */
536 +               new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
537 +               IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " FMT_TUPLE
538 +                       ", found inout cp=" FMT_CONN "\n",
539 +                       __FUNCTION__, ct, ct->status,
540 +                       ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
541 +                       ARG_CONN(cp));
542 +               new_reply.dst.u3 = cp->vaddr;
543 +               new_reply.dst.u.tcp.port = cp->vport;
544 +               IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
545 +                       ", inout cp=" FMT_CONN "\n",
546 +                       __FUNCTION__, ct,
547 +                       ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
548 +                       ARG_CONN(cp));
549 +               goto alter;
550 +       }
551 +
552 +       /* CLIENT->VS */
553 +       cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum,
554 +                               &orig->src.u3, orig->src.u.tcp.port,
555 +                               &orig->dst.u3, orig->dst.u.tcp.port);
556 +       if (cp) {
557 +               /* Change reply VS->CLIENT to RS->CLIENT */
558 +               new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
559 +               IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " FMT_TUPLE
560 +                       ", found outin cp=" FMT_CONN "\n",
561 +                       __FUNCTION__, ct, ct->status,
562 +                       ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
563 +                       ARG_CONN(cp));
564 +               new_reply.src.u3 = cp->daddr;
565 +               new_reply.src.u.tcp.port = cp->dport;
566 +               IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
567 +                       ", outin cp=" FMT_CONN "\n",
568 +                       __FUNCTION__, ct,
569 +                       ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
570 +                       ARG_CONN(cp));
571 +               goto alter;
572 +       }
573 +       IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE " - unknown expect\n",
574 +               __FUNCTION__, ct, ct->status, ARG_TUPLE(orig));
575 +       return;
576 +
577 +alter:
578 +
579 +       /* Never alter conntrack for non-NAT conns */
580 +       if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
581 +               nf_conntrack_alter_reply(ct, &new_reply);
582 +       ip_vs_conn_put(cp);
583 +       return;
584 +}
585 +
586 +/*
587 + * Create NF conntrack expectation with wildcard (optional) source port.
588 + * Then the default callback function will alter the reply and will confirm
589 + * the conntrack entry when the first packet comes.
590 + */
591 +void ip_vs_nfct_expect_related(struct sk_buff *skb, struct ip_vs_conn *cp,
592 +                              __be16 port, __u16 proto, int from_rs)
593 +{
594 +       struct nf_conn *ct = (struct nf_conn *) skb->nfct;
595 +       struct nf_conntrack_expect *e;
596 +
597 +       if (!sysctl_ip_vs_conntrack)
598 +               return;
599 +
600 +       if (!ct) {
601 +               IP_VS_DBG(7, "%s: ct=%p for cp=" FMT_CONN "\n",
602 +                       __FUNCTION__, ct, ARG_CONN(cp));
603 +               return;
604 +       }
605 +
606 +       if (!(e = nf_ct_expect_alloc(ct)))
607 +               return;
608 +
609 +       e->expectfn                     = ip_vs_nfct_expect_callback;
610 +       e->helper                       = NULL;
611 +       e->flags                        = 0;
612 +       e->class                        = NF_CT_EXPECT_CLASS_DEFAULT;
613 +       memset(&e->tuple, 0, sizeof(e->tuple));
614 +       e->tuple.src.u.tcp.port         = port;
615 +       e->tuple.src.l3num              = PF_INET;
616 +       e->tuple.dst.protonum           = proto;
617 +       memset(&e->mask, 0, sizeof(e->mask));
618 +       e->mask.src.u3.ip               = 0xffffffff;
619 +       e->mask.src.u.all               = port? 0xffff : 0;
620 +
621 +       if (from_rs) {
622 +               e->tuple.src.u3 = cp->daddr;
623 +               e->tuple.dst.u3 = cp->caddr;
624 +               e->tuple.dst.u.tcp.port = cp->cport;
625 +       } else {
626 +               e->tuple.src.u3 = cp->caddr;
627 +               e->tuple.dst.u3 = cp->vaddr;
628 +               e->tuple.dst.u.tcp.port = cp->vport;
629 +       }
630 +
631 +       IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
632 +               __FUNCTION__, ct, ARG_TUPLE(&e->tuple));
633 +       nf_ct_expect_related(e);
634 +       nf_ct_expect_put(e);
635 +}
636 +
637 +/*
638 + * Our connection was terminated, try to drop the conntrack immediately
639 + */
640 +void ip_vs_nfct_conn_drop(struct ip_vs_conn *cp)
641 +{
642 +       struct nf_conntrack_tuple_hash *h;
643 +       struct nf_conn *ct;
644 +       struct nf_conntrack_tuple tuple;
645 +
646 +       if (!cp->cport)
647 +               return;
648 +
649 +       tuple = (struct nf_conntrack_tuple) {
650 +               .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } };
651 +       tuple.src.u3 = cp->caddr;
652 +       tuple.src.u.all = cp->cport;
653 +       tuple.src.l3num = PF_INET;
654 +       tuple.dst.u3 = cp->vaddr;
655 +       tuple.dst.u.all = cp->vport;
656 +
657 +       IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE
658 +               " for conn " FMT_CONN "\n",
659 +               __FUNCTION__, ARG_TUPLE(&tuple), ARG_CONN(cp));
660 +
661 +       h = nf_conntrack_find_get(&init_net, &tuple);
662 +       if (h) {
663 +               ct = nf_ct_tuplehash_to_ctrack(h);
664 +               if (del_timer(&ct->timeout)) {
665 +                       IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
666 +                               FMT_TUPLE "\n",
667 +                               __FUNCTION__, ct, ARG_TUPLE(&tuple));
668 +                       if (ct->timeout.function)
669 +                               ct->timeout.function(ct->timeout.data);
670 +               } else {
671 +                       IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
672 +                               FMT_TUPLE "\n",
673 +                               __FUNCTION__, ct, ARG_TUPLE(&tuple));
674 +               }
675 +               nf_ct_put(ct);
676 +       } else {
677 +               IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
678 +                       __FUNCTION__, ARG_TUPLE(&tuple));
679 +       }
680 +}
681 +
682 diff -urNp v2.6.28/linux/net/netfilter/ipvs/ip_vs_xmit.c linux/net/netfilter/ipvs/ip_vs_xmit.c
683 --- v2.6.28/linux/net/netfilter/ipvs/ip_vs_xmit.c       2008-12-25 10:12:26.000000000 +0200
684 +++ linux/net/netfilter/ipvs/ip_vs_xmit.c       2008-12-26 17:31:51.000000000 +0200
685 @@ -265,6 +265,9 @@ ip_vs_bypass_xmit(struct sk_buff *skb, s
686         dst_release(skb->dst);
687         skb->dst = &rt->u.dst;
688  
689 +       if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
690 +               goto tx_error_out;
691 +
692         /* Another hack: avoid icmp_send in ip_fragment */
693         skb->local_df = 1;
694  
695 @@ -277,6 +280,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, s
696         dst_link_failure(skb);
697   tx_error:
698         kfree_skb(skb);
699 + tx_error_out:
700         LeaveFunction(10);
701         return NF_STOLEN;
702  }
703 @@ -393,6 +397,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru
704         dst_release(skb->dst);
705         skb->dst = &rt->u.dst;
706  
707 +       if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
708 +               goto tx_error_out;
709 +
710         /* mangle the packet */
711         if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))
712                 goto tx_error;
713 @@ -416,8 +423,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru
714    tx_error_icmp:
715         dst_link_failure(skb);
716    tx_error:
717 -       LeaveFunction(10);
718         kfree_skb(skb);
719 +  tx_error_out:
720 +       LeaveFunction(10);
721         return NF_STOLEN;
722    tx_error_put:
723         ip_rt_put(rt);
724 @@ -593,14 +601,17 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s
725         /* fix old IP header checksum */
726         ip_send_check(old_iph);
727  
728 -       skb_push(skb, sizeof(struct iphdr));
729 -       skb_reset_network_header(skb);
730 -       memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
731 -
732         /* drop old route */
733         skb_dst_drop(skb);
734         skb_dst_set(skb, &rt->u.dst);
735  
736 +       if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
737 +               goto tx_error_out;
738 +
739 +       skb_push(skb, sizeof(struct iphdr));
740 +       skb_reset_network_header(skb);
741 +       memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
742 +
743         /*
744          *      Push down and install the IPIP header.
745          */
746 @@ -628,6 +639,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s
747         dst_link_failure(skb);
748    tx_error:
749         kfree_skb(skb);
750 +  tx_error_out:
751         LeaveFunction(10);
752         return NF_STOLEN;
753  }
754 @@ -780,6 +792,9 @@ ip_vs_dr_xmit(struct sk_buff *skb, struc
755         dst_release(skb->dst);
756         skb->dst = &rt->u.dst;
757  
758 +       if (!ip_vs_confirm_conntrack(skb, cp, NF_INET_LOCAL_IN))
759 +               goto tx_error_out;
760 +
761         /* Another hack: avoid icmp_send in ip_fragment */
762         skb->local_df = 1;
763  
764 @@ -792,6 +807,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struc
765         dst_link_failure(skb);
766    tx_error:
767         kfree_skb(skb);
768 +  tx_error_out:
769         LeaveFunction(10);
770         return NF_STOLEN;
771  }
772 @@ -905,6 +921,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, str
773         dst_release(skb->dst);
774         skb->dst = &rt->u.dst;
775  
776 +       /* TODO: properly alter reply for NFCT */
777 +
778         ip_vs_nat_icmp(skb, pp, cp, 0);
779  
780         /* Another hack: avoid icmp_send in ip_fragment */
This page took 0.158542 seconds and 3 git commands to generate.