]> git.pld-linux.org Git - packages/kernel.git/blob - kernel-ipvs-nfct.patch
- typo
[packages/kernel.git] / kernel-ipvs-nfct.patch
1 diff -urNp v2.6.22/linux/include/net/ip_vs.h linux/include/net/ip_vs.h
2 --- v2.6.22/linux/include/net/ip_vs.h   2007-02-11 01:06:29.000000000 +0200
3 +++ linux/include/net/ip_vs.h   2007-07-12 12:03:43.000000000 +0300
4 @@ -9,6 +9,16 @@
5  #include <asm/types.h>         /* For __uXX types */
6  #include <linux/types.h>       /* For __beXX types in userland */
7  
8 +#ifdef __KERNEL__
9 +#include <linux/skbuff.h>
10 +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
11 +#include <net/netfilter/nf_conntrack.h>
12 +#include <net/netfilter/nf_conntrack_core.h>
13 +#include <net/netfilter/nf_conntrack_expect.h>
14 +#include <net/netfilter/nf_conntrack_helper.h>
15 +#endif
16 +#endif
17 +
18  #define IP_VS_VERSION_CODE     0x010201
19  #define NVERSION(version)                      \
20         (version >> 16) & 0xFF,                 \
21 @@ -358,6 +368,8 @@ enum {
22         NET_IPV4_VS_SYNC_THRESHOLD=24,
23         NET_IPV4_VS_NAT_ICMP_SEND=25,
24         NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE=26,
25 +       NET_IPV4_VS_SNAT_REROUTE=27,
26 +       NET_IPV4_VS_CONNTRACK=28,
27         NET_IPV4_VS_LAST
28  };
29  
30 @@ -715,6 +727,16 @@ extern void ip_vs_init_hash_table(struct
31   */
32  
33  /*
34 + *      Netfilter connection tracking
35 + *      (from ip_vs_nfct.c)
36 + */
37 +extern int ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum);
38 +extern void ip_vs_nfct_expect_related(struct sk_buff *skb,
39 +                                     struct ip_vs_conn *cp,
40 +                                     __be16 port, __u16 proto, int from_rs);
41 +extern void ip_vs_nfct_conn_drop(struct ip_vs_conn *cp);
42 +
43 +/*
44   *     IPVS connection entry hash table
45   */
46  #ifndef CONFIG_IP_VS_TAB_BITS
47 @@ -885,8 +907,41 @@ extern int sysctl_ip_vs_expire_nodest_co
48  extern int sysctl_ip_vs_expire_quiescent_template;
49  extern int sysctl_ip_vs_sync_threshold[2];
50  extern int sysctl_ip_vs_nat_icmp_send;
51 +extern int sysctl_ip_vs_snat_reroute;
52  extern struct ip_vs_stats ip_vs_stats;
53  
54 +#ifdef CONFIG_IP_VS_NFCT
55 +
56 +extern int sysctl_ip_vs_conntrack;
57 +
58 +static inline int ip_vs_use_conntrack(struct sk_buff *skb)
59 +{
60 +       return sysctl_ip_vs_conntrack && skb->nfct;
61 +}
62 +
63 +/* Returns boolean and skb is freed on failure */
64 +static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum)
65 +{
66 +       if (!ip_vs_use_conntrack(skb))
67 +               return 1;
68 +       return nf_ct_is_confirmed((struct nf_conn *) skb->nfct) ||
69 +               ip_vs_nfct_confirm(skb, cp, hooknum);
70 +}
71 +
72 +#else
73 +
74 +static inline int ip_vs_use_conntrack(struct sk_buff *skb)
75 +{
76 +       return 0;
77 +}
78 +
79 +static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, unsigned int hooknum)
80 +{
81 +       return 1;
82 +}
83 +
84 +#endif
85 +
86  extern struct ip_vs_service *
87  ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport);
88  
89 diff -urNp v2.6.22/linux/net/ipv4/ipvs/Kconfig linux/net/ipv4/ipvs/Kconfig
90 --- v2.6.22/linux/net/ipv4/ipvs/Kconfig 2007-07-10 09:18:43.000000000 +0300
91 +++ linux/net/ipv4/ipvs/Kconfig 2007-07-12 09:48:59.000000000 +0300
92 @@ -221,4 +221,12 @@ config     IP_VS_FTP
93           If you want to compile it in kernel, say Y. To compile it as a
94           module, choose M here. If unsure, say N.
95  
96 +config IP_VS_NFCT
97 +       bool "Netfilter connection tracking"
98 +       depends on NF_CONNTRACK
99 +       ---help---
100 +         The Netfilter connection tracking support allows the IPVS
101 +         connection state to be exported to the Netfilter framework
102 +         for filtering purposes.
103 +
104  endif # IP_VS
105 diff -urNp v2.6.22/linux/net/ipv4/ipvs/Makefile linux/net/ipv4/ipvs/Makefile
106 --- v2.6.22/linux/net/ipv4/ipvs/Makefile        2005-06-18 08:50:52.000000000 +0300
107 +++ linux/net/ipv4/ipvs/Makefile        2007-07-12 09:47:58.000000000 +0300
108 @@ -9,10 +9,13 @@ ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UD
109  ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_ESP) += ip_vs_proto_esp.o
110  ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH) += ip_vs_proto_ah.o
111  
112 +ip_vs-extra_objs-y :=
113 +ip_vs-extra_objs-$(CONFIG_IP_VS_NFCT) += ip_vs_nfct.o
114 +
115  ip_vs-objs :=  ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o        \
116                 ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o                      \
117                 ip_vs_est.o ip_vs_proto.o                                  \
118 -               $(ip_vs_proto-objs-y)
119 +               $(ip_vs_proto-objs-y) $(ip_vs-extra_objs-y)
120  
121  
122  # IPVS core
123 diff -urNp v2.6.22/linux/net/ipv4/ipvs/ip_vs_conn.c linux/net/ipv4/ipvs/ip_vs_conn.c
124 --- v2.6.22/linux/net/ipv4/ipvs/ip_vs_conn.c    2007-04-28 17:55:11.000000000 +0300
125 +++ linux/net/ipv4/ipvs/ip_vs_conn.c    2007-07-12 09:47:58.000000000 +0300
126 @@ -562,6 +562,11 @@ static void ip_vs_conn_expire(unsigned l
127                 if (cp->control)
128                         ip_vs_control_del(cp);
129  
130 +#ifdef CONFIG_IP_VS_NFCT
131 +               if (sysctl_ip_vs_conntrack)
132 +                       ip_vs_nfct_conn_drop(cp);
133 +#endif
134 +
135                 if (unlikely(cp->app != NULL))
136                         ip_vs_unbind_app(cp);
137                 ip_vs_unbind_dest(cp);
138 diff -urNp v2.6.22/linux/net/ipv4/ipvs/ip_vs_core.c linux/net/ipv4/ipvs/ip_vs_core.c
139 --- v2.6.22/linux/net/ipv4/ipvs/ip_vs_core.c    2007-07-10 09:18:43.000000000 +0300
140 +++ linux/net/ipv4/ipvs/ip_vs_core.c    2007-07-12 09:47:58.000000000 +0300
141 @@ -701,6 +701,8 @@ static int ip_vs_out_icmp(struct sk_buff
142  
143         skb->ipvs_property = 1;
144         verdict = NF_ACCEPT;
145 +       if (sysctl_ip_vs_snat_reroute && ip_route_me_harder(pskb, RTN_LOCAL))
146 +               verdict = NF_DROP;
147  
148    out:
149         __ip_vs_conn_put(cp);
150 @@ -805,6 +807,9 @@ ip_vs_out(unsigned int hooknum, struct s
151         if (!ip_vs_make_skb_writable(pskb, ihl))
152                 goto drop;
153  
154 +       if (!ip_vs_confirm_conntrack(*pskb, cp, hooknum))
155 +               goto out;
156 +
157         /* mangle the packet */
158         if (pp->snat_handler && !pp->snat_handler(pskb, pp, cp))
159                 goto drop;
160 @@ -812,13 +817,23 @@ ip_vs_out(unsigned int hooknum, struct s
161         ip_hdr(skb)->saddr = cp->vaddr;
162         ip_send_check(ip_hdr(skb));
163  
164 +       /*
165 +        * nf_iterate does not expect change in the skb->dst->dev.
166 +        * It looks like it is not fatal to enable this code for hooks
167 +        * where our handlers are at the end of the chain list and
168 +        * when all next handlers use skb->dst->dev and not outdev.
169 +        * It will definitely route properly the inout NAT traffic
170 +        * when multiple paths are used.
171 +        */
172 +
173         /* For policy routing, packets originating from this
174          * machine itself may be routed differently to packets
175          * passing through.  We want this packet to be routed as
176          * if it came from this machine itself.  So re-compute
177          * the routing information.
178          */
179 -       if (ip_route_me_harder(pskb, RTN_LOCAL) != 0)
180 +
181 +       if (sysctl_ip_vs_snat_reroute && ip_route_me_harder(pskb, RTN_LOCAL))
182                 goto drop;
183         skb = *pskb;
184  
185 @@ -834,8 +849,11 @@ ip_vs_out(unsigned int hooknum, struct s
186         return NF_ACCEPT;
187  
188    drop:
189 -       ip_vs_conn_put(cp);
190         kfree_skb(*pskb);
191 +
192 +  out:
193 +       ip_vs_conn_put(cp);
194 +       LeaveFunction(11);
195         return NF_STOLEN;
196  }
197  
198 diff -urNp v2.6.22/linux/net/ipv4/ipvs/ip_vs_ctl.c linux/net/ipv4/ipvs/ip_vs_ctl.c
199 --- v2.6.22/linux/net/ipv4/ipvs/ip_vs_ctl.c     2007-07-10 09:18:43.000000000 +0300
200 +++ linux/net/ipv4/ipvs/ip_vs_ctl.c     2007-07-12 09:47:58.000000000 +0300
201 @@ -81,6 +81,10 @@ int sysctl_ip_vs_expire_nodest_conn = 0;
202  int sysctl_ip_vs_expire_quiescent_template = 0;
203  int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
204  int sysctl_ip_vs_nat_icmp_send = 0;
205 +int sysctl_ip_vs_snat_reroute = 0;
206 +#ifdef CONFIG_IP_VS_NFCT
207 +int sysctl_ip_vs_conntrack = 0;
208 +#endif
209  
210  
211  #ifdef CONFIG_IP_VS_DEBUG
212 @@ -1424,6 +1428,16 @@ static struct ctl_table vs_vars[] = {
213                 .mode           = 0644,
214                 .proc_handler   = &proc_dointvec,
215         },
216 +#ifdef CONFIG_IP_VS_NFCT
217 +       {
218 +               .ctl_name       = NET_IPV4_VS_CONNTRACK,
219 +               .procname       = "conntrack",
220 +               .data           = &sysctl_ip_vs_conntrack,
221 +               .maxlen         = sizeof(int),
222 +               .mode           = 0644,
223 +               .proc_handler   = &proc_dointvec,
224 +       },
225 +#endif
226         {
227                 .ctl_name       = NET_IPV4_VS_DROP_ENTRY,
228                 .procname       = "drop_entry",
229 @@ -1448,6 +1462,14 @@ static struct ctl_table vs_vars[] = {
230                 .mode           = 0644,
231                 .proc_handler   = &proc_do_defense_mode,
232         },
233 +       {
234 +               .ctl_name       = NET_IPV4_VS_SNAT_REROUTE,
235 +               .procname       = "snat_reroute",
236 +               .data           = &sysctl_ip_vs_snat_reroute,
237 +               .maxlen         = sizeof(int),
238 +               .mode           = 0644,
239 +               .proc_handler   = &proc_dointvec,
240 +       },
241  #if 0
242         {
243                 .ctl_name       = NET_IPV4_VS_TO_ES,
244 diff -urNp v2.6.22/linux/net/ipv4/ipvs/ip_vs_ftp.c linux/net/ipv4/ipvs/ip_vs_ftp.c
245 --- v2.6.22/linux/net/ipv4/ipvs/ip_vs_ftp.c     2007-07-10 09:18:43.000000000 +0300
246 +++ linux/net/ipv4/ipvs/ip_vs_ftp.c     2007-07-12 09:47:58.000000000 +0300
247 @@ -194,6 +194,11 @@ static int ip_vs_ftp_out(struct ip_vs_ap
248                         ip_vs_control_add(n_cp, cp);
249                 }
250  
251 +#ifdef CONFIG_IP_VS_NFCT
252 +               if ((*pskb)->nfct)
253 +                       ip_vs_nfct_expect_related(*pskb, n_cp, 0, IPPROTO_TCP, 0);
254 +#endif
255 +
256                 /*
257                  * Replace the old passive address with the new one
258                  */
259 @@ -326,6 +331,11 @@ static int ip_vs_ftp_in(struct ip_vs_app
260                 ip_vs_control_add(n_cp, cp);
261         }
262  
263 +#ifdef CONFIG_IP_VS_NFCT
264 +       if ((*pskb)->nfct)
265 +               ip_vs_nfct_expect_related(*pskb, n_cp, n_cp->dport, IPPROTO_TCP, 1);
266 +#endif
267 +
268         /*
269          *      Move tunnel to listen state
270          */
271 diff -urNp v2.6.22/linux/net/ipv4/ipvs/ip_vs_nfct.c linux/net/ipv4/ipvs/ip_vs_nfct.c
272 --- v2.6.22/linux/net/ipv4/ipvs/ip_vs_nfct.c    1970-01-01 02:00:00.000000000 +0200
273 +++ linux/net/ipv4/ipvs/ip_vs_nfct.c    2007-07-12 12:04:31.000000000 +0300
274 @@ -0,0 +1,389 @@
275 +/*
276 + * ip_vs_nfct.c:       Netfilter connection tracking support for IPVS
277 + *
278 + * Portions Copyright (C) 2001-2002
279 + * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland.
280 + *
281 + * Portions Copyright (C) 2003-2007
282 + * Julian Anastasov
283 + *
284 + *
285 + * This code is free software; you can redistribute it and/or modify
286 + * it under the terms of the GNU General Public License as published by
287 + * the Free Software Foundation; either version 2 of the License, or
288 + * (at your option) any later version.
289 + *
290 + * This program is distributed in the hope that it will be useful,
291 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
292 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
293 + * GNU General Public License for more details.
294 + *
295 + * You should have received a copy of the GNU General Public License
296 + * along with this program; if not, write to the Free Software
297 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
298 + *
299 + *
300 + * Authors:
301 + * Ben North <ben@redfrontdoor.org>
302 + * Julian Anastasov <ja@ssi.bg>                Reorganize and sync with latest kernels
303 + *
304 + *
305 + * Current status:
306 + *
307 + * - provide conntrack confirmation for new and related connections, by
308 + * this way we can see their proper conntrack state in all hooks
309 + * - support for all forwarding methods, not only NAT
310 + * - FTP support (NAT), ability to support other NAT apps with expectations
311 + * - to correctly create expectations for related NAT connections the proper
312 + * NF conntrack support must be already installed, eg. ip_vs_ftp requires
313 + * nf_conntrack_ftp for the same ports
314 + *
315 + */
316 +
317 +#include <linux/module.h>
318 +#include <linux/types.h>
319 +#include <linux/kernel.h>
320 +#include <linux/errno.h>
321 +#include <linux/compiler.h>
322 +#include <linux/vmalloc.h>
323 +#include <linux/skbuff.h>
324 +#include <net/ip.h>
325 +#include <linux/netfilter.h>
326 +#include <linux/netfilter_ipv4.h>
327 +#include <net/ip_vs.h>
328 +
329 +
330 +EXPORT_SYMBOL(ip_vs_nfct_expect_related);
331 +
332 +
333 +#define FMT_TUPLE      "%u.%u.%u.%u:%u->%u.%u.%u.%u:%u/%u"
334 +#define ARG_TUPLE(t)   NIPQUAD((t)->src.u3.ip), ntohs((t)->src.u.all), \
335 +                       NIPQUAD((t)->dst.u3.ip), ntohs((t)->dst.u.all), \
336 +                       (t)->dst.protonum
337 +
338 +#define FMT_CONN       "%u.%u.%u.%u:%u->%u.%u.%u.%u:%u->%u.%u.%u.%u:%u/%u:%u"
339 +#define ARG_CONN(c)    NIPQUAD((c)->caddr), ntohs((c)->cport), \
340 +                       NIPQUAD((c)->vaddr), ntohs((c)->vport), \
341 +                       NIPQUAD((c)->daddr), ntohs((c)->dport), \
342 +                       (c)->protocol, (c)->state
343 +
344 +/* Returns boolean and skb is freed on failure */
345 +static int __ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp,
346 +                               unsigned int hooknum)
347 +{
348 +       /*
349 +        * The assumptions:
350 +        * - the nfct is !NULL and is not confirmed
351 +        * - we are called before any mangle
352 +        */
353 +
354 +       struct iphdr *iph = ip_hdr(skb);
355 +       struct nf_conn *ct = (struct nf_conn *) skb->nfct;
356 +       struct nf_conntrack_tuple new_reply;
357 +       int ret = NF_DROP;
358 +       __be16 _ports[2], *pptr;
359 +#ifdef CONFIG_IP_VS_DEBUG
360 +       struct nf_conntrack_tuple *orig_tup =
361 +               &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
362 +       struct nf_conntrack_tuple *orig_rep =
363 +               &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
364 +#endif
365 +#ifdef CONFIG_NF_NAT_NEEDED
366 +       int initialized = !!(ct->status & IPS_NAT_DONE_MASK);
367 +#else
368 +       int initialized = 0;
369 +#endif
370 +
371 +       IP_VS_DBG(7, "%s: ct=%p, init=%d, tuples=" FMT_TUPLE ", " FMT_TUPLE
372 +               ", cp=" FMT_CONN "\n",
373 +               __FUNCTION__, ct, initialized,
374 +               ARG_TUPLE(orig_tup), ARG_TUPLE(orig_rep), ARG_CONN(cp));
375 +
376 +#ifdef CONFIG_NF_NAT_NEEDED
377 +       /*
378 +        * This is really bad, may be we are trying to alter DNAT conn?
379 +        * This is not supported, avoid the confirmation.
380 +        */
381 +       if (initialized && ct->status & IPS_NAT_MASK) {
382 +#ifdef CONFIG_IP_VS_DEBUG
383 +               IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, init=%d\n",
384 +                       __FUNCTION__, ct, ct->status, initialized);
385 +#endif
386 +               return 1;
387 +       }
388 +#endif
389 +
390 +       if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ || NF_IP_FORWARD == hooknum)
391 +               goto confirm;
392 +
393 +       /*
394 +        * Alter reply only for IP_VS_CONN_F_MASQ in outin direction.
395 +        * For related connections in inout direction it is done in
396 +        * expectfn callback.
397 +        */
398 +
399 +       pptr = skb_header_pointer(skb, ip_hdrlen(skb),
400 +                                 sizeof(_ports), _ports);
401 +       if (!pptr)
402 +               goto out;
403 +
404 +       new_reply = (struct nf_conntrack_tuple) {
405 +               .dst = { .protonum = iph->protocol, .dir = IP_CT_DIR_REPLY }};
406 +
407 +       new_reply.src.u3.ip = cp->daddr;
408 +       new_reply.src.u.tcp.port = cp->dport;
409 +       new_reply.src.l3num = PF_INET;
410 +       new_reply.dst.u3.ip = iph->saddr;
411 +       new_reply.dst.u.tcp.port = pptr[0];
412 +
413 +       nf_conntrack_alter_reply(ct, &new_reply);
414 +
415 +       IP_VS_DBG(7, "%s: ct=%p, init=%d, orig=" FMT_TUPLE
416 +               ", new_reply=" FMT_TUPLE " => alter_reply\n",
417 +               __FUNCTION__, ct, initialized,
418 +               ARG_TUPLE(orig_tup), ARG_TUPLE(&new_reply));
419 +
420 +       /*
421 +        * No need to rehash NAT info because we don't change source
422 +        * address in original direction
423 +        */
424 +
425 +confirm:
426 +
427 +       ret = __nf_conntrack_confirm(&skb);
428 +
429 +       if (ret != NF_STOLEN) {
430 +               IP_VS_DBG(7, "%s: ct=%p, init=%d, orig=" FMT_TUPLE " => confirm ret=%d\n",
431 +                       __FUNCTION__, ct, initialized, ARG_TUPLE(orig_tup), ret);
432 +       }
433 +
434 +       if (ret != NF_ACCEPT)
435 +               goto out;
436 +       return 1;
437 +
438 +out:
439 +       if (ret != NF_STOLEN)
440 +               kfree_skb(skb);
441 +       return 0;
442 +}
443 +
444 +/*
445 + * Confirm (and optionally alter) the conntrack entry if needed
446 + * because the IPVS packets do not reach ipv4_confirm.
447 + */
448 +int ip_vs_nfct_confirm(struct sk_buff *skb, struct ip_vs_conn *cp,
449 +                      unsigned int hooknum)
450 +{
451 +       struct iphdr *iph = ip_hdr(skb);
452 +       struct nf_conn *ct = (struct nf_conn *) skb->nfct;
453 +
454 +       /* By the time we're sending the packet out the other
455 +        * side, there should be a confirmed Netfilter CT entry
456 +        * for this connection.  This may not be the case,
457 +        * however, if it's a brand new connection, or if the NF
458 +        * entry has timed out before ours has.  Either way, if
459 +        * the NF CT entry is unconfirmed, confirm it, and deal
460 +        * with reply tuple mangling at the same time.
461 +        */
462 +
463 +       /* We only deal with TCP or UDP packets */
464 +       if (iph->protocol != IPPROTO_TCP && iph->protocol != IPPROTO_UDP)
465 +               return 1;
466 +
467 +       if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) {
468 +               /*
469 +                * Do not be surprised if non-NAT conntracks stay in SYN_SENT
470 +                * state, may be the replies from the real server go
471 +                * directly to client. In any case, keep them in REPLIED
472 +                * state (ESTABLISHED).
473 +                */
474 +               if (iph->protocol != IPPROTO_TCP ||
475 +                   IP_VS_TCP_S_ESTABLISHED == cp->state) {
476 +                       set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
477 +               }
478 +       }
479 +
480 +       /*
481 +        * We assume the reused connections do not change their rip:rport
482 +        * and we do not need to alter their conntrack reply
483 +        */
484 +       return __ip_vs_nfct_confirm(skb, cp, hooknum);
485 +}
486 +
487 +/*
488 + * We are called from init_conntrack() as expectfn handler
489 + */
490 +
491 +static void ip_vs_nfct_expect_callback(struct nf_conn *ct,
492 +       struct nf_conntrack_expect *exp)
493 +{
494 +       struct nf_conntrack_tuple *orig, new_reply;
495 +       struct ip_vs_conn *cp;
496 +
497 +       if (exp->tuple.src.l3num != PF_INET)
498 +               return;
499 +
500 +       /* 
501 +        * - We assume that no NF locks are held before this callback
502 +        * - ip_vs_conn_out_get and ip_vs_conn_in_get should match their
503 +        * expectations even if they use wildcard values, now we provide
504 +        * the actual values from the newly created original conntrack direction
505 +        * - the conntrack is confirmed when packet reaches IPVS hooks
506 +        */
507 +
508 +       /* RS->CLIENT */
509 +       orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
510 +       cp = ip_vs_conn_out_get(orig->dst.protonum,
511 +                               orig->src.u3.ip, orig->src.u.tcp.port,
512 +                               orig->dst.u3.ip, orig->dst.u.tcp.port);
513 +       if (cp) {
514 +               /* Change reply CLIENT->RS to CLIENT->VS */
515 +               new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
516 +               IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " FMT_TUPLE
517 +                       ", found inout cp=" FMT_CONN "\n",
518 +                       __FUNCTION__, ct, ct->status,
519 +                       ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
520 +                       ARG_CONN(cp));
521 +               new_reply.dst.u3.ip = cp->vaddr;
522 +               new_reply.dst.u.tcp.port = cp->vport;
523 +               IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
524 +                       ", inout cp=" FMT_CONN "\n",
525 +                       __FUNCTION__, ct,
526 +                       ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
527 +                       ARG_CONN(cp));
528 +               goto alter;
529 +       }
530 +
531 +       /* CLIENT->VS */
532 +       cp = ip_vs_conn_in_get(orig->dst.protonum,
533 +                               orig->src.u3.ip, orig->src.u.tcp.port,
534 +                               orig->dst.u3.ip, orig->dst.u.tcp.port);
535 +       if (cp) {
536 +               /* Change reply VS->CLIENT to RS->CLIENT */
537 +               new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
538 +               IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " FMT_TUPLE
539 +                       ", found outin cp=" FMT_CONN "\n",
540 +                       __FUNCTION__, ct, ct->status,
541 +                       ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
542 +                       ARG_CONN(cp));
543 +               new_reply.src.u3.ip = cp->daddr;
544 +               new_reply.src.u.tcp.port = cp->dport;
545 +               IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE
546 +                       ", outin cp=" FMT_CONN "\n",
547 +                       __FUNCTION__, ct,
548 +                       ARG_TUPLE(orig), ARG_TUPLE(&new_reply),
549 +                       ARG_CONN(cp));
550 +               goto alter;
551 +       }
552 +       IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE " - unknown expect\n",
553 +               __FUNCTION__, ct, ct->status, ARG_TUPLE(orig));
554 +       return;
555 +
556 +alter:
557 +
558 +       /* Never alter conntrack for non-NAT conns */
559 +       if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ)
560 +               nf_conntrack_alter_reply(ct, &new_reply);
561 +       ip_vs_conn_put(cp);
562 +       return;
563 +}
564 +
565 +/*
566 + * Create NF conntrack expectation with wildcard (optional) source port.
567 + * Then the default callback function will alter the reply and will confirm
568 + * the conntrack entry when the first packet comes.
569 + */
570 +void ip_vs_nfct_expect_related(struct sk_buff *skb, struct ip_vs_conn *cp,
571 +                              __be16 port, __u16 proto, int from_rs)
572 +{
573 +       struct nf_conn *ct = (struct nf_conn *) skb->nfct;
574 +       struct nf_conntrack_expect *e;
575 +
576 +       if (!sysctl_ip_vs_conntrack)
577 +               return;
578 +
579 +       if (!ct) {
580 +               IP_VS_DBG(7, "%s: ct=%p for cp=" FMT_CONN "\n",
581 +                       __FUNCTION__, ct, ARG_CONN(cp));
582 +               return;
583 +       }
584 +
585 +       if (!(e = nf_conntrack_expect_alloc(ct)))
586 +               return;
587 +
588 +       e->expectfn                     = ip_vs_nfct_expect_callback;
589 +       e->helper                       = NULL;
590 +       e->flags                        = 0;
591 +       memset(&e->tuple, 0, sizeof(e->tuple));
592 +       e->tuple.src.u.tcp.port         = port;
593 +       e->tuple.src.l3num              = PF_INET;
594 +       e->tuple.dst.protonum           = proto;
595 +       memset(&e->mask, 0, sizeof(e->mask));
596 +       e->mask.src.u3.ip               = 0xffffffff;
597 +       e->mask.src.u.all               = port? 0xffff : 0;
598 +       e->mask.src.l3num               = 0xffff;
599 +       e->mask.dst.u3.ip               = 0xffffffff;
600 +       e->mask.dst.u.all               = 0xffff;
601 +       e->mask.dst.protonum            = 0xff;
602 +
603 +       if (from_rs) {
604 +               e->tuple.src.u3.ip = cp->daddr;
605 +               e->tuple.dst.u3.ip = cp->caddr;
606 +               e->tuple.dst.u.tcp.port = cp->cport;
607 +       } else {
608 +               e->tuple.src.u3.ip = cp->caddr;
609 +               e->tuple.dst.u3.ip = cp->vaddr;
610 +               e->tuple.dst.u.tcp.port = cp->vport;
611 +       }
612 +
613 +       IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
614 +               __FUNCTION__, ct, ARG_TUPLE(&e->tuple));
615 +       nf_conntrack_expect_related(e);
616 +       nf_conntrack_expect_put(e);
617 +}
618 +
619 +/*
620 + * Our connection was terminated, try to drop the conntrack immediately
621 + */
622 +void ip_vs_nfct_conn_drop(struct ip_vs_conn *cp)
623 +{
624 +       struct nf_conntrack_tuple_hash *h;
625 +       struct nf_conn *ct;
626 +       struct nf_conntrack_tuple tuple;
627 +
628 +       if (!cp->cport)
629 +               return;
630 +
631 +       tuple = (struct nf_conntrack_tuple) {
632 +               .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } };
633 +       tuple.src.u3.ip = cp->caddr;
634 +       tuple.src.u.all = cp->cport;
635 +       tuple.src.l3num = PF_INET;
636 +       tuple.dst.u3.ip = cp->vaddr;
637 +       tuple.dst.u.all = cp->vport;
638 +
639 +       IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE
640 +               " for conn " FMT_CONN "\n",
641 +               __FUNCTION__, ARG_TUPLE(&tuple), ARG_CONN(cp));
642 +
643 +       h = nf_conntrack_find_get(&tuple, NULL);
644 +       if (h) {
645 +               ct = nf_ct_tuplehash_to_ctrack(h);
646 +               if (del_timer(&ct->timeout)) {
647 +                       IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple="
648 +                               FMT_TUPLE "\n",
649 +                               __FUNCTION__, ct, ARG_TUPLE(&tuple));
650 +                       if (ct->timeout.function)
651 +                               ct->timeout.function(ct->timeout.data);
652 +               } else {
653 +                       IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple="
654 +                               FMT_TUPLE "\n",
655 +                               __FUNCTION__, ct, ARG_TUPLE(&tuple));
656 +               }
657 +               nf_ct_put(ct);
658 +       } else {
659 +               IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n",
660 +                       __FUNCTION__, ARG_TUPLE(&tuple));
661 +       }
662 +}
663 +
664 diff -urNp v2.6.22/linux/net/ipv4/ipvs/ip_vs_xmit.c linux/net/ipv4/ipvs/ip_vs_xmit.c
665 --- v2.6.22/linux/net/ipv4/ipvs/ip_vs_xmit.c    2007-07-10 09:18:43.000000000 +0300
666 +++ linux/net/ipv4/ipvs/ip_vs_xmit.c    2007-07-12 09:54:45.000000000 +0300
667 @@ -199,6 +199,9 @@ ip_vs_bypass_xmit(struct sk_buff *skb, s
668         dst_release(skb->dst);
669         skb->dst = &rt->u.dst;
670  
671 +       if (!ip_vs_confirm_conntrack(skb, cp, NF_IP_LOCAL_IN))
672 +               goto tx_error_out;
673 +
674         /* Another hack: avoid icmp_send in ip_fragment */
675         skb->local_df = 1;
676  
677 @@ -211,6 +214,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, s
678         dst_link_failure(skb);
679   tx_error:
680         kfree_skb(skb);
681 + tx_error_out:
682         LeaveFunction(10);
683         return NF_STOLEN;
684  }
685 @@ -263,6 +267,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru
686         dst_release(skb->dst);
687         skb->dst = &rt->u.dst;
688  
689 +       if (!ip_vs_confirm_conntrack(skb, cp, NF_IP_LOCAL_IN))
690 +               goto tx_error_out;
691 +
692         /* mangle the packet */
693         if (pp->dnat_handler && !pp->dnat_handler(&skb, pp, cp))
694                 goto tx_error;
695 @@ -286,8 +293,9 @@ ip_vs_nat_xmit(struct sk_buff *skb, stru
696    tx_error_icmp:
697         dst_link_failure(skb);
698    tx_error:
699 -       LeaveFunction(10);
700         kfree_skb(skb);
701 +  tx_error_out:
702 +       LeaveFunction(10);
703         return NF_STOLEN;
704    tx_error_put:
705         ip_rt_put(rt);
706 @@ -386,14 +394,17 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s
707         /* fix old IP header checksum */
708         ip_send_check(old_iph);
709  
710 -       skb_push(skb, sizeof(struct iphdr));
711 -       skb_reset_network_header(skb);
712 -       memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
713 -
714         /* drop old route */
715         dst_release(skb->dst);
716         skb->dst = &rt->u.dst;
717  
718 +       if (!ip_vs_confirm_conntrack(skb, cp, NF_IP_LOCAL_IN))
719 +               goto tx_error_out;
720 +
721 +       skb_push(skb, sizeof(struct iphdr));
722 +       skb_reset_network_header(skb);
723 +       memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
724 +
725         /*
726          *      Push down and install the IPIP header.
727          */
728 @@ -423,6 +434,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, s
729         dst_link_failure(skb);
730    tx_error:
731         kfree_skb(skb);
732 +  tx_error_out:
733         LeaveFunction(10);
734         return NF_STOLEN;
735  }
736 @@ -468,6 +480,9 @@ ip_vs_dr_xmit(struct sk_buff *skb, struc
737         dst_release(skb->dst);
738         skb->dst = &rt->u.dst;
739  
740 +       if (!ip_vs_confirm_conntrack(skb, cp, NF_IP_LOCAL_IN))
741 +               goto tx_error_out;
742 +
743         /* Another hack: avoid icmp_send in ip_fragment */
744         skb->local_df = 1;
745  
746 @@ -480,6 +495,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struc
747         dst_link_failure(skb);
748    tx_error:
749         kfree_skb(skb);
750 +  tx_error_out:
751         LeaveFunction(10);
752         return NF_STOLEN;
753  }
754 @@ -539,6 +555,8 @@ ip_vs_icmp_xmit(struct sk_buff *skb, str
755         dst_release(skb->dst);
756         skb->dst = &rt->u.dst;
757  
758 +       /* TODO: properly alter reply for NFCT */
759 +
760         ip_vs_nat_icmp(skb, pp, cp, 0);
761  
762         /* Another hack: avoid icmp_send in ip_fragment */
This page took 0.075055 seconds and 3 git commands to generate.