]>
Commit | Line | Data |
---|---|---|
2380c486 JR |
1 | diff -urp v2.6.28/linux/include/linux/rtnetlink.h linux/include/linux/rtnetlink.h |
2 | --- v2.6.28/linux/include/linux/rtnetlink.h 2008-12-25 10:12:24.000000000 +0200 | |
3 | +++ linux/include/linux/rtnetlink.h 2009-02-06 09:43:23.000000000 +0200 | |
4 | @@ -304,6 +304,8 @@ struct rtnexthop | |
5 | #define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */ | |
6 | #define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */ | |
7 | #define RTNH_F_ONLINK 4 /* Gateway is forced on link */ | |
8 | +#define RTNH_F_SUSPECT 8 /* We don't know the real state */ | |
9 | +#define RTNH_F_BADSTATE (RTNH_F_DEAD | RTNH_F_SUSPECT) | |
10 | ||
11 | /* Macros to handle hexthops */ | |
12 | ||
13 | diff -urp v2.6.28/linux/include/net/flow.h linux/include/net/flow.h | |
14 | --- v2.6.28/linux/include/net/flow.h 2008-12-25 10:12:24.000000000 +0200 | |
15 | +++ linux/include/net/flow.h 2009-02-06 09:43:23.000000000 +0200 | |
16 | @@ -19,6 +19,8 @@ struct flowi { | |
17 | struct { | |
18 | __be32 daddr; | |
19 | __be32 saddr; | |
20 | + __be32 lsrc; | |
21 | + __be32 gw; | |
22 | __u8 tos; | |
23 | __u8 scope; | |
24 | } ip4_u; | |
25 | @@ -43,6 +45,8 @@ struct flowi { | |
26 | #define fl6_flowlabel nl_u.ip6_u.flowlabel | |
27 | #define fl4_dst nl_u.ip4_u.daddr | |
28 | #define fl4_src nl_u.ip4_u.saddr | |
29 | +#define fl4_lsrc nl_u.ip4_u.lsrc | |
30 | +#define fl4_gw nl_u.ip4_u.gw | |
31 | #define fl4_tos nl_u.ip4_u.tos | |
32 | #define fl4_scope nl_u.ip4_u.scope | |
33 | ||
34 | diff -urp v2.6.28/linux/include/net/ip_fib.h linux/include/net/ip_fib.h | |
35 | --- v2.6.28/linux/include/net/ip_fib.h 2008-04-17 09:58:08.000000000 +0300 | |
36 | +++ linux/include/net/ip_fib.h 2009-02-06 09:43:23.000000000 +0200 | |
37 | @@ -207,6 +207,8 @@ extern int fib_lookup(struct net *n, str | |
38 | extern struct fib_table *fib_new_table(struct net *net, u32 id); | |
39 | extern struct fib_table *fib_get_table(struct net *net, u32 id); | |
40 | ||
41 | +extern int fib_result_table(struct fib_result *res); | |
42 | + | |
43 | #endif /* CONFIG_IP_MULTIPLE_TABLES */ | |
44 | ||
45 | /* Exported by fib_frontend.c */ | |
46 | @@ -276,4 +278,6 @@ static inline void fib_proc_exit(struct | |
47 | } | |
48 | #endif | |
49 | ||
50 | +extern rwlock_t fib_nhflags_lock; | |
51 | + | |
52 | #endif /* _NET_FIB_H */ | |
53 | diff -urp v2.6.28/linux/include/net/netfilter/nf_nat.h linux/include/net/netfilter/nf_nat.h | |
54 | --- v2.6.28/linux/include/net/netfilter/nf_nat.h 2008-04-17 09:58:08.000000000 +0300 | |
55 | +++ linux/include/net/netfilter/nf_nat.h 2009-02-06 09:43:23.000000000 +0200 | |
56 | @@ -77,6 +77,13 @@ struct nf_conn_nat | |
57 | #endif | |
58 | }; | |
59 | ||
60 | +/* Call input routing for SNAT-ed traffic */ | |
61 | +extern unsigned int ip_nat_route_input(unsigned int hooknum, | |
62 | + struct sk_buff *skb, | |
63 | + const struct net_device *in, | |
64 | + const struct net_device *out, | |
65 | + int (*okfn)(struct sk_buff *)); | |
66 | + | |
67 | /* Set up the info structure to map into this range. */ | |
68 | extern unsigned int nf_nat_setup_info(struct nf_conn *ct, | |
69 | const struct nf_nat_range *range, | |
70 | diff -urp v2.6.28/linux/include/net/route.h linux/include/net/route.h | |
71 | --- v2.6.28/linux/include/net/route.h 2008-12-25 10:12:24.000000000 +0200 | |
72 | +++ linux/include/net/route.h 2009-02-06 09:43:23.000000000 +0200 | |
73 | @@ -116,6 +116,7 @@ extern int __ip_route_output_key(struct | |
74 | extern int ip_route_output_key(struct net *, struct rtable **, struct flowi *flp); | |
75 | extern int ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags); | |
76 | extern int ip_route_input(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin); | |
77 | +extern int ip_route_input_lookup(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin, __be32 lsrc); | |
78 | extern unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, unsigned short new_mtu, struct net_device *dev); | |
79 | extern void ip_rt_send_redirect(struct sk_buff *skb); | |
80 | ||
81 | diff -urp v2.6.28/linux/net/bridge/br_netfilter.c linux/net/bridge/br_netfilter.c | |
82 | --- v2.6.28/linux/net/bridge/br_netfilter.c 2008-12-25 10:12:25.000000000 +0200 | |
83 | +++ linux/net/bridge/br_netfilter.c 2009-02-06 09:43:23.000000000 +0200 | |
84 | @@ -341,6 +341,10 @@ static int br_nf_pre_routing_finish(stru | |
85 | struct nf_bridge_info *nf_bridge = skb->nf_bridge; | |
86 | int err; | |
87 | ||
88 | + /* Old skb->dst is not expected, it is lost in all cases */ | |
933f5665 AM |
89 | + skb_dst_drop(skb); |
90 | + | |
2380c486 JR |
91 | + |
92 | if (nf_bridge->mask & BRNF_PKT_TYPE) { | |
93 | skb->pkt_type = PACKET_OTHERHOST; | |
94 | nf_bridge->mask ^= BRNF_PKT_TYPE; | |
95 | diff -urp v2.6.28/linux/net/ipv4/fib_frontend.c linux/net/ipv4/fib_frontend.c | |
96 | --- v2.6.28/linux/net/ipv4/fib_frontend.c 2008-10-11 12:46:16.000000000 +0300 | |
97 | +++ linux/net/ipv4/fib_frontend.c 2009-02-06 09:43:23.000000000 +0200 | |
98 | @@ -47,6 +47,8 @@ | |
99 | ||
100 | #ifndef CONFIG_IP_MULTIPLE_TABLES | |
101 | ||
102 | +#define FIB_RES_TABLE(r) (RT_TABLE_MAIN) | |
103 | + | |
104 | static int __net_init fib4_rules_init(struct net *net) | |
105 | { | |
106 | struct fib_table *local_table, *main_table; | |
107 | @@ -71,6 +73,8 @@ fail: | |
108 | } | |
109 | #else | |
110 | ||
111 | +#define FIB_RES_TABLE(r) (fib_result_table(r)) | |
112 | + | |
113 | struct fib_table *fib_new_table(struct net *net, u32 id) | |
114 | { | |
115 | struct fib_table *tb; | |
116 | @@ -125,7 +129,8 @@ void fib_select_default(struct net *net, | |
117 | table = res->r->table; | |
118 | #endif | |
119 | tb = fib_get_table(net, table); | |
120 | - if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) | |
121 | + if ((FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) || | |
122 | + FIB_RES_NH(*res).nh_scope == RT_SCOPE_HOST) | |
123 | tb->tb_select_default(tb, flp, res); | |
124 | } | |
125 | ||
126 | @@ -239,6 +244,9 @@ int fib_validate_source(__be32 src, __be | |
127 | .tos = tos } }, | |
128 | .iif = oif }; | |
129 | struct fib_result res; | |
130 | + int table; | |
131 | + unsigned char prefixlen; | |
132 | + unsigned char scope; | |
133 | int no_addr, rpf; | |
134 | int ret; | |
135 | struct net *net; | |
136 | @@ -262,31 +270,35 @@ int fib_validate_source(__be32 src, __be | |
137 | goto e_inval_res; | |
138 | *spec_dst = FIB_RES_PREFSRC(res); | |
139 | fib_combine_itag(itag, &res); | |
140 | -#ifdef CONFIG_IP_ROUTE_MULTIPATH | |
141 | - if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1) | |
142 | -#else | |
143 | if (FIB_RES_DEV(res) == dev) | |
144 | -#endif | |
145 | { | |
146 | ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; | |
147 | fib_res_put(&res); | |
148 | return ret; | |
149 | } | |
150 | + table = FIB_RES_TABLE(&res); | |
151 | + prefixlen = res.prefixlen; | |
152 | + scope = res.scope; | |
153 | fib_res_put(&res); | |
154 | if (no_addr) | |
155 | goto last_resort; | |
9474138d | 156 | - if (rpf == 1) |
2380c486 JR |
157 | - goto e_inval; |
158 | fl.oif = dev->ifindex; | |
159 | ||
160 | ret = 0; | |
161 | if (fib_lookup(net, &fl, &res) == 0) { | |
162 | - if (res.type == RTN_UNICAST) { | |
163 | + if (res.type == RTN_UNICAST && | |
164 | + ((table == FIB_RES_TABLE(&res) && | |
165 | + res.prefixlen >= prefixlen && res.scope >= scope) || | |
166 | + !rpf)) { | |
167 | *spec_dst = FIB_RES_PREFSRC(res); | |
168 | ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; | |
169 | + fib_res_put(&res); | |
170 | + return ret; | |
171 | } | |
172 | fib_res_put(&res); | |
173 | } | |
174 | + if (rpf) | |
175 | + goto e_inval; | |
176 | return ret; | |
177 | ||
178 | last_resort: | |
179 | @@ -909,9 +921,7 @@ static int fib_inetaddr_event(struct not | |
180 | switch (event) { | |
181 | case NETDEV_UP: | |
182 | fib_add_ifaddr(ifa); | |
183 | -#ifdef CONFIG_IP_ROUTE_MULTIPATH | |
184 | fib_sync_up(dev); | |
185 | -#endif | |
186 | rt_cache_flush(dev_net(dev), -1); | |
187 | break; | |
188 | case NETDEV_DOWN: | |
189 | @@ -947,9 +957,7 @@ static int fib_netdev_event(struct notif | |
190 | for_ifa(in_dev) { | |
191 | fib_add_ifaddr(ifa); | |
192 | } endfor_ifa(in_dev); | |
193 | -#ifdef CONFIG_IP_ROUTE_MULTIPATH | |
194 | fib_sync_up(dev); | |
195 | -#endif | |
196 | rt_cache_flush(dev_net(dev), -1); | |
197 | break; | |
198 | case NETDEV_DOWN: | |
199 | diff -urp v2.6.28/linux/net/ipv4/fib_hash.c linux/net/ipv4/fib_hash.c | |
200 | --- v2.6.28/linux/net/ipv4/fib_hash.c 2008-10-11 12:46:16.000000000 +0300 | |
201 | +++ linux/net/ipv4/fib_hash.c 2009-02-06 09:43:23.000000000 +0200 | |
202 | @@ -278,25 +278,35 @@ out: | |
203 | static void | |
204 | fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) | |
205 | { | |
206 | - int order, last_idx; | |
207 | + int order, last_idx, last_dflt, last_nhsel; | |
208 | + struct fib_alias *first_fa = NULL; | |
209 | + struct hlist_head *head; | |
210 | struct hlist_node *node; | |
211 | struct fib_node *f; | |
212 | struct fib_info *fi = NULL; | |
213 | struct fib_info *last_resort; | |
214 | struct fn_hash *t = (struct fn_hash *)tb->tb_data; | |
215 | - struct fn_zone *fz = t->fn_zones[0]; | |
216 | + struct fn_zone *fz = t->fn_zones[res->prefixlen]; | |
217 | + __be32 k; | |
218 | ||
219 | if (fz == NULL) | |
220 | return; | |
221 | ||
222 | + k = fz_key(flp->fl4_dst, fz); | |
223 | + last_dflt = -2; | |
224 | + last_nhsel = 0; | |
225 | last_idx = -1; | |
226 | last_resort = NULL; | |
227 | order = -1; | |
228 | ||
229 | read_lock(&fib_hash_lock); | |
230 | - hlist_for_each_entry(f, node, &fz->fz_hash[0], fn_hash) { | |
231 | + head = &fz->fz_hash[fn_hash(k, fz)]; | |
232 | + hlist_for_each_entry(f, node, head, fn_hash) { | |
233 | struct fib_alias *fa; | |
234 | ||
235 | + if (f->fn_key != k) | |
236 | + continue; | |
237 | + | |
238 | list_for_each_entry(fa, &f->fn_alias, fa_list) { | |
239 | struct fib_info *next_fi = fa->fa_info; | |
240 | ||
241 | @@ -304,42 +314,56 @@ fn_hash_select_default(struct fib_table | |
242 | fa->fa_type != RTN_UNICAST) | |
243 | continue; | |
244 | ||
245 | + if (fa->fa_tos && | |
246 | + fa->fa_tos != flp->fl4_tos) | |
247 | + continue; | |
248 | if (next_fi->fib_priority > res->fi->fib_priority) | |
249 | break; | |
250 | - if (!next_fi->fib_nh[0].nh_gw || | |
251 | - next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) | |
252 | - continue; | |
253 | fa->fa_state |= FA_S_ACCESSED; | |
254 | ||
255 | - if (fi == NULL) { | |
256 | - if (next_fi != res->fi) | |
257 | - break; | |
258 | - } else if (!fib_detect_death(fi, order, &last_resort, | |
259 | - &last_idx, tb->tb_default)) { | |
260 | + if (!first_fa) { | |
261 | + last_dflt = fa->fa_last_dflt; | |
262 | + first_fa = fa; | |
263 | + } | |
264 | + if (fi && !fib_detect_death(fi, order, &last_resort, | |
265 | + &last_idx, &last_dflt, &last_nhsel, flp)) { | |
266 | fib_result_assign(res, fi); | |
267 | - tb->tb_default = order; | |
268 | + first_fa->fa_last_dflt = order; | |
269 | goto out; | |
270 | } | |
271 | fi = next_fi; | |
272 | order++; | |
273 | } | |
274 | + break; | |
275 | } | |
276 | ||
277 | if (order <= 0 || fi == NULL) { | |
278 | - tb->tb_default = -1; | |
279 | + if (fi && fi->fib_nhs > 1 && | |
280 | + fib_detect_death(fi, order, &last_resort, &last_idx, | |
281 | + &last_dflt, &last_nhsel, flp) && | |
282 | + last_resort == fi) { | |
283 | + read_lock_bh(&fib_nhflags_lock); | |
284 | + fi->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT; | |
285 | + read_unlock_bh(&fib_nhflags_lock); | |
286 | + } | |
287 | + if (first_fa) first_fa->fa_last_dflt = -1; | |
288 | goto out; | |
289 | } | |
290 | ||
291 | if (!fib_detect_death(fi, order, &last_resort, &last_idx, | |
292 | - tb->tb_default)) { | |
293 | + &last_dflt, &last_nhsel, flp)) { | |
294 | fib_result_assign(res, fi); | |
295 | - tb->tb_default = order; | |
296 | + first_fa->fa_last_dflt = order; | |
297 | goto out; | |
298 | } | |
299 | ||
300 | - if (last_idx >= 0) | |
301 | + if (last_idx >= 0) { | |
302 | fib_result_assign(res, last_resort); | |
303 | - tb->tb_default = last_idx; | |
304 | + read_lock_bh(&fib_nhflags_lock); | |
305 | + last_resort->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT; | |
306 | + read_unlock_bh(&fib_nhflags_lock); | |
307 | + first_fa->fa_last_dflt = last_idx; | |
308 | + } | |
309 | out: | |
310 | read_unlock(&fib_hash_lock); | |
311 | } | |
312 | @@ -463,6 +487,7 @@ static int fn_hash_insert(struct fib_tab | |
313 | write_lock_bh(&fib_hash_lock); | |
314 | fi_drop = fa->fa_info; | |
315 | fa->fa_info = fi; | |
316 | + fa->fa_last_dflt = -1; | |
317 | fa->fa_type = cfg->fc_type; | |
318 | fa->fa_scope = cfg->fc_scope; | |
319 | state = fa->fa_state; | |
320 | @@ -517,6 +542,7 @@ static int fn_hash_insert(struct fib_tab | |
321 | new_fa->fa_type = cfg->fc_type; | |
322 | new_fa->fa_scope = cfg->fc_scope; | |
323 | new_fa->fa_state = 0; | |
324 | + new_fa->fa_last_dflt = -1; | |
325 | ||
326 | /* | |
327 | * Insert new entry to the list. | |
328 | diff -urp v2.6.28/linux/net/ipv4/fib_lookup.h linux/net/ipv4/fib_lookup.h | |
329 | --- v2.6.28/linux/net/ipv4/fib_lookup.h 2008-04-17 09:58:09.000000000 +0300 | |
330 | +++ linux/net/ipv4/fib_lookup.h 2009-02-06 09:43:23.000000000 +0200 | |
331 | @@ -8,6 +8,7 @@ | |
332 | struct fib_alias { | |
333 | struct list_head fa_list; | |
334 | struct fib_info *fa_info; | |
335 | + int fa_last_dflt; | |
336 | u8 fa_tos; | |
337 | u8 fa_type; | |
338 | u8 fa_scope; | |
339 | @@ -38,7 +39,8 @@ extern struct fib_alias *fib_find_alias( | |
340 | u8 tos, u32 prio); | |
341 | extern int fib_detect_death(struct fib_info *fi, int order, | |
342 | struct fib_info **last_resort, | |
343 | - int *last_idx, int dflt); | |
344 | + int *last_idx, int *dflt, int *last_nhsel, | |
345 | + const struct flowi *flp); | |
346 | ||
347 | static inline void fib_result_assign(struct fib_result *res, | |
348 | struct fib_info *fi) | |
349 | diff -urp v2.6.28/linux/net/ipv4/fib_rules.c linux/net/ipv4/fib_rules.c | |
350 | --- v2.6.28/linux/net/ipv4/fib_rules.c 2008-10-11 12:46:16.000000000 +0300 | |
351 | +++ linux/net/ipv4/fib_rules.c 2009-02-06 09:43:23.000000000 +0200 | |
352 | @@ -54,6 +54,11 @@ u32 fib_rules_tclass(struct fib_result * | |
353 | } | |
354 | #endif | |
355 | ||
356 | +int fib_result_table(struct fib_result *res) | |
357 | +{ | |
358 | + return res->r->table; | |
359 | +} | |
360 | + | |
361 | int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res) | |
362 | { | |
363 | struct fib_lookup_arg arg = { | |
364 | diff -urp v2.6.28/linux/net/ipv4/fib_semantics.c linux/net/ipv4/fib_semantics.c | |
365 | --- v2.6.28/linux/net/ipv4/fib_semantics.c 2008-10-11 12:46:16.000000000 +0300 | |
366 | +++ linux/net/ipv4/fib_semantics.c 2009-02-06 09:43:23.000000000 +0200 | |
367 | @@ -50,6 +50,7 @@ static struct hlist_head *fib_info_hash; | |
368 | static struct hlist_head *fib_info_laddrhash; | |
369 | static unsigned int fib_hash_size; | |
370 | static unsigned int fib_info_cnt; | |
371 | +rwlock_t fib_nhflags_lock = RW_LOCK_UNLOCKED; | |
372 | ||
373 | #define DEVINDEX_HASHBITS 8 | |
374 | #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) | |
375 | @@ -186,7 +187,7 @@ static __inline__ int nh_comp(const stru | |
376 | #ifdef CONFIG_NET_CLS_ROUTE | |
377 | nh->nh_tclassid != onh->nh_tclassid || | |
378 | #endif | |
379 | - ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD)) | |
380 | + ((nh->nh_flags^onh->nh_flags)&~RTNH_F_BADSTATE)) | |
381 | return -1; | |
382 | onh++; | |
383 | } endfor_nexthops(fi); | |
384 | @@ -237,7 +238,7 @@ static struct fib_info *fib_find_info(co | |
385 | nfi->fib_priority == fi->fib_priority && | |
386 | memcmp(nfi->fib_metrics, fi->fib_metrics, | |
387 | sizeof(fi->fib_metrics)) == 0 && | |
388 | - ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 && | |
389 | + ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_BADSTATE) == 0 && | |
390 | (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) | |
391 | return fi; | |
392 | } | |
393 | @@ -348,26 +349,70 @@ struct fib_alias *fib_find_alias(struct | |
394 | } | |
395 | ||
396 | int fib_detect_death(struct fib_info *fi, int order, | |
397 | - struct fib_info **last_resort, int *last_idx, int dflt) | |
398 | + struct fib_info **last_resort, int *last_idx, int *dflt, | |
399 | + int *last_nhsel, const struct flowi *flp) | |
400 | { | |
401 | struct neighbour *n; | |
402 | - int state = NUD_NONE; | |
403 | + int nhsel; | |
404 | + int state; | |
405 | + struct fib_nh * nh; | |
406 | + __be32 dst; | |
407 | + int flag, dead = 1; | |
408 | + | |
409 | + /* change_nexthops(fi) { */ | |
410 | + for (nhsel = 0, nh = fi->fib_nh; nhsel < fi->fib_nhs; nh++, nhsel++) { | |
411 | + if (flp->oif && flp->oif != nh->nh_oif) | |
412 | + continue; | |
413 | + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && nh->nh_gw && | |
414 | + nh->nh_scope == RT_SCOPE_LINK) | |
415 | + continue; | |
416 | + if (nh->nh_flags & RTNH_F_DEAD) | |
417 | + continue; | |
418 | ||
419 | - n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev); | |
420 | - if (n) { | |
421 | - state = n->nud_state; | |
422 | - neigh_release(n); | |
423 | - } | |
424 | - if (state == NUD_REACHABLE) | |
425 | - return 0; | |
426 | - if ((state&NUD_VALID) && order != dflt) | |
427 | - return 0; | |
428 | - if ((state&NUD_VALID) || | |
429 | - (*last_idx<0 && order > dflt)) { | |
430 | - *last_resort = fi; | |
431 | - *last_idx = order; | |
432 | + flag = 0; | |
433 | + if (nh->nh_dev->flags & IFF_NOARP) { | |
434 | + dead = 0; | |
435 | + goto setfl; | |
436 | + } | |
437 | + | |
438 | + dst = nh->nh_gw; | |
439 | + if (!nh->nh_gw || nh->nh_scope != RT_SCOPE_LINK) | |
440 | + dst = flp->fl4_dst; | |
441 | + | |
442 | + state = NUD_NONE; | |
443 | + n = neigh_lookup(&arp_tbl, &dst, nh->nh_dev); | |
444 | + if (n) { | |
445 | + state = n->nud_state; | |
446 | + neigh_release(n); | |
447 | + } | |
448 | + if (state == NUD_REACHABLE || | |
449 | + ((state&NUD_VALID) && order != *dflt)) { | |
450 | + dead = 0; | |
451 | + goto setfl; | |
452 | + } | |
453 | + if (!(state&NUD_VALID)) | |
454 | + flag = 1; | |
455 | + if (!dead) | |
456 | + goto setfl; | |
457 | + if ((state&NUD_VALID) || | |
458 | + (*last_idx<0 && order >= *dflt)) { | |
459 | + *last_resort = fi; | |
460 | + *last_idx = order; | |
461 | + *last_nhsel = nhsel; | |
462 | + } | |
463 | + | |
464 | + setfl: | |
465 | + | |
466 | + read_lock_bh(&fib_nhflags_lock); | |
467 | + if (flag) | |
468 | + nh->nh_flags |= RTNH_F_SUSPECT; | |
469 | + else | |
470 | + nh->nh_flags &= ~RTNH_F_SUSPECT; | |
471 | + read_unlock_bh(&fib_nhflags_lock); | |
472 | } | |
473 | - return 1; | |
474 | + /* } endfor_nexthops(fi) */ | |
475 | + | |
476 | + return dead; | |
477 | } | |
478 | ||
479 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | |
480 | @@ -539,8 +584,11 @@ static int fib_check_nh(struct fib_confi | |
481 | return -EINVAL; | |
482 | if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL) | |
483 | return -ENODEV; | |
484 | - if (!(dev->flags&IFF_UP)) | |
485 | - return -ENETDOWN; | |
486 | + if (!(dev->flags&IFF_UP)) { | |
487 | + if (fi->fib_protocol != RTPROT_STATIC) | |
488 | + return -ENETDOWN; | |
489 | + nh->nh_flags |= RTNH_F_DEAD; | |
490 | + } | |
491 | nh->nh_dev = dev; | |
492 | dev_hold(dev); | |
493 | nh->nh_scope = RT_SCOPE_LINK; | |
494 | @@ -560,24 +608,48 @@ static int fib_check_nh(struct fib_confi | |
495 | /* It is not necessary, but requires a bit of thinking */ | |
496 | if (fl.fl4_scope < RT_SCOPE_LINK) | |
497 | fl.fl4_scope = RT_SCOPE_LINK; | |
498 | - if ((err = fib_lookup(net, &fl, &res)) != 0) | |
499 | - return err; | |
500 | + err = fib_lookup(net, &fl, &res); | |
501 | } | |
502 | - err = -EINVAL; | |
503 | - if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) | |
504 | - goto out; | |
505 | - nh->nh_scope = res.scope; | |
506 | - nh->nh_oif = FIB_RES_OIF(res); | |
507 | - if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL) | |
508 | - goto out; | |
509 | - dev_hold(nh->nh_dev); | |
510 | - err = -ENETDOWN; | |
511 | - if (!(nh->nh_dev->flags & IFF_UP)) | |
512 | - goto out; | |
513 | - err = 0; | |
514 | + if (err) { | |
515 | + struct in_device *in_dev; | |
516 | + | |
517 | + if (err != -ENETUNREACH || | |
518 | + fi->fib_protocol != RTPROT_STATIC) | |
519 | + return err; | |
520 | + | |
521 | + in_dev = inetdev_by_index(net, nh->nh_oif); | |
522 | + if (in_dev == NULL || | |
523 | + in_dev->dev->flags & IFF_UP) { | |
524 | + if (in_dev) | |
525 | + in_dev_put(in_dev); | |
526 | + return err; | |
527 | + } | |
528 | + nh->nh_flags |= RTNH_F_DEAD; | |
529 | + nh->nh_scope = RT_SCOPE_LINK; | |
530 | + nh->nh_dev = in_dev->dev; | |
531 | + dev_hold(nh->nh_dev); | |
532 | + in_dev_put(in_dev); | |
533 | + } else { | |
534 | + err = -EINVAL; | |
535 | + if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) | |
536 | + goto out; | |
537 | + nh->nh_scope = res.scope; | |
538 | + nh->nh_oif = FIB_RES_OIF(res); | |
539 | + if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL) | |
540 | + goto out; | |
541 | + dev_hold(nh->nh_dev); | |
542 | + if (!(nh->nh_dev->flags & IFF_UP)) { | |
543 | + if (fi->fib_protocol != RTPROT_STATIC) { | |
544 | + err = -ENETDOWN; | |
545 | + goto out; | |
546 | + } | |
547 | + nh->nh_flags |= RTNH_F_DEAD; | |
548 | + } | |
549 | + err = 0; | |
550 | out: | |
551 | - fib_res_put(&res); | |
552 | - return err; | |
553 | + fib_res_put(&res); | |
554 | + return err; | |
555 | + } | |
556 | } else { | |
557 | struct in_device *in_dev; | |
558 | ||
559 | @@ -588,8 +660,11 @@ out: | |
560 | if (in_dev == NULL) | |
561 | return -ENODEV; | |
562 | if (!(in_dev->dev->flags&IFF_UP)) { | |
563 | - in_dev_put(in_dev); | |
564 | - return -ENETDOWN; | |
565 | + if (fi->fib_protocol != RTPROT_STATIC) { | |
566 | + in_dev_put(in_dev); | |
567 | + return -ENETDOWN; | |
568 | + } | |
569 | + nh->nh_flags |= RTNH_F_DEAD; | |
570 | } | |
571 | nh->nh_dev = in_dev->dev; | |
572 | dev_hold(nh->nh_dev); | |
573 | @@ -899,8 +974,12 @@ int fib_semantic_match(struct list_head | |
574 | for_nexthops(fi) { | |
575 | if (nh->nh_flags&RTNH_F_DEAD) | |
576 | continue; | |
577 | - if (!flp->oif || flp->oif == nh->nh_oif) | |
578 | - break; | |
579 | + if (flp->oif && flp->oif != nh->nh_oif) | |
580 | + continue; | |
581 | + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && | |
582 | + nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) | |
583 | + continue; | |
584 | + break; | |
585 | } | |
586 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | |
587 | if (nhsel < fi->fib_nhs) { | |
588 | @@ -1080,18 +1159,29 @@ int fib_sync_down_dev(struct net_device | |
589 | prev_fi = fi; | |
590 | dead = 0; | |
591 | change_nexthops(fi) { | |
592 | - if (nh->nh_flags&RTNH_F_DEAD) | |
593 | - dead++; | |
594 | - else if (nh->nh_dev == dev && | |
595 | - nh->nh_scope != scope) { | |
596 | - nh->nh_flags |= RTNH_F_DEAD; | |
597 | + if (nh->nh_flags&RTNH_F_DEAD) { | |
598 | + if (fi->fib_protocol!=RTPROT_STATIC || | |
599 | + nh->nh_dev == NULL || | |
600 | + __in_dev_get_rtnl(nh->nh_dev) == NULL || | |
601 | + nh->nh_dev->flags&IFF_UP) | |
602 | + dead++; | |
603 | + } else if (nh->nh_dev == dev && | |
604 | + nh->nh_scope != scope) { | |
605 | + write_lock_bh(&fib_nhflags_lock); | |
606 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | |
607 | - spin_lock_bh(&fib_multipath_lock); | |
608 | + spin_lock(&fib_multipath_lock); | |
609 | + nh->nh_flags |= RTNH_F_DEAD; | |
610 | fi->fib_power -= nh->nh_power; | |
611 | nh->nh_power = 0; | |
612 | - spin_unlock_bh(&fib_multipath_lock); | |
613 | + spin_unlock(&fib_multipath_lock); | |
614 | +#else | |
615 | + nh->nh_flags |= RTNH_F_DEAD; | |
616 | #endif | |
617 | - dead++; | |
618 | + write_unlock_bh(&fib_nhflags_lock); | |
619 | + if (fi->fib_protocol!=RTPROT_STATIC || | |
620 | + force || | |
621 | + __in_dev_get_rtnl(dev) == NULL) | |
622 | + dead++; | |
623 | } | |
624 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | |
625 | if (force > 1 && nh->nh_dev == dev) { | |
626 | @@ -1109,11 +1199,8 @@ int fib_sync_down_dev(struct net_device | |
627 | return ret; | |
628 | } | |
629 | ||
630 | -#ifdef CONFIG_IP_ROUTE_MULTIPATH | |
631 | - | |
632 | /* | |
633 | - Dead device goes up. We wake up dead nexthops. | |
634 | - It takes sense only on multipath routes. | |
635 | + Dead device goes up or new address is added. We wake up dead nexthops. | |
636 | */ | |
637 | ||
638 | int fib_sync_up(struct net_device *dev) | |
639 | @@ -1123,8 +1210,10 @@ int fib_sync_up(struct net_device *dev) | |
640 | struct hlist_head *head; | |
641 | struct hlist_node *node; | |
642 | struct fib_nh *nh; | |
643 | - int ret; | |
644 | + struct fib_result res; | |
645 | + int ret, rep; | |
646 | ||
647 | +repeat: | |
648 | if (!(dev->flags&IFF_UP)) | |
649 | return 0; | |
650 | ||
651 | @@ -1132,6 +1221,7 @@ int fib_sync_up(struct net_device *dev) | |
652 | hash = fib_devindex_hashfn(dev->ifindex); | |
653 | head = &fib_info_devhash[hash]; | |
654 | ret = 0; | |
655 | + rep = 0; | |
656 | ||
657 | hlist_for_each_entry(nh, node, head, nh_hash) { | |
658 | struct fib_info *fi = nh->nh_parent; | |
659 | @@ -1144,19 +1234,39 @@ int fib_sync_up(struct net_device *dev) | |
660 | prev_fi = fi; | |
661 | alive = 0; | |
662 | change_nexthops(fi) { | |
663 | - if (!(nh->nh_flags&RTNH_F_DEAD)) { | |
664 | - alive++; | |
665 | + if (!(nh->nh_flags&RTNH_F_DEAD)) | |
666 | continue; | |
667 | - } | |
668 | if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) | |
669 | continue; | |
670 | if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev)) | |
671 | continue; | |
672 | + if (nh->nh_gw && fi->fib_protocol == RTPROT_STATIC) { | |
673 | + struct flowi fl = { | |
674 | + .nl_u = { .ip4_u = | |
675 | + { .daddr = nh->nh_gw, | |
676 | + .scope = nh->nh_scope } }, | |
677 | + .oif = nh->nh_oif, | |
678 | + }; | |
679 | + if (fib_lookup(dev_net(dev), &fl, &res) != 0) | |
680 | + continue; | |
681 | + if (res.type != RTN_UNICAST && | |
682 | + res.type != RTN_LOCAL) { | |
683 | + fib_res_put(&res); | |
684 | + continue; | |
685 | + } | |
686 | + nh->nh_scope = res.scope; | |
687 | + fib_res_put(&res); | |
688 | + rep = 1; | |
689 | + } | |
690 | alive++; | |
691 | +#ifdef CONFIG_IP_ROUTE_MULTIPATH | |
692 | spin_lock_bh(&fib_multipath_lock); | |
693 | nh->nh_power = 0; | |
694 | +#endif | |
695 | nh->nh_flags &= ~RTNH_F_DEAD; | |
696 | +#ifdef CONFIG_IP_ROUTE_MULTIPATH | |
697 | spin_unlock_bh(&fib_multipath_lock); | |
698 | +#endif | |
699 | } endfor_nexthops(fi) | |
700 | ||
701 | if (alive > 0) { | |
702 | @@ -1164,10 +1274,14 @@ int fib_sync_up(struct net_device *dev) | |
703 | ret++; | |
704 | } | |
705 | } | |
706 | + if (rep) | |
707 | + goto repeat; | |
708 | ||
709 | return ret; | |
710 | } | |
711 | ||
712 | +#ifdef CONFIG_IP_ROUTE_MULTIPATH | |
713 | + | |
714 | /* | |
715 | The algorithm is suboptimal, but it provides really | |
716 | fair weighted route distribution. | |
717 | @@ -1176,24 +1290,45 @@ int fib_sync_up(struct net_device *dev) | |
718 | void fib_select_multipath(const struct flowi *flp, struct fib_result *res) | |
719 | { | |
720 | struct fib_info *fi = res->fi; | |
721 | - int w; | |
722 | + int w, alive; | |
723 | ||
724 | spin_lock_bh(&fib_multipath_lock); | |
725 | + if (flp->oif) { | |
726 | + int sel = -1; | |
727 | + w = -1; | |
728 | + change_nexthops(fi) { | |
729 | + if (flp->oif != nh->nh_oif) | |
730 | + continue; | |
731 | + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && | |
732 | + nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) | |
733 | + continue; | |
734 | + if (!(nh->nh_flags&RTNH_F_BADSTATE)) { | |
735 | + if (nh->nh_power > w) { | |
736 | + w = nh->nh_power; | |
737 | + sel = nhsel; | |
738 | + } | |
739 | + } | |
740 | + } endfor_nexthops(fi); | |
741 | + if (sel >= 0) { | |
742 | + spin_unlock_bh(&fib_multipath_lock); | |
743 | + res->nh_sel = sel; | |
744 | + return; | |
745 | + } | |
746 | + goto last_resort; | |
747 | + } | |
748 | + | |
749 | +repeat: | |
750 | if (fi->fib_power <= 0) { | |
751 | int power = 0; | |
752 | change_nexthops(fi) { | |
753 | - if (!(nh->nh_flags&RTNH_F_DEAD)) { | |
754 | + if (!(nh->nh_flags&RTNH_F_BADSTATE)) { | |
755 | power += nh->nh_weight; | |
756 | nh->nh_power = nh->nh_weight; | |
757 | } | |
758 | } endfor_nexthops(fi); | |
759 | fi->fib_power = power; | |
760 | - if (power <= 0) { | |
761 | - spin_unlock_bh(&fib_multipath_lock); | |
762 | - /* Race condition: route has just become dead. */ | |
763 | - res->nh_sel = 0; | |
764 | - return; | |
765 | - } | |
766 | + if (power <= 0) | |
767 | + goto last_resort; | |
768 | } | |
769 | ||
770 | ||
771 | @@ -1203,20 +1338,40 @@ void fib_select_multipath(const struct f | |
772 | ||
773 | w = jiffies % fi->fib_power; | |
774 | ||
775 | + alive = 0; | |
776 | change_nexthops(fi) { | |
777 | - if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) { | |
778 | + if (!(nh->nh_flags&RTNH_F_BADSTATE) && nh->nh_power) { | |
779 | if ((w -= nh->nh_power) <= 0) { | |
780 | nh->nh_power--; | |
781 | fi->fib_power--; | |
782 | - res->nh_sel = nhsel; | |
783 | spin_unlock_bh(&fib_multipath_lock); | |
784 | + res->nh_sel = nhsel; | |
785 | return; | |
786 | } | |
787 | + alive = 1; | |
788 | + } | |
789 | + } endfor_nexthops(fi); | |
790 | + if (alive) { | |
791 | + fi->fib_power = 0; | |
792 | + goto repeat; | |
793 | + } | |
794 | + | |
795 | +last_resort: | |
796 | + | |
797 | + for_nexthops(fi) { | |
798 | + if (!(nh->nh_flags&RTNH_F_DEAD)) { | |
799 | + if (flp->oif && flp->oif != nh->nh_oif) | |
800 | + continue; | |
801 | + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && | |
802 | + nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) | |
803 | + continue; | |
804 | + spin_unlock_bh(&fib_multipath_lock); | |
805 | + res->nh_sel = nhsel; | |
806 | + return; | |
807 | } | |
808 | } endfor_nexthops(fi); | |
809 | ||
810 | /* Race condition: route has just become dead. */ | |
811 | - res->nh_sel = 0; | |
812 | spin_unlock_bh(&fib_multipath_lock); | |
813 | } | |
814 | #endif | |
815 | diff -urp v2.6.28/linux/net/ipv4/fib_trie.c linux/net/ipv4/fib_trie.c | |
816 | --- v2.6.28/linux/net/ipv4/fib_trie.c 2008-10-11 12:46:16.000000000 +0300 | |
817 | +++ linux/net/ipv4/fib_trie.c 2009-02-06 09:43:23.000000000 +0200 | |
818 | @@ -1261,6 +1261,7 @@ static int fn_trie_insert(struct fib_tab | |
819 | fi_drop = fa->fa_info; | |
820 | new_fa->fa_tos = fa->fa_tos; | |
821 | new_fa->fa_info = fi; | |
822 | + new_fa->fa_last_dflt = -1; | |
823 | new_fa->fa_type = cfg->fc_type; | |
824 | new_fa->fa_scope = cfg->fc_scope; | |
825 | state = fa->fa_state; | |
826 | @@ -1301,6 +1302,7 @@ static int fn_trie_insert(struct fib_tab | |
827 | new_fa->fa_type = cfg->fc_type; | |
828 | new_fa->fa_scope = cfg->fc_scope; | |
829 | new_fa->fa_state = 0; | |
830 | + new_fa->fa_last_dflt = -1; | |
831 | /* | |
832 | * Insert new entry to the list. | |
833 | */ | |
834 | @@ -1802,24 +1804,31 @@ static void fn_trie_select_default(struc | |
835 | struct fib_result *res) | |
836 | { | |
837 | struct trie *t = (struct trie *) tb->tb_data; | |
838 | - int order, last_idx; | |
839 | + int order, last_idx, last_dflt, last_nhsel; | |
840 | + struct fib_alias *first_fa = NULL; | |
841 | struct fib_info *fi = NULL; | |
842 | struct fib_info *last_resort; | |
843 | struct fib_alias *fa = NULL; | |
844 | struct list_head *fa_head; | |
845 | struct leaf *l; | |
846 | + u32 key, mask; | |
847 | ||
848 | + last_dflt = -2; | |
849 | + last_nhsel = 0; | |
850 | last_idx = -1; | |
851 | last_resort = NULL; | |
852 | order = -1; | |
853 | ||
854 | + mask = inet_make_mask(res->prefixlen); | |
855 | + key = ntohl(flp->fl4_dst & mask); | |
856 | + | |
857 | rcu_read_lock(); | |
858 | ||
859 | - l = fib_find_node(t, 0); | |
860 | + l = fib_find_node(t, key); | |
861 | if (!l) | |
862 | goto out; | |
863 | ||
864 | - fa_head = get_fa_head(l, 0); | |
865 | + fa_head = get_fa_head(l, res->prefixlen); | |
866 | if (!fa_head) | |
867 | goto out; | |
868 | ||
869 | @@ -1833,39 +1842,52 @@ static void fn_trie_select_default(struc | |
870 | fa->fa_type != RTN_UNICAST) | |
871 | continue; | |
872 | ||
873 | + if (fa->fa_tos && | |
874 | + fa->fa_tos != flp->fl4_tos) | |
875 | + continue; | |
876 | if (next_fi->fib_priority > res->fi->fib_priority) | |
877 | break; | |
878 | - if (!next_fi->fib_nh[0].nh_gw || | |
879 | - next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) | |
880 | - continue; | |
881 | fa->fa_state |= FA_S_ACCESSED; | |
882 | ||
883 | - if (fi == NULL) { | |
884 | - if (next_fi != res->fi) | |
885 | - break; | |
886 | - } else if (!fib_detect_death(fi, order, &last_resort, | |
887 | - &last_idx, tb->tb_default)) { | |
888 | + if (!first_fa) { | |
889 | + last_dflt = fa->fa_last_dflt; | |
890 | + first_fa = fa; | |
891 | + } | |
892 | + if (fi && !fib_detect_death(fi, order, &last_resort, | |
893 | + &last_idx, &last_dflt, &last_nhsel, flp)) { | |
894 | fib_result_assign(res, fi); | |
895 | - tb->tb_default = order; | |
896 | + first_fa->fa_last_dflt = order; | |
897 | goto out; | |
898 | } | |
899 | fi = next_fi; | |
900 | order++; | |
901 | } | |
902 | if (order <= 0 || fi == NULL) { | |
903 | - tb->tb_default = -1; | |
904 | + if (fi && fi->fib_nhs > 1 && | |
905 | + fib_detect_death(fi, order, &last_resort, &last_idx, | |
906 | + &last_dflt, &last_nhsel, flp) && | |
907 | + last_resort == fi) { | |
908 | + read_lock_bh(&fib_nhflags_lock); | |
909 | + fi->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT; | |
910 | + read_unlock_bh(&fib_nhflags_lock); | |
911 | + } | |
912 | + if (first_fa) first_fa->fa_last_dflt = -1; | |
913 | goto out; | |
914 | } | |
915 | ||
916 | if (!fib_detect_death(fi, order, &last_resort, &last_idx, | |
917 | - tb->tb_default)) { | |
918 | + &last_dflt, &last_nhsel, flp)) { | |
919 | fib_result_assign(res, fi); | |
920 | - tb->tb_default = order; | |
921 | + first_fa->fa_last_dflt = order; | |
922 | goto out; | |
923 | } | |
924 | - if (last_idx >= 0) | |
925 | + if (last_idx >= 0) { | |
926 | fib_result_assign(res, last_resort); | |
927 | - tb->tb_default = last_idx; | |
928 | + read_lock_bh(&fib_nhflags_lock); | |
929 | + last_resort->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT; | |
930 | + read_unlock_bh(&fib_nhflags_lock); | |
931 | + first_fa->fa_last_dflt = last_idx; | |
932 | + } | |
933 | out: | |
934 | rcu_read_unlock(); | |
935 | } | |
936 | diff -urp v2.6.28/linux/net/ipv4/netfilter/ipt_MASQUERADE.c linux/net/ipv4/netfilter/ipt_MASQUERADE.c | |
937 | --- v2.6.28/linux/net/ipv4/netfilter/ipt_MASQUERADE.c 2008-12-25 10:12:25.000000000 +0200 | |
938 | +++ linux/net/ipv4/netfilter/ipt_MASQUERADE.c 2009-02-06 09:43:23.000000000 +0200 | |
939 | @@ -54,7 +54,7 @@ masquerade_tg(struct sk_buff *skb, const | |
940 | enum ip_conntrack_info ctinfo; | |
941 | struct nf_nat_range newrange; | |
942 | const struct nf_nat_multi_range_compat *mr; | |
943 | - const struct rtable *rt; | |
944 | + struct rtable *rt; | |
945 | __be32 newsrc; | |
946 | ||
947 | NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING); | |
13e5c3b1 | 948 | @@ -69,13 +69,27 @@ masquerade_tg(struct sk_buff *skb, const |
2380c486 JR |
949 | return NF_ACCEPT; |
950 | ||
951 | mr = par->targinfo; | |
13e5c3b1 | 952 | - rt = skb_rtable(skb); |
2380c486 JR |
953 | - newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE); |
954 | - if (!newsrc) { | |
955 | - printk("MASQUERADE: %s ate my IP address\n", par->out->name); | |
956 | - return NF_DROP; | |
2380c486 JR |
957 | + { |
958 | + struct flowi fl = { .nl_u = { .ip4_u = | |
13e5c3b1 AM |
959 | + { .daddr = ip_hdr(skb)->daddr, |
960 | + .tos = (RT_TOS(ip_hdr(skb)->tos) | | |
961 | + RTO_CONN), | |
84685b46 | 962 | + .gw = skb_rtable(skb)->rt_gateway, |
13e5c3b1 AM |
963 | + } }, |
964 | + .mark = skb->mark, | |
965 | + .oif = par->out->ifindex }; | |
2380c486 JR |
966 | + if (ip_route_output_key(dev_net(par->out), &rt, &fl) != 0) { |
967 | + /* Funky routing can do this. */ | |
968 | + if (net_ratelimit()) | |
969 | + printk("MASQUERADE:" | |
13e5c3b1 | 970 | + " No route: Rusty's brain broke!\n"); |
2380c486 JR |
971 | + return NF_DROP; |
972 | + } | |
973 | } | |
974 | ||
975 | + newsrc = rt->rt_src; | |
976 | + ip_rt_put(rt); | |
977 | + | |
2380c486 | 978 | nat->masq_index = par->out->ifindex; |
13e5c3b1 AM |
979 | |
980 | /* Transfer from original range. */ | |
2380c486 JR |
981 | diff -urp v2.6.28/linux/net/ipv4/netfilter/nf_nat_core.c linux/net/ipv4/netfilter/nf_nat_core.c |
982 | --- v2.6.28/linux/net/ipv4/netfilter/nf_nat_core.c 2008-12-25 10:12:25.000000000 +0200 | |
983 | +++ linux/net/ipv4/netfilter/nf_nat_core.c 2009-02-06 09:43:23.000000000 +0200 | |
984 | @@ -710,6 +710,52 @@ static struct pernet_operations nf_nat_n | |
985 | .exit = nf_nat_net_exit, | |
986 | }; | |
987 | ||
988 | +unsigned int | |
989 | +ip_nat_route_input(unsigned int hooknum, | |
990 | + struct sk_buff *skb, | |
991 | + const struct net_device *in, | |
992 | + const struct net_device *out, | |
993 | + int (*okfn)(struct sk_buff *)) | |
994 | +{ | |
995 | + struct iphdr *iph; | |
996 | + struct nf_conn *conn; | |
997 | + enum ip_conntrack_info ctinfo; | |
998 | + enum ip_conntrack_dir dir; | |
999 | + unsigned long statusbit; | |
1000 | + __be32 saddr; | |
1001 | + | |
1002 | + if (!(conn = nf_ct_get(skb, &ctinfo))) | |
1003 | + return NF_ACCEPT; | |
1004 | + | |
1005 | + if (!(conn->status & IPS_NAT_DONE_MASK)) | |
1006 | + return NF_ACCEPT; | |
1007 | + dir = CTINFO2DIR(ctinfo); | |
1008 | + statusbit = IPS_SRC_NAT; | |
1009 | + if (dir == IP_CT_DIR_REPLY) | |
1010 | + statusbit ^= IPS_NAT_MASK; | |
1011 | + if (!(conn->status & statusbit)) | |
1012 | + return NF_ACCEPT; | |
1013 | + | |
95ff68cd | 1014 | + if (skb_dst(skb)) |
2380c486 JR |
1015 | + return NF_ACCEPT; |
1016 | + | |
1017 | + if (skb->len < sizeof(struct iphdr)) | |
1018 | + return NF_ACCEPT; | |
1019 | + | |
1020 | + /* use daddr in other direction as masquerade address (lsrc) */ | |
1021 | + iph = ip_hdr(skb); | |
1022 | + saddr = conn->tuplehash[!dir].tuple.dst.u3.ip; | |
1023 | + if (saddr == iph->saddr) | |
1024 | + return NF_ACCEPT; | |
1025 | + | |
1026 | + if (ip_route_input_lookup(skb, iph->daddr, iph->saddr, iph->tos, | |
1027 | + skb->dev, saddr)) | |
1028 | + return NF_DROP; | |
1029 | + | |
1030 | + return NF_ACCEPT; | |
1031 | +} | |
1032 | +EXPORT_SYMBOL_GPL(ip_nat_route_input); | |
1033 | + | |
1034 | static int __init nf_nat_init(void) | |
1035 | { | |
1036 | size_t i; | |
1037 | diff -urp v2.6.28/linux/net/ipv4/netfilter/nf_nat_standalone.c linux/net/ipv4/netfilter/nf_nat_standalone.c | |
1038 | --- v2.6.28/linux/net/ipv4/netfilter/nf_nat_standalone.c 2008-07-14 09:58:50.000000000 +0300 | |
1039 | +++ linux/net/ipv4/netfilter/nf_nat_standalone.c 2009-02-06 09:43:23.000000000 +0200 | |
1040 | @@ -256,6 +256,14 @@ static struct nf_hook_ops nf_nat_ops[] _ | |
1041 | .hooknum = NF_INET_PRE_ROUTING, | |
1042 | .priority = NF_IP_PRI_NAT_DST, | |
1043 | }, | |
1044 | + /* Before routing, route before mangling */ | |
1045 | + { | |
1046 | + .hook = ip_nat_route_input, | |
1047 | + .owner = THIS_MODULE, | |
1048 | + .pf = PF_INET, | |
1049 | + .hooknum = NF_INET_PRE_ROUTING, | |
1050 | + .priority = NF_IP_PRI_LAST-1, | |
1051 | + }, | |
1052 | /* After packet filtering, change source */ | |
1053 | { | |
1054 | .hook = nf_nat_out, | |
1055 | diff -urp v2.6.28/linux/net/ipv4/route.c linux/net/ipv4/route.c | |
1056 | --- v2.6.28/linux/net/ipv4/route.c 2008-12-25 10:12:25.000000000 +0200 | |
1057 | +++ linux/net/ipv4/route.c 2009-02-06 09:43:43.000000000 +0200 | |
1058 | @@ -679,6 +679,7 @@ static inline int compare_keys(struct fl | |
1059 | return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) | | |
1060 | (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr)) | | |
1061 | (fl1->mark ^ fl2->mark) | | |
1062 | + ((__force u32)(fl1->nl_u.ip4_u.lsrc ^ fl2->nl_u.ip4_u.lsrc)) | | |
1063 | (*(u16 *)&fl1->nl_u.ip4_u.tos ^ | |
1064 | *(u16 *)&fl2->nl_u.ip4_u.tos) | | |
1065 | (fl1->oif ^ fl2->oif) | | |
1066 | @@ -1286,6 +1287,7 @@ void ip_rt_redirect(__be32 old_gw, __be3 | |
1067 | ||
1068 | /* Gateway is different ... */ | |
1069 | rt->rt_gateway = new_gw; | |
1070 | + if (rt->fl.fl4_gw) rt->fl.fl4_gw = new_gw; | |
1071 | ||
1072 | /* Redirect received -> path was valid */ | |
1073 | dst_confirm(&rth->u.dst); | |
1074 | @@ -1735,6 +1737,7 @@ static int ip_route_input_mc(struct sk_b | |
1075 | rth->fl.fl4_tos = tos; | |
1076 | rth->fl.mark = skb->mark; | |
1077 | rth->fl.fl4_src = saddr; | |
1078 | + rth->fl.fl4_lsrc = 0; | |
1079 | rth->rt_src = saddr; | |
1080 | #ifdef CONFIG_NET_CLS_ROUTE | |
1081 | rth->u.dst.tclassid = itag; | |
1082 | @@ -1745,6 +1748,7 @@ static int ip_route_input_mc(struct sk_b | |
1083 | dev_hold(rth->u.dst.dev); | |
1084 | rth->idev = in_dev_get(rth->u.dst.dev); | |
1085 | rth->fl.oif = 0; | |
1086 | + rth->fl.fl4_gw = 0; | |
1087 | rth->rt_gateway = daddr; | |
1088 | rth->rt_spec_dst= spec_dst; | |
1089 | rth->rt_genid = rt_genid(dev_net(dev)); | |
1090 | @@ -1810,7 +1814,7 @@ static int __mkroute_input(struct sk_buf | |
1091 | struct fib_result *res, | |
1092 | struct in_device *in_dev, | |
1093 | __be32 daddr, __be32 saddr, u32 tos, | |
1094 | - struct rtable **result) | |
1095 | + __be32 lsrc, struct rtable **result) | |
1096 | { | |
1097 | ||
1098 | struct rtable *rth; | |
1099 | @@ -1844,6 +1848,7 @@ static int __mkroute_input(struct sk_buf | |
1100 | flags |= RTCF_DIRECTSRC; | |
1101 | ||
1102 | if (out_dev == in_dev && err && | |
1103 | + !lsrc && | |
1104 | (IN_DEV_SHARED_MEDIA(out_dev) || | |
1105 | inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) | |
1106 | flags |= RTCF_DOREDIRECT; | |
1107 | @@ -1877,6 +1882,7 @@ static int __mkroute_input(struct sk_buf | |
1108 | rth->fl.mark = skb->mark; | |
1109 | rth->fl.fl4_src = saddr; | |
1110 | rth->rt_src = saddr; | |
1111 | + rth->fl.fl4_lsrc = lsrc; | |
1112 | rth->rt_gateway = daddr; | |
1113 | rth->rt_iif = | |
1114 | rth->fl.iif = in_dev->dev->ifindex; | |
1115 | @@ -1884,6 +1890,7 @@ static int __mkroute_input(struct sk_buf | |
1116 | dev_hold(rth->u.dst.dev); | |
1117 | rth->idev = in_dev_get(rth->u.dst.dev); | |
1118 | rth->fl.oif = 0; | |
1119 | + rth->fl.fl4_gw = 0; | |
1120 | rth->rt_spec_dst= spec_dst; | |
1121 | ||
1122 | rth->u.dst.input = ip_forward; | |
1123 | @@ -1904,21 +1911,23 @@ static int __mkroute_input(struct sk_buf | |
1124 | ||
1125 | static int ip_mkroute_input(struct sk_buff *skb, | |
1126 | struct fib_result *res, | |
1127 | + struct net *net, | |
1128 | const struct flowi *fl, | |
1129 | struct in_device *in_dev, | |
1130 | - __be32 daddr, __be32 saddr, u32 tos) | |
1131 | + __be32 daddr, __be32 saddr, u32 tos, __be32 lsrc) | |
1132 | { | |
1133 | struct rtable* rth = NULL; | |
1134 | int err; | |
1135 | unsigned hash; | |
1136 | ||
1137 | + fib_select_default(net, fl, res); | |
1138 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | |
1139 | - if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0) | |
1140 | + if (res->fi && res->fi->fib_nhs > 1) | |
1141 | fib_select_multipath(fl, res); | |
1142 | #endif | |
1143 | ||
1144 | /* create a routing cache entry */ | |
1145 | - err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth); | |
1146 | + err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, lsrc, &rth); | |
1147 | if (err) | |
1148 | return err; | |
1149 | ||
1150 | @@ -1939,18 +1948,19 @@ static int ip_mkroute_input(struct sk_bu | |
1151 | */ | |
1152 | ||
1153 | static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |
1154 | - u8 tos, struct net_device *dev) | |
1155 | + u8 tos, struct net_device *dev, __be32 lsrc) | |
1156 | { | |
1157 | struct fib_result res; | |
1158 | struct in_device *in_dev = in_dev_get(dev); | |
1159 | struct flowi fl = { .nl_u = { .ip4_u = | |
1160 | { .daddr = daddr, | |
1161 | - .saddr = saddr, | |
1162 | + .saddr = lsrc? : saddr, | |
1163 | .tos = tos, | |
1164 | .scope = RT_SCOPE_UNIVERSE, | |
1165 | } }, | |
1166 | .mark = skb->mark, | |
1167 | - .iif = dev->ifindex }; | |
1168 | + .iif = lsrc? | |
1169 | + dev_net(dev)->loopback_dev->ifindex : dev->ifindex }; | |
1170 | unsigned flags = 0; | |
1171 | u32 itag = 0; | |
1172 | struct rtable * rth; | |
1173 | @@ -1986,6 +1996,12 @@ static int ip_route_input_slow(struct sk | |
1174 | ipv4_is_loopback(daddr)) | |
1175 | goto martian_destination; | |
1176 | ||
1177 | + if (lsrc) { | |
1178 | + if (ipv4_is_multicast(lsrc) || ipv4_is_lbcast(lsrc) || | |
1179 | + ipv4_is_zeronet(lsrc) || ipv4_is_loopback(lsrc)) | |
1180 | + goto e_inval; | |
1181 | + } | |
1182 | + | |
1183 | /* | |
1184 | * Now we are ready to route packet. | |
1185 | */ | |
1186 | @@ -1995,6 +2011,8 @@ static int ip_route_input_slow(struct sk | |
1187 | goto no_route; | |
1188 | } | |
1189 | free_res = 1; | |
1190 | + fl.iif = dev->ifindex; | |
1191 | + fl.fl4_src = saddr; | |
1192 | ||
1193 | RT_CACHE_STAT_INC(in_slow_tot); | |
1194 | ||
1195 | @@ -2019,7 +2037,7 @@ static int ip_route_input_slow(struct sk | |
1196 | if (res.type != RTN_UNICAST) | |
1197 | goto martian_destination; | |
1198 | ||
1199 | - err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); | |
1200 | + err = ip_mkroute_input(skb, &res, net, &fl, in_dev, daddr, saddr, tos, lsrc); | |
1201 | done: | |
1202 | in_dev_put(in_dev); | |
1203 | if (free_res) | |
1204 | @@ -2029,6 +2047,8 @@ out: return err; | |
1205 | brd_input: | |
1206 | if (skb->protocol != htons(ETH_P_IP)) | |
1207 | goto e_inval; | |
1208 | + if (lsrc) | |
1209 | + goto e_inval; | |
1210 | ||
1211 | if (ipv4_is_zeronet(saddr)) | |
1212 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); | |
1213 | @@ -2070,6 +2090,7 @@ local_input: | |
1214 | rth->u.dst.dev = net->loopback_dev; | |
1215 | dev_hold(rth->u.dst.dev); | |
1216 | rth->idev = in_dev_get(rth->u.dst.dev); | |
1217 | + rth->fl.fl4_gw = 0; | |
1218 | rth->rt_gateway = daddr; | |
1219 | rth->rt_spec_dst= spec_dst; | |
1220 | rth->u.dst.input= ip_local_deliver; | |
1221 | @@ -2121,8 +2142,9 @@ martian_source: | |
1222 | goto e_inval; | |
1223 | } | |
1224 | ||
1225 | -int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |
1226 | - u8 tos, struct net_device *dev) | |
1227 | +static inline int | |
1228 | +ip_route_input_cached(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |
1229 | + u8 tos, struct net_device *dev, __be32 lsrc) | |
1230 | { | |
1231 | struct rtable * rth; | |
1232 | unsigned hash; | |
1233 | @@ -2139,6 +2161,7 @@ int ip_route_input(struct sk_buff *skb, | |
1234 | if (((rth->fl.fl4_dst ^ daddr) | | |
1235 | (rth->fl.fl4_src ^ saddr) | | |
1236 | (rth->fl.iif ^ iif) | | |
1237 | + (rth->fl.fl4_lsrc ^ lsrc) | | |
1238 | rth->fl.oif | | |
1239 | (rth->fl.fl4_tos ^ tos)) == 0 && | |
1240 | rth->fl.mark == skb->mark && | |
1241 | @@ -2186,7 +2209,19 @@ int ip_route_input(struct sk_buff *skb, | |
1242 | rcu_read_unlock(); | |
1243 | return -EINVAL; | |
1244 | } | |
1245 | - return ip_route_input_slow(skb, daddr, saddr, tos, dev); | |
1246 | + return ip_route_input_slow(skb, daddr, saddr, tos, dev, lsrc); | |
1247 | +} | |
1248 | + | |
1249 | +int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |
1250 | + u8 tos, struct net_device *dev) | |
1251 | +{ | |
1252 | + return ip_route_input_cached(skb, daddr, saddr, tos, dev, 0); | |
1253 | +} | |
1254 | + | |
1255 | +int ip_route_input_lookup(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |
1256 | + u8 tos, struct net_device *dev, __be32 lsrc) | |
1257 | +{ | |
1258 | + return ip_route_input_cached(skb, daddr, saddr, tos, dev, lsrc); | |
1259 | } | |
1260 | ||
1261 | static int __mkroute_output(struct rtable **result, | |
1262 | @@ -2258,6 +2293,7 @@ static int __mkroute_output(struct rtabl | |
1263 | rth->fl.fl4_tos = tos; | |
1264 | rth->fl.fl4_src = oldflp->fl4_src; | |
1265 | rth->fl.oif = oldflp->oif; | |
1266 | + rth->fl.fl4_gw = oldflp->fl4_gw; | |
1267 | rth->fl.mark = oldflp->mark; | |
1268 | rth->rt_dst = fl->fl4_dst; | |
1269 | rth->rt_src = fl->fl4_src; | |
1270 | @@ -2339,6 +2375,7 @@ static int ip_route_output_slow(struct n | |
1271 | struct flowi fl = { .nl_u = { .ip4_u = | |
1272 | { .daddr = oldflp->fl4_dst, | |
1273 | .saddr = oldflp->fl4_src, | |
1274 | + .gw = oldflp->fl4_gw, | |
1275 | .tos = tos & IPTOS_RT_MASK, | |
1276 | .scope = ((tos & RTO_ONLINK) ? | |
1277 | RT_SCOPE_LINK : | |
1278 | @@ -2450,6 +2487,7 @@ static int ip_route_output_slow(struct n | |
1279 | dev_out = net->loopback_dev; | |
1280 | dev_hold(dev_out); | |
1281 | fl.oif = net->loopback_dev->ifindex; | |
1282 | + fl.fl4_gw = 0; | |
1283 | res.type = RTN_LOCAL; | |
1284 | flags |= RTCF_LOCAL; | |
1285 | goto make_route; | |
1286 | @@ -2457,7 +2495,7 @@ static int ip_route_output_slow(struct n | |
1287 | ||
1288 | if (fib_lookup(net, &fl, &res)) { | |
1289 | res.fi = NULL; | |
1290 | - if (oldflp->oif) { | |
1291 | + if (oldflp->oif && dev_out->flags & IFF_UP) { | |
1292 | /* Apparently, routing tables are wrong. Assume, | |
1293 | that the destination is on link. | |
1294 | ||
1295 | @@ -2497,6 +2535,7 @@ static int ip_route_output_slow(struct n | |
1296 | dev_out = net->loopback_dev; | |
1297 | dev_hold(dev_out); | |
1298 | fl.oif = dev_out->ifindex; | |
1299 | + fl.fl4_gw = 0; | |
1300 | if (res.fi) | |
1301 | fib_info_put(res.fi); | |
1302 | res.fi = NULL; | |
1303 | @@ -2504,13 +2543,12 @@ static int ip_route_output_slow(struct n | |
1304 | goto make_route; | |
1305 | } | |
1306 | ||
1307 | + if (res.type == RTN_UNICAST) | |
1308 | + fib_select_default(net, &fl, &res); | |
1309 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | |
1310 | - if (res.fi->fib_nhs > 1 && fl.oif == 0) | |
1311 | + if (res.fi->fib_nhs > 1) | |
1312 | fib_select_multipath(&fl, &res); | |
1313 | - else | |
1314 | #endif | |
1315 | - if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif) | |
1316 | - fib_select_default(net, &fl, &res); | |
1317 | ||
1318 | if (!fl.fl4_src) | |
1319 | fl.fl4_src = FIB_RES_PREFSRC(res); | |
1320 | @@ -2548,6 +2586,7 @@ int __ip_route_output_key(struct net *ne | |
1321 | rth->fl.fl4_src == flp->fl4_src && | |
1322 | rth->fl.iif == 0 && | |
1323 | rth->fl.oif == flp->oif && | |
1324 | + rth->fl.fl4_gw == flp->fl4_gw && | |
1325 | rth->fl.mark == flp->mark && | |
1326 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & | |
1327 | (IPTOS_RT_MASK | RTO_ONLINK)) && | |
1328 | @@ -3322,3 +3361,4 @@ void __init ip_static_sysctl_init(void) | |
1329 | EXPORT_SYMBOL(__ip_select_ident); | |
1330 | EXPORT_SYMBOL(ip_route_input); | |
1331 | EXPORT_SYMBOL(ip_route_output_key); | |
1332 | +EXPORT_SYMBOL(ip_route_input_lookup); |