]>
Commit | Line | Data |
---|---|---|
83cd86f2 PS |
1 | diff -urp v2.6.27/linux/include/linux/rtnetlink.h linux/include/linux/rtnetlink.h |
2 | --- v2.6.27/linux/include/linux/rtnetlink.h 2008-10-11 12:46:15.000000000 +0300 | |
3 | +++ linux/include/linux/rtnetlink.h 2008-10-11 13:50:41.000000000 +0300 | |
10b0f5dc | 4 | @@ -304,6 +304,8 @@ struct rtnexthop |
d70ce330 | 5 | #define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */ |
6 | #define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */ | |
7 | #define RTNH_F_ONLINK 4 /* Gateway is forced on link */ | |
8 | +#define RTNH_F_SUSPECT 8 /* We don't know the real state */ | |
9 | +#define RTNH_F_BADSTATE (RTNH_F_DEAD | RTNH_F_SUSPECT) | |
10 | ||
11 | /* Macros to handle hexthops */ | |
12 | ||
83cd86f2 PS |
13 | diff -urp v2.6.27/linux/include/net/flow.h linux/include/net/flow.h |
14 | --- v2.6.27/linux/include/net/flow.h 2008-10-11 12:46:15.000000000 +0300 | |
15 | +++ linux/include/net/flow.h 2008-10-11 13:51:37.000000000 +0300 | |
d70ce330 | 16 | @@ -19,6 +19,8 @@ struct flowi { |
17 | struct { | |
18 | __be32 daddr; | |
19 | __be32 saddr; | |
20 | + __be32 lsrc; | |
21 | + __be32 gw; | |
22 | __u8 tos; | |
23 | __u8 scope; | |
24 | } ip4_u; | |
25 | @@ -43,6 +45,8 @@ struct flowi { | |
26 | #define fl6_flowlabel nl_u.ip6_u.flowlabel | |
27 | #define fl4_dst nl_u.ip4_u.daddr | |
28 | #define fl4_src nl_u.ip4_u.saddr | |
29 | +#define fl4_lsrc nl_u.ip4_u.lsrc | |
30 | +#define fl4_gw nl_u.ip4_u.gw | |
31 | #define fl4_tos nl_u.ip4_u.tos | |
32 | #define fl4_scope nl_u.ip4_u.scope | |
33 | ||
83cd86f2 PS |
34 | diff -urp v2.6.27/linux/include/net/ip_fib.h linux/include/net/ip_fib.h |
35 | --- v2.6.27/linux/include/net/ip_fib.h 2008-04-17 09:58:08.000000000 +0300 | |
36 | +++ linux/include/net/ip_fib.h 2008-10-11 13:50:41.000000000 +0300 | |
d70ce330 | 37 | @@ -207,6 +207,8 @@ extern int fib_lookup(struct net *n, str |
38 | extern struct fib_table *fib_new_table(struct net *net, u32 id); | |
39 | extern struct fib_table *fib_get_table(struct net *net, u32 id); | |
40 | ||
41 | +extern int fib_result_table(struct fib_result *res); | |
42 | + | |
43 | #endif /* CONFIG_IP_MULTIPLE_TABLES */ | |
44 | ||
45 | /* Exported by fib_frontend.c */ | |
46 | @@ -276,4 +278,6 @@ static inline void fib_proc_exit(struct | |
47 | } | |
48 | #endif | |
49 | ||
50 | +extern rwlock_t fib_nhflags_lock; | |
51 | + | |
52 | #endif /* _NET_FIB_H */ | |
83cd86f2 PS |
53 | diff -urp v2.6.27/linux/include/net/netfilter/nf_nat.h linux/include/net/netfilter/nf_nat.h |
54 | --- v2.6.27/linux/include/net/netfilter/nf_nat.h 2008-04-17 09:58:08.000000000 +0300 | |
55 | +++ linux/include/net/netfilter/nf_nat.h 2008-10-11 13:51:37.000000000 +0300 | |
d70ce330 | 56 | @@ -77,6 +77,13 @@ struct nf_conn_nat |
57 | #endif | |
58 | }; | |
59 | ||
60 | +/* Call input routing for SNAT-ed traffic */ | |
61 | +extern unsigned int ip_nat_route_input(unsigned int hooknum, | |
62 | + struct sk_buff *skb, | |
63 | + const struct net_device *in, | |
64 | + const struct net_device *out, | |
65 | + int (*okfn)(struct sk_buff *)); | |
66 | + | |
67 | /* Set up the info structure to map into this range. */ | |
68 | extern unsigned int nf_nat_setup_info(struct nf_conn *ct, | |
69 | const struct nf_nat_range *range, | |
83cd86f2 PS |
70 | diff -urp v2.6.27/linux/include/net/route.h linux/include/net/route.h |
71 | --- v2.6.27/linux/include/net/route.h 2008-10-11 12:46:15.000000000 +0300 | |
72 | +++ linux/include/net/route.h 2008-10-11 13:51:37.000000000 +0300 | |
10b0f5dc | 73 | @@ -116,6 +116,7 @@ extern int __ip_route_output_key(struct |
d70ce330 | 74 | extern int ip_route_output_key(struct net *, struct rtable **, struct flowi *flp); |
75 | extern int ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags); | |
76 | extern int ip_route_input(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin); | |
77 | +extern int ip_route_input_lookup(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin, __be32 lsrc); | |
10b0f5dc | 78 | extern unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, unsigned short new_mtu, struct net_device *dev); |
d70ce330 | 79 | extern void ip_rt_send_redirect(struct sk_buff *skb); |
80 | ||
83cd86f2 PS |
81 | diff -urp v2.6.27/linux/net/bridge/br_netfilter.c linux/net/bridge/br_netfilter.c |
82 | --- v2.6.27/linux/net/bridge/br_netfilter.c 2008-10-11 12:46:16.000000000 +0300 | |
83 | +++ linux/net/bridge/br_netfilter.c 2008-10-11 13:51:37.000000000 +0300 | |
84 | @@ -328,6 +328,10 @@ static int br_nf_pre_routing_finish(stru | |
d70ce330 | 85 | struct nf_bridge_info *nf_bridge = skb->nf_bridge; |
86 | int err; | |
87 | ||
88 | + /* Old skb->dst is not expected, it is lost in all cases */ | |
89 | + dst_release(skb->dst); | |
90 | + skb->dst = NULL; | |
91 | + | |
92 | if (nf_bridge->mask & BRNF_PKT_TYPE) { | |
93 | skb->pkt_type = PACKET_OTHERHOST; | |
94 | nf_bridge->mask ^= BRNF_PKT_TYPE; | |
83cd86f2 PS |
95 | diff -urp v2.6.27/linux/net/ipv4/fib_frontend.c linux/net/ipv4/fib_frontend.c |
96 | --- v2.6.27/linux/net/ipv4/fib_frontend.c 2008-10-11 12:46:16.000000000 +0300 | |
97 | +++ linux/net/ipv4/fib_frontend.c 2008-10-11 13:50:41.000000000 +0300 | |
98 | @@ -47,6 +47,8 @@ | |
d70ce330 | 99 | |
100 | #ifndef CONFIG_IP_MULTIPLE_TABLES | |
101 | ||
102 | +#define FIB_RES_TABLE(r) (RT_TABLE_MAIN) | |
103 | + | |
104 | static int __net_init fib4_rules_init(struct net *net) | |
105 | { | |
106 | struct fib_table *local_table, *main_table; | |
83cd86f2 | 107 | @@ -71,6 +73,8 @@ fail: |
d70ce330 | 108 | } |
109 | #else | |
110 | ||
111 | +#define FIB_RES_TABLE(r) (fib_result_table(r)) | |
112 | + | |
113 | struct fib_table *fib_new_table(struct net *net, u32 id) | |
114 | { | |
115 | struct fib_table *tb; | |
83cd86f2 | 116 | @@ -125,7 +129,8 @@ void fib_select_default(struct net *net, |
d70ce330 | 117 | table = res->r->table; |
118 | #endif | |
119 | tb = fib_get_table(net, table); | |
120 | - if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) | |
121 | + if ((FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) || | |
122 | + FIB_RES_NH(*res).nh_scope == RT_SCOPE_HOST) | |
123 | tb->tb_select_default(tb, flp, res); | |
124 | } | |
125 | ||
83cd86f2 | 126 | @@ -239,6 +244,9 @@ int fib_validate_source(__be32 src, __be |
d70ce330 | 127 | .tos = tos } }, |
128 | .iif = oif }; | |
129 | struct fib_result res; | |
130 | + int table; | |
131 | + unsigned char prefixlen; | |
132 | + unsigned char scope; | |
133 | int no_addr, rpf; | |
134 | int ret; | |
135 | struct net *net; | |
83cd86f2 | 136 | @@ -262,31 +270,35 @@ int fib_validate_source(__be32 src, __be |
d70ce330 | 137 | goto e_inval_res; |
138 | *spec_dst = FIB_RES_PREFSRC(res); | |
139 | fib_combine_itag(itag, &res); | |
140 | -#ifdef CONFIG_IP_ROUTE_MULTIPATH | |
141 | - if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1) | |
142 | -#else | |
143 | if (FIB_RES_DEV(res) == dev) | |
144 | -#endif | |
145 | { | |
146 | ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; | |
147 | fib_res_put(&res); | |
148 | return ret; | |
149 | } | |
150 | + table = FIB_RES_TABLE(&res); | |
151 | + prefixlen = res.prefixlen; | |
152 | + scope = res.scope; | |
153 | fib_res_put(&res); | |
154 | if (no_addr) | |
155 | goto last_resort; | |
156 | - if (rpf) | |
157 | - goto e_inval; | |
158 | fl.oif = dev->ifindex; | |
159 | ||
160 | ret = 0; | |
161 | if (fib_lookup(net, &fl, &res) == 0) { | |
162 | - if (res.type == RTN_UNICAST) { | |
163 | + if (res.type == RTN_UNICAST && | |
164 | + ((table == FIB_RES_TABLE(&res) && | |
165 | + res.prefixlen >= prefixlen && res.scope >= scope) || | |
166 | + !rpf)) { | |
167 | *spec_dst = FIB_RES_PREFSRC(res); | |
168 | ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; | |
169 | + fib_res_put(&res); | |
170 | + return ret; | |
171 | } | |
172 | fib_res_put(&res); | |
173 | } | |
174 | + if (rpf) | |
175 | + goto e_inval; | |
176 | return ret; | |
177 | ||
178 | last_resort: | |
83cd86f2 | 179 | @@ -909,9 +921,7 @@ static int fib_inetaddr_event(struct not |
d70ce330 | 180 | switch (event) { |
181 | case NETDEV_UP: | |
182 | fib_add_ifaddr(ifa); | |
183 | -#ifdef CONFIG_IP_ROUTE_MULTIPATH | |
83cd86f2 | 184 | fib_sync_up(dev); |
d70ce330 | 185 | -#endif |
83cd86f2 | 186 | rt_cache_flush(dev_net(dev), -1); |
d70ce330 | 187 | break; |
188 | case NETDEV_DOWN: | |
83cd86f2 | 189 | @@ -947,9 +957,7 @@ static int fib_netdev_event(struct notif |
d70ce330 | 190 | for_ifa(in_dev) { |
191 | fib_add_ifaddr(ifa); | |
192 | } endfor_ifa(in_dev); | |
193 | -#ifdef CONFIG_IP_ROUTE_MULTIPATH | |
194 | fib_sync_up(dev); | |
195 | -#endif | |
83cd86f2 | 196 | rt_cache_flush(dev_net(dev), -1); |
d70ce330 | 197 | break; |
198 | case NETDEV_DOWN: | |
83cd86f2 PS |
199 | diff -urp v2.6.27/linux/net/ipv4/fib_hash.c linux/net/ipv4/fib_hash.c |
200 | --- v2.6.27/linux/net/ipv4/fib_hash.c 2008-10-11 12:46:16.000000000 +0300 | |
201 | +++ linux/net/ipv4/fib_hash.c 2008-10-11 13:50:41.000000000 +0300 | |
202 | @@ -278,25 +278,35 @@ out: | |
d70ce330 | 203 | static void |
204 | fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res) | |
205 | { | |
206 | - int order, last_idx; | |
207 | + int order, last_idx, last_dflt, last_nhsel; | |
208 | + struct fib_alias *first_fa = NULL; | |
209 | + struct hlist_head *head; | |
210 | struct hlist_node *node; | |
211 | struct fib_node *f; | |
212 | struct fib_info *fi = NULL; | |
213 | struct fib_info *last_resort; | |
214 | struct fn_hash *t = (struct fn_hash*)tb->tb_data; | |
215 | - struct fn_zone *fz = t->fn_zones[0]; | |
216 | + struct fn_zone *fz = t->fn_zones[res->prefixlen]; | |
217 | + __be32 k; | |
218 | ||
219 | if (fz == NULL) | |
220 | return; | |
221 | ||
222 | + k = fz_key(flp->fl4_dst, fz); | |
223 | + last_dflt = -2; | |
224 | + last_nhsel = 0; | |
225 | last_idx = -1; | |
226 | last_resort = NULL; | |
227 | order = -1; | |
228 | ||
229 | read_lock(&fib_hash_lock); | |
230 | - hlist_for_each_entry(f, node, &fz->fz_hash[0], fn_hash) { | |
231 | + head = &fz->fz_hash[fn_hash(k, fz)]; | |
232 | + hlist_for_each_entry(f, node, head, fn_hash) { | |
233 | struct fib_alias *fa; | |
234 | ||
235 | + if (f->fn_key != k) | |
236 | + continue; | |
237 | + | |
238 | list_for_each_entry(fa, &f->fn_alias, fa_list) { | |
239 | struct fib_info *next_fi = fa->fa_info; | |
240 | ||
83cd86f2 | 241 | @@ -304,42 +314,56 @@ fn_hash_select_default(struct fib_table |
d70ce330 | 242 | fa->fa_type != RTN_UNICAST) |
243 | continue; | |
244 | ||
245 | + if (fa->fa_tos && | |
246 | + fa->fa_tos != flp->fl4_tos) | |
247 | + continue; | |
248 | if (next_fi->fib_priority > res->fi->fib_priority) | |
249 | break; | |
250 | - if (!next_fi->fib_nh[0].nh_gw || | |
251 | - next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) | |
252 | - continue; | |
253 | fa->fa_state |= FA_S_ACCESSED; | |
254 | ||
255 | - if (fi == NULL) { | |
256 | - if (next_fi != res->fi) | |
257 | - break; | |
258 | - } else if (!fib_detect_death(fi, order, &last_resort, | |
259 | - &last_idx, tb->tb_default)) { | |
260 | + if (!first_fa) { | |
261 | + last_dflt = fa->fa_last_dflt; | |
262 | + first_fa = fa; | |
263 | + } | |
264 | + if (fi && !fib_detect_death(fi, order, &last_resort, | |
265 | + &last_idx, &last_dflt, &last_nhsel, flp)) { | |
266 | fib_result_assign(res, fi); | |
267 | - tb->tb_default = order; | |
268 | + first_fa->fa_last_dflt = order; | |
269 | goto out; | |
270 | } | |
271 | fi = next_fi; | |
272 | order++; | |
273 | } | |
274 | + break; | |
275 | } | |
276 | ||
277 | if (order <= 0 || fi == NULL) { | |
278 | - tb->tb_default = -1; | |
279 | + if (fi && fi->fib_nhs > 1 && | |
280 | + fib_detect_death(fi, order, &last_resort, &last_idx, | |
281 | + &last_dflt, &last_nhsel, flp) && | |
282 | + last_resort == fi) { | |
283 | + read_lock_bh(&fib_nhflags_lock); | |
284 | + fi->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT; | |
285 | + read_unlock_bh(&fib_nhflags_lock); | |
286 | + } | |
287 | + if (first_fa) first_fa->fa_last_dflt = -1; | |
288 | goto out; | |
289 | } | |
290 | ||
291 | if (!fib_detect_death(fi, order, &last_resort, &last_idx, | |
292 | - tb->tb_default)) { | |
293 | + &last_dflt, &last_nhsel, flp)) { | |
294 | fib_result_assign(res, fi); | |
295 | - tb->tb_default = order; | |
296 | + first_fa->fa_last_dflt = order; | |
297 | goto out; | |
298 | } | |
299 | ||
300 | - if (last_idx >= 0) | |
301 | + if (last_idx >= 0) { | |
302 | fib_result_assign(res, last_resort); | |
303 | - tb->tb_default = last_idx; | |
304 | + read_lock_bh(&fib_nhflags_lock); | |
305 | + last_resort->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT; | |
306 | + read_unlock_bh(&fib_nhflags_lock); | |
307 | + first_fa->fa_last_dflt = last_idx; | |
308 | + } | |
309 | out: | |
310 | read_unlock(&fib_hash_lock); | |
311 | } | |
83cd86f2 | 312 | @@ -463,6 +487,7 @@ static int fn_hash_insert(struct fib_tab |
d70ce330 | 313 | write_lock_bh(&fib_hash_lock); |
314 | fi_drop = fa->fa_info; | |
315 | fa->fa_info = fi; | |
316 | + fa->fa_last_dflt = -1; | |
317 | fa->fa_type = cfg->fc_type; | |
318 | fa->fa_scope = cfg->fc_scope; | |
319 | state = fa->fa_state; | |
83cd86f2 | 320 | @@ -517,6 +542,7 @@ static int fn_hash_insert(struct fib_tab |
d70ce330 | 321 | new_fa->fa_type = cfg->fc_type; |
322 | new_fa->fa_scope = cfg->fc_scope; | |
323 | new_fa->fa_state = 0; | |
324 | + new_fa->fa_last_dflt = -1; | |
325 | ||
326 | /* | |
327 | * Insert new entry to the list. | |
83cd86f2 PS |
328 | diff -urp v2.6.27/linux/net/ipv4/fib_lookup.h linux/net/ipv4/fib_lookup.h |
329 | --- v2.6.27/linux/net/ipv4/fib_lookup.h 2008-04-17 09:58:09.000000000 +0300 | |
330 | +++ linux/net/ipv4/fib_lookup.h 2008-10-11 13:50:41.000000000 +0300 | |
d70ce330 | 331 | @@ -8,6 +8,7 @@ |
332 | struct fib_alias { | |
333 | struct list_head fa_list; | |
334 | struct fib_info *fa_info; | |
335 | + int fa_last_dflt; | |
336 | u8 fa_tos; | |
337 | u8 fa_type; | |
338 | u8 fa_scope; | |
339 | @@ -38,7 +39,8 @@ extern struct fib_alias *fib_find_alias( | |
340 | u8 tos, u32 prio); | |
341 | extern int fib_detect_death(struct fib_info *fi, int order, | |
342 | struct fib_info **last_resort, | |
343 | - int *last_idx, int dflt); | |
344 | + int *last_idx, int *dflt, int *last_nhsel, | |
345 | + const struct flowi *flp); | |
346 | ||
347 | static inline void fib_result_assign(struct fib_result *res, | |
348 | struct fib_info *fi) | |
83cd86f2 PS |
349 | diff -urp v2.6.27/linux/net/ipv4/fib_rules.c linux/net/ipv4/fib_rules.c |
350 | --- v2.6.27/linux/net/ipv4/fib_rules.c 2008-10-11 12:46:16.000000000 +0300 | |
351 | +++ linux/net/ipv4/fib_rules.c 2008-10-11 13:50:41.000000000 +0300 | |
d70ce330 | 352 | @@ -54,6 +54,11 @@ u32 fib_rules_tclass(struct fib_result * |
353 | } | |
354 | #endif | |
355 | ||
356 | +int fib_result_table(struct fib_result *res) | |
357 | +{ | |
358 | + return res->r->table; | |
359 | +} | |
360 | + | |
361 | int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res) | |
362 | { | |
363 | struct fib_lookup_arg arg = { | |
83cd86f2 PS |
364 | diff -urp v2.6.27/linux/net/ipv4/fib_semantics.c linux/net/ipv4/fib_semantics.c |
365 | --- v2.6.27/linux/net/ipv4/fib_semantics.c 2008-10-11 12:46:16.000000000 +0300 | |
366 | +++ linux/net/ipv4/fib_semantics.c 2008-10-11 13:51:37.000000000 +0300 | |
367 | @@ -50,6 +50,7 @@ static struct hlist_head *fib_info_hash; | |
d70ce330 | 368 | static struct hlist_head *fib_info_laddrhash; |
369 | static unsigned int fib_hash_size; | |
370 | static unsigned int fib_info_cnt; | |
371 | +rwlock_t fib_nhflags_lock = RW_LOCK_UNLOCKED; | |
372 | ||
373 | #define DEVINDEX_HASHBITS 8 | |
374 | #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS) | |
83cd86f2 | 375 | @@ -186,7 +187,7 @@ static __inline__ int nh_comp(const stru |
d70ce330 | 376 | #ifdef CONFIG_NET_CLS_ROUTE |
377 | nh->nh_tclassid != onh->nh_tclassid || | |
378 | #endif | |
379 | - ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD)) | |
380 | + ((nh->nh_flags^onh->nh_flags)&~RTNH_F_BADSTATE)) | |
381 | return -1; | |
382 | onh++; | |
383 | } endfor_nexthops(fi); | |
83cd86f2 | 384 | @@ -237,7 +238,7 @@ static struct fib_info *fib_find_info(co |
d70ce330 | 385 | nfi->fib_priority == fi->fib_priority && |
386 | memcmp(nfi->fib_metrics, fi->fib_metrics, | |
387 | sizeof(fi->fib_metrics)) == 0 && | |
388 | - ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 && | |
389 | + ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_BADSTATE) == 0 && | |
390 | (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) | |
391 | return fi; | |
392 | } | |
83cd86f2 | 393 | @@ -348,26 +349,70 @@ struct fib_alias *fib_find_alias(struct |
d70ce330 | 394 | } |
395 | ||
396 | int fib_detect_death(struct fib_info *fi, int order, | |
397 | - struct fib_info **last_resort, int *last_idx, int dflt) | |
398 | + struct fib_info **last_resort, int *last_idx, int *dflt, | |
399 | + int *last_nhsel, const struct flowi *flp) | |
400 | { | |
401 | struct neighbour *n; | |
402 | - int state = NUD_NONE; | |
403 | + int nhsel; | |
404 | + int state; | |
405 | + struct fib_nh * nh; | |
406 | + __be32 dst; | |
407 | + int flag, dead = 1; | |
408 | + | |
409 | + /* change_nexthops(fi) { */ | |
410 | + for (nhsel = 0, nh = fi->fib_nh; nhsel < fi->fib_nhs; nh++, nhsel++) { | |
411 | + if (flp->oif && flp->oif != nh->nh_oif) | |
412 | + continue; | |
413 | + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && nh->nh_gw && | |
414 | + nh->nh_scope == RT_SCOPE_LINK) | |
415 | + continue; | |
416 | + if (nh->nh_flags & RTNH_F_DEAD) | |
417 | + continue; | |
418 | ||
419 | - n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev); | |
420 | - if (n) { | |
421 | - state = n->nud_state; | |
422 | - neigh_release(n); | |
423 | - } | |
424 | - if (state==NUD_REACHABLE) | |
425 | - return 0; | |
426 | - if ((state&NUD_VALID) && order != dflt) | |
427 | - return 0; | |
428 | - if ((state&NUD_VALID) || | |
429 | - (*last_idx<0 && order > dflt)) { | |
430 | - *last_resort = fi; | |
431 | - *last_idx = order; | |
432 | + flag = 0; | |
433 | + if (nh->nh_dev->flags & IFF_NOARP) { | |
434 | + dead = 0; | |
435 | + goto setfl; | |
436 | + } | |
437 | + | |
438 | + dst = nh->nh_gw; | |
439 | + if (!nh->nh_gw || nh->nh_scope != RT_SCOPE_LINK) | |
440 | + dst = flp->fl4_dst; | |
441 | + | |
442 | + state = NUD_NONE; | |
443 | + n = neigh_lookup(&arp_tbl, &dst, nh->nh_dev); | |
444 | + if (n) { | |
445 | + state = n->nud_state; | |
446 | + neigh_release(n); | |
447 | + } | |
448 | + if (state==NUD_REACHABLE || | |
449 | + ((state&NUD_VALID) && order != *dflt)) { | |
450 | + dead = 0; | |
451 | + goto setfl; | |
452 | + } | |
453 | + if (!(state&NUD_VALID)) | |
454 | + flag = 1; | |
455 | + if (!dead) | |
456 | + goto setfl; | |
457 | + if ((state&NUD_VALID) || | |
458 | + (*last_idx<0 && order >= *dflt)) { | |
459 | + *last_resort = fi; | |
460 | + *last_idx = order; | |
461 | + *last_nhsel = nhsel; | |
462 | + } | |
463 | + | |
464 | + setfl: | |
465 | + | |
466 | + read_lock_bh(&fib_nhflags_lock); | |
467 | + if (flag) | |
468 | + nh->nh_flags |= RTNH_F_SUSPECT; | |
469 | + else | |
470 | + nh->nh_flags &= ~RTNH_F_SUSPECT; | |
471 | + read_unlock_bh(&fib_nhflags_lock); | |
472 | } | |
473 | - return 1; | |
474 | + /* } endfor_nexthops(fi) */ | |
475 | + | |
476 | + return dead; | |
477 | } | |
478 | ||
479 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | |
83cd86f2 | 480 | @@ -539,8 +584,11 @@ static int fib_check_nh(struct fib_confi |
d70ce330 | 481 | return -EINVAL; |
482 | if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL) | |
483 | return -ENODEV; | |
484 | - if (!(dev->flags&IFF_UP)) | |
485 | - return -ENETDOWN; | |
486 | + if (!(dev->flags&IFF_UP)) { | |
487 | + if (fi->fib_protocol != RTPROT_STATIC) | |
488 | + return -ENETDOWN; | |
489 | + nh->nh_flags |= RTNH_F_DEAD; | |
490 | + } | |
491 | nh->nh_dev = dev; | |
492 | dev_hold(dev); | |
493 | nh->nh_scope = RT_SCOPE_LINK; | |
83cd86f2 | 494 | @@ -560,24 +608,48 @@ static int fib_check_nh(struct fib_confi |
d70ce330 | 495 | /* It is not necessary, but requires a bit of thinking */ |
496 | if (fl.fl4_scope < RT_SCOPE_LINK) | |
497 | fl.fl4_scope = RT_SCOPE_LINK; | |
498 | - if ((err = fib_lookup(net, &fl, &res)) != 0) | |
499 | - return err; | |
500 | + err = fib_lookup(net, &fl, &res); | |
501 | } | |
502 | - err = -EINVAL; | |
503 | - if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) | |
504 | - goto out; | |
505 | - nh->nh_scope = res.scope; | |
506 | - nh->nh_oif = FIB_RES_OIF(res); | |
507 | - if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL) | |
508 | - goto out; | |
509 | - dev_hold(nh->nh_dev); | |
510 | - err = -ENETDOWN; | |
511 | - if (!(nh->nh_dev->flags & IFF_UP)) | |
512 | - goto out; | |
513 | - err = 0; | |
514 | + if (err) { | |
515 | + struct in_device *in_dev; | |
516 | + | |
517 | + if (err != -ENETUNREACH || | |
518 | + fi->fib_protocol != RTPROT_STATIC) | |
519 | + return err; | |
520 | + | |
521 | + in_dev = inetdev_by_index(net, nh->nh_oif); | |
522 | + if (in_dev == NULL || | |
523 | + in_dev->dev->flags & IFF_UP) { | |
524 | + if (in_dev) | |
525 | + in_dev_put(in_dev); | |
526 | + return err; | |
527 | + } | |
528 | + nh->nh_flags |= RTNH_F_DEAD; | |
529 | + nh->nh_scope = RT_SCOPE_LINK; | |
530 | + nh->nh_dev = in_dev->dev; | |
531 | + dev_hold(nh->nh_dev); | |
532 | + in_dev_put(in_dev); | |
533 | + } else { | |
534 | + err = -EINVAL; | |
535 | + if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) | |
536 | + goto out; | |
537 | + nh->nh_scope = res.scope; | |
538 | + nh->nh_oif = FIB_RES_OIF(res); | |
539 | + if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL) | |
540 | + goto out; | |
541 | + dev_hold(nh->nh_dev); | |
542 | + if (!(nh->nh_dev->flags & IFF_UP)) { | |
543 | + if (fi->fib_protocol != RTPROT_STATIC) { | |
544 | + err = -ENETDOWN; | |
545 | + goto out; | |
546 | + } | |
547 | + nh->nh_flags |= RTNH_F_DEAD; | |
548 | + } | |
549 | + err = 0; | |
550 | out: | |
551 | - fib_res_put(&res); | |
552 | - return err; | |
553 | + fib_res_put(&res); | |
554 | + return err; | |
555 | + } | |
556 | } else { | |
557 | struct in_device *in_dev; | |
558 | ||
83cd86f2 | 559 | @@ -588,8 +660,11 @@ out: |
d70ce330 | 560 | if (in_dev == NULL) |
561 | return -ENODEV; | |
562 | if (!(in_dev->dev->flags&IFF_UP)) { | |
563 | - in_dev_put(in_dev); | |
564 | - return -ENETDOWN; | |
565 | + if (fi->fib_protocol != RTPROT_STATIC) { | |
566 | + in_dev_put(in_dev); | |
567 | + return -ENETDOWN; | |
568 | + } | |
569 | + nh->nh_flags |= RTNH_F_DEAD; | |
570 | } | |
571 | nh->nh_dev = in_dev->dev; | |
572 | dev_hold(nh->nh_dev); | |
83cd86f2 | 573 | @@ -899,8 +974,12 @@ int fib_semantic_match(struct list_head |
d70ce330 | 574 | for_nexthops(fi) { |
575 | if (nh->nh_flags&RTNH_F_DEAD) | |
576 | continue; | |
577 | - if (!flp->oif || flp->oif == nh->nh_oif) | |
578 | - break; | |
579 | + if (flp->oif && flp->oif != nh->nh_oif) | |
580 | + continue; | |
581 | + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && | |
582 | + nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) | |
583 | + continue; | |
584 | + break; | |
585 | } | |
586 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | |
587 | if (nhsel < fi->fib_nhs) { | |
83cd86f2 | 588 | @@ -1080,18 +1159,29 @@ int fib_sync_down_dev(struct net_device |
d70ce330 | 589 | prev_fi = fi; |
590 | dead = 0; | |
591 | change_nexthops(fi) { | |
592 | - if (nh->nh_flags&RTNH_F_DEAD) | |
593 | - dead++; | |
594 | - else if (nh->nh_dev == dev && | |
595 | - nh->nh_scope != scope) { | |
596 | - nh->nh_flags |= RTNH_F_DEAD; | |
597 | + if (nh->nh_flags&RTNH_F_DEAD) { | |
598 | + if (fi->fib_protocol!=RTPROT_STATIC || | |
599 | + nh->nh_dev == NULL || | |
600 | + __in_dev_get_rtnl(nh->nh_dev) == NULL || | |
601 | + nh->nh_dev->flags&IFF_UP) | |
602 | + dead++; | |
603 | + } else if (nh->nh_dev == dev && | |
604 | + nh->nh_scope != scope) { | |
605 | + write_lock_bh(&fib_nhflags_lock); | |
606 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | |
607 | - spin_lock_bh(&fib_multipath_lock); | |
608 | + spin_lock(&fib_multipath_lock); | |
609 | + nh->nh_flags |= RTNH_F_DEAD; | |
610 | fi->fib_power -= nh->nh_power; | |
611 | nh->nh_power = 0; | |
612 | - spin_unlock_bh(&fib_multipath_lock); | |
613 | + spin_unlock(&fib_multipath_lock); | |
614 | +#else | |
615 | + nh->nh_flags |= RTNH_F_DEAD; | |
616 | #endif | |
617 | - dead++; | |
618 | + write_unlock_bh(&fib_nhflags_lock); | |
619 | + if (fi->fib_protocol!=RTPROT_STATIC || | |
620 | + force || | |
621 | + __in_dev_get_rtnl(dev) == NULL) | |
622 | + dead++; | |
623 | } | |
624 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | |
625 | if (force > 1 && nh->nh_dev == dev) { | |
83cd86f2 | 626 | @@ -1109,11 +1199,8 @@ int fib_sync_down_dev(struct net_device |
d70ce330 | 627 | return ret; |
628 | } | |
629 | ||
630 | -#ifdef CONFIG_IP_ROUTE_MULTIPATH | |
631 | - | |
632 | /* | |
633 | - Dead device goes up. We wake up dead nexthops. | |
634 | - It takes sense only on multipath routes. | |
635 | + Dead device goes up or new address is added. We wake up dead nexthops. | |
636 | */ | |
637 | ||
638 | int fib_sync_up(struct net_device *dev) | |
83cd86f2 | 639 | @@ -1123,8 +1210,10 @@ int fib_sync_up(struct net_device *dev) |
d70ce330 | 640 | struct hlist_head *head; |
641 | struct hlist_node *node; | |
642 | struct fib_nh *nh; | |
643 | - int ret; | |
644 | + struct fib_result res; | |
645 | + int ret, rep; | |
646 | ||
647 | +repeat: | |
648 | if (!(dev->flags&IFF_UP)) | |
649 | return 0; | |
650 | ||
83cd86f2 | 651 | @@ -1132,6 +1221,7 @@ int fib_sync_up(struct net_device *dev) |
d70ce330 | 652 | hash = fib_devindex_hashfn(dev->ifindex); |
653 | head = &fib_info_devhash[hash]; | |
654 | ret = 0; | |
655 | + rep = 0; | |
656 | ||
657 | hlist_for_each_entry(nh, node, head, nh_hash) { | |
658 | struct fib_info *fi = nh->nh_parent; | |
83cd86f2 | 659 | @@ -1144,19 +1234,39 @@ int fib_sync_up(struct net_device *dev) |
d70ce330 | 660 | prev_fi = fi; |
661 | alive = 0; | |
662 | change_nexthops(fi) { | |
663 | - if (!(nh->nh_flags&RTNH_F_DEAD)) { | |
664 | - alive++; | |
665 | + if (!(nh->nh_flags&RTNH_F_DEAD)) | |
666 | continue; | |
667 | - } | |
668 | if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) | |
669 | continue; | |
670 | if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev)) | |
671 | continue; | |
672 | + if (nh->nh_gw && fi->fib_protocol == RTPROT_STATIC) { | |
673 | + struct flowi fl = { | |
674 | + .nl_u = { .ip4_u = | |
675 | + { .daddr = nh->nh_gw, | |
676 | + .scope = nh->nh_scope } }, | |
677 | + .oif = nh->nh_oif, | |
678 | + }; | |
10b0f5dc | 679 | + if (fib_lookup(dev_net(dev), &fl, &res) != 0) |
d70ce330 | 680 | + continue; |
681 | + if (res.type != RTN_UNICAST && | |
682 | + res.type != RTN_LOCAL) { | |
683 | + fib_res_put(&res); | |
684 | + continue; | |
685 | + } | |
686 | + nh->nh_scope = res.scope; | |
687 | + fib_res_put(&res); | |
688 | + rep = 1; | |
689 | + } | |
690 | alive++; | |
691 | +#ifdef CONFIG_IP_ROUTE_MULTIPATH | |
692 | spin_lock_bh(&fib_multipath_lock); | |
693 | nh->nh_power = 0; | |
694 | +#endif | |
695 | nh->nh_flags &= ~RTNH_F_DEAD; | |
696 | +#ifdef CONFIG_IP_ROUTE_MULTIPATH | |
697 | spin_unlock_bh(&fib_multipath_lock); | |
698 | +#endif | |
699 | } endfor_nexthops(fi) | |
700 | ||
701 | if (alive > 0) { | |
83cd86f2 | 702 | @@ -1164,10 +1274,14 @@ int fib_sync_up(struct net_device *dev) |
d70ce330 | 703 | ret++; |
704 | } | |
705 | } | |
706 | + if (rep) | |
707 | + goto repeat; | |
708 | ||
709 | return ret; | |
710 | } | |
711 | ||
712 | +#ifdef CONFIG_IP_ROUTE_MULTIPATH | |
713 | + | |
714 | /* | |
715 | The algorithm is suboptimal, but it provides really | |
716 | fair weighted route distribution. | |
83cd86f2 | 717 | @@ -1176,24 +1290,45 @@ int fib_sync_up(struct net_device *dev) |
d70ce330 | 718 | void fib_select_multipath(const struct flowi *flp, struct fib_result *res) |
719 | { | |
720 | struct fib_info *fi = res->fi; | |
721 | - int w; | |
722 | + int w, alive; | |
723 | ||
724 | spin_lock_bh(&fib_multipath_lock); | |
725 | + if (flp->oif) { | |
726 | + int sel = -1; | |
727 | + w = -1; | |
728 | + change_nexthops(fi) { | |
729 | + if (flp->oif != nh->nh_oif) | |
730 | + continue; | |
731 | + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && | |
732 | + nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) | |
733 | + continue; | |
734 | + if (!(nh->nh_flags&RTNH_F_BADSTATE)) { | |
735 | + if (nh->nh_power > w) { | |
736 | + w = nh->nh_power; | |
737 | + sel = nhsel; | |
738 | + } | |
739 | + } | |
740 | + } endfor_nexthops(fi); | |
741 | + if (sel >= 0) { | |
742 | + spin_unlock_bh(&fib_multipath_lock); | |
743 | + res->nh_sel = sel; | |
744 | + return; | |
745 | + } | |
746 | + goto last_resort; | |
747 | + } | |
748 | + | |
749 | +repeat: | |
750 | if (fi->fib_power <= 0) { | |
751 | int power = 0; | |
752 | change_nexthops(fi) { | |
753 | - if (!(nh->nh_flags&RTNH_F_DEAD)) { | |
754 | + if (!(nh->nh_flags&RTNH_F_BADSTATE)) { | |
755 | power += nh->nh_weight; | |
756 | nh->nh_power = nh->nh_weight; | |
757 | } | |
758 | } endfor_nexthops(fi); | |
759 | fi->fib_power = power; | |
760 | - if (power <= 0) { | |
761 | - spin_unlock_bh(&fib_multipath_lock); | |
762 | - /* Race condition: route has just become dead. */ | |
763 | - res->nh_sel = 0; | |
764 | - return; | |
765 | - } | |
766 | + if (power <= 0) | |
767 | + goto last_resort; | |
768 | } | |
769 | ||
770 | ||
83cd86f2 | 771 | @@ -1203,20 +1338,40 @@ void fib_select_multipath(const struct f |
d70ce330 | 772 | |
773 | w = jiffies % fi->fib_power; | |
774 | ||
775 | + alive = 0; | |
776 | change_nexthops(fi) { | |
777 | - if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) { | |
778 | + if (!(nh->nh_flags&RTNH_F_BADSTATE) && nh->nh_power) { | |
779 | if ((w -= nh->nh_power) <= 0) { | |
780 | nh->nh_power--; | |
781 | fi->fib_power--; | |
782 | - res->nh_sel = nhsel; | |
783 | spin_unlock_bh(&fib_multipath_lock); | |
784 | + res->nh_sel = nhsel; | |
785 | return; | |
786 | } | |
787 | + alive = 1; | |
788 | + } | |
789 | + } endfor_nexthops(fi); | |
790 | + if (alive) { | |
791 | + fi->fib_power = 0; | |
792 | + goto repeat; | |
793 | + } | |
794 | + | |
795 | +last_resort: | |
796 | + | |
797 | + for_nexthops(fi) { | |
798 | + if (!(nh->nh_flags&RTNH_F_DEAD)) { | |
799 | + if (flp->oif && flp->oif != nh->nh_oif) | |
800 | + continue; | |
801 | + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && | |
802 | + nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) | |
803 | + continue; | |
804 | + spin_unlock_bh(&fib_multipath_lock); | |
805 | + res->nh_sel = nhsel; | |
806 | + return; | |
807 | } | |
808 | } endfor_nexthops(fi); | |
809 | ||
810 | /* Race condition: route has just become dead. */ | |
811 | - res->nh_sel = 0; | |
812 | spin_unlock_bh(&fib_multipath_lock); | |
813 | } | |
814 | #endif | |
83cd86f2 PS |
815 | diff -urp v2.6.27/linux/net/ipv4/fib_trie.c linux/net/ipv4/fib_trie.c |
816 | --- v2.6.27/linux/net/ipv4/fib_trie.c 2008-10-11 12:46:16.000000000 +0300 | |
817 | +++ linux/net/ipv4/fib_trie.c 2008-10-11 13:50:41.000000000 +0300 | |
818 | @@ -1261,6 +1261,7 @@ static int fn_trie_insert(struct fib_tab | |
10b0f5dc PS |
819 | fi_drop = fa->fa_info; |
820 | new_fa->fa_tos = fa->fa_tos; | |
821 | new_fa->fa_info = fi; | |
822 | + new_fa->fa_last_dflt = -1; | |
823 | new_fa->fa_type = cfg->fc_type; | |
824 | new_fa->fa_scope = cfg->fc_scope; | |
825 | state = fa->fa_state; | |
83cd86f2 | 826 | @@ -1301,6 +1302,7 @@ static int fn_trie_insert(struct fib_tab |
10b0f5dc PS |
827 | new_fa->fa_type = cfg->fc_type; |
828 | new_fa->fa_scope = cfg->fc_scope; | |
829 | new_fa->fa_state = 0; | |
830 | + new_fa->fa_last_dflt = -1; | |
831 | /* | |
832 | * Insert new entry to the list. | |
833 | */ | |
83cd86f2 | 834 | @@ -1802,24 +1804,31 @@ static void fn_trie_select_default(struc |
10b0f5dc PS |
835 | struct fib_result *res) |
836 | { | |
837 | struct trie *t = (struct trie *) tb->tb_data; | |
838 | - int order, last_idx; | |
839 | + int order, last_idx, last_dflt, last_nhsel; | |
840 | + struct fib_alias *first_fa = NULL; | |
841 | struct fib_info *fi = NULL; | |
842 | struct fib_info *last_resort; | |
843 | struct fib_alias *fa = NULL; | |
844 | struct list_head *fa_head; | |
845 | struct leaf *l; | |
846 | + u32 key, mask; | |
847 | ||
848 | + last_dflt = -2; | |
849 | + last_nhsel = 0; | |
850 | last_idx = -1; | |
851 | last_resort = NULL; | |
852 | order = -1; | |
853 | ||
854 | + mask = inet_make_mask(res->prefixlen); | |
855 | + key = ntohl(flp->fl4_dst & mask); | |
856 | + | |
857 | rcu_read_lock(); | |
858 | ||
859 | - l = fib_find_node(t, 0); | |
860 | + l = fib_find_node(t, key); | |
861 | if (!l) | |
862 | goto out; | |
863 | ||
864 | - fa_head = get_fa_head(l, 0); | |
865 | + fa_head = get_fa_head(l, res->prefixlen); | |
866 | if (!fa_head) | |
867 | goto out; | |
868 | ||
83cd86f2 | 869 | @@ -1833,39 +1842,52 @@ static void fn_trie_select_default(struc |
10b0f5dc PS |
870 | fa->fa_type != RTN_UNICAST) |
871 | continue; | |
872 | ||
873 | + if (fa->fa_tos && | |
874 | + fa->fa_tos != flp->fl4_tos) | |
875 | + continue; | |
876 | if (next_fi->fib_priority > res->fi->fib_priority) | |
877 | break; | |
878 | - if (!next_fi->fib_nh[0].nh_gw || | |
879 | - next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) | |
880 | - continue; | |
881 | fa->fa_state |= FA_S_ACCESSED; | |
882 | ||
883 | - if (fi == NULL) { | |
884 | - if (next_fi != res->fi) | |
885 | - break; | |
886 | - } else if (!fib_detect_death(fi, order, &last_resort, | |
887 | - &last_idx, tb->tb_default)) { | |
888 | + if (!first_fa) { | |
889 | + last_dflt = fa->fa_last_dflt; | |
890 | + first_fa = fa; | |
891 | + } | |
892 | + if (fi && !fib_detect_death(fi, order, &last_resort, | |
893 | + &last_idx, &last_dflt, &last_nhsel, flp)) { | |
894 | fib_result_assign(res, fi); | |
895 | - tb->tb_default = order; | |
896 | + first_fa->fa_last_dflt = order; | |
897 | goto out; | |
898 | } | |
899 | fi = next_fi; | |
900 | order++; | |
901 | } | |
902 | if (order <= 0 || fi == NULL) { | |
903 | - tb->tb_default = -1; | |
904 | + if (fi && fi->fib_nhs > 1 && | |
905 | + fib_detect_death(fi, order, &last_resort, &last_idx, | |
906 | + &last_dflt, &last_nhsel, flp) && | |
907 | + last_resort == fi) { | |
908 | + read_lock_bh(&fib_nhflags_lock); | |
909 | + fi->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT; | |
910 | + read_unlock_bh(&fib_nhflags_lock); | |
911 | + } | |
912 | + if (first_fa) first_fa->fa_last_dflt = -1; | |
913 | goto out; | |
914 | } | |
915 | ||
916 | if (!fib_detect_death(fi, order, &last_resort, &last_idx, | |
917 | - tb->tb_default)) { | |
918 | + &last_dflt, &last_nhsel, flp)) { | |
919 | fib_result_assign(res, fi); | |
920 | - tb->tb_default = order; | |
921 | + first_fa->fa_last_dflt = order; | |
922 | goto out; | |
923 | } | |
924 | - if (last_idx >= 0) | |
925 | + if (last_idx >= 0) { | |
926 | fib_result_assign(res, last_resort); | |
927 | - tb->tb_default = last_idx; | |
928 | + read_lock_bh(&fib_nhflags_lock); | |
929 | + last_resort->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT; | |
930 | + read_unlock_bh(&fib_nhflags_lock); | |
931 | + first_fa->fa_last_dflt = last_idx; | |
932 | + } | |
933 | out: | |
934 | rcu_read_unlock(); | |
935 | } | |
83cd86f2 PS |
936 | diff -urp v2.6.27/linux/net/ipv4/netfilter/ipt_MASQUERADE.c linux/net/ipv4/netfilter/ipt_MASQUERADE.c |
937 | --- v2.6.27/linux/net/ipv4/netfilter/ipt_MASQUERADE.c 2008-10-11 12:46:16.000000000 +0300 | |
938 | +++ linux/net/ipv4/netfilter/ipt_MASQUERADE.c 2008-10-11 13:51:37.000000000 +0300 | |
d70ce330 | 939 | @@ -59,7 +59,7 @@ masquerade_tg(struct sk_buff *skb, const |
940 | enum ip_conntrack_info ctinfo; | |
941 | struct nf_nat_range newrange; | |
942 | const struct nf_nat_multi_range_compat *mr; | |
943 | - const struct rtable *rt; | |
944 | + struct rtable *rt; | |
945 | __be32 newsrc; | |
946 | ||
947 | NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING); | |
948 | @@ -77,13 +77,28 @@ masquerade_tg(struct sk_buff *skb, const | |
949 | return NF_ACCEPT; | |
950 | ||
951 | mr = targinfo; | |
10b0f5dc | 952 | - rt = skb->rtable; |
d70ce330 | 953 | - newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE); |
954 | - if (!newsrc) { | |
955 | - printk("MASQUERADE: %s ate my IP address\n", out->name); | |
956 | - return NF_DROP; | |
957 | + | |
958 | + { | |
959 | + struct flowi fl = { .nl_u = { .ip4_u = | |
960 | + { .daddr = ip_hdr(skb)->daddr, | |
961 | + .tos = (RT_TOS(ip_hdr(skb)->tos) | | |
962 | + RTO_CONN), | |
10b0f5dc | 963 | + .gw = skb->rtable->rt_gateway, |
d70ce330 | 964 | + } }, |
965 | + .mark = skb->mark, | |
966 | + .oif = out->ifindex }; | |
10b0f5dc | 967 | + if (ip_route_output_key(dev_net(out), &rt, &fl) != 0) { |
d70ce330 | 968 | + /* Funky routing can do this. */ |
969 | + if (net_ratelimit()) | |
970 | + printk("MASQUERADE:" | |
971 | + " No route: Rusty's brain broke!\n"); | |
972 | + return NF_DROP; | |
973 | + } | |
974 | } | |
975 | ||
976 | + newsrc = rt->rt_src; | |
977 | + ip_rt_put(rt); | |
978 | + | |
979 | write_lock_bh(&masq_lock); | |
980 | nat->masq_index = out->ifindex; | |
981 | write_unlock_bh(&masq_lock); | |
83cd86f2 PS |
982 | diff -urp v2.6.27/linux/net/ipv4/netfilter/nf_nat_core.c linux/net/ipv4/netfilter/nf_nat_core.c |
983 | --- v2.6.27/linux/net/ipv4/netfilter/nf_nat_core.c 2008-10-11 12:46:16.000000000 +0300 | |
984 | +++ linux/net/ipv4/netfilter/nf_nat_core.c 2008-10-11 13:51:37.000000000 +0300 | |
10b0f5dc | 985 | @@ -583,6 +583,52 @@ static struct nf_ct_ext_type nat_extend |
d70ce330 | 986 | .flags = NF_CT_EXT_F_PREALLOC, |
987 | }; | |
988 | ||
989 | +unsigned int | |
990 | +ip_nat_route_input(unsigned int hooknum, | |
991 | + struct sk_buff *skb, | |
992 | + const struct net_device *in, | |
993 | + const struct net_device *out, | |
994 | + int (*okfn)(struct sk_buff *)) | |
995 | +{ | |
996 | + struct iphdr *iph; | |
997 | + struct nf_conn *conn; | |
998 | + enum ip_conntrack_info ctinfo; | |
999 | + enum ip_conntrack_dir dir; | |
1000 | + unsigned long statusbit; | |
1001 | + __be32 saddr; | |
1002 | + | |
1003 | + if (!(conn = nf_ct_get(skb, &ctinfo))) | |
1004 | + return NF_ACCEPT; | |
1005 | + | |
1006 | + if (!(conn->status & IPS_NAT_DONE_MASK)) | |
1007 | + return NF_ACCEPT; | |
1008 | + dir = CTINFO2DIR(ctinfo); | |
1009 | + statusbit = IPS_SRC_NAT; | |
1010 | + if (dir == IP_CT_DIR_REPLY) | |
1011 | + statusbit ^= IPS_NAT_MASK; | |
1012 | + if (!(conn->status & statusbit)) | |
1013 | + return NF_ACCEPT; | |
1014 | + | |
1015 | + if (skb->dst) | |
1016 | + return NF_ACCEPT; | |
1017 | + | |
1018 | + if (skb->len < sizeof(struct iphdr)) | |
1019 | + return NF_ACCEPT; | |
1020 | + | |
1021 | + /* use daddr in other direction as masquerade address (lsrc) */ | |
1022 | + iph = ip_hdr(skb); | |
1023 | + saddr = conn->tuplehash[!dir].tuple.dst.u3.ip; | |
1024 | + if (saddr == iph->saddr) | |
1025 | + return NF_ACCEPT; | |
1026 | + | |
1027 | + if (ip_route_input_lookup(skb, iph->daddr, iph->saddr, iph->tos, | |
1028 | + skb->dev, saddr)) | |
1029 | + return NF_DROP; | |
1030 | + | |
1031 | + return NF_ACCEPT; | |
1032 | +} | |
1033 | +EXPORT_SYMBOL_GPL(ip_nat_route_input); | |
1034 | + | |
1035 | static int __init nf_nat_init(void) | |
1036 | { | |
1037 | size_t i; | |
83cd86f2 PS |
1038 | diff -urp v2.6.27/linux/net/ipv4/netfilter/nf_nat_standalone.c linux/net/ipv4/netfilter/nf_nat_standalone.c |
1039 | --- v2.6.27/linux/net/ipv4/netfilter/nf_nat_standalone.c 2008-07-14 09:58:50.000000000 +0300 | |
1040 | +++ linux/net/ipv4/netfilter/nf_nat_standalone.c 2008-10-11 13:51:37.000000000 +0300 | |
10b0f5dc | 1041 | @@ -256,6 +256,14 @@ static struct nf_hook_ops nf_nat_ops[] _ |
d70ce330 | 1042 | .hooknum = NF_INET_PRE_ROUTING, |
1043 | .priority = NF_IP_PRI_NAT_DST, | |
1044 | }, | |
1045 | + /* Before routing, route before mangling */ | |
1046 | + { | |
1047 | + .hook = ip_nat_route_input, | |
1048 | + .owner = THIS_MODULE, | |
1049 | + .pf = PF_INET, | |
1050 | + .hooknum = NF_INET_PRE_ROUTING, | |
1051 | + .priority = NF_IP_PRI_LAST-1, | |
1052 | + }, | |
1053 | /* After packet filtering, change source */ | |
1054 | { | |
1055 | .hook = nf_nat_out, | |
83cd86f2 PS |
1056 | diff -urp v2.6.27/linux/net/ipv4/route.c linux/net/ipv4/route.c |
1057 | --- v2.6.27/linux/net/ipv4/route.c 2008-10-11 12:46:16.000000000 +0300 | |
1058 | +++ linux/net/ipv4/route.c 2008-10-11 13:51:37.000000000 +0300 | |
1059 | @@ -1276,6 +1276,7 @@ void ip_rt_redirect(__be32 old_gw, __be3 | |
d70ce330 | 1060 | |
1061 | /* Gateway is different ... */ | |
1062 | rt->rt_gateway = new_gw; | |
1063 | + if (rt->fl.fl4_gw) rt->fl.fl4_gw = new_gw; | |
1064 | ||
1065 | /* Redirect received -> path was valid */ | |
1066 | dst_confirm(&rth->u.dst); | |
83cd86f2 | 1067 | @@ -1725,6 +1726,7 @@ static int ip_route_input_mc(struct sk_b |
d70ce330 | 1068 | rth->fl.fl4_tos = tos; |
1069 | rth->fl.mark = skb->mark; | |
1070 | rth->fl.fl4_src = saddr; | |
1071 | + rth->fl.fl4_lsrc = 0; | |
1072 | rth->rt_src = saddr; | |
1073 | #ifdef CONFIG_NET_CLS_ROUTE | |
1074 | rth->u.dst.tclassid = itag; | |
83cd86f2 | 1075 | @@ -1735,6 +1737,7 @@ static int ip_route_input_mc(struct sk_b |
d70ce330 | 1076 | dev_hold(rth->u.dst.dev); |
1077 | rth->idev = in_dev_get(rth->u.dst.dev); | |
1078 | rth->fl.oif = 0; | |
1079 | + rth->fl.fl4_gw = 0; | |
1080 | rth->rt_gateway = daddr; | |
1081 | rth->rt_spec_dst= spec_dst; | |
83cd86f2 PS |
1082 | rth->rt_genid = rt_genid(dev_net(dev)); |
1083 | @@ -1800,7 +1803,7 @@ static int __mkroute_input(struct sk_buf | |
10b0f5dc PS |
1084 | struct fib_result *res, |
1085 | struct in_device *in_dev, | |
1086 | __be32 daddr, __be32 saddr, u32 tos, | |
1087 | - struct rtable **result) | |
1088 | + __be32 lsrc, struct rtable **result) | |
d70ce330 | 1089 | { |
1090 | ||
1091 | struct rtable *rth; | |
83cd86f2 | 1092 | @@ -1834,6 +1837,7 @@ static int __mkroute_input(struct sk_buf |
d70ce330 | 1093 | flags |= RTCF_DIRECTSRC; |
1094 | ||
10b0f5dc | 1095 | if (out_dev == in_dev && err && |
d70ce330 | 1096 | + !lsrc && |
1097 | (IN_DEV_SHARED_MEDIA(out_dev) || | |
1098 | inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) | |
1099 | flags |= RTCF_DOREDIRECT; | |
83cd86f2 | 1100 | @@ -1867,6 +1871,7 @@ static int __mkroute_input(struct sk_buf |
d70ce330 | 1101 | rth->fl.mark = skb->mark; |
1102 | rth->fl.fl4_src = saddr; | |
1103 | rth->rt_src = saddr; | |
1104 | + rth->fl.fl4_lsrc = lsrc; | |
1105 | rth->rt_gateway = daddr; | |
1106 | rth->rt_iif = | |
1107 | rth->fl.iif = in_dev->dev->ifindex; | |
83cd86f2 | 1108 | @@ -1874,6 +1879,7 @@ static int __mkroute_input(struct sk_buf |
d70ce330 | 1109 | dev_hold(rth->u.dst.dev); |
1110 | rth->idev = in_dev_get(rth->u.dst.dev); | |
1111 | rth->fl.oif = 0; | |
1112 | + rth->fl.fl4_gw = 0; | |
1113 | rth->rt_spec_dst= spec_dst; | |
1114 | ||
1115 | rth->u.dst.input = ip_forward; | |
83cd86f2 | 1116 | @@ -1894,21 +1900,23 @@ static int __mkroute_input(struct sk_buf |
10b0f5dc PS |
1117 | |
1118 | static int ip_mkroute_input(struct sk_buff *skb, | |
1119 | struct fib_result *res, | |
1120 | + struct net *net, | |
1121 | const struct flowi *fl, | |
1122 | struct in_device *in_dev, | |
1123 | - __be32 daddr, __be32 saddr, u32 tos) | |
1124 | + __be32 daddr, __be32 saddr, u32 tos, __be32 lsrc) | |
d70ce330 | 1125 | { |
1126 | struct rtable* rth = NULL; | |
1127 | int err; | |
1128 | unsigned hash; | |
1129 | ||
1130 | + fib_select_default(net, fl, res); | |
1131 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | |
1132 | - if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0) | |
1133 | + if (res->fi && res->fi->fib_nhs > 1) | |
1134 | fib_select_multipath(fl, res); | |
1135 | #endif | |
1136 | ||
1137 | /* create a routing cache entry */ | |
1138 | - err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth); | |
1139 | + err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, lsrc, &rth); | |
1140 | if (err) | |
1141 | return err; | |
1142 | ||
83cd86f2 | 1143 | @@ -1929,18 +1937,19 @@ static int ip_mkroute_input(struct sk_bu |
d70ce330 | 1144 | */ |
1145 | ||
1146 | static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |
1147 | - u8 tos, struct net_device *dev) | |
1148 | + u8 tos, struct net_device *dev, __be32 lsrc) | |
1149 | { | |
1150 | struct fib_result res; | |
1151 | struct in_device *in_dev = in_dev_get(dev); | |
1152 | struct flowi fl = { .nl_u = { .ip4_u = | |
1153 | { .daddr = daddr, | |
1154 | - .saddr = saddr, | |
1155 | + .saddr = lsrc? : saddr, | |
1156 | .tos = tos, | |
1157 | .scope = RT_SCOPE_UNIVERSE, | |
1158 | } }, | |
1159 | .mark = skb->mark, | |
1160 | - .iif = dev->ifindex }; | |
1161 | + .iif = lsrc? | |
10b0f5dc | 1162 | + dev_net(dev)->loopback_dev->ifindex : dev->ifindex }; |
d70ce330 | 1163 | unsigned flags = 0; |
1164 | u32 itag = 0; | |
1165 | struct rtable * rth; | |
83cd86f2 | 1166 | @@ -1976,6 +1985,12 @@ static int ip_route_input_slow(struct sk |
d70ce330 | 1167 | ipv4_is_loopback(daddr)) |
1168 | goto martian_destination; | |
1169 | ||
1170 | + if (lsrc) { | |
1171 | + if (ipv4_is_multicast(lsrc) || ipv4_is_lbcast(lsrc) || | |
1172 | + ipv4_is_zeronet(lsrc) || ipv4_is_loopback(lsrc)) | |
1173 | + goto e_inval; | |
1174 | + } | |
1175 | + | |
1176 | /* | |
1177 | * Now we are ready to route packet. | |
1178 | */ | |
83cd86f2 | 1179 | @@ -1985,6 +2000,8 @@ static int ip_route_input_slow(struct sk |
d70ce330 | 1180 | goto no_route; |
1181 | } | |
1182 | free_res = 1; | |
1183 | + fl.iif = dev->ifindex; | |
1184 | + fl.fl4_src = saddr; | |
1185 | ||
1186 | RT_CACHE_STAT_INC(in_slow_tot); | |
1187 | ||
83cd86f2 | 1188 | @@ -2009,7 +2026,7 @@ static int ip_route_input_slow(struct sk |
d70ce330 | 1189 | if (res.type != RTN_UNICAST) |
1190 | goto martian_destination; | |
1191 | ||
1192 | - err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); | |
1193 | + err = ip_mkroute_input(skb, &res, net, &fl, in_dev, daddr, saddr, tos, lsrc); | |
1194 | done: | |
1195 | in_dev_put(in_dev); | |
1196 | if (free_res) | |
83cd86f2 | 1197 | @@ -2019,6 +2036,8 @@ out: return err; |
d70ce330 | 1198 | brd_input: |
1199 | if (skb->protocol != htons(ETH_P_IP)) | |
1200 | goto e_inval; | |
1201 | + if (lsrc) | |
1202 | + goto e_inval; | |
1203 | ||
1204 | if (ipv4_is_zeronet(saddr)) | |
1205 | spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); | |
83cd86f2 | 1206 | @@ -2060,6 +2079,7 @@ local_input: |
d70ce330 | 1207 | rth->u.dst.dev = net->loopback_dev; |
1208 | dev_hold(rth->u.dst.dev); | |
1209 | rth->idev = in_dev_get(rth->u.dst.dev); | |
1210 | + rth->fl.fl4_gw = 0; | |
1211 | rth->rt_gateway = daddr; | |
1212 | rth->rt_spec_dst= spec_dst; | |
1213 | rth->u.dst.input= ip_local_deliver; | |
83cd86f2 | 1214 | @@ -2111,8 +2131,9 @@ martian_source: |
d70ce330 | 1215 | goto e_inval; |
1216 | } | |
1217 | ||
1218 | -int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |
1219 | - u8 tos, struct net_device *dev) | |
1220 | +static inline int | |
1221 | +ip_route_input_cached(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |
1222 | + u8 tos, struct net_device *dev, __be32 lsrc) | |
1223 | { | |
1224 | struct rtable * rth; | |
1225 | unsigned hash; | |
83cd86f2 | 1226 | @@ -2129,6 +2150,7 @@ int ip_route_input(struct sk_buff *skb, |
10b0f5dc PS |
1227 | if (((rth->fl.fl4_dst ^ daddr) | |
1228 | (rth->fl.fl4_src ^ saddr) | | |
1229 | (rth->fl.iif ^ iif) | | |
1230 | + (rth->fl.fl4_lsrc ^ lsrc) | | |
1231 | rth->fl.oif | | |
1232 | (rth->fl.fl4_tos ^ tos)) == 0 && | |
d70ce330 | 1233 | rth->fl.mark == skb->mark && |
83cd86f2 | 1234 | @@ -2176,7 +2198,19 @@ int ip_route_input(struct sk_buff *skb, |
d70ce330 | 1235 | rcu_read_unlock(); |
1236 | return -EINVAL; | |
1237 | } | |
1238 | - return ip_route_input_slow(skb, daddr, saddr, tos, dev); | |
1239 | + return ip_route_input_slow(skb, daddr, saddr, tos, dev, lsrc); | |
1240 | +} | |
1241 | + | |
1242 | +int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |
1243 | + u8 tos, struct net_device *dev) | |
1244 | +{ | |
1245 | + return ip_route_input_cached(skb, daddr, saddr, tos, dev, 0); | |
1246 | +} | |
1247 | + | |
1248 | +int ip_route_input_lookup(struct sk_buff *skb, __be32 daddr, __be32 saddr, | |
1249 | + u8 tos, struct net_device *dev, __be32 lsrc) | |
1250 | +{ | |
1251 | + return ip_route_input_cached(skb, daddr, saddr, tos, dev, lsrc); | |
1252 | } | |
1253 | ||
10b0f5dc | 1254 | static int __mkroute_output(struct rtable **result, |
83cd86f2 | 1255 | @@ -2248,6 +2282,7 @@ static int __mkroute_output(struct rtabl |
d70ce330 | 1256 | rth->fl.fl4_tos = tos; |
1257 | rth->fl.fl4_src = oldflp->fl4_src; | |
1258 | rth->fl.oif = oldflp->oif; | |
1259 | + rth->fl.fl4_gw = oldflp->fl4_gw; | |
1260 | rth->fl.mark = oldflp->mark; | |
1261 | rth->rt_dst = fl->fl4_dst; | |
1262 | rth->rt_src = fl->fl4_src; | |
83cd86f2 | 1263 | @@ -2329,6 +2364,7 @@ static int ip_route_output_slow(struct n |
d70ce330 | 1264 | struct flowi fl = { .nl_u = { .ip4_u = |
1265 | { .daddr = oldflp->fl4_dst, | |
1266 | .saddr = oldflp->fl4_src, | |
1267 | + .gw = oldflp->fl4_gw, | |
1268 | .tos = tos & IPTOS_RT_MASK, | |
1269 | .scope = ((tos & RTO_ONLINK) ? | |
1270 | RT_SCOPE_LINK : | |
83cd86f2 | 1271 | @@ -2434,6 +2470,7 @@ static int ip_route_output_slow(struct n |
d70ce330 | 1272 | dev_out = net->loopback_dev; |
1273 | dev_hold(dev_out); | |
1274 | fl.oif = net->loopback_dev->ifindex; | |
1275 | + fl.fl4_gw = 0; | |
1276 | res.type = RTN_LOCAL; | |
1277 | flags |= RTCF_LOCAL; | |
1278 | goto make_route; | |
83cd86f2 | 1279 | @@ -2441,7 +2478,7 @@ static int ip_route_output_slow(struct n |
d70ce330 | 1280 | |
1281 | if (fib_lookup(net, &fl, &res)) { | |
1282 | res.fi = NULL; | |
1283 | - if (oldflp->oif) { | |
1284 | + if (oldflp->oif && dev_out->flags & IFF_UP) { | |
1285 | /* Apparently, routing tables are wrong. Assume, | |
1286 | that the destination is on link. | |
1287 | ||
83cd86f2 | 1288 | @@ -2481,6 +2518,7 @@ static int ip_route_output_slow(struct n |
d70ce330 | 1289 | dev_out = net->loopback_dev; |
1290 | dev_hold(dev_out); | |
1291 | fl.oif = dev_out->ifindex; | |
1292 | + fl.fl4_gw = 0; | |
1293 | if (res.fi) | |
1294 | fib_info_put(res.fi); | |
1295 | res.fi = NULL; | |
83cd86f2 | 1296 | @@ -2488,13 +2526,12 @@ static int ip_route_output_slow(struct n |
d70ce330 | 1297 | goto make_route; |
1298 | } | |
1299 | ||
1300 | + if (res.type == RTN_UNICAST) | |
1301 | + fib_select_default(net, &fl, &res); | |
1302 | #ifdef CONFIG_IP_ROUTE_MULTIPATH | |
1303 | - if (res.fi->fib_nhs > 1 && fl.oif == 0) | |
1304 | + if (res.fi->fib_nhs > 1) | |
1305 | fib_select_multipath(&fl, &res); | |
1306 | - else | |
1307 | #endif | |
1308 | - if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif) | |
1309 | - fib_select_default(net, &fl, &res); | |
1310 | ||
1311 | if (!fl.fl4_src) | |
1312 | fl.fl4_src = FIB_RES_PREFSRC(res); | |
83cd86f2 | 1313 | @@ -2532,6 +2569,7 @@ int __ip_route_output_key(struct net *ne |
d70ce330 | 1314 | rth->fl.fl4_src == flp->fl4_src && |
1315 | rth->fl.iif == 0 && | |
1316 | rth->fl.oif == flp->oif && | |
1317 | + rth->fl.fl4_gw == flp->fl4_gw && | |
1318 | rth->fl.mark == flp->mark && | |
1319 | !((rth->fl.fl4_tos ^ flp->fl4_tos) & | |
1320 | (IPTOS_RT_MASK | RTO_ONLINK)) && | |
83cd86f2 | 1321 | @@ -3310,3 +3348,4 @@ void __init ip_static_sysctl_init(void) |
d70ce330 | 1322 | EXPORT_SYMBOL(__ip_select_ident); |
1323 | EXPORT_SYMBOL(ip_route_input); | |
1324 | EXPORT_SYMBOL(ip_route_output_key); | |
1325 | +EXPORT_SYMBOL(ip_route_input_lookup); |