]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-routes.patch
- updated for 2.6.32.56
[packages/kernel.git] / kernel-routes.patch
CommitLineData
2380c486
JR
1diff -urp v2.6.28/linux/include/linux/rtnetlink.h linux/include/linux/rtnetlink.h
2--- v2.6.28/linux/include/linux/rtnetlink.h 2008-12-25 10:12:24.000000000 +0200
3+++ linux/include/linux/rtnetlink.h 2009-02-06 09:43:23.000000000 +0200
4@@ -304,6 +304,8 @@ struct rtnexthop
5 #define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */
6 #define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */
7 #define RTNH_F_ONLINK 4 /* Gateway is forced on link */
8+#define RTNH_F_SUSPECT 8 /* We don't know the real state */
9+#define RTNH_F_BADSTATE (RTNH_F_DEAD | RTNH_F_SUSPECT)
10
11 /* Macros to handle hexthops */
12
13diff -urp v2.6.28/linux/include/net/flow.h linux/include/net/flow.h
14--- v2.6.28/linux/include/net/flow.h 2008-12-25 10:12:24.000000000 +0200
15+++ linux/include/net/flow.h 2009-02-06 09:43:23.000000000 +0200
16@@ -19,6 +19,8 @@ struct flowi {
17 struct {
18 __be32 daddr;
19 __be32 saddr;
20+ __be32 lsrc;
21+ __be32 gw;
22 __u8 tos;
23 __u8 scope;
24 } ip4_u;
25@@ -43,6 +45,8 @@ struct flowi {
26 #define fl6_flowlabel nl_u.ip6_u.flowlabel
27 #define fl4_dst nl_u.ip4_u.daddr
28 #define fl4_src nl_u.ip4_u.saddr
29+#define fl4_lsrc nl_u.ip4_u.lsrc
30+#define fl4_gw nl_u.ip4_u.gw
31 #define fl4_tos nl_u.ip4_u.tos
32 #define fl4_scope nl_u.ip4_u.scope
33
34diff -urp v2.6.28/linux/include/net/ip_fib.h linux/include/net/ip_fib.h
35--- v2.6.28/linux/include/net/ip_fib.h 2008-04-17 09:58:08.000000000 +0300
36+++ linux/include/net/ip_fib.h 2009-02-06 09:43:23.000000000 +0200
37@@ -207,6 +207,8 @@ extern int fib_lookup(struct net *n, str
38 extern struct fib_table *fib_new_table(struct net *net, u32 id);
39 extern struct fib_table *fib_get_table(struct net *net, u32 id);
40
41+extern int fib_result_table(struct fib_result *res);
42+
43 #endif /* CONFIG_IP_MULTIPLE_TABLES */
44
45 /* Exported by fib_frontend.c */
46@@ -276,4 +278,6 @@ static inline void fib_proc_exit(struct
47 }
48 #endif
49
50+extern rwlock_t fib_nhflags_lock;
51+
52 #endif /* _NET_FIB_H */
53diff -urp v2.6.28/linux/include/net/netfilter/nf_nat.h linux/include/net/netfilter/nf_nat.h
54--- v2.6.28/linux/include/net/netfilter/nf_nat.h 2008-04-17 09:58:08.000000000 +0300
55+++ linux/include/net/netfilter/nf_nat.h 2009-02-06 09:43:23.000000000 +0200
56@@ -77,6 +77,13 @@ struct nf_conn_nat
57 #endif
58 };
59
60+/* Call input routing for SNAT-ed traffic */
61+extern unsigned int ip_nat_route_input(unsigned int hooknum,
62+ struct sk_buff *skb,
63+ const struct net_device *in,
64+ const struct net_device *out,
65+ int (*okfn)(struct sk_buff *));
66+
67 /* Set up the info structure to map into this range. */
68 extern unsigned int nf_nat_setup_info(struct nf_conn *ct,
69 const struct nf_nat_range *range,
70diff -urp v2.6.28/linux/include/net/route.h linux/include/net/route.h
71--- v2.6.28/linux/include/net/route.h 2008-12-25 10:12:24.000000000 +0200
72+++ linux/include/net/route.h 2009-02-06 09:43:23.000000000 +0200
73@@ -116,6 +116,7 @@ extern int __ip_route_output_key(struct
74 extern int ip_route_output_key(struct net *, struct rtable **, struct flowi *flp);
75 extern int ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags);
76 extern int ip_route_input(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin);
77+extern int ip_route_input_lookup(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin, __be32 lsrc);
78 extern unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, unsigned short new_mtu, struct net_device *dev);
79 extern void ip_rt_send_redirect(struct sk_buff *skb);
80
81diff -urp v2.6.28/linux/net/bridge/br_netfilter.c linux/net/bridge/br_netfilter.c
82--- v2.6.28/linux/net/bridge/br_netfilter.c 2008-12-25 10:12:25.000000000 +0200
83+++ linux/net/bridge/br_netfilter.c 2009-02-06 09:43:23.000000000 +0200
84@@ -341,6 +341,10 @@ static int br_nf_pre_routing_finish(stru
85 struct nf_bridge_info *nf_bridge = skb->nf_bridge;
86 int err;
87
88+ /* Old skb->dst is not expected, it is lost in all cases */
933f5665
AM
89+ skb_dst_drop(skb);
90+
2380c486
JR
91+
92 if (nf_bridge->mask & BRNF_PKT_TYPE) {
93 skb->pkt_type = PACKET_OTHERHOST;
94 nf_bridge->mask ^= BRNF_PKT_TYPE;
95diff -urp v2.6.28/linux/net/ipv4/fib_frontend.c linux/net/ipv4/fib_frontend.c
96--- v2.6.28/linux/net/ipv4/fib_frontend.c 2008-10-11 12:46:16.000000000 +0300
97+++ linux/net/ipv4/fib_frontend.c 2009-02-06 09:43:23.000000000 +0200
98@@ -47,6 +47,8 @@
99
100 #ifndef CONFIG_IP_MULTIPLE_TABLES
101
102+#define FIB_RES_TABLE(r) (RT_TABLE_MAIN)
103+
104 static int __net_init fib4_rules_init(struct net *net)
105 {
106 struct fib_table *local_table, *main_table;
107@@ -71,6 +73,8 @@ fail:
108 }
109 #else
110
111+#define FIB_RES_TABLE(r) (fib_result_table(r))
112+
113 struct fib_table *fib_new_table(struct net *net, u32 id)
114 {
115 struct fib_table *tb;
116@@ -125,7 +129,8 @@ void fib_select_default(struct net *net,
117 table = res->r->table;
118 #endif
119 tb = fib_get_table(net, table);
120- if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
121+ if ((FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) ||
122+ FIB_RES_NH(*res).nh_scope == RT_SCOPE_HOST)
123 tb->tb_select_default(tb, flp, res);
124 }
125
126@@ -239,6 +244,9 @@ int fib_validate_source(__be32 src, __be
127 .tos = tos } },
128 .iif = oif };
129 struct fib_result res;
130+ int table;
131+ unsigned char prefixlen;
132+ unsigned char scope;
133 int no_addr, rpf;
134 int ret;
135 struct net *net;
136@@ -262,31 +270,35 @@ int fib_validate_source(__be32 src, __be
137 goto e_inval_res;
138 *spec_dst = FIB_RES_PREFSRC(res);
139 fib_combine_itag(itag, &res);
140-#ifdef CONFIG_IP_ROUTE_MULTIPATH
141- if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
142-#else
143 if (FIB_RES_DEV(res) == dev)
144-#endif
145 {
146 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
147 fib_res_put(&res);
148 return ret;
149 }
150+ table = FIB_RES_TABLE(&res);
151+ prefixlen = res.prefixlen;
152+ scope = res.scope;
153 fib_res_put(&res);
154 if (no_addr)
155 goto last_resort;
9474138d 156- if (rpf == 1)
2380c486
JR
157- goto e_inval;
158 fl.oif = dev->ifindex;
159
160 ret = 0;
161 if (fib_lookup(net, &fl, &res) == 0) {
162- if (res.type == RTN_UNICAST) {
163+ if (res.type == RTN_UNICAST &&
164+ ((table == FIB_RES_TABLE(&res) &&
165+ res.prefixlen >= prefixlen && res.scope >= scope) ||
166+ !rpf)) {
167 *spec_dst = FIB_RES_PREFSRC(res);
168 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
169+ fib_res_put(&res);
170+ return ret;
171 }
172 fib_res_put(&res);
173 }
174+ if (rpf)
175+ goto e_inval;
176 return ret;
177
178 last_resort:
179@@ -909,9 +921,7 @@ static int fib_inetaddr_event(struct not
180 switch (event) {
181 case NETDEV_UP:
182 fib_add_ifaddr(ifa);
183-#ifdef CONFIG_IP_ROUTE_MULTIPATH
184 fib_sync_up(dev);
185-#endif
186 rt_cache_flush(dev_net(dev), -1);
187 break;
188 case NETDEV_DOWN:
189@@ -947,9 +957,7 @@ static int fib_netdev_event(struct notif
190 for_ifa(in_dev) {
191 fib_add_ifaddr(ifa);
192 } endfor_ifa(in_dev);
193-#ifdef CONFIG_IP_ROUTE_MULTIPATH
194 fib_sync_up(dev);
195-#endif
196 rt_cache_flush(dev_net(dev), -1);
197 break;
198 case NETDEV_DOWN:
199diff -urp v2.6.28/linux/net/ipv4/fib_hash.c linux/net/ipv4/fib_hash.c
200--- v2.6.28/linux/net/ipv4/fib_hash.c 2008-10-11 12:46:16.000000000 +0300
201+++ linux/net/ipv4/fib_hash.c 2009-02-06 09:43:23.000000000 +0200
202@@ -278,25 +278,35 @@ out:
203 static void
204 fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
205 {
206- int order, last_idx;
207+ int order, last_idx, last_dflt, last_nhsel;
208+ struct fib_alias *first_fa = NULL;
209+ struct hlist_head *head;
210 struct hlist_node *node;
211 struct fib_node *f;
212 struct fib_info *fi = NULL;
213 struct fib_info *last_resort;
214 struct fn_hash *t = (struct fn_hash *)tb->tb_data;
215- struct fn_zone *fz = t->fn_zones[0];
216+ struct fn_zone *fz = t->fn_zones[res->prefixlen];
217+ __be32 k;
218
219 if (fz == NULL)
220 return;
221
222+ k = fz_key(flp->fl4_dst, fz);
223+ last_dflt = -2;
224+ last_nhsel = 0;
225 last_idx = -1;
226 last_resort = NULL;
227 order = -1;
228
229 read_lock(&fib_hash_lock);
230- hlist_for_each_entry(f, node, &fz->fz_hash[0], fn_hash) {
231+ head = &fz->fz_hash[fn_hash(k, fz)];
232+ hlist_for_each_entry(f, node, head, fn_hash) {
233 struct fib_alias *fa;
234
235+ if (f->fn_key != k)
236+ continue;
237+
238 list_for_each_entry(fa, &f->fn_alias, fa_list) {
239 struct fib_info *next_fi = fa->fa_info;
240
241@@ -304,42 +314,56 @@ fn_hash_select_default(struct fib_table
242 fa->fa_type != RTN_UNICAST)
243 continue;
244
245+ if (fa->fa_tos &&
246+ fa->fa_tos != flp->fl4_tos)
247+ continue;
248 if (next_fi->fib_priority > res->fi->fib_priority)
249 break;
250- if (!next_fi->fib_nh[0].nh_gw ||
251- next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
252- continue;
253 fa->fa_state |= FA_S_ACCESSED;
254
255- if (fi == NULL) {
256- if (next_fi != res->fi)
257- break;
258- } else if (!fib_detect_death(fi, order, &last_resort,
259- &last_idx, tb->tb_default)) {
260+ if (!first_fa) {
261+ last_dflt = fa->fa_last_dflt;
262+ first_fa = fa;
263+ }
264+ if (fi && !fib_detect_death(fi, order, &last_resort,
265+ &last_idx, &last_dflt, &last_nhsel, flp)) {
266 fib_result_assign(res, fi);
267- tb->tb_default = order;
268+ first_fa->fa_last_dflt = order;
269 goto out;
270 }
271 fi = next_fi;
272 order++;
273 }
274+ break;
275 }
276
277 if (order <= 0 || fi == NULL) {
278- tb->tb_default = -1;
279+ if (fi && fi->fib_nhs > 1 &&
280+ fib_detect_death(fi, order, &last_resort, &last_idx,
281+ &last_dflt, &last_nhsel, flp) &&
282+ last_resort == fi) {
283+ read_lock_bh(&fib_nhflags_lock);
284+ fi->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
285+ read_unlock_bh(&fib_nhflags_lock);
286+ }
287+ if (first_fa) first_fa->fa_last_dflt = -1;
288 goto out;
289 }
290
291 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
292- tb->tb_default)) {
293+ &last_dflt, &last_nhsel, flp)) {
294 fib_result_assign(res, fi);
295- tb->tb_default = order;
296+ first_fa->fa_last_dflt = order;
297 goto out;
298 }
299
300- if (last_idx >= 0)
301+ if (last_idx >= 0) {
302 fib_result_assign(res, last_resort);
303- tb->tb_default = last_idx;
304+ read_lock_bh(&fib_nhflags_lock);
305+ last_resort->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
306+ read_unlock_bh(&fib_nhflags_lock);
307+ first_fa->fa_last_dflt = last_idx;
308+ }
309 out:
310 read_unlock(&fib_hash_lock);
311 }
312@@ -463,6 +487,7 @@ static int fn_hash_insert(struct fib_tab
313 write_lock_bh(&fib_hash_lock);
314 fi_drop = fa->fa_info;
315 fa->fa_info = fi;
316+ fa->fa_last_dflt = -1;
317 fa->fa_type = cfg->fc_type;
318 fa->fa_scope = cfg->fc_scope;
319 state = fa->fa_state;
320@@ -517,6 +542,7 @@ static int fn_hash_insert(struct fib_tab
321 new_fa->fa_type = cfg->fc_type;
322 new_fa->fa_scope = cfg->fc_scope;
323 new_fa->fa_state = 0;
324+ new_fa->fa_last_dflt = -1;
325
326 /*
327 * Insert new entry to the list.
328diff -urp v2.6.28/linux/net/ipv4/fib_lookup.h linux/net/ipv4/fib_lookup.h
329--- v2.6.28/linux/net/ipv4/fib_lookup.h 2008-04-17 09:58:09.000000000 +0300
330+++ linux/net/ipv4/fib_lookup.h 2009-02-06 09:43:23.000000000 +0200
331@@ -8,6 +8,7 @@
332 struct fib_alias {
333 struct list_head fa_list;
334 struct fib_info *fa_info;
335+ int fa_last_dflt;
336 u8 fa_tos;
337 u8 fa_type;
338 u8 fa_scope;
339@@ -38,7 +39,8 @@ extern struct fib_alias *fib_find_alias(
340 u8 tos, u32 prio);
341 extern int fib_detect_death(struct fib_info *fi, int order,
342 struct fib_info **last_resort,
343- int *last_idx, int dflt);
344+ int *last_idx, int *dflt, int *last_nhsel,
345+ const struct flowi *flp);
346
347 static inline void fib_result_assign(struct fib_result *res,
348 struct fib_info *fi)
349diff -urp v2.6.28/linux/net/ipv4/fib_rules.c linux/net/ipv4/fib_rules.c
350--- v2.6.28/linux/net/ipv4/fib_rules.c 2008-10-11 12:46:16.000000000 +0300
351+++ linux/net/ipv4/fib_rules.c 2009-02-06 09:43:23.000000000 +0200
352@@ -54,6 +54,11 @@ u32 fib_rules_tclass(struct fib_result *
353 }
354 #endif
355
356+int fib_result_table(struct fib_result *res)
357+{
358+ return res->r->table;
359+}
360+
361 int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res)
362 {
363 struct fib_lookup_arg arg = {
364diff -urp v2.6.28/linux/net/ipv4/fib_semantics.c linux/net/ipv4/fib_semantics.c
365--- v2.6.28/linux/net/ipv4/fib_semantics.c 2008-10-11 12:46:16.000000000 +0300
366+++ linux/net/ipv4/fib_semantics.c 2009-02-06 09:43:23.000000000 +0200
367@@ -50,6 +50,7 @@ static struct hlist_head *fib_info_hash;
368 static struct hlist_head *fib_info_laddrhash;
369 static unsigned int fib_hash_size;
370 static unsigned int fib_info_cnt;
371+rwlock_t fib_nhflags_lock = RW_LOCK_UNLOCKED;
372
373 #define DEVINDEX_HASHBITS 8
374 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
375@@ -186,7 +187,7 @@ static __inline__ int nh_comp(const stru
376 #ifdef CONFIG_NET_CLS_ROUTE
377 nh->nh_tclassid != onh->nh_tclassid ||
378 #endif
379- ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
380+ ((nh->nh_flags^onh->nh_flags)&~RTNH_F_BADSTATE))
381 return -1;
382 onh++;
383 } endfor_nexthops(fi);
384@@ -237,7 +238,7 @@ static struct fib_info *fib_find_info(co
385 nfi->fib_priority == fi->fib_priority &&
386 memcmp(nfi->fib_metrics, fi->fib_metrics,
387 sizeof(fi->fib_metrics)) == 0 &&
388- ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
389+ ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_BADSTATE) == 0 &&
390 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
391 return fi;
392 }
393@@ -348,26 +349,70 @@ struct fib_alias *fib_find_alias(struct
394 }
395
396 int fib_detect_death(struct fib_info *fi, int order,
397- struct fib_info **last_resort, int *last_idx, int dflt)
398+ struct fib_info **last_resort, int *last_idx, int *dflt,
399+ int *last_nhsel, const struct flowi *flp)
400 {
401 struct neighbour *n;
402- int state = NUD_NONE;
403+ int nhsel;
404+ int state;
405+ struct fib_nh * nh;
406+ __be32 dst;
407+ int flag, dead = 1;
408+
409+ /* change_nexthops(fi) { */
410+ for (nhsel = 0, nh = fi->fib_nh; nhsel < fi->fib_nhs; nh++, nhsel++) {
411+ if (flp->oif && flp->oif != nh->nh_oif)
412+ continue;
413+ if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && nh->nh_gw &&
414+ nh->nh_scope == RT_SCOPE_LINK)
415+ continue;
416+ if (nh->nh_flags & RTNH_F_DEAD)
417+ continue;
418
419- n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
420- if (n) {
421- state = n->nud_state;
422- neigh_release(n);
423- }
424- if (state == NUD_REACHABLE)
425- return 0;
426- if ((state&NUD_VALID) && order != dflt)
427- return 0;
428- if ((state&NUD_VALID) ||
429- (*last_idx<0 && order > dflt)) {
430- *last_resort = fi;
431- *last_idx = order;
432+ flag = 0;
433+ if (nh->nh_dev->flags & IFF_NOARP) {
434+ dead = 0;
435+ goto setfl;
436+ }
437+
438+ dst = nh->nh_gw;
439+ if (!nh->nh_gw || nh->nh_scope != RT_SCOPE_LINK)
440+ dst = flp->fl4_dst;
441+
442+ state = NUD_NONE;
443+ n = neigh_lookup(&arp_tbl, &dst, nh->nh_dev);
444+ if (n) {
445+ state = n->nud_state;
446+ neigh_release(n);
447+ }
448+ if (state == NUD_REACHABLE ||
449+ ((state&NUD_VALID) && order != *dflt)) {
450+ dead = 0;
451+ goto setfl;
452+ }
453+ if (!(state&NUD_VALID))
454+ flag = 1;
455+ if (!dead)
456+ goto setfl;
457+ if ((state&NUD_VALID) ||
458+ (*last_idx<0 && order >= *dflt)) {
459+ *last_resort = fi;
460+ *last_idx = order;
461+ *last_nhsel = nhsel;
462+ }
463+
464+ setfl:
465+
466+ read_lock_bh(&fib_nhflags_lock);
467+ if (flag)
468+ nh->nh_flags |= RTNH_F_SUSPECT;
469+ else
470+ nh->nh_flags &= ~RTNH_F_SUSPECT;
471+ read_unlock_bh(&fib_nhflags_lock);
472 }
473- return 1;
474+ /* } endfor_nexthops(fi) */
475+
476+ return dead;
477 }
478
479 #ifdef CONFIG_IP_ROUTE_MULTIPATH
480@@ -539,8 +584,11 @@ static int fib_check_nh(struct fib_confi
481 return -EINVAL;
482 if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL)
483 return -ENODEV;
484- if (!(dev->flags&IFF_UP))
485- return -ENETDOWN;
486+ if (!(dev->flags&IFF_UP)) {
487+ if (fi->fib_protocol != RTPROT_STATIC)
488+ return -ENETDOWN;
489+ nh->nh_flags |= RTNH_F_DEAD;
490+ }
491 nh->nh_dev = dev;
492 dev_hold(dev);
493 nh->nh_scope = RT_SCOPE_LINK;
494@@ -560,24 +608,48 @@ static int fib_check_nh(struct fib_confi
495 /* It is not necessary, but requires a bit of thinking */
496 if (fl.fl4_scope < RT_SCOPE_LINK)
497 fl.fl4_scope = RT_SCOPE_LINK;
498- if ((err = fib_lookup(net, &fl, &res)) != 0)
499- return err;
500+ err = fib_lookup(net, &fl, &res);
501 }
502- err = -EINVAL;
503- if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
504- goto out;
505- nh->nh_scope = res.scope;
506- nh->nh_oif = FIB_RES_OIF(res);
507- if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
508- goto out;
509- dev_hold(nh->nh_dev);
510- err = -ENETDOWN;
511- if (!(nh->nh_dev->flags & IFF_UP))
512- goto out;
513- err = 0;
514+ if (err) {
515+ struct in_device *in_dev;
516+
517+ if (err != -ENETUNREACH ||
518+ fi->fib_protocol != RTPROT_STATIC)
519+ return err;
520+
521+ in_dev = inetdev_by_index(net, nh->nh_oif);
522+ if (in_dev == NULL ||
523+ in_dev->dev->flags & IFF_UP) {
524+ if (in_dev)
525+ in_dev_put(in_dev);
526+ return err;
527+ }
528+ nh->nh_flags |= RTNH_F_DEAD;
529+ nh->nh_scope = RT_SCOPE_LINK;
530+ nh->nh_dev = in_dev->dev;
531+ dev_hold(nh->nh_dev);
532+ in_dev_put(in_dev);
533+ } else {
534+ err = -EINVAL;
535+ if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
536+ goto out;
537+ nh->nh_scope = res.scope;
538+ nh->nh_oif = FIB_RES_OIF(res);
539+ if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
540+ goto out;
541+ dev_hold(nh->nh_dev);
542+ if (!(nh->nh_dev->flags & IFF_UP)) {
543+ if (fi->fib_protocol != RTPROT_STATIC) {
544+ err = -ENETDOWN;
545+ goto out;
546+ }
547+ nh->nh_flags |= RTNH_F_DEAD;
548+ }
549+ err = 0;
550 out:
551- fib_res_put(&res);
552- return err;
553+ fib_res_put(&res);
554+ return err;
555+ }
556 } else {
557 struct in_device *in_dev;
558
559@@ -588,8 +660,11 @@ out:
560 if (in_dev == NULL)
561 return -ENODEV;
562 if (!(in_dev->dev->flags&IFF_UP)) {
563- in_dev_put(in_dev);
564- return -ENETDOWN;
565+ if (fi->fib_protocol != RTPROT_STATIC) {
566+ in_dev_put(in_dev);
567+ return -ENETDOWN;
568+ }
569+ nh->nh_flags |= RTNH_F_DEAD;
570 }
571 nh->nh_dev = in_dev->dev;
572 dev_hold(nh->nh_dev);
573@@ -899,8 +974,12 @@ int fib_semantic_match(struct list_head
574 for_nexthops(fi) {
575 if (nh->nh_flags&RTNH_F_DEAD)
576 continue;
577- if (!flp->oif || flp->oif == nh->nh_oif)
578- break;
579+ if (flp->oif && flp->oif != nh->nh_oif)
580+ continue;
581+ if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
582+ nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
583+ continue;
584+ break;
585 }
586 #ifdef CONFIG_IP_ROUTE_MULTIPATH
587 if (nhsel < fi->fib_nhs) {
588@@ -1080,18 +1159,29 @@ int fib_sync_down_dev(struct net_device
589 prev_fi = fi;
590 dead = 0;
591 change_nexthops(fi) {
592- if (nh->nh_flags&RTNH_F_DEAD)
593- dead++;
594- else if (nh->nh_dev == dev &&
595- nh->nh_scope != scope) {
596- nh->nh_flags |= RTNH_F_DEAD;
597+ if (nh->nh_flags&RTNH_F_DEAD) {
598+ if (fi->fib_protocol!=RTPROT_STATIC ||
599+ nh->nh_dev == NULL ||
600+ __in_dev_get_rtnl(nh->nh_dev) == NULL ||
601+ nh->nh_dev->flags&IFF_UP)
602+ dead++;
603+ } else if (nh->nh_dev == dev &&
604+ nh->nh_scope != scope) {
605+ write_lock_bh(&fib_nhflags_lock);
606 #ifdef CONFIG_IP_ROUTE_MULTIPATH
607- spin_lock_bh(&fib_multipath_lock);
608+ spin_lock(&fib_multipath_lock);
609+ nh->nh_flags |= RTNH_F_DEAD;
610 fi->fib_power -= nh->nh_power;
611 nh->nh_power = 0;
612- spin_unlock_bh(&fib_multipath_lock);
613+ spin_unlock(&fib_multipath_lock);
614+#else
615+ nh->nh_flags |= RTNH_F_DEAD;
616 #endif
617- dead++;
618+ write_unlock_bh(&fib_nhflags_lock);
619+ if (fi->fib_protocol!=RTPROT_STATIC ||
620+ force ||
621+ __in_dev_get_rtnl(dev) == NULL)
622+ dead++;
623 }
624 #ifdef CONFIG_IP_ROUTE_MULTIPATH
625 if (force > 1 && nh->nh_dev == dev) {
626@@ -1109,11 +1199,8 @@ int fib_sync_down_dev(struct net_device
627 return ret;
628 }
629
630-#ifdef CONFIG_IP_ROUTE_MULTIPATH
631-
632 /*
633- Dead device goes up. We wake up dead nexthops.
634- It takes sense only on multipath routes.
635+ Dead device goes up or new address is added. We wake up dead nexthops.
636 */
637
638 int fib_sync_up(struct net_device *dev)
639@@ -1123,8 +1210,10 @@ int fib_sync_up(struct net_device *dev)
640 struct hlist_head *head;
641 struct hlist_node *node;
642 struct fib_nh *nh;
643- int ret;
644+ struct fib_result res;
645+ int ret, rep;
646
647+repeat:
648 if (!(dev->flags&IFF_UP))
649 return 0;
650
651@@ -1132,6 +1221,7 @@ int fib_sync_up(struct net_device *dev)
652 hash = fib_devindex_hashfn(dev->ifindex);
653 head = &fib_info_devhash[hash];
654 ret = 0;
655+ rep = 0;
656
657 hlist_for_each_entry(nh, node, head, nh_hash) {
658 struct fib_info *fi = nh->nh_parent;
659@@ -1144,19 +1234,39 @@ int fib_sync_up(struct net_device *dev)
660 prev_fi = fi;
661 alive = 0;
662 change_nexthops(fi) {
663- if (!(nh->nh_flags&RTNH_F_DEAD)) {
664- alive++;
665+ if (!(nh->nh_flags&RTNH_F_DEAD))
666 continue;
667- }
668 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
669 continue;
670 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
671 continue;
672+ if (nh->nh_gw && fi->fib_protocol == RTPROT_STATIC) {
673+ struct flowi fl = {
674+ .nl_u = { .ip4_u =
675+ { .daddr = nh->nh_gw,
676+ .scope = nh->nh_scope } },
677+ .oif = nh->nh_oif,
678+ };
679+ if (fib_lookup(dev_net(dev), &fl, &res) != 0)
680+ continue;
681+ if (res.type != RTN_UNICAST &&
682+ res.type != RTN_LOCAL) {
683+ fib_res_put(&res);
684+ continue;
685+ }
686+ nh->nh_scope = res.scope;
687+ fib_res_put(&res);
688+ rep = 1;
689+ }
690 alive++;
691+#ifdef CONFIG_IP_ROUTE_MULTIPATH
692 spin_lock_bh(&fib_multipath_lock);
693 nh->nh_power = 0;
694+#endif
695 nh->nh_flags &= ~RTNH_F_DEAD;
696+#ifdef CONFIG_IP_ROUTE_MULTIPATH
697 spin_unlock_bh(&fib_multipath_lock);
698+#endif
699 } endfor_nexthops(fi)
700
701 if (alive > 0) {
702@@ -1164,10 +1274,14 @@ int fib_sync_up(struct net_device *dev)
703 ret++;
704 }
705 }
706+ if (rep)
707+ goto repeat;
708
709 return ret;
710 }
711
712+#ifdef CONFIG_IP_ROUTE_MULTIPATH
713+
714 /*
715 The algorithm is suboptimal, but it provides really
716 fair weighted route distribution.
717@@ -1176,24 +1290,45 @@ int fib_sync_up(struct net_device *dev)
718 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
719 {
720 struct fib_info *fi = res->fi;
721- int w;
722+ int w, alive;
723
724 spin_lock_bh(&fib_multipath_lock);
725+ if (flp->oif) {
726+ int sel = -1;
727+ w = -1;
728+ change_nexthops(fi) {
729+ if (flp->oif != nh->nh_oif)
730+ continue;
731+ if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
732+ nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
733+ continue;
734+ if (!(nh->nh_flags&RTNH_F_BADSTATE)) {
735+ if (nh->nh_power > w) {
736+ w = nh->nh_power;
737+ sel = nhsel;
738+ }
739+ }
740+ } endfor_nexthops(fi);
741+ if (sel >= 0) {
742+ spin_unlock_bh(&fib_multipath_lock);
743+ res->nh_sel = sel;
744+ return;
745+ }
746+ goto last_resort;
747+ }
748+
749+repeat:
750 if (fi->fib_power <= 0) {
751 int power = 0;
752 change_nexthops(fi) {
753- if (!(nh->nh_flags&RTNH_F_DEAD)) {
754+ if (!(nh->nh_flags&RTNH_F_BADSTATE)) {
755 power += nh->nh_weight;
756 nh->nh_power = nh->nh_weight;
757 }
758 } endfor_nexthops(fi);
759 fi->fib_power = power;
760- if (power <= 0) {
761- spin_unlock_bh(&fib_multipath_lock);
762- /* Race condition: route has just become dead. */
763- res->nh_sel = 0;
764- return;
765- }
766+ if (power <= 0)
767+ goto last_resort;
768 }
769
770
771@@ -1203,20 +1338,40 @@ void fib_select_multipath(const struct f
772
773 w = jiffies % fi->fib_power;
774
775+ alive = 0;
776 change_nexthops(fi) {
777- if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
778+ if (!(nh->nh_flags&RTNH_F_BADSTATE) && nh->nh_power) {
779 if ((w -= nh->nh_power) <= 0) {
780 nh->nh_power--;
781 fi->fib_power--;
782- res->nh_sel = nhsel;
783 spin_unlock_bh(&fib_multipath_lock);
784+ res->nh_sel = nhsel;
785 return;
786 }
787+ alive = 1;
788+ }
789+ } endfor_nexthops(fi);
790+ if (alive) {
791+ fi->fib_power = 0;
792+ goto repeat;
793+ }
794+
795+last_resort:
796+
797+ for_nexthops(fi) {
798+ if (!(nh->nh_flags&RTNH_F_DEAD)) {
799+ if (flp->oif && flp->oif != nh->nh_oif)
800+ continue;
801+ if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
802+ nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
803+ continue;
804+ spin_unlock_bh(&fib_multipath_lock);
805+ res->nh_sel = nhsel;
806+ return;
807 }
808 } endfor_nexthops(fi);
809
810 /* Race condition: route has just become dead. */
811- res->nh_sel = 0;
812 spin_unlock_bh(&fib_multipath_lock);
813 }
814 #endif
815diff -urp v2.6.28/linux/net/ipv4/fib_trie.c linux/net/ipv4/fib_trie.c
816--- v2.6.28/linux/net/ipv4/fib_trie.c 2008-10-11 12:46:16.000000000 +0300
817+++ linux/net/ipv4/fib_trie.c 2009-02-06 09:43:23.000000000 +0200
818@@ -1261,6 +1261,7 @@ static int fn_trie_insert(struct fib_tab
819 fi_drop = fa->fa_info;
820 new_fa->fa_tos = fa->fa_tos;
821 new_fa->fa_info = fi;
822+ new_fa->fa_last_dflt = -1;
823 new_fa->fa_type = cfg->fc_type;
824 new_fa->fa_scope = cfg->fc_scope;
825 state = fa->fa_state;
826@@ -1301,6 +1302,7 @@ static int fn_trie_insert(struct fib_tab
827 new_fa->fa_type = cfg->fc_type;
828 new_fa->fa_scope = cfg->fc_scope;
829 new_fa->fa_state = 0;
830+ new_fa->fa_last_dflt = -1;
831 /*
832 * Insert new entry to the list.
833 */
834@@ -1802,24 +1804,31 @@ static void fn_trie_select_default(struc
835 struct fib_result *res)
836 {
837 struct trie *t = (struct trie *) tb->tb_data;
838- int order, last_idx;
839+ int order, last_idx, last_dflt, last_nhsel;
840+ struct fib_alias *first_fa = NULL;
841 struct fib_info *fi = NULL;
842 struct fib_info *last_resort;
843 struct fib_alias *fa = NULL;
844 struct list_head *fa_head;
845 struct leaf *l;
846+ u32 key, mask;
847
848+ last_dflt = -2;
849+ last_nhsel = 0;
850 last_idx = -1;
851 last_resort = NULL;
852 order = -1;
853
854+ mask = inet_make_mask(res->prefixlen);
855+ key = ntohl(flp->fl4_dst & mask);
856+
857 rcu_read_lock();
858
859- l = fib_find_node(t, 0);
860+ l = fib_find_node(t, key);
861 if (!l)
862 goto out;
863
864- fa_head = get_fa_head(l, 0);
865+ fa_head = get_fa_head(l, res->prefixlen);
866 if (!fa_head)
867 goto out;
868
869@@ -1833,39 +1842,52 @@ static void fn_trie_select_default(struc
870 fa->fa_type != RTN_UNICAST)
871 continue;
872
873+ if (fa->fa_tos &&
874+ fa->fa_tos != flp->fl4_tos)
875+ continue;
876 if (next_fi->fib_priority > res->fi->fib_priority)
877 break;
878- if (!next_fi->fib_nh[0].nh_gw ||
879- next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
880- continue;
881 fa->fa_state |= FA_S_ACCESSED;
882
883- if (fi == NULL) {
884- if (next_fi != res->fi)
885- break;
886- } else if (!fib_detect_death(fi, order, &last_resort,
887- &last_idx, tb->tb_default)) {
888+ if (!first_fa) {
889+ last_dflt = fa->fa_last_dflt;
890+ first_fa = fa;
891+ }
892+ if (fi && !fib_detect_death(fi, order, &last_resort,
893+ &last_idx, &last_dflt, &last_nhsel, flp)) {
894 fib_result_assign(res, fi);
895- tb->tb_default = order;
896+ first_fa->fa_last_dflt = order;
897 goto out;
898 }
899 fi = next_fi;
900 order++;
901 }
902 if (order <= 0 || fi == NULL) {
903- tb->tb_default = -1;
904+ if (fi && fi->fib_nhs > 1 &&
905+ fib_detect_death(fi, order, &last_resort, &last_idx,
906+ &last_dflt, &last_nhsel, flp) &&
907+ last_resort == fi) {
908+ read_lock_bh(&fib_nhflags_lock);
909+ fi->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
910+ read_unlock_bh(&fib_nhflags_lock);
911+ }
912+ if (first_fa) first_fa->fa_last_dflt = -1;
913 goto out;
914 }
915
916 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
917- tb->tb_default)) {
918+ &last_dflt, &last_nhsel, flp)) {
919 fib_result_assign(res, fi);
920- tb->tb_default = order;
921+ first_fa->fa_last_dflt = order;
922 goto out;
923 }
924- if (last_idx >= 0)
925+ if (last_idx >= 0) {
926 fib_result_assign(res, last_resort);
927- tb->tb_default = last_idx;
928+ read_lock_bh(&fib_nhflags_lock);
929+ last_resort->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
930+ read_unlock_bh(&fib_nhflags_lock);
931+ first_fa->fa_last_dflt = last_idx;
932+ }
933 out:
934 rcu_read_unlock();
935 }
936diff -urp v2.6.28/linux/net/ipv4/netfilter/ipt_MASQUERADE.c linux/net/ipv4/netfilter/ipt_MASQUERADE.c
937--- v2.6.28/linux/net/ipv4/netfilter/ipt_MASQUERADE.c 2008-12-25 10:12:25.000000000 +0200
938+++ linux/net/ipv4/netfilter/ipt_MASQUERADE.c 2009-02-06 09:43:23.000000000 +0200
939@@ -54,7 +54,7 @@ masquerade_tg(struct sk_buff *skb, const
940 enum ip_conntrack_info ctinfo;
941 struct nf_nat_range newrange;
942 const struct nf_nat_multi_range_compat *mr;
943- const struct rtable *rt;
944+ struct rtable *rt;
945 __be32 newsrc;
946
947 NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING);
13e5c3b1 948@@ -69,13 +69,27 @@ masquerade_tg(struct sk_buff *skb, const
2380c486
JR
949 return NF_ACCEPT;
950
951 mr = par->targinfo;
13e5c3b1 952- rt = skb_rtable(skb);
2380c486
JR
953- newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
954- if (!newsrc) {
955- printk("MASQUERADE: %s ate my IP address\n", par->out->name);
956- return NF_DROP;
2380c486
JR
957+ {
958+ struct flowi fl = { .nl_u = { .ip4_u =
13e5c3b1
AM
959+ { .daddr = ip_hdr(skb)->daddr,
960+ .tos = (RT_TOS(ip_hdr(skb)->tos) |
961+ RTO_CONN),
84685b46 962+ .gw = skb_rtable(skb)->rt_gateway,
13e5c3b1
AM
963+ } },
964+ .mark = skb->mark,
965+ .oif = par->out->ifindex };
2380c486
JR
966+ if (ip_route_output_key(dev_net(par->out), &rt, &fl) != 0) {
967+ /* Funky routing can do this. */
968+ if (net_ratelimit())
969+ printk("MASQUERADE:"
13e5c3b1 970+ " No route: Rusty's brain broke!\n");
2380c486
JR
971+ return NF_DROP;
972+ }
973 }
974
975+ newsrc = rt->rt_src;
976+ ip_rt_put(rt);
977+
2380c486 978 nat->masq_index = par->out->ifindex;
13e5c3b1
AM
979
980 /* Transfer from original range. */
2380c486
JR
981diff -urp v2.6.28/linux/net/ipv4/netfilter/nf_nat_core.c linux/net/ipv4/netfilter/nf_nat_core.c
982--- v2.6.28/linux/net/ipv4/netfilter/nf_nat_core.c 2008-12-25 10:12:25.000000000 +0200
983+++ linux/net/ipv4/netfilter/nf_nat_core.c 2009-02-06 09:43:23.000000000 +0200
984@@ -710,6 +710,52 @@ static struct pernet_operations nf_nat_n
985 .exit = nf_nat_net_exit,
986 };
987
988+unsigned int
989+ip_nat_route_input(unsigned int hooknum,
990+ struct sk_buff *skb,
991+ const struct net_device *in,
992+ const struct net_device *out,
993+ int (*okfn)(struct sk_buff *))
994+{
995+ struct iphdr *iph;
996+ struct nf_conn *conn;
997+ enum ip_conntrack_info ctinfo;
998+ enum ip_conntrack_dir dir;
999+ unsigned long statusbit;
1000+ __be32 saddr;
1001+
1002+ if (!(conn = nf_ct_get(skb, &ctinfo)))
1003+ return NF_ACCEPT;
1004+
1005+ if (!(conn->status & IPS_NAT_DONE_MASK))
1006+ return NF_ACCEPT;
1007+ dir = CTINFO2DIR(ctinfo);
1008+ statusbit = IPS_SRC_NAT;
1009+ if (dir == IP_CT_DIR_REPLY)
1010+ statusbit ^= IPS_NAT_MASK;
1011+ if (!(conn->status & statusbit))
1012+ return NF_ACCEPT;
1013+
95ff68cd 1014+ if (skb_dst(skb))
2380c486
JR
1015+ return NF_ACCEPT;
1016+
1017+ if (skb->len < sizeof(struct iphdr))
1018+ return NF_ACCEPT;
1019+
1020+ /* use daddr in other direction as masquerade address (lsrc) */
1021+ iph = ip_hdr(skb);
1022+ saddr = conn->tuplehash[!dir].tuple.dst.u3.ip;
1023+ if (saddr == iph->saddr)
1024+ return NF_ACCEPT;
1025+
1026+ if (ip_route_input_lookup(skb, iph->daddr, iph->saddr, iph->tos,
1027+ skb->dev, saddr))
1028+ return NF_DROP;
1029+
1030+ return NF_ACCEPT;
1031+}
1032+EXPORT_SYMBOL_GPL(ip_nat_route_input);
1033+
1034 static int __init nf_nat_init(void)
1035 {
1036 size_t i;
1037diff -urp v2.6.28/linux/net/ipv4/netfilter/nf_nat_standalone.c linux/net/ipv4/netfilter/nf_nat_standalone.c
1038--- v2.6.28/linux/net/ipv4/netfilter/nf_nat_standalone.c 2008-07-14 09:58:50.000000000 +0300
1039+++ linux/net/ipv4/netfilter/nf_nat_standalone.c 2009-02-06 09:43:23.000000000 +0200
1040@@ -256,6 +256,14 @@ static struct nf_hook_ops nf_nat_ops[] _
1041 .hooknum = NF_INET_PRE_ROUTING,
1042 .priority = NF_IP_PRI_NAT_DST,
1043 },
1044+ /* Before routing, route before mangling */
1045+ {
1046+ .hook = ip_nat_route_input,
1047+ .owner = THIS_MODULE,
1048+ .pf = PF_INET,
1049+ .hooknum = NF_INET_PRE_ROUTING,
1050+ .priority = NF_IP_PRI_LAST-1,
1051+ },
1052 /* After packet filtering, change source */
1053 {
1054 .hook = nf_nat_out,
1055diff -urp v2.6.28/linux/net/ipv4/route.c linux/net/ipv4/route.c
1056--- v2.6.28/linux/net/ipv4/route.c 2008-12-25 10:12:25.000000000 +0200
1057+++ linux/net/ipv4/route.c 2009-02-06 09:43:43.000000000 +0200
1058@@ -679,6 +679,7 @@ static inline int compare_keys(struct fl
1059 return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) |
1060 (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr)) |
1061 (fl1->mark ^ fl2->mark) |
1062+ ((__force u32)(fl1->nl_u.ip4_u.lsrc ^ fl2->nl_u.ip4_u.lsrc)) |
1063 (*(u16 *)&fl1->nl_u.ip4_u.tos ^
1064 *(u16 *)&fl2->nl_u.ip4_u.tos) |
1065 (fl1->oif ^ fl2->oif) |
1066@@ -1286,6 +1287,7 @@ void ip_rt_redirect(__be32 old_gw, __be3
1067
1068 /* Gateway is different ... */
1069 rt->rt_gateway = new_gw;
1070+ if (rt->fl.fl4_gw) rt->fl.fl4_gw = new_gw;
1071
1072 /* Redirect received -> path was valid */
1073 dst_confirm(&rth->u.dst);
1074@@ -1735,6 +1737,7 @@ static int ip_route_input_mc(struct sk_b
1075 rth->fl.fl4_tos = tos;
1076 rth->fl.mark = skb->mark;
1077 rth->fl.fl4_src = saddr;
1078+ rth->fl.fl4_lsrc = 0;
1079 rth->rt_src = saddr;
1080 #ifdef CONFIG_NET_CLS_ROUTE
1081 rth->u.dst.tclassid = itag;
1082@@ -1745,6 +1748,7 @@ static int ip_route_input_mc(struct sk_b
1083 dev_hold(rth->u.dst.dev);
1084 rth->idev = in_dev_get(rth->u.dst.dev);
1085 rth->fl.oif = 0;
1086+ rth->fl.fl4_gw = 0;
1087 rth->rt_gateway = daddr;
1088 rth->rt_spec_dst= spec_dst;
1089 rth->rt_genid = rt_genid(dev_net(dev));
1090@@ -1810,7 +1814,7 @@ static int __mkroute_input(struct sk_buf
1091 struct fib_result *res,
1092 struct in_device *in_dev,
1093 __be32 daddr, __be32 saddr, u32 tos,
1094- struct rtable **result)
1095+ __be32 lsrc, struct rtable **result)
1096 {
1097
1098 struct rtable *rth;
1099@@ -1844,6 +1848,7 @@ static int __mkroute_input(struct sk_buf
1100 flags |= RTCF_DIRECTSRC;
1101
1102 if (out_dev == in_dev && err &&
1103+ !lsrc &&
1104 (IN_DEV_SHARED_MEDIA(out_dev) ||
1105 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1106 flags |= RTCF_DOREDIRECT;
1107@@ -1877,6 +1882,7 @@ static int __mkroute_input(struct sk_buf
1108 rth->fl.mark = skb->mark;
1109 rth->fl.fl4_src = saddr;
1110 rth->rt_src = saddr;
1111+ rth->fl.fl4_lsrc = lsrc;
1112 rth->rt_gateway = daddr;
1113 rth->rt_iif =
1114 rth->fl.iif = in_dev->dev->ifindex;
1115@@ -1884,6 +1890,7 @@ static int __mkroute_input(struct sk_buf
1116 dev_hold(rth->u.dst.dev);
1117 rth->idev = in_dev_get(rth->u.dst.dev);
1118 rth->fl.oif = 0;
1119+ rth->fl.fl4_gw = 0;
1120 rth->rt_spec_dst= spec_dst;
1121
1122 rth->u.dst.input = ip_forward;
1123@@ -1904,21 +1911,23 @@ static int __mkroute_input(struct sk_buf
1124
1125 static int ip_mkroute_input(struct sk_buff *skb,
1126 struct fib_result *res,
1127+ struct net *net,
1128 const struct flowi *fl,
1129 struct in_device *in_dev,
1130- __be32 daddr, __be32 saddr, u32 tos)
1131+ __be32 daddr, __be32 saddr, u32 tos, __be32 lsrc)
1132 {
1133 struct rtable* rth = NULL;
1134 int err;
1135 unsigned hash;
1136
1137+ fib_select_default(net, fl, res);
1138 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1139- if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0)
1140+ if (res->fi && res->fi->fib_nhs > 1)
1141 fib_select_multipath(fl, res);
1142 #endif
1143
1144 /* create a routing cache entry */
1145- err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth);
1146+ err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, lsrc, &rth);
1147 if (err)
1148 return err;
1149
1150@@ -1939,18 +1948,19 @@ static int ip_mkroute_input(struct sk_bu
1151 */
1152
1153 static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1154- u8 tos, struct net_device *dev)
1155+ u8 tos, struct net_device *dev, __be32 lsrc)
1156 {
1157 struct fib_result res;
1158 struct in_device *in_dev = in_dev_get(dev);
1159 struct flowi fl = { .nl_u = { .ip4_u =
1160 { .daddr = daddr,
1161- .saddr = saddr,
1162+ .saddr = lsrc? : saddr,
1163 .tos = tos,
1164 .scope = RT_SCOPE_UNIVERSE,
1165 } },
1166 .mark = skb->mark,
1167- .iif = dev->ifindex };
1168+ .iif = lsrc?
1169+ dev_net(dev)->loopback_dev->ifindex : dev->ifindex };
1170 unsigned flags = 0;
1171 u32 itag = 0;
1172 struct rtable * rth;
1173@@ -1986,6 +1996,12 @@ static int ip_route_input_slow(struct sk
1174 ipv4_is_loopback(daddr))
1175 goto martian_destination;
1176
1177+ if (lsrc) {
1178+ if (ipv4_is_multicast(lsrc) || ipv4_is_lbcast(lsrc) ||
1179+ ipv4_is_zeronet(lsrc) || ipv4_is_loopback(lsrc))
1180+ goto e_inval;
1181+ }
1182+
1183 /*
1184 * Now we are ready to route packet.
1185 */
1186@@ -1995,6 +2011,8 @@ static int ip_route_input_slow(struct sk
1187 goto no_route;
1188 }
1189 free_res = 1;
1190+ fl.iif = dev->ifindex;
1191+ fl.fl4_src = saddr;
1192
1193 RT_CACHE_STAT_INC(in_slow_tot);
1194
1195@@ -2019,7 +2037,7 @@ static int ip_route_input_slow(struct sk
1196 if (res.type != RTN_UNICAST)
1197 goto martian_destination;
1198
1199- err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
1200+ err = ip_mkroute_input(skb, &res, net, &fl, in_dev, daddr, saddr, tos, lsrc);
1201 done:
1202 in_dev_put(in_dev);
1203 if (free_res)
1204@@ -2029,6 +2047,8 @@ out: return err;
1205 brd_input:
1206 if (skb->protocol != htons(ETH_P_IP))
1207 goto e_inval;
1208+ if (lsrc)
1209+ goto e_inval;
1210
1211 if (ipv4_is_zeronet(saddr))
1212 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1213@@ -2070,6 +2090,7 @@ local_input:
1214 rth->u.dst.dev = net->loopback_dev;
1215 dev_hold(rth->u.dst.dev);
1216 rth->idev = in_dev_get(rth->u.dst.dev);
1217+ rth->fl.fl4_gw = 0;
1218 rth->rt_gateway = daddr;
1219 rth->rt_spec_dst= spec_dst;
1220 rth->u.dst.input= ip_local_deliver;
1221@@ -2121,8 +2142,9 @@ martian_source:
1222 goto e_inval;
1223 }
1224
1225-int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1226- u8 tos, struct net_device *dev)
1227+static inline int
1228+ip_route_input_cached(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1229+ u8 tos, struct net_device *dev, __be32 lsrc)
1230 {
1231 struct rtable * rth;
1232 unsigned hash;
1233@@ -2139,6 +2161,7 @@ int ip_route_input(struct sk_buff *skb,
1234 if (((rth->fl.fl4_dst ^ daddr) |
1235 (rth->fl.fl4_src ^ saddr) |
1236 (rth->fl.iif ^ iif) |
1237+ (rth->fl.fl4_lsrc ^ lsrc) |
1238 rth->fl.oif |
1239 (rth->fl.fl4_tos ^ tos)) == 0 &&
1240 rth->fl.mark == skb->mark &&
1241@@ -2186,7 +2209,19 @@ int ip_route_input(struct sk_buff *skb,
1242 rcu_read_unlock();
1243 return -EINVAL;
1244 }
1245- return ip_route_input_slow(skb, daddr, saddr, tos, dev);
1246+ return ip_route_input_slow(skb, daddr, saddr, tos, dev, lsrc);
1247+}
1248+
1249+int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1250+ u8 tos, struct net_device *dev)
1251+{
1252+ return ip_route_input_cached(skb, daddr, saddr, tos, dev, 0);
1253+}
1254+
1255+int ip_route_input_lookup(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1256+ u8 tos, struct net_device *dev, __be32 lsrc)
1257+{
1258+ return ip_route_input_cached(skb, daddr, saddr, tos, dev, lsrc);
1259 }
1260
1261 static int __mkroute_output(struct rtable **result,
1262@@ -2258,6 +2293,7 @@ static int __mkroute_output(struct rtabl
1263 rth->fl.fl4_tos = tos;
1264 rth->fl.fl4_src = oldflp->fl4_src;
1265 rth->fl.oif = oldflp->oif;
1266+ rth->fl.fl4_gw = oldflp->fl4_gw;
1267 rth->fl.mark = oldflp->mark;
1268 rth->rt_dst = fl->fl4_dst;
1269 rth->rt_src = fl->fl4_src;
1270@@ -2339,6 +2375,7 @@ static int ip_route_output_slow(struct n
1271 struct flowi fl = { .nl_u = { .ip4_u =
1272 { .daddr = oldflp->fl4_dst,
1273 .saddr = oldflp->fl4_src,
1274+ .gw = oldflp->fl4_gw,
1275 .tos = tos & IPTOS_RT_MASK,
1276 .scope = ((tos & RTO_ONLINK) ?
1277 RT_SCOPE_LINK :
1278@@ -2450,6 +2487,7 @@ static int ip_route_output_slow(struct n
1279 dev_out = net->loopback_dev;
1280 dev_hold(dev_out);
1281 fl.oif = net->loopback_dev->ifindex;
1282+ fl.fl4_gw = 0;
1283 res.type = RTN_LOCAL;
1284 flags |= RTCF_LOCAL;
1285 goto make_route;
1286@@ -2457,7 +2495,7 @@ static int ip_route_output_slow(struct n
1287
1288 if (fib_lookup(net, &fl, &res)) {
1289 res.fi = NULL;
1290- if (oldflp->oif) {
1291+ if (oldflp->oif && dev_out->flags & IFF_UP) {
1292 /* Apparently, routing tables are wrong. Assume,
1293 that the destination is on link.
1294
1295@@ -2497,6 +2535,7 @@ static int ip_route_output_slow(struct n
1296 dev_out = net->loopback_dev;
1297 dev_hold(dev_out);
1298 fl.oif = dev_out->ifindex;
1299+ fl.fl4_gw = 0;
1300 if (res.fi)
1301 fib_info_put(res.fi);
1302 res.fi = NULL;
1303@@ -2504,13 +2543,12 @@ static int ip_route_output_slow(struct n
1304 goto make_route;
1305 }
1306
1307+ if (res.type == RTN_UNICAST)
1308+ fib_select_default(net, &fl, &res);
1309 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1310- if (res.fi->fib_nhs > 1 && fl.oif == 0)
1311+ if (res.fi->fib_nhs > 1)
1312 fib_select_multipath(&fl, &res);
1313- else
1314 #endif
1315- if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif)
1316- fib_select_default(net, &fl, &res);
1317
1318 if (!fl.fl4_src)
1319 fl.fl4_src = FIB_RES_PREFSRC(res);
1320@@ -2548,6 +2586,7 @@ int __ip_route_output_key(struct net *ne
1321 rth->fl.fl4_src == flp->fl4_src &&
1322 rth->fl.iif == 0 &&
1323 rth->fl.oif == flp->oif &&
1324+ rth->fl.fl4_gw == flp->fl4_gw &&
1325 rth->fl.mark == flp->mark &&
1326 !((rth->fl.fl4_tos ^ flp->fl4_tos) &
1327 (IPTOS_RT_MASK | RTO_ONLINK)) &&
1328@@ -3322,3 +3361,4 @@ void __init ip_static_sysctl_init(void)
1329 EXPORT_SYMBOL(__ip_select_ident);
1330 EXPORT_SYMBOL(ip_route_input);
1331 EXPORT_SYMBOL(ip_route_output_key);
1332+EXPORT_SYMBOL(ip_route_input_lookup);
This page took 0.181709 seconds and 4 git commands to generate.