]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-routes.patch
- updated to 2.6.27.21
[packages/kernel.git] / kernel-routes.patch
CommitLineData
83cd86f2
PS
1diff -urp v2.6.27/linux/include/linux/rtnetlink.h linux/include/linux/rtnetlink.h
2--- v2.6.27/linux/include/linux/rtnetlink.h 2008-10-11 12:46:15.000000000 +0300
3+++ linux/include/linux/rtnetlink.h 2008-10-11 13:50:41.000000000 +0300
10b0f5dc 4@@ -304,6 +304,8 @@ struct rtnexthop
d70ce330 5 #define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */
6 #define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */
7 #define RTNH_F_ONLINK 4 /* Gateway is forced on link */
8+#define RTNH_F_SUSPECT 8 /* We don't know the real state */
9+#define RTNH_F_BADSTATE (RTNH_F_DEAD | RTNH_F_SUSPECT)
10
11 /* Macros to handle hexthops */
12
83cd86f2
PS
13diff -urp v2.6.27/linux/include/net/flow.h linux/include/net/flow.h
14--- v2.6.27/linux/include/net/flow.h 2008-10-11 12:46:15.000000000 +0300
15+++ linux/include/net/flow.h 2008-10-11 13:51:37.000000000 +0300
d70ce330 16@@ -19,6 +19,8 @@ struct flowi {
17 struct {
18 __be32 daddr;
19 __be32 saddr;
20+ __be32 lsrc;
21+ __be32 gw;
22 __u8 tos;
23 __u8 scope;
24 } ip4_u;
25@@ -43,6 +45,8 @@ struct flowi {
26 #define fl6_flowlabel nl_u.ip6_u.flowlabel
27 #define fl4_dst nl_u.ip4_u.daddr
28 #define fl4_src nl_u.ip4_u.saddr
29+#define fl4_lsrc nl_u.ip4_u.lsrc
30+#define fl4_gw nl_u.ip4_u.gw
31 #define fl4_tos nl_u.ip4_u.tos
32 #define fl4_scope nl_u.ip4_u.scope
33
83cd86f2
PS
34diff -urp v2.6.27/linux/include/net/ip_fib.h linux/include/net/ip_fib.h
35--- v2.6.27/linux/include/net/ip_fib.h 2008-04-17 09:58:08.000000000 +0300
36+++ linux/include/net/ip_fib.h 2008-10-11 13:50:41.000000000 +0300
d70ce330 37@@ -207,6 +207,8 @@ extern int fib_lookup(struct net *n, str
38 extern struct fib_table *fib_new_table(struct net *net, u32 id);
39 extern struct fib_table *fib_get_table(struct net *net, u32 id);
40
41+extern int fib_result_table(struct fib_result *res);
42+
43 #endif /* CONFIG_IP_MULTIPLE_TABLES */
44
45 /* Exported by fib_frontend.c */
46@@ -276,4 +278,6 @@ static inline void fib_proc_exit(struct
47 }
48 #endif
49
50+extern rwlock_t fib_nhflags_lock;
51+
52 #endif /* _NET_FIB_H */
83cd86f2
PS
53diff -urp v2.6.27/linux/include/net/netfilter/nf_nat.h linux/include/net/netfilter/nf_nat.h
54--- v2.6.27/linux/include/net/netfilter/nf_nat.h 2008-04-17 09:58:08.000000000 +0300
55+++ linux/include/net/netfilter/nf_nat.h 2008-10-11 13:51:37.000000000 +0300
d70ce330 56@@ -77,6 +77,13 @@ struct nf_conn_nat
57 #endif
58 };
59
60+/* Call input routing for SNAT-ed traffic */
61+extern unsigned int ip_nat_route_input(unsigned int hooknum,
62+ struct sk_buff *skb,
63+ const struct net_device *in,
64+ const struct net_device *out,
65+ int (*okfn)(struct sk_buff *));
66+
67 /* Set up the info structure to map into this range. */
68 extern unsigned int nf_nat_setup_info(struct nf_conn *ct,
69 const struct nf_nat_range *range,
83cd86f2
PS
70diff -urp v2.6.27/linux/include/net/route.h linux/include/net/route.h
71--- v2.6.27/linux/include/net/route.h 2008-10-11 12:46:15.000000000 +0300
72+++ linux/include/net/route.h 2008-10-11 13:51:37.000000000 +0300
10b0f5dc 73@@ -116,6 +116,7 @@ extern int __ip_route_output_key(struct
d70ce330 74 extern int ip_route_output_key(struct net *, struct rtable **, struct flowi *flp);
75 extern int ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags);
76 extern int ip_route_input(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin);
77+extern int ip_route_input_lookup(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin, __be32 lsrc);
10b0f5dc 78 extern unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, unsigned short new_mtu, struct net_device *dev);
d70ce330 79 extern void ip_rt_send_redirect(struct sk_buff *skb);
80
83cd86f2
PS
81diff -urp v2.6.27/linux/net/bridge/br_netfilter.c linux/net/bridge/br_netfilter.c
82--- v2.6.27/linux/net/bridge/br_netfilter.c 2008-10-11 12:46:16.000000000 +0300
83+++ linux/net/bridge/br_netfilter.c 2008-10-11 13:51:37.000000000 +0300
84@@ -328,6 +328,10 @@ static int br_nf_pre_routing_finish(stru
d70ce330 85 struct nf_bridge_info *nf_bridge = skb->nf_bridge;
86 int err;
87
88+ /* Old skb->dst is not expected, it is lost in all cases */
89+ dst_release(skb->dst);
90+ skb->dst = NULL;
91+
92 if (nf_bridge->mask & BRNF_PKT_TYPE) {
93 skb->pkt_type = PACKET_OTHERHOST;
94 nf_bridge->mask ^= BRNF_PKT_TYPE;
83cd86f2
PS
95diff -urp v2.6.27/linux/net/ipv4/fib_frontend.c linux/net/ipv4/fib_frontend.c
96--- v2.6.27/linux/net/ipv4/fib_frontend.c 2008-10-11 12:46:16.000000000 +0300
97+++ linux/net/ipv4/fib_frontend.c 2008-10-11 13:50:41.000000000 +0300
98@@ -47,6 +47,8 @@
d70ce330 99
100 #ifndef CONFIG_IP_MULTIPLE_TABLES
101
102+#define FIB_RES_TABLE(r) (RT_TABLE_MAIN)
103+
104 static int __net_init fib4_rules_init(struct net *net)
105 {
106 struct fib_table *local_table, *main_table;
83cd86f2 107@@ -71,6 +73,8 @@ fail:
d70ce330 108 }
109 #else
110
111+#define FIB_RES_TABLE(r) (fib_result_table(r))
112+
113 struct fib_table *fib_new_table(struct net *net, u32 id)
114 {
115 struct fib_table *tb;
83cd86f2 116@@ -125,7 +129,8 @@ void fib_select_default(struct net *net,
d70ce330 117 table = res->r->table;
118 #endif
119 tb = fib_get_table(net, table);
120- if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
121+ if ((FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) ||
122+ FIB_RES_NH(*res).nh_scope == RT_SCOPE_HOST)
123 tb->tb_select_default(tb, flp, res);
124 }
125
83cd86f2 126@@ -239,6 +244,9 @@ int fib_validate_source(__be32 src, __be
d70ce330 127 .tos = tos } },
128 .iif = oif };
129 struct fib_result res;
130+ int table;
131+ unsigned char prefixlen;
132+ unsigned char scope;
133 int no_addr, rpf;
134 int ret;
135 struct net *net;
83cd86f2 136@@ -262,31 +270,35 @@ int fib_validate_source(__be32 src, __be
d70ce330 137 goto e_inval_res;
138 *spec_dst = FIB_RES_PREFSRC(res);
139 fib_combine_itag(itag, &res);
140-#ifdef CONFIG_IP_ROUTE_MULTIPATH
141- if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
142-#else
143 if (FIB_RES_DEV(res) == dev)
144-#endif
145 {
146 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
147 fib_res_put(&res);
148 return ret;
149 }
150+ table = FIB_RES_TABLE(&res);
151+ prefixlen = res.prefixlen;
152+ scope = res.scope;
153 fib_res_put(&res);
154 if (no_addr)
155 goto last_resort;
156- if (rpf)
157- goto e_inval;
158 fl.oif = dev->ifindex;
159
160 ret = 0;
161 if (fib_lookup(net, &fl, &res) == 0) {
162- if (res.type == RTN_UNICAST) {
163+ if (res.type == RTN_UNICAST &&
164+ ((table == FIB_RES_TABLE(&res) &&
165+ res.prefixlen >= prefixlen && res.scope >= scope) ||
166+ !rpf)) {
167 *spec_dst = FIB_RES_PREFSRC(res);
168 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
169+ fib_res_put(&res);
170+ return ret;
171 }
172 fib_res_put(&res);
173 }
174+ if (rpf)
175+ goto e_inval;
176 return ret;
177
178 last_resort:
83cd86f2 179@@ -909,9 +921,7 @@ static int fib_inetaddr_event(struct not
d70ce330 180 switch (event) {
181 case NETDEV_UP:
182 fib_add_ifaddr(ifa);
183-#ifdef CONFIG_IP_ROUTE_MULTIPATH
83cd86f2 184 fib_sync_up(dev);
d70ce330 185-#endif
83cd86f2 186 rt_cache_flush(dev_net(dev), -1);
d70ce330 187 break;
188 case NETDEV_DOWN:
83cd86f2 189@@ -947,9 +957,7 @@ static int fib_netdev_event(struct notif
d70ce330 190 for_ifa(in_dev) {
191 fib_add_ifaddr(ifa);
192 } endfor_ifa(in_dev);
193-#ifdef CONFIG_IP_ROUTE_MULTIPATH
194 fib_sync_up(dev);
195-#endif
83cd86f2 196 rt_cache_flush(dev_net(dev), -1);
d70ce330 197 break;
198 case NETDEV_DOWN:
83cd86f2
PS
199diff -urp v2.6.27/linux/net/ipv4/fib_hash.c linux/net/ipv4/fib_hash.c
200--- v2.6.27/linux/net/ipv4/fib_hash.c 2008-10-11 12:46:16.000000000 +0300
201+++ linux/net/ipv4/fib_hash.c 2008-10-11 13:50:41.000000000 +0300
202@@ -278,25 +278,35 @@ out:
d70ce330 203 static void
204 fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
205 {
206- int order, last_idx;
207+ int order, last_idx, last_dflt, last_nhsel;
208+ struct fib_alias *first_fa = NULL;
209+ struct hlist_head *head;
210 struct hlist_node *node;
211 struct fib_node *f;
212 struct fib_info *fi = NULL;
213 struct fib_info *last_resort;
214 struct fn_hash *t = (struct fn_hash*)tb->tb_data;
215- struct fn_zone *fz = t->fn_zones[0];
216+ struct fn_zone *fz = t->fn_zones[res->prefixlen];
217+ __be32 k;
218
219 if (fz == NULL)
220 return;
221
222+ k = fz_key(flp->fl4_dst, fz);
223+ last_dflt = -2;
224+ last_nhsel = 0;
225 last_idx = -1;
226 last_resort = NULL;
227 order = -1;
228
229 read_lock(&fib_hash_lock);
230- hlist_for_each_entry(f, node, &fz->fz_hash[0], fn_hash) {
231+ head = &fz->fz_hash[fn_hash(k, fz)];
232+ hlist_for_each_entry(f, node, head, fn_hash) {
233 struct fib_alias *fa;
234
235+ if (f->fn_key != k)
236+ continue;
237+
238 list_for_each_entry(fa, &f->fn_alias, fa_list) {
239 struct fib_info *next_fi = fa->fa_info;
240
83cd86f2 241@@ -304,42 +314,56 @@ fn_hash_select_default(struct fib_table
d70ce330 242 fa->fa_type != RTN_UNICAST)
243 continue;
244
245+ if (fa->fa_tos &&
246+ fa->fa_tos != flp->fl4_tos)
247+ continue;
248 if (next_fi->fib_priority > res->fi->fib_priority)
249 break;
250- if (!next_fi->fib_nh[0].nh_gw ||
251- next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
252- continue;
253 fa->fa_state |= FA_S_ACCESSED;
254
255- if (fi == NULL) {
256- if (next_fi != res->fi)
257- break;
258- } else if (!fib_detect_death(fi, order, &last_resort,
259- &last_idx, tb->tb_default)) {
260+ if (!first_fa) {
261+ last_dflt = fa->fa_last_dflt;
262+ first_fa = fa;
263+ }
264+ if (fi && !fib_detect_death(fi, order, &last_resort,
265+ &last_idx, &last_dflt, &last_nhsel, flp)) {
266 fib_result_assign(res, fi);
267- tb->tb_default = order;
268+ first_fa->fa_last_dflt = order;
269 goto out;
270 }
271 fi = next_fi;
272 order++;
273 }
274+ break;
275 }
276
277 if (order <= 0 || fi == NULL) {
278- tb->tb_default = -1;
279+ if (fi && fi->fib_nhs > 1 &&
280+ fib_detect_death(fi, order, &last_resort, &last_idx,
281+ &last_dflt, &last_nhsel, flp) &&
282+ last_resort == fi) {
283+ read_lock_bh(&fib_nhflags_lock);
284+ fi->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
285+ read_unlock_bh(&fib_nhflags_lock);
286+ }
287+ if (first_fa) first_fa->fa_last_dflt = -1;
288 goto out;
289 }
290
291 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
292- tb->tb_default)) {
293+ &last_dflt, &last_nhsel, flp)) {
294 fib_result_assign(res, fi);
295- tb->tb_default = order;
296+ first_fa->fa_last_dflt = order;
297 goto out;
298 }
299
300- if (last_idx >= 0)
301+ if (last_idx >= 0) {
302 fib_result_assign(res, last_resort);
303- tb->tb_default = last_idx;
304+ read_lock_bh(&fib_nhflags_lock);
305+ last_resort->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
306+ read_unlock_bh(&fib_nhflags_lock);
307+ first_fa->fa_last_dflt = last_idx;
308+ }
309 out:
310 read_unlock(&fib_hash_lock);
311 }
83cd86f2 312@@ -463,6 +487,7 @@ static int fn_hash_insert(struct fib_tab
d70ce330 313 write_lock_bh(&fib_hash_lock);
314 fi_drop = fa->fa_info;
315 fa->fa_info = fi;
316+ fa->fa_last_dflt = -1;
317 fa->fa_type = cfg->fc_type;
318 fa->fa_scope = cfg->fc_scope;
319 state = fa->fa_state;
83cd86f2 320@@ -517,6 +542,7 @@ static int fn_hash_insert(struct fib_tab
d70ce330 321 new_fa->fa_type = cfg->fc_type;
322 new_fa->fa_scope = cfg->fc_scope;
323 new_fa->fa_state = 0;
324+ new_fa->fa_last_dflt = -1;
325
326 /*
327 * Insert new entry to the list.
83cd86f2
PS
328diff -urp v2.6.27/linux/net/ipv4/fib_lookup.h linux/net/ipv4/fib_lookup.h
329--- v2.6.27/linux/net/ipv4/fib_lookup.h 2008-04-17 09:58:09.000000000 +0300
330+++ linux/net/ipv4/fib_lookup.h 2008-10-11 13:50:41.000000000 +0300
d70ce330 331@@ -8,6 +8,7 @@
332 struct fib_alias {
333 struct list_head fa_list;
334 struct fib_info *fa_info;
335+ int fa_last_dflt;
336 u8 fa_tos;
337 u8 fa_type;
338 u8 fa_scope;
339@@ -38,7 +39,8 @@ extern struct fib_alias *fib_find_alias(
340 u8 tos, u32 prio);
341 extern int fib_detect_death(struct fib_info *fi, int order,
342 struct fib_info **last_resort,
343- int *last_idx, int dflt);
344+ int *last_idx, int *dflt, int *last_nhsel,
345+ const struct flowi *flp);
346
347 static inline void fib_result_assign(struct fib_result *res,
348 struct fib_info *fi)
83cd86f2
PS
349diff -urp v2.6.27/linux/net/ipv4/fib_rules.c linux/net/ipv4/fib_rules.c
350--- v2.6.27/linux/net/ipv4/fib_rules.c 2008-10-11 12:46:16.000000000 +0300
351+++ linux/net/ipv4/fib_rules.c 2008-10-11 13:50:41.000000000 +0300
d70ce330 352@@ -54,6 +54,11 @@ u32 fib_rules_tclass(struct fib_result *
353 }
354 #endif
355
356+int fib_result_table(struct fib_result *res)
357+{
358+ return res->r->table;
359+}
360+
361 int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res)
362 {
363 struct fib_lookup_arg arg = {
83cd86f2
PS
364diff -urp v2.6.27/linux/net/ipv4/fib_semantics.c linux/net/ipv4/fib_semantics.c
365--- v2.6.27/linux/net/ipv4/fib_semantics.c 2008-10-11 12:46:16.000000000 +0300
366+++ linux/net/ipv4/fib_semantics.c 2008-10-11 13:51:37.000000000 +0300
367@@ -50,6 +50,7 @@ static struct hlist_head *fib_info_hash;
d70ce330 368 static struct hlist_head *fib_info_laddrhash;
369 static unsigned int fib_hash_size;
370 static unsigned int fib_info_cnt;
371+rwlock_t fib_nhflags_lock = RW_LOCK_UNLOCKED;
372
373 #define DEVINDEX_HASHBITS 8
374 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
83cd86f2 375@@ -186,7 +187,7 @@ static __inline__ int nh_comp(const stru
d70ce330 376 #ifdef CONFIG_NET_CLS_ROUTE
377 nh->nh_tclassid != onh->nh_tclassid ||
378 #endif
379- ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
380+ ((nh->nh_flags^onh->nh_flags)&~RTNH_F_BADSTATE))
381 return -1;
382 onh++;
383 } endfor_nexthops(fi);
83cd86f2 384@@ -237,7 +238,7 @@ static struct fib_info *fib_find_info(co
d70ce330 385 nfi->fib_priority == fi->fib_priority &&
386 memcmp(nfi->fib_metrics, fi->fib_metrics,
387 sizeof(fi->fib_metrics)) == 0 &&
388- ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
389+ ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_BADSTATE) == 0 &&
390 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
391 return fi;
392 }
83cd86f2 393@@ -348,26 +349,70 @@ struct fib_alias *fib_find_alias(struct
d70ce330 394 }
395
396 int fib_detect_death(struct fib_info *fi, int order,
397- struct fib_info **last_resort, int *last_idx, int dflt)
398+ struct fib_info **last_resort, int *last_idx, int *dflt,
399+ int *last_nhsel, const struct flowi *flp)
400 {
401 struct neighbour *n;
402- int state = NUD_NONE;
403+ int nhsel;
404+ int state;
405+ struct fib_nh * nh;
406+ __be32 dst;
407+ int flag, dead = 1;
408+
409+ /* change_nexthops(fi) { */
410+ for (nhsel = 0, nh = fi->fib_nh; nhsel < fi->fib_nhs; nh++, nhsel++) {
411+ if (flp->oif && flp->oif != nh->nh_oif)
412+ continue;
413+ if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && nh->nh_gw &&
414+ nh->nh_scope == RT_SCOPE_LINK)
415+ continue;
416+ if (nh->nh_flags & RTNH_F_DEAD)
417+ continue;
418
419- n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
420- if (n) {
421- state = n->nud_state;
422- neigh_release(n);
423- }
424- if (state==NUD_REACHABLE)
425- return 0;
426- if ((state&NUD_VALID) && order != dflt)
427- return 0;
428- if ((state&NUD_VALID) ||
429- (*last_idx<0 && order > dflt)) {
430- *last_resort = fi;
431- *last_idx = order;
432+ flag = 0;
433+ if (nh->nh_dev->flags & IFF_NOARP) {
434+ dead = 0;
435+ goto setfl;
436+ }
437+
438+ dst = nh->nh_gw;
439+ if (!nh->nh_gw || nh->nh_scope != RT_SCOPE_LINK)
440+ dst = flp->fl4_dst;
441+
442+ state = NUD_NONE;
443+ n = neigh_lookup(&arp_tbl, &dst, nh->nh_dev);
444+ if (n) {
445+ state = n->nud_state;
446+ neigh_release(n);
447+ }
448+ if (state==NUD_REACHABLE ||
449+ ((state&NUD_VALID) && order != *dflt)) {
450+ dead = 0;
451+ goto setfl;
452+ }
453+ if (!(state&NUD_VALID))
454+ flag = 1;
455+ if (!dead)
456+ goto setfl;
457+ if ((state&NUD_VALID) ||
458+ (*last_idx<0 && order >= *dflt)) {
459+ *last_resort = fi;
460+ *last_idx = order;
461+ *last_nhsel = nhsel;
462+ }
463+
464+ setfl:
465+
466+ read_lock_bh(&fib_nhflags_lock);
467+ if (flag)
468+ nh->nh_flags |= RTNH_F_SUSPECT;
469+ else
470+ nh->nh_flags &= ~RTNH_F_SUSPECT;
471+ read_unlock_bh(&fib_nhflags_lock);
472 }
473- return 1;
474+ /* } endfor_nexthops(fi) */
475+
476+ return dead;
477 }
478
479 #ifdef CONFIG_IP_ROUTE_MULTIPATH
83cd86f2 480@@ -539,8 +584,11 @@ static int fib_check_nh(struct fib_confi
d70ce330 481 return -EINVAL;
482 if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL)
483 return -ENODEV;
484- if (!(dev->flags&IFF_UP))
485- return -ENETDOWN;
486+ if (!(dev->flags&IFF_UP)) {
487+ if (fi->fib_protocol != RTPROT_STATIC)
488+ return -ENETDOWN;
489+ nh->nh_flags |= RTNH_F_DEAD;
490+ }
491 nh->nh_dev = dev;
492 dev_hold(dev);
493 nh->nh_scope = RT_SCOPE_LINK;
83cd86f2 494@@ -560,24 +608,48 @@ static int fib_check_nh(struct fib_confi
d70ce330 495 /* It is not necessary, but requires a bit of thinking */
496 if (fl.fl4_scope < RT_SCOPE_LINK)
497 fl.fl4_scope = RT_SCOPE_LINK;
498- if ((err = fib_lookup(net, &fl, &res)) != 0)
499- return err;
500+ err = fib_lookup(net, &fl, &res);
501 }
502- err = -EINVAL;
503- if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
504- goto out;
505- nh->nh_scope = res.scope;
506- nh->nh_oif = FIB_RES_OIF(res);
507- if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
508- goto out;
509- dev_hold(nh->nh_dev);
510- err = -ENETDOWN;
511- if (!(nh->nh_dev->flags & IFF_UP))
512- goto out;
513- err = 0;
514+ if (err) {
515+ struct in_device *in_dev;
516+
517+ if (err != -ENETUNREACH ||
518+ fi->fib_protocol != RTPROT_STATIC)
519+ return err;
520+
521+ in_dev = inetdev_by_index(net, nh->nh_oif);
522+ if (in_dev == NULL ||
523+ in_dev->dev->flags & IFF_UP) {
524+ if (in_dev)
525+ in_dev_put(in_dev);
526+ return err;
527+ }
528+ nh->nh_flags |= RTNH_F_DEAD;
529+ nh->nh_scope = RT_SCOPE_LINK;
530+ nh->nh_dev = in_dev->dev;
531+ dev_hold(nh->nh_dev);
532+ in_dev_put(in_dev);
533+ } else {
534+ err = -EINVAL;
535+ if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
536+ goto out;
537+ nh->nh_scope = res.scope;
538+ nh->nh_oif = FIB_RES_OIF(res);
539+ if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
540+ goto out;
541+ dev_hold(nh->nh_dev);
542+ if (!(nh->nh_dev->flags & IFF_UP)) {
543+ if (fi->fib_protocol != RTPROT_STATIC) {
544+ err = -ENETDOWN;
545+ goto out;
546+ }
547+ nh->nh_flags |= RTNH_F_DEAD;
548+ }
549+ err = 0;
550 out:
551- fib_res_put(&res);
552- return err;
553+ fib_res_put(&res);
554+ return err;
555+ }
556 } else {
557 struct in_device *in_dev;
558
83cd86f2 559@@ -588,8 +660,11 @@ out:
d70ce330 560 if (in_dev == NULL)
561 return -ENODEV;
562 if (!(in_dev->dev->flags&IFF_UP)) {
563- in_dev_put(in_dev);
564- return -ENETDOWN;
565+ if (fi->fib_protocol != RTPROT_STATIC) {
566+ in_dev_put(in_dev);
567+ return -ENETDOWN;
568+ }
569+ nh->nh_flags |= RTNH_F_DEAD;
570 }
571 nh->nh_dev = in_dev->dev;
572 dev_hold(nh->nh_dev);
83cd86f2 573@@ -899,8 +974,12 @@ int fib_semantic_match(struct list_head
d70ce330 574 for_nexthops(fi) {
575 if (nh->nh_flags&RTNH_F_DEAD)
576 continue;
577- if (!flp->oif || flp->oif == nh->nh_oif)
578- break;
579+ if (flp->oif && flp->oif != nh->nh_oif)
580+ continue;
581+ if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
582+ nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
583+ continue;
584+ break;
585 }
586 #ifdef CONFIG_IP_ROUTE_MULTIPATH
587 if (nhsel < fi->fib_nhs) {
83cd86f2 588@@ -1080,18 +1159,29 @@ int fib_sync_down_dev(struct net_device
d70ce330 589 prev_fi = fi;
590 dead = 0;
591 change_nexthops(fi) {
592- if (nh->nh_flags&RTNH_F_DEAD)
593- dead++;
594- else if (nh->nh_dev == dev &&
595- nh->nh_scope != scope) {
596- nh->nh_flags |= RTNH_F_DEAD;
597+ if (nh->nh_flags&RTNH_F_DEAD) {
598+ if (fi->fib_protocol!=RTPROT_STATIC ||
599+ nh->nh_dev == NULL ||
600+ __in_dev_get_rtnl(nh->nh_dev) == NULL ||
601+ nh->nh_dev->flags&IFF_UP)
602+ dead++;
603+ } else if (nh->nh_dev == dev &&
604+ nh->nh_scope != scope) {
605+ write_lock_bh(&fib_nhflags_lock);
606 #ifdef CONFIG_IP_ROUTE_MULTIPATH
607- spin_lock_bh(&fib_multipath_lock);
608+ spin_lock(&fib_multipath_lock);
609+ nh->nh_flags |= RTNH_F_DEAD;
610 fi->fib_power -= nh->nh_power;
611 nh->nh_power = 0;
612- spin_unlock_bh(&fib_multipath_lock);
613+ spin_unlock(&fib_multipath_lock);
614+#else
615+ nh->nh_flags |= RTNH_F_DEAD;
616 #endif
617- dead++;
618+ write_unlock_bh(&fib_nhflags_lock);
619+ if (fi->fib_protocol!=RTPROT_STATIC ||
620+ force ||
621+ __in_dev_get_rtnl(dev) == NULL)
622+ dead++;
623 }
624 #ifdef CONFIG_IP_ROUTE_MULTIPATH
625 if (force > 1 && nh->nh_dev == dev) {
83cd86f2 626@@ -1109,11 +1199,8 @@ int fib_sync_down_dev(struct net_device
d70ce330 627 return ret;
628 }
629
630-#ifdef CONFIG_IP_ROUTE_MULTIPATH
631-
632 /*
633- Dead device goes up. We wake up dead nexthops.
634- It takes sense only on multipath routes.
635+ Dead device goes up or new address is added. We wake up dead nexthops.
636 */
637
638 int fib_sync_up(struct net_device *dev)
83cd86f2 639@@ -1123,8 +1210,10 @@ int fib_sync_up(struct net_device *dev)
d70ce330 640 struct hlist_head *head;
641 struct hlist_node *node;
642 struct fib_nh *nh;
643- int ret;
644+ struct fib_result res;
645+ int ret, rep;
646
647+repeat:
648 if (!(dev->flags&IFF_UP))
649 return 0;
650
83cd86f2 651@@ -1132,6 +1221,7 @@ int fib_sync_up(struct net_device *dev)
d70ce330 652 hash = fib_devindex_hashfn(dev->ifindex);
653 head = &fib_info_devhash[hash];
654 ret = 0;
655+ rep = 0;
656
657 hlist_for_each_entry(nh, node, head, nh_hash) {
658 struct fib_info *fi = nh->nh_parent;
83cd86f2 659@@ -1144,19 +1234,39 @@ int fib_sync_up(struct net_device *dev)
d70ce330 660 prev_fi = fi;
661 alive = 0;
662 change_nexthops(fi) {
663- if (!(nh->nh_flags&RTNH_F_DEAD)) {
664- alive++;
665+ if (!(nh->nh_flags&RTNH_F_DEAD))
666 continue;
667- }
668 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
669 continue;
670 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
671 continue;
672+ if (nh->nh_gw && fi->fib_protocol == RTPROT_STATIC) {
673+ struct flowi fl = {
674+ .nl_u = { .ip4_u =
675+ { .daddr = nh->nh_gw,
676+ .scope = nh->nh_scope } },
677+ .oif = nh->nh_oif,
678+ };
10b0f5dc 679+ if (fib_lookup(dev_net(dev), &fl, &res) != 0)
d70ce330 680+ continue;
681+ if (res.type != RTN_UNICAST &&
682+ res.type != RTN_LOCAL) {
683+ fib_res_put(&res);
684+ continue;
685+ }
686+ nh->nh_scope = res.scope;
687+ fib_res_put(&res);
688+ rep = 1;
689+ }
690 alive++;
691+#ifdef CONFIG_IP_ROUTE_MULTIPATH
692 spin_lock_bh(&fib_multipath_lock);
693 nh->nh_power = 0;
694+#endif
695 nh->nh_flags &= ~RTNH_F_DEAD;
696+#ifdef CONFIG_IP_ROUTE_MULTIPATH
697 spin_unlock_bh(&fib_multipath_lock);
698+#endif
699 } endfor_nexthops(fi)
700
701 if (alive > 0) {
83cd86f2 702@@ -1164,10 +1274,14 @@ int fib_sync_up(struct net_device *dev)
d70ce330 703 ret++;
704 }
705 }
706+ if (rep)
707+ goto repeat;
708
709 return ret;
710 }
711
712+#ifdef CONFIG_IP_ROUTE_MULTIPATH
713+
714 /*
715 The algorithm is suboptimal, but it provides really
716 fair weighted route distribution.
83cd86f2 717@@ -1176,24 +1290,45 @@ int fib_sync_up(struct net_device *dev)
d70ce330 718 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
719 {
720 struct fib_info *fi = res->fi;
721- int w;
722+ int w, alive;
723
724 spin_lock_bh(&fib_multipath_lock);
725+ if (flp->oif) {
726+ int sel = -1;
727+ w = -1;
728+ change_nexthops(fi) {
729+ if (flp->oif != nh->nh_oif)
730+ continue;
731+ if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
732+ nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
733+ continue;
734+ if (!(nh->nh_flags&RTNH_F_BADSTATE)) {
735+ if (nh->nh_power > w) {
736+ w = nh->nh_power;
737+ sel = nhsel;
738+ }
739+ }
740+ } endfor_nexthops(fi);
741+ if (sel >= 0) {
742+ spin_unlock_bh(&fib_multipath_lock);
743+ res->nh_sel = sel;
744+ return;
745+ }
746+ goto last_resort;
747+ }
748+
749+repeat:
750 if (fi->fib_power <= 0) {
751 int power = 0;
752 change_nexthops(fi) {
753- if (!(nh->nh_flags&RTNH_F_DEAD)) {
754+ if (!(nh->nh_flags&RTNH_F_BADSTATE)) {
755 power += nh->nh_weight;
756 nh->nh_power = nh->nh_weight;
757 }
758 } endfor_nexthops(fi);
759 fi->fib_power = power;
760- if (power <= 0) {
761- spin_unlock_bh(&fib_multipath_lock);
762- /* Race condition: route has just become dead. */
763- res->nh_sel = 0;
764- return;
765- }
766+ if (power <= 0)
767+ goto last_resort;
768 }
769
770
83cd86f2 771@@ -1203,20 +1338,40 @@ void fib_select_multipath(const struct f
d70ce330 772
773 w = jiffies % fi->fib_power;
774
775+ alive = 0;
776 change_nexthops(fi) {
777- if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
778+ if (!(nh->nh_flags&RTNH_F_BADSTATE) && nh->nh_power) {
779 if ((w -= nh->nh_power) <= 0) {
780 nh->nh_power--;
781 fi->fib_power--;
782- res->nh_sel = nhsel;
783 spin_unlock_bh(&fib_multipath_lock);
784+ res->nh_sel = nhsel;
785 return;
786 }
787+ alive = 1;
788+ }
789+ } endfor_nexthops(fi);
790+ if (alive) {
791+ fi->fib_power = 0;
792+ goto repeat;
793+ }
794+
795+last_resort:
796+
797+ for_nexthops(fi) {
798+ if (!(nh->nh_flags&RTNH_F_DEAD)) {
799+ if (flp->oif && flp->oif != nh->nh_oif)
800+ continue;
801+ if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
802+ nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
803+ continue;
804+ spin_unlock_bh(&fib_multipath_lock);
805+ res->nh_sel = nhsel;
806+ return;
807 }
808 } endfor_nexthops(fi);
809
810 /* Race condition: route has just become dead. */
811- res->nh_sel = 0;
812 spin_unlock_bh(&fib_multipath_lock);
813 }
814 #endif
83cd86f2
PS
815diff -urp v2.6.27/linux/net/ipv4/fib_trie.c linux/net/ipv4/fib_trie.c
816--- v2.6.27/linux/net/ipv4/fib_trie.c 2008-10-11 12:46:16.000000000 +0300
817+++ linux/net/ipv4/fib_trie.c 2008-10-11 13:50:41.000000000 +0300
818@@ -1261,6 +1261,7 @@ static int fn_trie_insert(struct fib_tab
10b0f5dc
PS
819 fi_drop = fa->fa_info;
820 new_fa->fa_tos = fa->fa_tos;
821 new_fa->fa_info = fi;
822+ new_fa->fa_last_dflt = -1;
823 new_fa->fa_type = cfg->fc_type;
824 new_fa->fa_scope = cfg->fc_scope;
825 state = fa->fa_state;
83cd86f2 826@@ -1301,6 +1302,7 @@ static int fn_trie_insert(struct fib_tab
10b0f5dc
PS
827 new_fa->fa_type = cfg->fc_type;
828 new_fa->fa_scope = cfg->fc_scope;
829 new_fa->fa_state = 0;
830+ new_fa->fa_last_dflt = -1;
831 /*
832 * Insert new entry to the list.
833 */
83cd86f2 834@@ -1802,24 +1804,31 @@ static void fn_trie_select_default(struc
10b0f5dc
PS
835 struct fib_result *res)
836 {
837 struct trie *t = (struct trie *) tb->tb_data;
838- int order, last_idx;
839+ int order, last_idx, last_dflt, last_nhsel;
840+ struct fib_alias *first_fa = NULL;
841 struct fib_info *fi = NULL;
842 struct fib_info *last_resort;
843 struct fib_alias *fa = NULL;
844 struct list_head *fa_head;
845 struct leaf *l;
846+ u32 key, mask;
847
848+ last_dflt = -2;
849+ last_nhsel = 0;
850 last_idx = -1;
851 last_resort = NULL;
852 order = -1;
853
854+ mask = inet_make_mask(res->prefixlen);
855+ key = ntohl(flp->fl4_dst & mask);
856+
857 rcu_read_lock();
858
859- l = fib_find_node(t, 0);
860+ l = fib_find_node(t, key);
861 if (!l)
862 goto out;
863
864- fa_head = get_fa_head(l, 0);
865+ fa_head = get_fa_head(l, res->prefixlen);
866 if (!fa_head)
867 goto out;
868
83cd86f2 869@@ -1833,39 +1842,52 @@ static void fn_trie_select_default(struc
10b0f5dc
PS
870 fa->fa_type != RTN_UNICAST)
871 continue;
872
873+ if (fa->fa_tos &&
874+ fa->fa_tos != flp->fl4_tos)
875+ continue;
876 if (next_fi->fib_priority > res->fi->fib_priority)
877 break;
878- if (!next_fi->fib_nh[0].nh_gw ||
879- next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
880- continue;
881 fa->fa_state |= FA_S_ACCESSED;
882
883- if (fi == NULL) {
884- if (next_fi != res->fi)
885- break;
886- } else if (!fib_detect_death(fi, order, &last_resort,
887- &last_idx, tb->tb_default)) {
888+ if (!first_fa) {
889+ last_dflt = fa->fa_last_dflt;
890+ first_fa = fa;
891+ }
892+ if (fi && !fib_detect_death(fi, order, &last_resort,
893+ &last_idx, &last_dflt, &last_nhsel, flp)) {
894 fib_result_assign(res, fi);
895- tb->tb_default = order;
896+ first_fa->fa_last_dflt = order;
897 goto out;
898 }
899 fi = next_fi;
900 order++;
901 }
902 if (order <= 0 || fi == NULL) {
903- tb->tb_default = -1;
904+ if (fi && fi->fib_nhs > 1 &&
905+ fib_detect_death(fi, order, &last_resort, &last_idx,
906+ &last_dflt, &last_nhsel, flp) &&
907+ last_resort == fi) {
908+ read_lock_bh(&fib_nhflags_lock);
909+ fi->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
910+ read_unlock_bh(&fib_nhflags_lock);
911+ }
912+ if (first_fa) first_fa->fa_last_dflt = -1;
913 goto out;
914 }
915
916 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
917- tb->tb_default)) {
918+ &last_dflt, &last_nhsel, flp)) {
919 fib_result_assign(res, fi);
920- tb->tb_default = order;
921+ first_fa->fa_last_dflt = order;
922 goto out;
923 }
924- if (last_idx >= 0)
925+ if (last_idx >= 0) {
926 fib_result_assign(res, last_resort);
927- tb->tb_default = last_idx;
928+ read_lock_bh(&fib_nhflags_lock);
929+ last_resort->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
930+ read_unlock_bh(&fib_nhflags_lock);
931+ first_fa->fa_last_dflt = last_idx;
932+ }
933 out:
934 rcu_read_unlock();
935 }
83cd86f2
PS
936diff -urp v2.6.27/linux/net/ipv4/netfilter/ipt_MASQUERADE.c linux/net/ipv4/netfilter/ipt_MASQUERADE.c
937--- v2.6.27/linux/net/ipv4/netfilter/ipt_MASQUERADE.c 2008-10-11 12:46:16.000000000 +0300
938+++ linux/net/ipv4/netfilter/ipt_MASQUERADE.c 2008-10-11 13:51:37.000000000 +0300
d70ce330 939@@ -59,7 +59,7 @@ masquerade_tg(struct sk_buff *skb, const
940 enum ip_conntrack_info ctinfo;
941 struct nf_nat_range newrange;
942 const struct nf_nat_multi_range_compat *mr;
943- const struct rtable *rt;
944+ struct rtable *rt;
945 __be32 newsrc;
946
947 NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING);
948@@ -77,13 +77,28 @@ masquerade_tg(struct sk_buff *skb, const
949 return NF_ACCEPT;
950
951 mr = targinfo;
10b0f5dc 952- rt = skb->rtable;
d70ce330 953- newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
954- if (!newsrc) {
955- printk("MASQUERADE: %s ate my IP address\n", out->name);
956- return NF_DROP;
957+
958+ {
959+ struct flowi fl = { .nl_u = { .ip4_u =
960+ { .daddr = ip_hdr(skb)->daddr,
961+ .tos = (RT_TOS(ip_hdr(skb)->tos) |
962+ RTO_CONN),
10b0f5dc 963+ .gw = skb->rtable->rt_gateway,
d70ce330 964+ } },
965+ .mark = skb->mark,
966+ .oif = out->ifindex };
10b0f5dc 967+ if (ip_route_output_key(dev_net(out), &rt, &fl) != 0) {
d70ce330 968+ /* Funky routing can do this. */
969+ if (net_ratelimit())
970+ printk("MASQUERADE:"
971+ " No route: Rusty's brain broke!\n");
972+ return NF_DROP;
973+ }
974 }
975
976+ newsrc = rt->rt_src;
977+ ip_rt_put(rt);
978+
979 write_lock_bh(&masq_lock);
980 nat->masq_index = out->ifindex;
981 write_unlock_bh(&masq_lock);
83cd86f2
PS
982diff -urp v2.6.27/linux/net/ipv4/netfilter/nf_nat_core.c linux/net/ipv4/netfilter/nf_nat_core.c
983--- v2.6.27/linux/net/ipv4/netfilter/nf_nat_core.c 2008-10-11 12:46:16.000000000 +0300
984+++ linux/net/ipv4/netfilter/nf_nat_core.c 2008-10-11 13:51:37.000000000 +0300
10b0f5dc 985@@ -583,6 +583,52 @@ static struct nf_ct_ext_type nat_extend
d70ce330 986 .flags = NF_CT_EXT_F_PREALLOC,
987 };
988
989+unsigned int
990+ip_nat_route_input(unsigned int hooknum,
991+ struct sk_buff *skb,
992+ const struct net_device *in,
993+ const struct net_device *out,
994+ int (*okfn)(struct sk_buff *))
995+{
996+ struct iphdr *iph;
997+ struct nf_conn *conn;
998+ enum ip_conntrack_info ctinfo;
999+ enum ip_conntrack_dir dir;
1000+ unsigned long statusbit;
1001+ __be32 saddr;
1002+
1003+ if (!(conn = nf_ct_get(skb, &ctinfo)))
1004+ return NF_ACCEPT;
1005+
1006+ if (!(conn->status & IPS_NAT_DONE_MASK))
1007+ return NF_ACCEPT;
1008+ dir = CTINFO2DIR(ctinfo);
1009+ statusbit = IPS_SRC_NAT;
1010+ if (dir == IP_CT_DIR_REPLY)
1011+ statusbit ^= IPS_NAT_MASK;
1012+ if (!(conn->status & statusbit))
1013+ return NF_ACCEPT;
1014+
1015+ if (skb->dst)
1016+ return NF_ACCEPT;
1017+
1018+ if (skb->len < sizeof(struct iphdr))
1019+ return NF_ACCEPT;
1020+
1021+ /* use daddr in other direction as masquerade address (lsrc) */
1022+ iph = ip_hdr(skb);
1023+ saddr = conn->tuplehash[!dir].tuple.dst.u3.ip;
1024+ if (saddr == iph->saddr)
1025+ return NF_ACCEPT;
1026+
1027+ if (ip_route_input_lookup(skb, iph->daddr, iph->saddr, iph->tos,
1028+ skb->dev, saddr))
1029+ return NF_DROP;
1030+
1031+ return NF_ACCEPT;
1032+}
1033+EXPORT_SYMBOL_GPL(ip_nat_route_input);
1034+
1035 static int __init nf_nat_init(void)
1036 {
1037 size_t i;
83cd86f2
PS
1038diff -urp v2.6.27/linux/net/ipv4/netfilter/nf_nat_standalone.c linux/net/ipv4/netfilter/nf_nat_standalone.c
1039--- v2.6.27/linux/net/ipv4/netfilter/nf_nat_standalone.c 2008-07-14 09:58:50.000000000 +0300
1040+++ linux/net/ipv4/netfilter/nf_nat_standalone.c 2008-10-11 13:51:37.000000000 +0300
10b0f5dc 1041@@ -256,6 +256,14 @@ static struct nf_hook_ops nf_nat_ops[] _
d70ce330 1042 .hooknum = NF_INET_PRE_ROUTING,
1043 .priority = NF_IP_PRI_NAT_DST,
1044 },
1045+ /* Before routing, route before mangling */
1046+ {
1047+ .hook = ip_nat_route_input,
1048+ .owner = THIS_MODULE,
1049+ .pf = PF_INET,
1050+ .hooknum = NF_INET_PRE_ROUTING,
1051+ .priority = NF_IP_PRI_LAST-1,
1052+ },
1053 /* After packet filtering, change source */
1054 {
1055 .hook = nf_nat_out,
83cd86f2
PS
1056diff -urp v2.6.27/linux/net/ipv4/route.c linux/net/ipv4/route.c
1057--- v2.6.27/linux/net/ipv4/route.c 2008-10-11 12:46:16.000000000 +0300
1058+++ linux/net/ipv4/route.c 2008-10-11 13:51:37.000000000 +0300
1059@@ -1276,6 +1276,7 @@ void ip_rt_redirect(__be32 old_gw, __be3
d70ce330 1060
1061 /* Gateway is different ... */
1062 rt->rt_gateway = new_gw;
1063+ if (rt->fl.fl4_gw) rt->fl.fl4_gw = new_gw;
1064
1065 /* Redirect received -> path was valid */
1066 dst_confirm(&rth->u.dst);
83cd86f2 1067@@ -1725,6 +1726,7 @@ static int ip_route_input_mc(struct sk_b
d70ce330 1068 rth->fl.fl4_tos = tos;
1069 rth->fl.mark = skb->mark;
1070 rth->fl.fl4_src = saddr;
1071+ rth->fl.fl4_lsrc = 0;
1072 rth->rt_src = saddr;
1073 #ifdef CONFIG_NET_CLS_ROUTE
1074 rth->u.dst.tclassid = itag;
83cd86f2 1075@@ -1735,6 +1737,7 @@ static int ip_route_input_mc(struct sk_b
d70ce330 1076 dev_hold(rth->u.dst.dev);
1077 rth->idev = in_dev_get(rth->u.dst.dev);
1078 rth->fl.oif = 0;
1079+ rth->fl.fl4_gw = 0;
1080 rth->rt_gateway = daddr;
1081 rth->rt_spec_dst= spec_dst;
83cd86f2
PS
1082 rth->rt_genid = rt_genid(dev_net(dev));
1083@@ -1800,7 +1803,7 @@ static int __mkroute_input(struct sk_buf
10b0f5dc
PS
1084 struct fib_result *res,
1085 struct in_device *in_dev,
1086 __be32 daddr, __be32 saddr, u32 tos,
1087- struct rtable **result)
1088+ __be32 lsrc, struct rtable **result)
d70ce330 1089 {
1090
1091 struct rtable *rth;
83cd86f2 1092@@ -1834,6 +1837,7 @@ static int __mkroute_input(struct sk_buf
d70ce330 1093 flags |= RTCF_DIRECTSRC;
1094
10b0f5dc 1095 if (out_dev == in_dev && err &&
d70ce330 1096+ !lsrc &&
1097 (IN_DEV_SHARED_MEDIA(out_dev) ||
1098 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1099 flags |= RTCF_DOREDIRECT;
83cd86f2 1100@@ -1867,6 +1871,7 @@ static int __mkroute_input(struct sk_buf
d70ce330 1101 rth->fl.mark = skb->mark;
1102 rth->fl.fl4_src = saddr;
1103 rth->rt_src = saddr;
1104+ rth->fl.fl4_lsrc = lsrc;
1105 rth->rt_gateway = daddr;
1106 rth->rt_iif =
1107 rth->fl.iif = in_dev->dev->ifindex;
83cd86f2 1108@@ -1874,6 +1879,7 @@ static int __mkroute_input(struct sk_buf
d70ce330 1109 dev_hold(rth->u.dst.dev);
1110 rth->idev = in_dev_get(rth->u.dst.dev);
1111 rth->fl.oif = 0;
1112+ rth->fl.fl4_gw = 0;
1113 rth->rt_spec_dst= spec_dst;
1114
1115 rth->u.dst.input = ip_forward;
83cd86f2 1116@@ -1894,21 +1900,23 @@ static int __mkroute_input(struct sk_buf
10b0f5dc
PS
1117
1118 static int ip_mkroute_input(struct sk_buff *skb,
1119 struct fib_result *res,
1120+ struct net *net,
1121 const struct flowi *fl,
1122 struct in_device *in_dev,
1123- __be32 daddr, __be32 saddr, u32 tos)
1124+ __be32 daddr, __be32 saddr, u32 tos, __be32 lsrc)
d70ce330 1125 {
1126 struct rtable* rth = NULL;
1127 int err;
1128 unsigned hash;
1129
1130+ fib_select_default(net, fl, res);
1131 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1132- if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0)
1133+ if (res->fi && res->fi->fib_nhs > 1)
1134 fib_select_multipath(fl, res);
1135 #endif
1136
1137 /* create a routing cache entry */
1138- err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth);
1139+ err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, lsrc, &rth);
1140 if (err)
1141 return err;
1142
83cd86f2 1143@@ -1929,18 +1937,19 @@ static int ip_mkroute_input(struct sk_bu
d70ce330 1144 */
1145
1146 static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1147- u8 tos, struct net_device *dev)
1148+ u8 tos, struct net_device *dev, __be32 lsrc)
1149 {
1150 struct fib_result res;
1151 struct in_device *in_dev = in_dev_get(dev);
1152 struct flowi fl = { .nl_u = { .ip4_u =
1153 { .daddr = daddr,
1154- .saddr = saddr,
1155+ .saddr = lsrc? : saddr,
1156 .tos = tos,
1157 .scope = RT_SCOPE_UNIVERSE,
1158 } },
1159 .mark = skb->mark,
1160- .iif = dev->ifindex };
1161+ .iif = lsrc?
10b0f5dc 1162+ dev_net(dev)->loopback_dev->ifindex : dev->ifindex };
d70ce330 1163 unsigned flags = 0;
1164 u32 itag = 0;
1165 struct rtable * rth;
83cd86f2 1166@@ -1976,6 +1985,12 @@ static int ip_route_input_slow(struct sk
d70ce330 1167 ipv4_is_loopback(daddr))
1168 goto martian_destination;
1169
1170+ if (lsrc) {
1171+ if (ipv4_is_multicast(lsrc) || ipv4_is_lbcast(lsrc) ||
1172+ ipv4_is_zeronet(lsrc) || ipv4_is_loopback(lsrc))
1173+ goto e_inval;
1174+ }
1175+
1176 /*
1177 * Now we are ready to route packet.
1178 */
83cd86f2 1179@@ -1985,6 +2000,8 @@ static int ip_route_input_slow(struct sk
d70ce330 1180 goto no_route;
1181 }
1182 free_res = 1;
1183+ fl.iif = dev->ifindex;
1184+ fl.fl4_src = saddr;
1185
1186 RT_CACHE_STAT_INC(in_slow_tot);
1187
83cd86f2 1188@@ -2009,7 +2026,7 @@ static int ip_route_input_slow(struct sk
d70ce330 1189 if (res.type != RTN_UNICAST)
1190 goto martian_destination;
1191
1192- err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
1193+ err = ip_mkroute_input(skb, &res, net, &fl, in_dev, daddr, saddr, tos, lsrc);
1194 done:
1195 in_dev_put(in_dev);
1196 if (free_res)
83cd86f2 1197@@ -2019,6 +2036,8 @@ out: return err;
d70ce330 1198 brd_input:
1199 if (skb->protocol != htons(ETH_P_IP))
1200 goto e_inval;
1201+ if (lsrc)
1202+ goto e_inval;
1203
1204 if (ipv4_is_zeronet(saddr))
1205 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
83cd86f2 1206@@ -2060,6 +2079,7 @@ local_input:
d70ce330 1207 rth->u.dst.dev = net->loopback_dev;
1208 dev_hold(rth->u.dst.dev);
1209 rth->idev = in_dev_get(rth->u.dst.dev);
1210+ rth->fl.fl4_gw = 0;
1211 rth->rt_gateway = daddr;
1212 rth->rt_spec_dst= spec_dst;
1213 rth->u.dst.input= ip_local_deliver;
83cd86f2 1214@@ -2111,8 +2131,9 @@ martian_source:
d70ce330 1215 goto e_inval;
1216 }
1217
1218-int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1219- u8 tos, struct net_device *dev)
1220+static inline int
1221+ip_route_input_cached(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1222+ u8 tos, struct net_device *dev, __be32 lsrc)
1223 {
1224 struct rtable * rth;
1225 unsigned hash;
83cd86f2 1226@@ -2129,6 +2150,7 @@ int ip_route_input(struct sk_buff *skb,
10b0f5dc
PS
1227 if (((rth->fl.fl4_dst ^ daddr) |
1228 (rth->fl.fl4_src ^ saddr) |
1229 (rth->fl.iif ^ iif) |
1230+ (rth->fl.fl4_lsrc ^ lsrc) |
1231 rth->fl.oif |
1232 (rth->fl.fl4_tos ^ tos)) == 0 &&
d70ce330 1233 rth->fl.mark == skb->mark &&
83cd86f2 1234@@ -2176,7 +2198,19 @@ int ip_route_input(struct sk_buff *skb,
d70ce330 1235 rcu_read_unlock();
1236 return -EINVAL;
1237 }
1238- return ip_route_input_slow(skb, daddr, saddr, tos, dev);
1239+ return ip_route_input_slow(skb, daddr, saddr, tos, dev, lsrc);
1240+}
1241+
1242+int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1243+ u8 tos, struct net_device *dev)
1244+{
1245+ return ip_route_input_cached(skb, daddr, saddr, tos, dev, 0);
1246+}
1247+
1248+int ip_route_input_lookup(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1249+ u8 tos, struct net_device *dev, __be32 lsrc)
1250+{
1251+ return ip_route_input_cached(skb, daddr, saddr, tos, dev, lsrc);
1252 }
1253
10b0f5dc 1254 static int __mkroute_output(struct rtable **result,
83cd86f2 1255@@ -2248,6 +2282,7 @@ static int __mkroute_output(struct rtabl
d70ce330 1256 rth->fl.fl4_tos = tos;
1257 rth->fl.fl4_src = oldflp->fl4_src;
1258 rth->fl.oif = oldflp->oif;
1259+ rth->fl.fl4_gw = oldflp->fl4_gw;
1260 rth->fl.mark = oldflp->mark;
1261 rth->rt_dst = fl->fl4_dst;
1262 rth->rt_src = fl->fl4_src;
83cd86f2 1263@@ -2329,6 +2364,7 @@ static int ip_route_output_slow(struct n
d70ce330 1264 struct flowi fl = { .nl_u = { .ip4_u =
1265 { .daddr = oldflp->fl4_dst,
1266 .saddr = oldflp->fl4_src,
1267+ .gw = oldflp->fl4_gw,
1268 .tos = tos & IPTOS_RT_MASK,
1269 .scope = ((tos & RTO_ONLINK) ?
1270 RT_SCOPE_LINK :
83cd86f2 1271@@ -2434,6 +2470,7 @@ static int ip_route_output_slow(struct n
d70ce330 1272 dev_out = net->loopback_dev;
1273 dev_hold(dev_out);
1274 fl.oif = net->loopback_dev->ifindex;
1275+ fl.fl4_gw = 0;
1276 res.type = RTN_LOCAL;
1277 flags |= RTCF_LOCAL;
1278 goto make_route;
83cd86f2 1279@@ -2441,7 +2478,7 @@ static int ip_route_output_slow(struct n
d70ce330 1280
1281 if (fib_lookup(net, &fl, &res)) {
1282 res.fi = NULL;
1283- if (oldflp->oif) {
1284+ if (oldflp->oif && dev_out->flags & IFF_UP) {
1285 /* Apparently, routing tables are wrong. Assume,
1286 that the destination is on link.
1287
83cd86f2 1288@@ -2481,6 +2518,7 @@ static int ip_route_output_slow(struct n
d70ce330 1289 dev_out = net->loopback_dev;
1290 dev_hold(dev_out);
1291 fl.oif = dev_out->ifindex;
1292+ fl.fl4_gw = 0;
1293 if (res.fi)
1294 fib_info_put(res.fi);
1295 res.fi = NULL;
83cd86f2 1296@@ -2488,13 +2526,12 @@ static int ip_route_output_slow(struct n
d70ce330 1297 goto make_route;
1298 }
1299
1300+ if (res.type == RTN_UNICAST)
1301+ fib_select_default(net, &fl, &res);
1302 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1303- if (res.fi->fib_nhs > 1 && fl.oif == 0)
1304+ if (res.fi->fib_nhs > 1)
1305 fib_select_multipath(&fl, &res);
1306- else
1307 #endif
1308- if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif)
1309- fib_select_default(net, &fl, &res);
1310
1311 if (!fl.fl4_src)
1312 fl.fl4_src = FIB_RES_PREFSRC(res);
83cd86f2 1313@@ -2532,6 +2569,7 @@ int __ip_route_output_key(struct net *ne
d70ce330 1314 rth->fl.fl4_src == flp->fl4_src &&
1315 rth->fl.iif == 0 &&
1316 rth->fl.oif == flp->oif &&
1317+ rth->fl.fl4_gw == flp->fl4_gw &&
1318 rth->fl.mark == flp->mark &&
1319 !((rth->fl.fl4_tos ^ flp->fl4_tos) &
1320 (IPTOS_RT_MASK | RTO_ONLINK)) &&
83cd86f2 1321@@ -3310,3 +3348,4 @@ void __init ip_static_sysctl_init(void)
d70ce330 1322 EXPORT_SYMBOL(__ip_select_ident);
1323 EXPORT_SYMBOL(ip_route_input);
1324 EXPORT_SYMBOL(ip_route_output_key);
1325+EXPORT_SYMBOL(ip_route_input_lookup);
This page took 0.210646 seconds and 4 git commands to generate.