1 diff -urp v2.6.25/linux/include/linux/rtnetlink.h linux/include/linux/rtnetlink.h
2 --- v2.6.25/linux/include/linux/rtnetlink.h 2008-04-17 09:58:08.000000000 +0300
3 +++ linux/include/linux/rtnetlink.h 2008-04-19 18:30:04.000000000 +0300
4 @@ -303,6 +303,8 @@ struct rtnexthop
5 #define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */
6 #define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */
7 #define RTNH_F_ONLINK 4 /* Gateway is forced on link */
8 +#define RTNH_F_SUSPECT 8 /* We don't know the real state */
9 +#define RTNH_F_BADSTATE (RTNH_F_DEAD | RTNH_F_SUSPECT)
11 /* Macros to handle hexthops */
13 diff -urp v2.6.25/linux/include/net/flow.h linux/include/net/flow.h
14 --- v2.6.25/linux/include/net/flow.h 2008-04-17 09:58:08.000000000 +0300
15 +++ linux/include/net/flow.h 2008-04-19 18:30:17.000000000 +0300
16 @@ -19,6 +19,8 @@ struct flowi {
25 @@ -43,6 +45,8 @@ struct flowi {
26 #define fl6_flowlabel nl_u.ip6_u.flowlabel
27 #define fl4_dst nl_u.ip4_u.daddr
28 #define fl4_src nl_u.ip4_u.saddr
29 +#define fl4_lsrc nl_u.ip4_u.lsrc
30 +#define fl4_gw nl_u.ip4_u.gw
31 #define fl4_tos nl_u.ip4_u.tos
32 #define fl4_scope nl_u.ip4_u.scope
34 diff -urp v2.6.25/linux/include/net/ip_fib.h linux/include/net/ip_fib.h
35 --- v2.6.25/linux/include/net/ip_fib.h 2008-04-17 09:58:08.000000000 +0300
36 +++ linux/include/net/ip_fib.h 2008-04-19 18:30:04.000000000 +0300
37 @@ -207,6 +207,8 @@ extern int fib_lookup(struct net *n, str
38 extern struct fib_table *fib_new_table(struct net *net, u32 id);
39 extern struct fib_table *fib_get_table(struct net *net, u32 id);
41 +extern int fib_result_table(struct fib_result *res);
43 #endif /* CONFIG_IP_MULTIPLE_TABLES */
45 /* Exported by fib_frontend.c */
46 @@ -276,4 +278,6 @@ static inline void fib_proc_exit(struct
50 +extern rwlock_t fib_nhflags_lock;
52 #endif /* _NET_FIB_H */
53 diff -urp v2.6.25/linux/include/net/netfilter/nf_nat.h linux/include/net/netfilter/nf_nat.h
54 --- v2.6.25/linux/include/net/netfilter/nf_nat.h 2008-04-17 09:58:08.000000000 +0300
55 +++ linux/include/net/netfilter/nf_nat.h 2008-04-19 18:30:17.000000000 +0300
56 @@ -77,6 +77,13 @@ struct nf_conn_nat
60 +/* Call input routing for SNAT-ed traffic */
61 +extern unsigned int ip_nat_route_input(unsigned int hooknum,
62 + struct sk_buff *skb,
63 + const struct net_device *in,
64 + const struct net_device *out,
65 + int (*okfn)(struct sk_buff *));
67 /* Set up the info structure to map into this range. */
68 extern unsigned int nf_nat_setup_info(struct nf_conn *ct,
69 const struct nf_nat_range *range,
70 diff -urp v2.6.25/linux/include/net/route.h linux/include/net/route.h
71 --- v2.6.25/linux/include/net/route.h 2008-04-17 09:58:08.000000000 +0300
72 +++ linux/include/net/route.h 2008-04-19 18:30:17.000000000 +0300
73 @@ -117,6 +117,7 @@ extern int __ip_route_output_key(struct
74 extern int ip_route_output_key(struct net *, struct rtable **, struct flowi *flp);
75 extern int ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags);
76 extern int ip_route_input(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin);
77 +extern int ip_route_input_lookup(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin, __be32 lsrc);
78 extern unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph, unsigned short new_mtu);
79 extern void ip_rt_send_redirect(struct sk_buff *skb);
81 diff -urp v2.6.25/linux/net/bridge/br_netfilter.c linux/net/bridge/br_netfilter.c
82 --- v2.6.25/linux/net/bridge/br_netfilter.c 2008-04-17 09:58:08.000000000 +0300
83 +++ linux/net/bridge/br_netfilter.c 2008-04-19 18:30:17.000000000 +0300
84 @@ -325,6 +325,10 @@ static int br_nf_pre_routing_finish(stru
85 struct nf_bridge_info *nf_bridge = skb->nf_bridge;
88 + /* Old skb->dst is not expected, it is lost in all cases */
89 + dst_release(skb->dst);
92 if (nf_bridge->mask & BRNF_PKT_TYPE) {
93 skb->pkt_type = PACKET_OTHERHOST;
94 nf_bridge->mask ^= BRNF_PKT_TYPE;
95 diff -urp v2.6.25/linux/net/ipv4/fib_frontend.c linux/net/ipv4/fib_frontend.c
96 --- v2.6.25/linux/net/ipv4/fib_frontend.c 2008-04-17 09:58:09.000000000 +0300
97 +++ linux/net/ipv4/fib_frontend.c 2008-04-19 18:30:04.000000000 +0300
100 #ifndef CONFIG_IP_MULTIPLE_TABLES
102 +#define FIB_RES_TABLE(r) (RT_TABLE_MAIN)
104 static int __net_init fib4_rules_init(struct net *net)
106 struct fib_table *local_table, *main_table;
107 @@ -73,6 +75,8 @@ fail:
111 +#define FIB_RES_TABLE(r) (fib_result_table(r))
113 struct fib_table *fib_new_table(struct net *net, u32 id)
115 struct fib_table *tb;
116 @@ -127,7 +131,8 @@ void fib_select_default(struct net *net,
117 table = res->r->table;
119 tb = fib_get_table(net, table);
120 - if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
121 + if ((FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) ||
122 + FIB_RES_NH(*res).nh_scope == RT_SCOPE_HOST)
123 tb->tb_select_default(tb, flp, res);
126 @@ -241,6 +246,9 @@ int fib_validate_source(__be32 src, __be
129 struct fib_result res;
131 + unsigned char prefixlen;
132 + unsigned char scope;
136 @@ -264,31 +272,35 @@ int fib_validate_source(__be32 src, __be
138 *spec_dst = FIB_RES_PREFSRC(res);
139 fib_combine_itag(itag, &res);
140 -#ifdef CONFIG_IP_ROUTE_MULTIPATH
141 - if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
143 if (FIB_RES_DEV(res) == dev)
146 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
150 + table = FIB_RES_TABLE(&res);
151 + prefixlen = res.prefixlen;
158 fl.oif = dev->ifindex;
161 if (fib_lookup(net, &fl, &res) == 0) {
162 - if (res.type == RTN_UNICAST) {
163 + if (res.type == RTN_UNICAST &&
164 + ((table == FIB_RES_TABLE(&res) &&
165 + res.prefixlen >= prefixlen && res.scope >= scope) ||
167 *spec_dst = FIB_RES_PREFSRC(res);
168 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
179 @@ -911,9 +923,7 @@ static int fib_inetaddr_event(struct not
183 -#ifdef CONFIG_IP_ROUTE_MULTIPATH
184 fib_sync_up(ifa->ifa_dev->dev);
189 @@ -949,9 +959,7 @@ static int fib_netdev_event(struct notif
192 } endfor_ifa(in_dev);
193 -#ifdef CONFIG_IP_ROUTE_MULTIPATH
199 diff -urp v2.6.25/linux/net/ipv4/fib_hash.c linux/net/ipv4/fib_hash.c
200 --- v2.6.25/linux/net/ipv4/fib_hash.c 2008-04-17 09:58:09.000000000 +0300
201 +++ linux/net/ipv4/fib_hash.c 2008-04-19 18:30:04.000000000 +0300
202 @@ -280,25 +280,35 @@ out:
204 fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
206 - int order, last_idx;
207 + int order, last_idx, last_dflt, last_nhsel;
208 + struct fib_alias *first_fa = NULL;
209 + struct hlist_head *head;
210 struct hlist_node *node;
212 struct fib_info *fi = NULL;
213 struct fib_info *last_resort;
214 struct fn_hash *t = (struct fn_hash*)tb->tb_data;
215 - struct fn_zone *fz = t->fn_zones[0];
216 + struct fn_zone *fz = t->fn_zones[res->prefixlen];
222 + k = fz_key(flp->fl4_dst, fz);
229 read_lock(&fib_hash_lock);
230 - hlist_for_each_entry(f, node, &fz->fz_hash[0], fn_hash) {
231 + head = &fz->fz_hash[fn_hash(k, fz)];
232 + hlist_for_each_entry(f, node, head, fn_hash) {
233 struct fib_alias *fa;
235 + if (f->fn_key != k)
238 list_for_each_entry(fa, &f->fn_alias, fa_list) {
239 struct fib_info *next_fi = fa->fa_info;
241 @@ -306,42 +316,56 @@ fn_hash_select_default(struct fib_table
242 fa->fa_type != RTN_UNICAST)
246 + fa->fa_tos != flp->fl4_tos)
248 if (next_fi->fib_priority > res->fi->fib_priority)
250 - if (!next_fi->fib_nh[0].nh_gw ||
251 - next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
253 fa->fa_state |= FA_S_ACCESSED;
256 - if (next_fi != res->fi)
258 - } else if (!fib_detect_death(fi, order, &last_resort,
259 - &last_idx, tb->tb_default)) {
261 + last_dflt = fa->fa_last_dflt;
264 + if (fi && !fib_detect_death(fi, order, &last_resort,
265 + &last_idx, &last_dflt, &last_nhsel, flp)) {
266 fib_result_assign(res, fi);
267 - tb->tb_default = order;
268 + first_fa->fa_last_dflt = order;
277 if (order <= 0 || fi == NULL) {
278 - tb->tb_default = -1;
279 + if (fi && fi->fib_nhs > 1 &&
280 + fib_detect_death(fi, order, &last_resort, &last_idx,
281 + &last_dflt, &last_nhsel, flp) &&
282 + last_resort == fi) {
283 + read_lock_bh(&fib_nhflags_lock);
284 + fi->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
285 + read_unlock_bh(&fib_nhflags_lock);
287 + if (first_fa) first_fa->fa_last_dflt = -1;
291 if (!fib_detect_death(fi, order, &last_resort, &last_idx,
293 + &last_dflt, &last_nhsel, flp)) {
294 fib_result_assign(res, fi);
295 - tb->tb_default = order;
296 + first_fa->fa_last_dflt = order;
301 + if (last_idx >= 0) {
302 fib_result_assign(res, last_resort);
303 - tb->tb_default = last_idx;
304 + read_lock_bh(&fib_nhflags_lock);
305 + last_resort->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
306 + read_unlock_bh(&fib_nhflags_lock);
307 + first_fa->fa_last_dflt = last_idx;
310 read_unlock(&fib_hash_lock);
312 @@ -465,6 +489,7 @@ static int fn_hash_insert(struct fib_tab
313 write_lock_bh(&fib_hash_lock);
314 fi_drop = fa->fa_info;
316 + fa->fa_last_dflt = -1;
317 fa->fa_type = cfg->fc_type;
318 fa->fa_scope = cfg->fc_scope;
319 state = fa->fa_state;
320 @@ -519,6 +544,7 @@ static int fn_hash_insert(struct fib_tab
321 new_fa->fa_type = cfg->fc_type;
322 new_fa->fa_scope = cfg->fc_scope;
323 new_fa->fa_state = 0;
324 + new_fa->fa_last_dflt = -1;
327 * Insert new entry to the list.
328 diff -urp v2.6.25/linux/net/ipv4/fib_lookup.h linux/net/ipv4/fib_lookup.h
329 --- v2.6.25/linux/net/ipv4/fib_lookup.h 2008-04-17 09:58:09.000000000 +0300
330 +++ linux/net/ipv4/fib_lookup.h 2008-04-19 18:30:04.000000000 +0300
333 struct list_head fa_list;
334 struct fib_info *fa_info;
339 @@ -38,7 +39,8 @@ extern struct fib_alias *fib_find_alias(
341 extern int fib_detect_death(struct fib_info *fi, int order,
342 struct fib_info **last_resort,
343 - int *last_idx, int dflt);
344 + int *last_idx, int *dflt, int *last_nhsel,
345 + const struct flowi *flp);
347 static inline void fib_result_assign(struct fib_result *res,
349 diff -urp v2.6.25/linux/net/ipv4/fib_rules.c linux/net/ipv4/fib_rules.c
350 --- v2.6.25/linux/net/ipv4/fib_rules.c 2008-04-17 09:58:09.000000000 +0300
351 +++ linux/net/ipv4/fib_rules.c 2008-04-19 18:30:04.000000000 +0300
352 @@ -54,6 +54,11 @@ u32 fib_rules_tclass(struct fib_result *
356 +int fib_result_table(struct fib_result *res)
358 + return res->r->table;
361 int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res)
363 struct fib_lookup_arg arg = {
364 diff -urp v2.6.25/linux/net/ipv4/fib_semantics.c linux/net/ipv4/fib_semantics.c
365 --- v2.6.25/linux/net/ipv4/fib_semantics.c 2008-04-17 09:58:09.000000000 +0300
366 +++ linux/net/ipv4/fib_semantics.c 2008-04-19 18:30:17.000000000 +0300
367 @@ -52,6 +52,7 @@ static struct hlist_head *fib_info_hash;
368 static struct hlist_head *fib_info_laddrhash;
369 static unsigned int fib_hash_size;
370 static unsigned int fib_info_cnt;
371 +rwlock_t fib_nhflags_lock = RW_LOCK_UNLOCKED;
373 #define DEVINDEX_HASHBITS 8
374 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
375 @@ -187,7 +188,7 @@ static __inline__ int nh_comp(const stru
376 #ifdef CONFIG_NET_CLS_ROUTE
377 nh->nh_tclassid != onh->nh_tclassid ||
379 - ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
380 + ((nh->nh_flags^onh->nh_flags)&~RTNH_F_BADSTATE))
383 } endfor_nexthops(fi);
384 @@ -238,7 +239,7 @@ static struct fib_info *fib_find_info(co
385 nfi->fib_priority == fi->fib_priority &&
386 memcmp(nfi->fib_metrics, fi->fib_metrics,
387 sizeof(fi->fib_metrics)) == 0 &&
388 - ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
389 + ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_BADSTATE) == 0 &&
390 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
393 @@ -349,26 +350,70 @@ struct fib_alias *fib_find_alias(struct
396 int fib_detect_death(struct fib_info *fi, int order,
397 - struct fib_info **last_resort, int *last_idx, int dflt)
398 + struct fib_info **last_resort, int *last_idx, int *dflt,
399 + int *last_nhsel, const struct flowi *flp)
402 - int state = NUD_NONE;
405 + struct fib_nh * nh;
407 + int flag, dead = 1;
409 + /* change_nexthops(fi) { */
410 + for (nhsel = 0, nh = fi->fib_nh; nhsel < fi->fib_nhs; nh++, nhsel++) {
411 + if (flp->oif && flp->oif != nh->nh_oif)
413 + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && nh->nh_gw &&
414 + nh->nh_scope == RT_SCOPE_LINK)
416 + if (nh->nh_flags & RTNH_F_DEAD)
419 - n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
421 - state = n->nud_state;
424 - if (state==NUD_REACHABLE)
426 - if ((state&NUD_VALID) && order != dflt)
428 - if ((state&NUD_VALID) ||
429 - (*last_idx<0 && order > dflt)) {
433 + if (nh->nh_dev->flags & IFF_NOARP) {
439 + if (!nh->nh_gw || nh->nh_scope != RT_SCOPE_LINK)
440 + dst = flp->fl4_dst;
443 + n = neigh_lookup(&arp_tbl, &dst, nh->nh_dev);
445 + state = n->nud_state;
448 + if (state==NUD_REACHABLE ||
449 + ((state&NUD_VALID) && order != *dflt)) {
453 + if (!(state&NUD_VALID))
457 + if ((state&NUD_VALID) ||
458 + (*last_idx<0 && order >= *dflt)) {
461 + *last_nhsel = nhsel;
466 + read_lock_bh(&fib_nhflags_lock);
468 + nh->nh_flags |= RTNH_F_SUSPECT;
470 + nh->nh_flags &= ~RTNH_F_SUSPECT;
471 + read_unlock_bh(&fib_nhflags_lock);
474 + /* } endfor_nexthops(fi) */
479 #ifdef CONFIG_IP_ROUTE_MULTIPATH
480 @@ -540,8 +585,11 @@ static int fib_check_nh(struct fib_confi
482 if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL)
484 - if (!(dev->flags&IFF_UP))
486 + if (!(dev->flags&IFF_UP)) {
487 + if (fi->fib_protocol != RTPROT_STATIC)
489 + nh->nh_flags |= RTNH_F_DEAD;
493 nh->nh_scope = RT_SCOPE_LINK;
494 @@ -561,24 +609,48 @@ static int fib_check_nh(struct fib_confi
495 /* It is not necessary, but requires a bit of thinking */
496 if (fl.fl4_scope < RT_SCOPE_LINK)
497 fl.fl4_scope = RT_SCOPE_LINK;
498 - if ((err = fib_lookup(net, &fl, &res)) != 0)
500 + err = fib_lookup(net, &fl, &res);
503 - if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
505 - nh->nh_scope = res.scope;
506 - nh->nh_oif = FIB_RES_OIF(res);
507 - if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
509 - dev_hold(nh->nh_dev);
511 - if (!(nh->nh_dev->flags & IFF_UP))
515 + struct in_device *in_dev;
517 + if (err != -ENETUNREACH ||
518 + fi->fib_protocol != RTPROT_STATIC)
521 + in_dev = inetdev_by_index(net, nh->nh_oif);
522 + if (in_dev == NULL ||
523 + in_dev->dev->flags & IFF_UP) {
525 + in_dev_put(in_dev);
528 + nh->nh_flags |= RTNH_F_DEAD;
529 + nh->nh_scope = RT_SCOPE_LINK;
530 + nh->nh_dev = in_dev->dev;
531 + dev_hold(nh->nh_dev);
532 + in_dev_put(in_dev);
535 + if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
537 + nh->nh_scope = res.scope;
538 + nh->nh_oif = FIB_RES_OIF(res);
539 + if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
541 + dev_hold(nh->nh_dev);
542 + if (!(nh->nh_dev->flags & IFF_UP)) {
543 + if (fi->fib_protocol != RTPROT_STATIC) {
547 + nh->nh_flags |= RTNH_F_DEAD;
557 struct in_device *in_dev;
559 @@ -589,8 +661,11 @@ out:
562 if (!(in_dev->dev->flags&IFF_UP)) {
563 - in_dev_put(in_dev);
565 + if (fi->fib_protocol != RTPROT_STATIC) {
566 + in_dev_put(in_dev);
569 + nh->nh_flags |= RTNH_F_DEAD;
571 nh->nh_dev = in_dev->dev;
572 dev_hold(nh->nh_dev);
573 @@ -900,8 +975,12 @@ int fib_semantic_match(struct list_head
575 if (nh->nh_flags&RTNH_F_DEAD)
577 - if (!flp->oif || flp->oif == nh->nh_oif)
579 + if (flp->oif && flp->oif != nh->nh_oif)
581 + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
582 + nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
586 #ifdef CONFIG_IP_ROUTE_MULTIPATH
587 if (nhsel < fi->fib_nhs) {
588 @@ -1078,18 +1157,29 @@ int fib_sync_down_dev(struct net_device
591 change_nexthops(fi) {
592 - if (nh->nh_flags&RTNH_F_DEAD)
594 - else if (nh->nh_dev == dev &&
595 - nh->nh_scope != scope) {
596 - nh->nh_flags |= RTNH_F_DEAD;
597 + if (nh->nh_flags&RTNH_F_DEAD) {
598 + if (fi->fib_protocol!=RTPROT_STATIC ||
599 + nh->nh_dev == NULL ||
600 + __in_dev_get_rtnl(nh->nh_dev) == NULL ||
601 + nh->nh_dev->flags&IFF_UP)
603 + } else if (nh->nh_dev == dev &&
604 + nh->nh_scope != scope) {
605 + write_lock_bh(&fib_nhflags_lock);
606 #ifdef CONFIG_IP_ROUTE_MULTIPATH
607 - spin_lock_bh(&fib_multipath_lock);
608 + spin_lock(&fib_multipath_lock);
609 + nh->nh_flags |= RTNH_F_DEAD;
610 fi->fib_power -= nh->nh_power;
612 - spin_unlock_bh(&fib_multipath_lock);
613 + spin_unlock(&fib_multipath_lock);
615 + nh->nh_flags |= RTNH_F_DEAD;
618 + write_unlock_bh(&fib_nhflags_lock);
619 + if (fi->fib_protocol!=RTPROT_STATIC ||
621 + __in_dev_get_rtnl(dev) == NULL)
624 #ifdef CONFIG_IP_ROUTE_MULTIPATH
625 if (force > 1 && nh->nh_dev == dev) {
626 @@ -1107,11 +1197,8 @@ int fib_sync_down_dev(struct net_device
630 -#ifdef CONFIG_IP_ROUTE_MULTIPATH
633 - Dead device goes up. We wake up dead nexthops.
634 - It takes sense only on multipath routes.
635 + Dead device goes up or new address is added. We wake up dead nexthops.
638 int fib_sync_up(struct net_device *dev)
639 @@ -1121,8 +1208,10 @@ int fib_sync_up(struct net_device *dev)
640 struct hlist_head *head;
641 struct hlist_node *node;
644 + struct fib_result res;
648 if (!(dev->flags&IFF_UP))
651 @@ -1130,6 +1219,7 @@ int fib_sync_up(struct net_device *dev)
652 hash = fib_devindex_hashfn(dev->ifindex);
653 head = &fib_info_devhash[hash];
657 hlist_for_each_entry(nh, node, head, nh_hash) {
658 struct fib_info *fi = nh->nh_parent;
659 @@ -1142,19 +1232,39 @@ int fib_sync_up(struct net_device *dev)
662 change_nexthops(fi) {
663 - if (!(nh->nh_flags&RTNH_F_DEAD)) {
665 + if (!(nh->nh_flags&RTNH_F_DEAD))
668 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
670 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
672 + if (nh->nh_gw && fi->fib_protocol == RTPROT_STATIC) {
673 + struct flowi fl = {
675 + { .daddr = nh->nh_gw,
676 + .scope = nh->nh_scope } },
679 + if (fib_lookup(dev->nd_net, &fl, &res) != 0)
681 + if (res.type != RTN_UNICAST &&
682 + res.type != RTN_LOCAL) {
686 + nh->nh_scope = res.scope;
691 +#ifdef CONFIG_IP_ROUTE_MULTIPATH
692 spin_lock_bh(&fib_multipath_lock);
695 nh->nh_flags &= ~RTNH_F_DEAD;
696 +#ifdef CONFIG_IP_ROUTE_MULTIPATH
697 spin_unlock_bh(&fib_multipath_lock);
699 } endfor_nexthops(fi)
702 @@ -1162,10 +1272,14 @@ int fib_sync_up(struct net_device *dev)
712 +#ifdef CONFIG_IP_ROUTE_MULTIPATH
715 The algorithm is suboptimal, but it provides really
716 fair weighted route distribution.
717 @@ -1174,24 +1288,45 @@ int fib_sync_up(struct net_device *dev)
718 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
720 struct fib_info *fi = res->fi;
724 spin_lock_bh(&fib_multipath_lock);
728 + change_nexthops(fi) {
729 + if (flp->oif != nh->nh_oif)
731 + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
732 + nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
734 + if (!(nh->nh_flags&RTNH_F_BADSTATE)) {
735 + if (nh->nh_power > w) {
740 + } endfor_nexthops(fi);
742 + spin_unlock_bh(&fib_multipath_lock);
750 if (fi->fib_power <= 0) {
752 change_nexthops(fi) {
753 - if (!(nh->nh_flags&RTNH_F_DEAD)) {
754 + if (!(nh->nh_flags&RTNH_F_BADSTATE)) {
755 power += nh->nh_weight;
756 nh->nh_power = nh->nh_weight;
758 } endfor_nexthops(fi);
759 fi->fib_power = power;
761 - spin_unlock_bh(&fib_multipath_lock);
762 - /* Race condition: route has just become dead. */
771 @@ -1201,20 +1336,40 @@ void fib_select_multipath(const struct f
773 w = jiffies % fi->fib_power;
776 change_nexthops(fi) {
777 - if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
778 + if (!(nh->nh_flags&RTNH_F_BADSTATE) && nh->nh_power) {
779 if ((w -= nh->nh_power) <= 0) {
782 - res->nh_sel = nhsel;
783 spin_unlock_bh(&fib_multipath_lock);
784 + res->nh_sel = nhsel;
789 + } endfor_nexthops(fi);
798 + if (!(nh->nh_flags&RTNH_F_DEAD)) {
799 + if (flp->oif && flp->oif != nh->nh_oif)
801 + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
802 + nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
804 + spin_unlock_bh(&fib_multipath_lock);
805 + res->nh_sel = nhsel;
808 } endfor_nexthops(fi);
810 /* Race condition: route has just become dead. */
812 spin_unlock_bh(&fib_multipath_lock);
815 diff -urp v2.6.25/linux/net/ipv4/netfilter/ipt_MASQUERADE.c linux/net/ipv4/netfilter/ipt_MASQUERADE.c
816 --- v2.6.25/linux/net/ipv4/netfilter/ipt_MASQUERADE.c 2008-04-17 09:58:09.000000000 +0300
817 +++ linux/net/ipv4/netfilter/ipt_MASQUERADE.c 2008-04-19 18:30:17.000000000 +0300
818 @@ -59,7 +59,7 @@ masquerade_tg(struct sk_buff *skb, const
819 enum ip_conntrack_info ctinfo;
820 struct nf_nat_range newrange;
821 const struct nf_nat_multi_range_compat *mr;
822 - const struct rtable *rt;
826 NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING);
827 @@ -77,13 +77,28 @@ masquerade_tg(struct sk_buff *skb, const
831 - rt = (struct rtable *)skb->dst;
832 - newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
834 - printk("MASQUERADE: %s ate my IP address\n", out->name);
838 + struct flowi fl = { .nl_u = { .ip4_u =
839 + { .daddr = ip_hdr(skb)->daddr,
840 + .tos = (RT_TOS(ip_hdr(skb)->tos) |
842 + .gw = ((struct rtable *) skb->dst)->rt_gateway,
845 + .oif = out->ifindex };
846 + if (ip_route_output_key(out->nd_net, &rt, &fl) != 0) {
847 + /* Funky routing can do this. */
848 + if (net_ratelimit())
849 + printk("MASQUERADE:"
850 + " No route: Rusty's brain broke!\n");
855 + newsrc = rt->rt_src;
858 write_lock_bh(&masq_lock);
859 nat->masq_index = out->ifindex;
860 write_unlock_bh(&masq_lock);
861 diff -urp v2.6.25/linux/net/ipv4/netfilter/nf_nat_core.c linux/net/ipv4/netfilter/nf_nat_core.c
862 --- v2.6.25/linux/net/ipv4/netfilter/nf_nat_core.c 2008-04-17 09:58:09.000000000 +0300
863 +++ linux/net/ipv4/netfilter/nf_nat_core.c 2008-04-19 18:30:17.000000000 +0300
864 @@ -624,6 +624,52 @@ static struct nf_ct_ext_type nat_extend
865 .flags = NF_CT_EXT_F_PREALLOC,
869 +ip_nat_route_input(unsigned int hooknum,
870 + struct sk_buff *skb,
871 + const struct net_device *in,
872 + const struct net_device *out,
873 + int (*okfn)(struct sk_buff *))
876 + struct nf_conn *conn;
877 + enum ip_conntrack_info ctinfo;
878 + enum ip_conntrack_dir dir;
879 + unsigned long statusbit;
882 + if (!(conn = nf_ct_get(skb, &ctinfo)))
885 + if (!(conn->status & IPS_NAT_DONE_MASK))
887 + dir = CTINFO2DIR(ctinfo);
888 + statusbit = IPS_SRC_NAT;
889 + if (dir == IP_CT_DIR_REPLY)
890 + statusbit ^= IPS_NAT_MASK;
891 + if (!(conn->status & statusbit))
897 + if (skb->len < sizeof(struct iphdr))
900 + /* use daddr in other direction as masquerade address (lsrc) */
902 + saddr = conn->tuplehash[!dir].tuple.dst.u3.ip;
903 + if (saddr == iph->saddr)
906 + if (ip_route_input_lookup(skb, iph->daddr, iph->saddr, iph->tos,
912 +EXPORT_SYMBOL_GPL(ip_nat_route_input);
914 static int __init nf_nat_init(void)
917 diff -urp v2.6.25/linux/net/ipv4/netfilter/nf_nat_standalone.c linux/net/ipv4/netfilter/nf_nat_standalone.c
918 --- v2.6.25/linux/net/ipv4/netfilter/nf_nat_standalone.c 2008-04-17 09:58:09.000000000 +0300
919 +++ linux/net/ipv4/netfilter/nf_nat_standalone.c 2008-04-19 18:30:17.000000000 +0300
920 @@ -282,6 +282,14 @@ static struct nf_hook_ops nf_nat_ops[] _
921 .hooknum = NF_INET_PRE_ROUTING,
922 .priority = NF_IP_PRI_NAT_DST,
924 + /* Before routing, route before mangling */
926 + .hook = ip_nat_route_input,
927 + .owner = THIS_MODULE,
929 + .hooknum = NF_INET_PRE_ROUTING,
930 + .priority = NF_IP_PRI_LAST-1,
932 /* After packet filtering, change source */
935 diff -urp v2.6.25/linux/net/ipv4/route.c linux/net/ipv4/route.c
936 --- v2.6.25/linux/net/ipv4/route.c 2008-04-17 09:58:09.000000000 +0300
937 +++ linux/net/ipv4/route.c 2008-04-19 18:30:17.000000000 +0300
938 @@ -1207,6 +1207,7 @@ void ip_rt_redirect(__be32 old_gw, __be3
940 /* Gateway is different ... */
941 rt->rt_gateway = new_gw;
942 + if (rt->fl.fl4_gw) rt->fl.fl4_gw = new_gw;
944 /* Redirect received -> path was valid */
945 dst_confirm(&rth->u.dst);
946 @@ -1647,6 +1648,7 @@ static int ip_route_input_mc(struct sk_b
947 rth->fl.fl4_tos = tos;
948 rth->fl.mark = skb->mark;
949 rth->fl.fl4_src = saddr;
950 + rth->fl.fl4_lsrc = 0;
952 #ifdef CONFIG_NET_CLS_ROUTE
953 rth->u.dst.tclassid = itag;
954 @@ -1657,6 +1659,7 @@ static int ip_route_input_mc(struct sk_b
955 dev_hold(rth->u.dst.dev);
956 rth->idev = in_dev_get(rth->u.dst.dev);
958 + rth->fl.fl4_gw = 0;
959 rth->rt_gateway = daddr;
960 rth->rt_spec_dst= spec_dst;
961 rth->rt_genid = atomic_read(&rt_genid);
962 @@ -1722,7 +1725,7 @@ static inline int __mkroute_input(struct
963 struct fib_result* res,
964 struct in_device *in_dev,
965 __be32 daddr, __be32 saddr, u32 tos,
966 - struct rtable **result)
967 + __be32 lsrc, struct rtable **result)
971 @@ -1756,6 +1759,7 @@ static inline int __mkroute_input(struct
972 flags |= RTCF_DIRECTSRC;
974 if (out_dev == in_dev && err && !(flags & RTCF_MASQ) &&
976 (IN_DEV_SHARED_MEDIA(out_dev) ||
977 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
978 flags |= RTCF_DOREDIRECT;
979 @@ -1789,6 +1793,7 @@ static inline int __mkroute_input(struct
980 rth->fl.mark = skb->mark;
981 rth->fl.fl4_src = saddr;
983 + rth->fl.fl4_lsrc = lsrc;
984 rth->rt_gateway = daddr;
986 rth->fl.iif = in_dev->dev->ifindex;
987 @@ -1796,6 +1801,7 @@ static inline int __mkroute_input(struct
988 dev_hold(rth->u.dst.dev);
989 rth->idev = in_dev_get(rth->u.dst.dev);
991 + rth->fl.fl4_gw = 0;
992 rth->rt_spec_dst= spec_dst;
994 rth->u.dst.input = ip_forward;
995 @@ -1816,21 +1822,23 @@ static inline int __mkroute_input(struct
997 static inline int ip_mkroute_input(struct sk_buff *skb,
998 struct fib_result* res,
1000 const struct flowi *fl,
1001 struct in_device *in_dev,
1002 - __be32 daddr, __be32 saddr, u32 tos)
1003 + __be32 daddr, __be32 saddr, u32 tos, __be32 lsrc)
1005 struct rtable* rth = NULL;
1009 + fib_select_default(net, fl, res);
1010 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1011 - if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0)
1012 + if (res->fi && res->fi->fib_nhs > 1)
1013 fib_select_multipath(fl, res);
1016 /* create a routing cache entry */
1017 - err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth);
1018 + err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, lsrc, &rth);
1022 @@ -1850,18 +1858,19 @@ static inline int ip_mkroute_input(struc
1025 static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1026 - u8 tos, struct net_device *dev)
1027 + u8 tos, struct net_device *dev, __be32 lsrc)
1029 struct fib_result res;
1030 struct in_device *in_dev = in_dev_get(dev);
1031 struct flowi fl = { .nl_u = { .ip4_u =
1034 + .saddr = lsrc? : saddr,
1036 .scope = RT_SCOPE_UNIVERSE,
1039 - .iif = dev->ifindex };
1041 + init_net.loopback_dev->ifindex : dev->ifindex };
1044 struct rtable * rth;
1045 @@ -1897,6 +1906,12 @@ static int ip_route_input_slow(struct sk
1046 ipv4_is_loopback(daddr))
1047 goto martian_destination;
1050 + if (ipv4_is_multicast(lsrc) || ipv4_is_lbcast(lsrc) ||
1051 + ipv4_is_zeronet(lsrc) || ipv4_is_loopback(lsrc))
1056 * Now we are ready to route packet.
1058 @@ -1906,6 +1921,8 @@ static int ip_route_input_slow(struct sk
1062 + fl.iif = dev->ifindex;
1063 + fl.fl4_src = saddr;
1065 RT_CACHE_STAT_INC(in_slow_tot);
1067 @@ -1930,7 +1947,7 @@ static int ip_route_input_slow(struct sk
1068 if (res.type != RTN_UNICAST)
1069 goto martian_destination;
1071 - err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
1072 + err = ip_mkroute_input(skb, &res, net, &fl, in_dev, daddr, saddr, tos, lsrc);
1076 @@ -1940,6 +1957,8 @@ out: return err;
1078 if (skb->protocol != htons(ETH_P_IP))
1083 if (ipv4_is_zeronet(saddr))
1084 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1085 @@ -1981,6 +2000,7 @@ local_input:
1086 rth->u.dst.dev = net->loopback_dev;
1087 dev_hold(rth->u.dst.dev);
1088 rth->idev = in_dev_get(rth->u.dst.dev);
1089 + rth->fl.fl4_gw = 0;
1090 rth->rt_gateway = daddr;
1091 rth->rt_spec_dst= spec_dst;
1092 rth->u.dst.input= ip_local_deliver;
1093 @@ -2032,8 +2052,9 @@ martian_source:
1097 -int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1098 - u8 tos, struct net_device *dev)
1100 +ip_route_input_cached(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1101 + u8 tos, struct net_device *dev, __be32 lsrc)
1103 struct rtable * rth;
1105 @@ -2050,6 +2071,7 @@ int ip_route_input(struct sk_buff *skb,
1106 if (rth->fl.fl4_dst == daddr &&
1107 rth->fl.fl4_src == saddr &&
1108 rth->fl.iif == iif &&
1109 + rth->fl.fl4_lsrc == lsrc &&
1111 rth->fl.mark == skb->mark &&
1112 rth->fl.fl4_tos == tos &&
1113 @@ -2097,7 +2119,19 @@ int ip_route_input(struct sk_buff *skb,
1117 - return ip_route_input_slow(skb, daddr, saddr, tos, dev);
1118 + return ip_route_input_slow(skb, daddr, saddr, tos, dev, lsrc);
1121 +int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1122 + u8 tos, struct net_device *dev)
1124 + return ip_route_input_cached(skb, daddr, saddr, tos, dev, 0);
1127 +int ip_route_input_lookup(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1128 + u8 tos, struct net_device *dev, __be32 lsrc)
1130 + return ip_route_input_cached(skb, daddr, saddr, tos, dev, lsrc);
1133 static inline int __mkroute_output(struct rtable **result,
1134 @@ -2169,6 +2203,7 @@ static inline int __mkroute_output(struc
1135 rth->fl.fl4_tos = tos;
1136 rth->fl.fl4_src = oldflp->fl4_src;
1137 rth->fl.oif = oldflp->oif;
1138 + rth->fl.fl4_gw = oldflp->fl4_gw;
1139 rth->fl.mark = oldflp->mark;
1140 rth->rt_dst = fl->fl4_dst;
1141 rth->rt_src = fl->fl4_src;
1142 @@ -2249,6 +2284,7 @@ static int ip_route_output_slow(struct n
1143 struct flowi fl = { .nl_u = { .ip4_u =
1144 { .daddr = oldflp->fl4_dst,
1145 .saddr = oldflp->fl4_src,
1146 + .gw = oldflp->fl4_gw,
1147 .tos = tos & IPTOS_RT_MASK,
1148 .scope = ((tos & RTO_ONLINK) ?
1150 @@ -2354,6 +2390,7 @@ static int ip_route_output_slow(struct n
1151 dev_out = net->loopback_dev;
1153 fl.oif = net->loopback_dev->ifindex;
1155 res.type = RTN_LOCAL;
1156 flags |= RTCF_LOCAL;
1158 @@ -2361,7 +2398,7 @@ static int ip_route_output_slow(struct n
1160 if (fib_lookup(net, &fl, &res)) {
1162 - if (oldflp->oif) {
1163 + if (oldflp->oif && dev_out->flags & IFF_UP) {
1164 /* Apparently, routing tables are wrong. Assume,
1165 that the destination is on link.
1167 @@ -2401,6 +2438,7 @@ static int ip_route_output_slow(struct n
1168 dev_out = net->loopback_dev;
1170 fl.oif = dev_out->ifindex;
1173 fib_info_put(res.fi);
1175 @@ -2408,13 +2446,12 @@ static int ip_route_output_slow(struct n
1179 + if (res.type == RTN_UNICAST)
1180 + fib_select_default(net, &fl, &res);
1181 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1182 - if (res.fi->fib_nhs > 1 && fl.oif == 0)
1183 + if (res.fi->fib_nhs > 1)
1184 fib_select_multipath(&fl, &res);
1187 - if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif)
1188 - fib_select_default(net, &fl, &res);
1191 fl.fl4_src = FIB_RES_PREFSRC(res);
1192 @@ -2452,6 +2489,7 @@ int __ip_route_output_key(struct net *ne
1193 rth->fl.fl4_src == flp->fl4_src &&
1195 rth->fl.oif == flp->oif &&
1196 + rth->fl.fl4_gw == flp->fl4_gw &&
1197 rth->fl.mark == flp->mark &&
1198 !((rth->fl.fl4_tos ^ flp->fl4_tos) &
1199 (IPTOS_RT_MASK | RTO_ONLINK)) &&
1200 @@ -3054,3 +3092,4 @@ int __init ip_rt_init(void)
1201 EXPORT_SYMBOL(__ip_select_ident);
1202 EXPORT_SYMBOL(ip_route_input);
1203 EXPORT_SYMBOL(ip_route_output_key);
1204 +EXPORT_SYMBOL(ip_route_input_lookup);