1 diff -urp v2.6.22/linux/include/linux/rtnetlink.h linux/include/linux/rtnetlink.h
2 --- v2.6.22/linux/include/linux/rtnetlink.h 2007-07-10 09:18:42.000000000 +0300
3 +++ linux/include/linux/rtnetlink.h 2007-07-12 08:00:54.000000000 +0300
4 @@ -293,6 +293,8 @@ struct rtnexthop
5 #define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */
6 #define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */
7 #define RTNH_F_ONLINK 4 /* Gateway is forced on link */
8 +#define RTNH_F_SUSPECT 8 /* We don't know the real state */
9 +#define RTNH_F_BADSTATE (RTNH_F_DEAD | RTNH_F_SUSPECT)
11 /* Macros to handle hexthops */
13 diff -urp v2.6.22/linux/include/net/flow.h linux/include/net/flow.h
14 --- v2.6.22/linux/include/net/flow.h 2007-07-10 09:18:42.000000000 +0300
15 +++ linux/include/net/flow.h 2007-07-12 08:00:54.000000000 +0300
16 @@ -19,6 +19,8 @@ struct flowi {
25 @@ -43,6 +45,8 @@ struct flowi {
26 #define fl6_flowlabel nl_u.ip6_u.flowlabel
27 #define fl4_dst nl_u.ip4_u.daddr
28 #define fl4_src nl_u.ip4_u.saddr
29 +#define fl4_lsrc nl_u.ip4_u.lsrc
30 +#define fl4_gw nl_u.ip4_u.gw
31 #define fl4_tos nl_u.ip4_u.tos
32 #define fl4_scope nl_u.ip4_u.scope
34 diff -urp v2.6.22/linux/include/net/ip_fib.h linux/include/net/ip_fib.h
35 --- v2.6.22/linux/include/net/ip_fib.h 2007-07-10 09:18:42.000000000 +0300
36 +++ linux/include/net/ip_fib.h 2007-07-12 08:00:54.000000000 +0300
37 @@ -196,7 +196,8 @@ static inline int fib_lookup(const struc
39 static inline void fib_select_default(const struct flowi *flp, struct fib_result *res)
41 - if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
42 + if ((FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) ||
43 + FIB_RES_NH(*res).nh_scope == RT_SCOPE_HOST)
44 ip_fib_main_table->tb_select_default(ip_fib_main_table, flp, res);
47 @@ -209,6 +210,7 @@ extern int fib_lookup(struct flowi *flp,
48 extern struct fib_table *fib_new_table(u32 id);
49 extern struct fib_table *fib_get_table(u32 id);
50 extern void fib_select_default(const struct flowi *flp, struct fib_result *res);
51 +extern int fib_result_table(struct fib_result *res);
53 #endif /* CONFIG_IP_MULTIPLE_TABLES */
55 @@ -278,4 +280,6 @@ extern int fib_proc_init(void);
56 extern void fib_proc_exit(void);
59 +extern rwlock_t fib_nhflags_lock;
61 #endif /* _NET_FIB_H */
62 diff -urp v2.6.22/linux/include/net/netfilter/nf_nat.h linux/include/net/netfilter/nf_nat.h
63 --- v2.6.22/linux/include/net/netfilter/nf_nat.h 2007-04-28 17:55:11.000000000 +0300
64 +++ linux/include/net/netfilter/nf_nat.h 2007-07-12 08:00:54.000000000 +0300
65 @@ -61,6 +61,13 @@ struct nf_nat_info
69 +/* Call input routing for SNAT-ed traffic */
70 +extern unsigned int ip_nat_route_input(unsigned int hooknum,
71 + struct sk_buff **pskb,
72 + const struct net_device *in,
73 + const struct net_device *out,
74 + int (*okfn)(struct sk_buff *));
76 /* Set up the info structure to map into this range. */
77 extern unsigned int nf_nat_setup_info(struct nf_conn *ct,
78 const struct nf_nat_range *range,
79 diff -urp v2.6.22/linux/include/net/route.h linux/include/net/route.h
80 --- v2.6.22/linux/include/net/route.h 2007-04-28 17:55:11.000000000 +0300
81 +++ linux/include/net/route.h 2007-07-12 08:00:54.000000000 +0300
82 @@ -116,6 +116,7 @@ extern int __ip_route_output_key(struct
83 extern int ip_route_output_key(struct rtable **, struct flowi *flp);
84 extern int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags);
85 extern int ip_route_input(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin);
86 +extern int ip_route_input_lookup(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin, __be32 lsrc);
87 extern unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu);
88 extern void ip_rt_send_redirect(struct sk_buff *skb);
90 diff -urp v2.6.22/linux/net/bridge/br_netfilter.c linux/net/bridge/br_netfilter.c
91 --- v2.6.22/linux/net/bridge/br_netfilter.c 2007-07-10 09:18:43.000000000 +0300
92 +++ linux/net/bridge/br_netfilter.c 2007-07-12 08:00:54.000000000 +0300
93 @@ -301,6 +301,10 @@ static int br_nf_pre_routing_finish(stru
94 struct nf_bridge_info *nf_bridge = skb->nf_bridge;
97 + /* Old skb->dst is not expected, it is lost in all cases */
98 + dst_release(skb->dst);
101 if (nf_bridge->mask & BRNF_PKT_TYPE) {
102 skb->pkt_type = PACKET_OTHERHOST;
103 nf_bridge->mask ^= BRNF_PKT_TYPE;
104 diff -urp v2.6.22/linux/net/ipv4/fib_frontend.c linux/net/ipv4/fib_frontend.c
105 --- v2.6.22/linux/net/ipv4/fib_frontend.c 2007-07-10 09:18:43.000000000 +0300
106 +++ linux/net/ipv4/fib_frontend.c 2007-07-12 08:00:54.000000000 +0300
107 @@ -57,8 +57,12 @@ struct fib_table *ip_fib_main_table;
108 #define FIB_TABLE_HASHSZ 1
109 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
111 +#define FIB_RES_TABLE(r) (RT_TABLE_MAIN)
115 +#define FIB_RES_TABLE(r) (fib_result_table(r))
117 #define FIB_TABLE_HASHSZ 256
118 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
120 @@ -189,6 +193,9 @@ int fib_validate_source(__be32 src, __be
123 struct fib_result res;
125 + unsigned char prefixlen;
126 + unsigned char scope;
130 @@ -210,31 +217,35 @@ int fib_validate_source(__be32 src, __be
132 *spec_dst = FIB_RES_PREFSRC(res);
133 fib_combine_itag(itag, &res);
134 -#ifdef CONFIG_IP_ROUTE_MULTIPATH
135 - if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
137 if (FIB_RES_DEV(res) == dev)
140 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
144 + table = FIB_RES_TABLE(&res);
145 + prefixlen = res.prefixlen;
152 fl.oif = dev->ifindex;
155 if (fib_lookup(&fl, &res) == 0) {
156 - if (res.type == RTN_UNICAST) {
157 + if (res.type == RTN_UNICAST &&
158 + ((table == FIB_RES_TABLE(&res) &&
159 + res.prefixlen >= prefixlen && res.scope >= scope) ||
161 *spec_dst = FIB_RES_PREFSRC(res);
162 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
173 @@ -839,9 +850,7 @@ static int fib_inetaddr_event(struct not
177 -#ifdef CONFIG_IP_ROUTE_MULTIPATH
178 fib_sync_up(ifa->ifa_dev->dev);
183 @@ -877,9 +886,7 @@ static int fib_netdev_event(struct notif
186 } endfor_ifa(in_dev);
187 -#ifdef CONFIG_IP_ROUTE_MULTIPATH
193 diff -urp v2.6.22/linux/net/ipv4/fib_hash.c linux/net/ipv4/fib_hash.c
194 --- v2.6.22/linux/net/ipv4/fib_hash.c 2007-07-10 09:18:43.000000000 +0300
195 +++ linux/net/ipv4/fib_hash.c 2007-07-12 08:00:54.000000000 +0300
196 @@ -274,30 +274,38 @@ out:
200 -static int fn_hash_last_dflt=-1;
203 fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
205 - int order, last_idx;
206 + int order, last_idx, last_dflt, last_nhsel;
207 + struct fib_alias *first_fa = NULL;
208 + struct hlist_head *head;
209 struct hlist_node *node;
211 struct fib_info *fi = NULL;
212 struct fib_info *last_resort;
213 struct fn_hash *t = (struct fn_hash*)tb->tb_data;
214 - struct fn_zone *fz = t->fn_zones[0];
215 + struct fn_zone *fz = t->fn_zones[res->prefixlen];
221 + k = fz_key(flp->fl4_dst, fz);
228 read_lock(&fib_hash_lock);
229 - hlist_for_each_entry(f, node, &fz->fz_hash[0], fn_hash) {
230 + head = &fz->fz_hash[fn_hash(k, fz)];
231 + hlist_for_each_entry(f, node, head, fn_hash) {
232 struct fib_alias *fa;
234 + if (f->fn_key != k)
237 list_for_each_entry(fa, &f->fn_alias, fa_list) {
238 struct fib_info *next_fi = fa->fa_info;
240 @@ -305,41 +313,52 @@ fn_hash_select_default(struct fib_table
241 fa->fa_type != RTN_UNICAST)
245 + fa->fa_tos != flp->fl4_tos)
247 if (next_fi->fib_priority > res->fi->fib_priority)
249 - if (!next_fi->fib_nh[0].nh_gw ||
250 - next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
252 fa->fa_state |= FA_S_ACCESSED;
255 - if (next_fi != res->fi)
257 - } else if (!fib_detect_death(fi, order, &last_resort,
258 - &last_idx, &fn_hash_last_dflt)) {
260 + last_dflt = fa->fa_last_dflt;
263 + if (fi && !fib_detect_death(fi, order, &last_resort,
264 + &last_idx, &last_dflt, &last_nhsel, flp)) {
266 fib_info_put(res->fi);
268 atomic_inc(&fi->fib_clntref);
269 - fn_hash_last_dflt = order;
270 + first_fa->fa_last_dflt = order;
279 if (order <= 0 || fi == NULL) {
280 - fn_hash_last_dflt = -1;
281 + if (fi && fi->fib_nhs > 1 &&
282 + fib_detect_death(fi, order, &last_resort, &last_idx,
283 + &last_dflt, &last_nhsel, flp) &&
284 + last_resort == fi) {
285 + read_lock_bh(&fib_nhflags_lock);
286 + fi->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
287 + read_unlock_bh(&fib_nhflags_lock);
289 + if (first_fa) first_fa->fa_last_dflt = -1;
293 - if (!fib_detect_death(fi, order, &last_resort, &last_idx, &fn_hash_last_dflt)) {
294 + if (!fib_detect_death(fi, order, &last_resort, &last_idx,
295 + &last_dflt, &last_nhsel, flp)) {
297 fib_info_put(res->fi);
299 atomic_inc(&fi->fib_clntref);
300 - fn_hash_last_dflt = order;
301 + first_fa->fa_last_dflt = order;
305 @@ -349,8 +368,11 @@ fn_hash_select_default(struct fib_table
306 res->fi = last_resort;
308 atomic_inc(&last_resort->fib_clntref);
309 + read_lock_bh(&fib_nhflags_lock);
310 + last_resort->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
311 + read_unlock_bh(&fib_nhflags_lock);
312 + first_fa->fa_last_dflt = last_idx;
314 - fn_hash_last_dflt = last_idx;
316 read_unlock(&fib_hash_lock);
318 @@ -446,6 +468,7 @@ static int fn_hash_insert(struct fib_tab
319 write_lock_bh(&fib_hash_lock);
320 fi_drop = fa->fa_info;
322 + fa->fa_last_dflt = -1;
323 fa->fa_type = cfg->fc_type;
324 fa->fa_scope = cfg->fc_scope;
325 state = fa->fa_state;
326 @@ -507,6 +530,7 @@ static int fn_hash_insert(struct fib_tab
327 new_fa->fa_type = cfg->fc_type;
328 new_fa->fa_scope = cfg->fc_scope;
329 new_fa->fa_state = 0;
330 + new_fa->fa_last_dflt = -1;
333 * Insert new entry to the list.
334 diff -urp v2.6.22/linux/net/ipv4/fib_lookup.h linux/net/ipv4/fib_lookup.h
335 --- v2.6.22/linux/net/ipv4/fib_lookup.h 2007-07-10 09:18:43.000000000 +0300
336 +++ linux/net/ipv4/fib_lookup.h 2007-07-12 08:00:54.000000000 +0300
337 @@ -9,6 +9,7 @@ struct fib_alias {
338 struct list_head fa_list;
340 struct fib_info *fa_info;
345 @@ -36,6 +37,7 @@ extern struct fib_alias *fib_find_alias(
347 extern int fib_detect_death(struct fib_info *fi, int order,
348 struct fib_info **last_resort,
349 - int *last_idx, int *dflt);
350 + int *last_idx, int *dflt, int *last_nhsel,
351 + const struct flowi *flp);
353 #endif /* _FIB_LOOKUP_H */
354 diff -urp v2.6.22/linux/net/ipv4/fib_rules.c linux/net/ipv4/fib_rules.c
355 --- v2.6.22/linux/net/ipv4/fib_rules.c 2007-07-10 09:18:43.000000000 +0300
356 +++ linux/net/ipv4/fib_rules.c 2007-07-12 08:00:54.000000000 +0300
357 @@ -85,6 +85,11 @@ u32 fib_rules_tclass(struct fib_result *
361 +int fib_result_table(struct fib_result *res)
363 + return res->r->table;
366 int fib_lookup(struct flowi *flp, struct fib_result *res)
368 struct fib_lookup_arg arg = {
369 @@ -136,7 +141,8 @@ errout:
370 void fib_select_default(const struct flowi *flp, struct fib_result *res)
372 if (res->r && res->r->action == FR_ACT_TO_TBL &&
373 - FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
374 + ((FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) ||
375 + FIB_RES_NH(*res).nh_scope == RT_SCOPE_HOST)) {
376 struct fib_table *tb;
377 if ((tb = fib_get_table(res->r->table)) != NULL)
378 tb->tb_select_default(tb, flp, res);
379 diff -urp v2.6.22/linux/net/ipv4/fib_semantics.c linux/net/ipv4/fib_semantics.c
380 --- v2.6.22/linux/net/ipv4/fib_semantics.c 2007-07-10 09:18:43.000000000 +0300
381 +++ linux/net/ipv4/fib_semantics.c 2007-07-12 08:00:54.000000000 +0300
382 @@ -55,6 +55,7 @@ static struct hlist_head *fib_info_hash;
383 static struct hlist_head *fib_info_laddrhash;
384 static unsigned int fib_hash_size;
385 static unsigned int fib_info_cnt;
386 +rwlock_t fib_nhflags_lock = RW_LOCK_UNLOCKED;
388 #define DEVINDEX_HASHBITS 8
389 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
390 @@ -190,7 +191,7 @@ static __inline__ int nh_comp(const stru
391 #ifdef CONFIG_NET_CLS_ROUTE
392 nh->nh_tclassid != onh->nh_tclassid ||
394 - ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
395 + ((nh->nh_flags^onh->nh_flags)&~RTNH_F_BADSTATE))
398 } endfor_nexthops(fi);
399 @@ -227,7 +228,7 @@ static struct fib_info *fib_find_info(co
400 nfi->fib_priority == fi->fib_priority &&
401 memcmp(nfi->fib_metrics, fi->fib_metrics,
402 sizeof(fi->fib_metrics)) == 0 &&
403 - ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
404 + ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_BADSTATE) == 0 &&
405 (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
408 @@ -347,26 +348,70 @@ struct fib_alias *fib_find_alias(struct
411 int fib_detect_death(struct fib_info *fi, int order,
412 - struct fib_info **last_resort, int *last_idx, int *dflt)
413 + struct fib_info **last_resort, int *last_idx, int *dflt,
414 + int *last_nhsel, const struct flowi *flp)
417 - int state = NUD_NONE;
420 + struct fib_nh * nh;
422 + int flag, dead = 1;
424 + /* change_nexthops(fi) { */
425 + for (nhsel = 0, nh = fi->fib_nh; nhsel < fi->fib_nhs; nh++, nhsel++) {
426 + if (flp->oif && flp->oif != nh->nh_oif)
428 + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && nh->nh_gw &&
429 + nh->nh_scope == RT_SCOPE_LINK)
431 + if (nh->nh_flags & RTNH_F_DEAD)
434 - n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
436 - state = n->nud_state;
439 - if (state==NUD_REACHABLE)
441 - if ((state&NUD_VALID) && order != *dflt)
443 - if ((state&NUD_VALID) ||
444 - (*last_idx<0 && order > *dflt)) {
448 + if (nh->nh_dev->flags & IFF_NOARP) {
454 + if (!nh->nh_gw || nh->nh_scope != RT_SCOPE_LINK)
455 + dst = flp->fl4_dst;
458 + n = neigh_lookup(&arp_tbl, &dst, nh->nh_dev);
460 + state = n->nud_state;
463 + if (state==NUD_REACHABLE ||
464 + ((state&NUD_VALID) && order != *dflt)) {
468 + if (!(state&NUD_VALID))
472 + if ((state&NUD_VALID) ||
473 + (*last_idx<0 && order >= *dflt)) {
476 + *last_nhsel = nhsel;
481 + read_lock_bh(&fib_nhflags_lock);
483 + nh->nh_flags |= RTNH_F_SUSPECT;
485 + nh->nh_flags &= ~RTNH_F_SUSPECT;
486 + read_unlock_bh(&fib_nhflags_lock);
489 + /* } endfor_nexthops(fi) */
494 #ifdef CONFIG_IP_ROUTE_MULTIPATH
495 @@ -536,8 +581,11 @@ static int fib_check_nh(struct fib_confi
497 if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
499 - if (!(dev->flags&IFF_UP))
501 + if (!(dev->flags&IFF_UP)) {
502 + if (fi->fib_protocol != RTPROT_STATIC)
504 + nh->nh_flags |= RTNH_F_DEAD;
508 nh->nh_scope = RT_SCOPE_LINK;
509 @@ -557,24 +605,48 @@ static int fib_check_nh(struct fib_confi
510 /* It is not necessary, but requires a bit of thinking */
511 if (fl.fl4_scope < RT_SCOPE_LINK)
512 fl.fl4_scope = RT_SCOPE_LINK;
513 - if ((err = fib_lookup(&fl, &res)) != 0)
515 + err = fib_lookup(&fl, &res);
518 - if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
520 - nh->nh_scope = res.scope;
521 - nh->nh_oif = FIB_RES_OIF(res);
522 - if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
524 - dev_hold(nh->nh_dev);
526 - if (!(nh->nh_dev->flags & IFF_UP))
530 + struct in_device *in_dev;
532 + if (err != -ENETUNREACH ||
533 + fi->fib_protocol != RTPROT_STATIC)
536 + in_dev = inetdev_by_index(nh->nh_oif);
537 + if (in_dev == NULL ||
538 + in_dev->dev->flags & IFF_UP) {
540 + in_dev_put(in_dev);
543 + nh->nh_flags |= RTNH_F_DEAD;
544 + nh->nh_scope = RT_SCOPE_LINK;
545 + nh->nh_dev = in_dev->dev;
546 + dev_hold(nh->nh_dev);
547 + in_dev_put(in_dev);
550 + if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
552 + nh->nh_scope = res.scope;
553 + nh->nh_oif = FIB_RES_OIF(res);
554 + if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
556 + dev_hold(nh->nh_dev);
557 + if (!(nh->nh_dev->flags & IFF_UP)) {
558 + if (fi->fib_protocol != RTPROT_STATIC) {
562 + nh->nh_flags |= RTNH_F_DEAD;
572 struct in_device *in_dev;
574 @@ -585,8 +657,11 @@ out:
577 if (!(in_dev->dev->flags&IFF_UP)) {
578 - in_dev_put(in_dev);
580 + if (fi->fib_protocol != RTPROT_STATIC) {
581 + in_dev_put(in_dev);
584 + nh->nh_flags |= RTNH_F_DEAD;
586 nh->nh_dev = in_dev->dev;
587 dev_hold(nh->nh_dev);
588 @@ -909,8 +984,12 @@ int fib_semantic_match(struct list_head
590 if (nh->nh_flags&RTNH_F_DEAD)
592 - if (!flp->oif || flp->oif == nh->nh_oif)
594 + if (flp->oif && flp->oif != nh->nh_oif)
596 + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
597 + nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
601 #ifdef CONFIG_IP_ROUTE_MULTIPATH
602 if (nhsel < fi->fib_nhs) {
603 @@ -1085,18 +1164,29 @@ int fib_sync_down(__be32 local, struct n
606 change_nexthops(fi) {
607 - if (nh->nh_flags&RTNH_F_DEAD)
609 - else if (nh->nh_dev == dev &&
610 - nh->nh_scope != scope) {
611 - nh->nh_flags |= RTNH_F_DEAD;
612 + if (nh->nh_flags&RTNH_F_DEAD) {
613 + if (fi->fib_protocol!=RTPROT_STATIC ||
614 + nh->nh_dev == NULL ||
615 + __in_dev_get_rtnl(nh->nh_dev) == NULL ||
616 + nh->nh_dev->flags&IFF_UP)
618 + } else if (nh->nh_dev == dev &&
619 + nh->nh_scope != scope) {
620 + write_lock_bh(&fib_nhflags_lock);
621 #ifdef CONFIG_IP_ROUTE_MULTIPATH
622 - spin_lock_bh(&fib_multipath_lock);
623 + spin_lock(&fib_multipath_lock);
624 + nh->nh_flags |= RTNH_F_DEAD;
625 fi->fib_power -= nh->nh_power;
627 - spin_unlock_bh(&fib_multipath_lock);
628 + spin_unlock(&fib_multipath_lock);
630 + nh->nh_flags |= RTNH_F_DEAD;
633 + write_unlock_bh(&fib_nhflags_lock);
634 + if (fi->fib_protocol!=RTPROT_STATIC ||
636 + __in_dev_get_rtnl(dev) == NULL)
639 #ifdef CONFIG_IP_ROUTE_MULTIPATH
640 if (force > 1 && nh->nh_dev == dev) {
641 @@ -1115,11 +1205,8 @@ int fib_sync_down(__be32 local, struct n
645 -#ifdef CONFIG_IP_ROUTE_MULTIPATH
648 - Dead device goes up. We wake up dead nexthops.
649 - It takes sense only on multipath routes.
650 + Dead device goes up or new address is added. We wake up dead nexthops.
653 int fib_sync_up(struct net_device *dev)
654 @@ -1129,8 +1216,10 @@ int fib_sync_up(struct net_device *dev)
655 struct hlist_head *head;
656 struct hlist_node *node;
659 + struct fib_result res;
663 if (!(dev->flags&IFF_UP))
666 @@ -1138,6 +1227,7 @@ int fib_sync_up(struct net_device *dev)
667 hash = fib_devindex_hashfn(dev->ifindex);
668 head = &fib_info_devhash[hash];
672 hlist_for_each_entry(nh, node, head, nh_hash) {
673 struct fib_info *fi = nh->nh_parent;
674 @@ -1150,19 +1240,39 @@ int fib_sync_up(struct net_device *dev)
677 change_nexthops(fi) {
678 - if (!(nh->nh_flags&RTNH_F_DEAD)) {
680 + if (!(nh->nh_flags&RTNH_F_DEAD))
683 if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
685 if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
687 + if (nh->nh_gw && fi->fib_protocol == RTPROT_STATIC) {
688 + struct flowi fl = {
690 + { .daddr = nh->nh_gw,
691 + .scope = nh->nh_scope } },
694 + if (fib_lookup(&fl, &res) != 0)
696 + if (res.type != RTN_UNICAST &&
697 + res.type != RTN_LOCAL) {
701 + nh->nh_scope = res.scope;
706 +#ifdef CONFIG_IP_ROUTE_MULTIPATH
707 spin_lock_bh(&fib_multipath_lock);
710 nh->nh_flags &= ~RTNH_F_DEAD;
711 +#ifdef CONFIG_IP_ROUTE_MULTIPATH
712 spin_unlock_bh(&fib_multipath_lock);
714 } endfor_nexthops(fi)
717 @@ -1170,10 +1280,14 @@ int fib_sync_up(struct net_device *dev)
727 +#ifdef CONFIG_IP_ROUTE_MULTIPATH
730 The algorithm is suboptimal, but it provides really
731 fair weighted route distribution.
732 @@ -1182,24 +1296,45 @@ int fib_sync_up(struct net_device *dev)
733 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
735 struct fib_info *fi = res->fi;
739 spin_lock_bh(&fib_multipath_lock);
743 + change_nexthops(fi) {
744 + if (flp->oif != nh->nh_oif)
746 + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
747 + nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
749 + if (!(nh->nh_flags&RTNH_F_BADSTATE)) {
750 + if (nh->nh_power > w) {
755 + } endfor_nexthops(fi);
757 + spin_unlock_bh(&fib_multipath_lock);
765 if (fi->fib_power <= 0) {
767 change_nexthops(fi) {
768 - if (!(nh->nh_flags&RTNH_F_DEAD)) {
769 + if (!(nh->nh_flags&RTNH_F_BADSTATE)) {
770 power += nh->nh_weight;
771 nh->nh_power = nh->nh_weight;
773 } endfor_nexthops(fi);
774 fi->fib_power = power;
776 - spin_unlock_bh(&fib_multipath_lock);
777 - /* Race condition: route has just become dead. */
786 @@ -1209,20 +1344,40 @@ void fib_select_multipath(const struct f
788 w = jiffies % fi->fib_power;
791 change_nexthops(fi) {
792 - if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
793 + if (!(nh->nh_flags&RTNH_F_BADSTATE) && nh->nh_power) {
794 if ((w -= nh->nh_power) <= 0) {
797 - res->nh_sel = nhsel;
798 spin_unlock_bh(&fib_multipath_lock);
799 + res->nh_sel = nhsel;
804 + } endfor_nexthops(fi);
813 + if (!(nh->nh_flags&RTNH_F_DEAD)) {
814 + if (flp->oif && flp->oif != nh->nh_oif)
816 + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
817 + nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
819 + spin_unlock_bh(&fib_multipath_lock);
820 + res->nh_sel = nhsel;
823 } endfor_nexthops(fi);
825 /* Race condition: route has just become dead. */
827 spin_unlock_bh(&fib_multipath_lock);
830 diff -urp v2.6.22/linux/net/ipv4/netfilter/ipt_MASQUERADE.c linux/net/ipv4/netfilter/ipt_MASQUERADE.c
831 --- v2.6.22/linux/net/ipv4/netfilter/ipt_MASQUERADE.c 2007-07-10 09:18:43.000000000 +0300
832 +++ linux/net/ipv4/netfilter/ipt_MASQUERADE.c 2007-07-11 10:00:30.000000000 +0300
833 @@ -88,13 +88,29 @@ masquerade_target(struct sk_buff **pskb,
837 - rt = (struct rtable *)(*pskb)->dst;
838 - newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
840 - printk("MASQUERADE: %s ate my IP address\n", out->name);
844 + struct flowi fl = { .nl_u = { .ip4_u =
845 + { .daddr = ip_hdr(*pskb)->daddr,
846 + .tos = (RT_TOS(ip_hdr(*pskb)->tos) |
848 + .gw = ((struct rtable *) (*pskb)->dst)->rt_gateway,
850 + .mark = (*pskb)->mark,
851 + .oif = out->ifindex };
852 + if (ip_route_output_key(&rt, &fl) != 0) {
853 + /* Funky routing can do this. */
854 + if (net_ratelimit())
855 + printk("MASQUERADE:"
856 + " No route: Rusty's brain broke!\n");
861 + newsrc = rt->rt_src;
862 + DEBUGP("newsrc = %u.%u.%u.%u\n", NIPQUAD(newsrc));
865 write_lock_bh(&masq_lock);
866 nat->masq_index = out->ifindex;
867 write_unlock_bh(&masq_lock);
868 diff -urp v2.6.22/linux/net/ipv4/netfilter/nf_nat_core.c linux/net/ipv4/netfilter/nf_nat_core.c
869 --- v2.6.22/linux/net/ipv4/netfilter/nf_nat_core.c 2007-07-10 09:18:43.000000000 +0300
870 +++ linux/net/ipv4/netfilter/nf_nat_core.c 2007-07-11 10:02:05.000000000 +0300
871 @@ -590,6 +590,53 @@ nf_nat_port_nfattr_to_range(struct nfatt
872 EXPORT_SYMBOL_GPL(nf_nat_port_range_to_nfattr);
876 +ip_nat_route_input(unsigned int hooknum,
877 + struct sk_buff **pskb,
878 + const struct net_device *in,
879 + const struct net_device *out,
880 + int (*okfn)(struct sk_buff *))
882 + struct sk_buff *skb = *pskb;
884 + struct nf_conn *conn;
885 + enum ip_conntrack_info ctinfo;
886 + enum ip_conntrack_dir dir;
887 + unsigned long statusbit;
890 + if (!(conn = nf_ct_get(skb, &ctinfo)))
893 + if (!(conn->status & IPS_NAT_DONE_MASK))
895 + dir = CTINFO2DIR(ctinfo);
896 + statusbit = IPS_SRC_NAT;
897 + if (dir == IP_CT_DIR_REPLY)
898 + statusbit ^= IPS_NAT_MASK;
899 + if (!(conn->status & statusbit))
905 + if (skb->len < sizeof(struct iphdr))
908 + /* use daddr in other direction as masquerade address (lsrc) */
910 + saddr = conn->tuplehash[!dir].tuple.dst.u3.ip;
911 + if (saddr == iph->saddr)
914 + if (ip_route_input_lookup(skb, iph->daddr, iph->saddr, iph->tos,
920 +EXPORT_SYMBOL_GPL(ip_nat_route_input);
922 static int __init nf_nat_init(void)
925 diff -urp v2.6.22/linux/net/ipv4/netfilter/nf_nat_standalone.c linux/net/ipv4/netfilter/nf_nat_standalone.c
926 --- v2.6.22/linux/net/ipv4/netfilter/nf_nat_standalone.c 2007-07-10 09:18:43.000000000 +0300
927 +++ linux/net/ipv4/netfilter/nf_nat_standalone.c 2007-07-12 08:00:54.000000000 +0300
928 @@ -282,6 +282,14 @@ static struct nf_hook_ops nf_nat_ops[] =
929 .hooknum = NF_IP_PRE_ROUTING,
930 .priority = NF_IP_PRI_NAT_DST,
932 + /* Before routing, route before mangling */
934 + .hook = ip_nat_route_input,
935 + .owner = THIS_MODULE,
937 + .hooknum = NF_IP_PRE_ROUTING,
938 + .priority = NF_IP_PRI_LAST-1,
940 /* After packet filtering, change source */
943 diff -urp v2.6.22/linux/net/ipv4/route.c linux/net/ipv4/route.c
944 --- v2.6.22/linux/net/ipv4/route.c 2007-07-10 09:18:43.000000000 +0300
945 +++ linux/net/ipv4/route.c 2007-07-12 08:00:54.000000000 +0300
946 @@ -1208,6 +1208,7 @@ void ip_rt_redirect(__be32 old_gw, __be3
948 /* Gateway is different ... */
949 rt->rt_gateway = new_gw;
950 + if (rt->fl.fl4_gw) rt->fl.fl4_gw = new_gw;
952 /* Redirect received -> path was valid */
953 dst_confirm(&rth->u.dst);
954 @@ -1643,6 +1644,7 @@ static int ip_route_input_mc(struct sk_b
955 rth->fl.fl4_tos = tos;
956 rth->fl.mark = skb->mark;
957 rth->fl.fl4_src = saddr;
958 + rth->fl.fl4_lsrc = 0;
960 #ifdef CONFIG_NET_CLS_ROUTE
961 rth->u.dst.tclassid = itag;
962 @@ -1653,6 +1655,7 @@ static int ip_route_input_mc(struct sk_b
963 dev_hold(rth->u.dst.dev);
964 rth->idev = in_dev_get(rth->u.dst.dev);
966 + rth->fl.fl4_gw = 0;
967 rth->rt_gateway = daddr;
968 rth->rt_spec_dst= spec_dst;
969 rth->rt_type = RTN_MULTICAST;
970 @@ -1717,7 +1720,7 @@ static inline int __mkroute_input(struct
971 struct fib_result* res,
972 struct in_device *in_dev,
973 __be32 daddr, __be32 saddr, u32 tos,
974 - struct rtable **result)
975 + __be32 lsrc, struct rtable **result)
979 @@ -1751,6 +1754,7 @@ static inline int __mkroute_input(struct
980 flags |= RTCF_DIRECTSRC;
982 if (out_dev == in_dev && err && !(flags & (RTCF_NAT | RTCF_MASQ)) &&
984 (IN_DEV_SHARED_MEDIA(out_dev) ||
985 inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
986 flags |= RTCF_DOREDIRECT;
987 @@ -1788,6 +1792,7 @@ static inline int __mkroute_input(struct
988 rth->fl.mark = skb->mark;
989 rth->fl.fl4_src = saddr;
991 + rth->fl.fl4_lsrc = lsrc;
992 rth->rt_gateway = daddr;
994 rth->fl.iif = in_dev->dev->ifindex;
995 @@ -1795,6 +1800,7 @@ static inline int __mkroute_input(struct
996 dev_hold(rth->u.dst.dev);
997 rth->idev = in_dev_get(rth->u.dst.dev);
999 + rth->fl.fl4_gw = 0;
1000 rth->rt_spec_dst= spec_dst;
1002 rth->u.dst.input = ip_forward;
1003 @@ -1816,19 +1822,20 @@ static inline int ip_mkroute_input_def(s
1004 struct fib_result* res,
1005 const struct flowi *fl,
1006 struct in_device *in_dev,
1007 - __be32 daddr, __be32 saddr, u32 tos)
1008 + __be32 daddr, __be32 saddr, u32 tos, __be32 lsrc)
1010 struct rtable* rth = NULL;
1014 + fib_select_default(fl, res);
1015 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1016 - if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0)
1017 + if (res->fi && res->fi->fib_nhs > 1)
1018 fib_select_multipath(fl, res);
1021 /* create a routing cache entry */
1022 - err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth);
1023 + err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, lsrc, &rth);
1027 @@ -1841,7 +1848,7 @@ static inline int ip_mkroute_input(struc
1028 struct fib_result* res,
1029 const struct flowi *fl,
1030 struct in_device *in_dev,
1031 - __be32 daddr, __be32 saddr, u32 tos)
1032 + __be32 daddr, __be32 saddr, u32 tos, __be32 lsrc)
1034 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
1035 struct rtable* rth = NULL, *rtres;
1036 @@ -1857,7 +1864,7 @@ static inline int ip_mkroute_input(struc
1037 /* distinguish between multipath and singlepath */
1039 return ip_mkroute_input_def(skb, res, fl, in_dev, daddr,
1043 /* add all alternatives to the routing cache */
1044 for (hop = 0; hop < hopcount; hop++) {
1045 @@ -1869,7 +1876,7 @@ static inline int ip_mkroute_input(struc
1047 /* create a routing cache entry */
1048 err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos,
1054 @@ -1889,7 +1896,7 @@ static inline int ip_mkroute_input(struc
1055 skb->dst = &rtres->u.dst;
1057 #else /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
1058 - return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, saddr, tos);
1059 + return ip_mkroute_input_def(skb, res, fl, in_dev, daddr, saddr, tos, lsrc);
1060 #endif /* CONFIG_IP_ROUTE_MULTIPATH_CACHED */
1063 @@ -1905,18 +1912,18 @@ static inline int ip_mkroute_input(struc
1066 static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1067 - u8 tos, struct net_device *dev)
1068 + u8 tos, struct net_device *dev, __be32 lsrc)
1070 struct fib_result res;
1071 struct in_device *in_dev = in_dev_get(dev);
1072 struct flowi fl = { .nl_u = { .ip4_u =
1075 + .saddr = lsrc? : saddr,
1077 .scope = RT_SCOPE_UNIVERSE,
1080 - .iif = dev->ifindex };
1081 + .iif = lsrc? loopback_dev.ifindex : dev->ifindex };
1084 struct rtable * rth;
1085 @@ -1949,6 +1956,12 @@ static int ip_route_input_slow(struct sk
1086 if (BADCLASS(daddr) || ZERONET(daddr) || LOOPBACK(daddr))
1087 goto martian_destination;
1090 + if (MULTICAST(lsrc) || BADCLASS(lsrc) ||
1091 + ZERONET(lsrc) || LOOPBACK(lsrc))
1096 * Now we are ready to route packet.
1098 @@ -1958,6 +1971,8 @@ static int ip_route_input_slow(struct sk
1102 + fl.iif = dev->ifindex;
1103 + fl.fl4_src = saddr;
1105 RT_CACHE_STAT_INC(in_slow_tot);
1107 @@ -1982,7 +1997,7 @@ static int ip_route_input_slow(struct sk
1108 if (res.type != RTN_UNICAST)
1109 goto martian_destination;
1111 - err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
1112 + err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos, lsrc);
1113 if (err == -ENOBUFS)
1116 @@ -1997,6 +2012,8 @@ out: return err;
1118 if (skb->protocol != htons(ETH_P_IP))
1124 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1125 @@ -2037,6 +2054,7 @@ local_input:
1126 rth->u.dst.dev = &loopback_dev;
1127 dev_hold(rth->u.dst.dev);
1128 rth->idev = in_dev_get(rth->u.dst.dev);
1129 + rth->fl.fl4_gw = 0;
1130 rth->rt_gateway = daddr;
1131 rth->rt_spec_dst= spec_dst;
1132 rth->u.dst.input= ip_local_deliver;
1133 @@ -2086,8 +2104,9 @@ martian_source:
1137 -int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1138 - u8 tos, struct net_device *dev)
1140 +ip_route_input_cached(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1141 + u8 tos, struct net_device *dev, __be32 lsrc)
1143 struct rtable * rth;
1145 @@ -2102,6 +2121,7 @@ int ip_route_input(struct sk_buff *skb,
1146 if (rth->fl.fl4_dst == daddr &&
1147 rth->fl.fl4_src == saddr &&
1148 rth->fl.iif == iif &&
1149 + rth->fl.fl4_lsrc == lsrc &&
1151 rth->fl.mark == skb->mark &&
1152 rth->fl.fl4_tos == tos) {
1153 @@ -2148,7 +2168,19 @@ int ip_route_input(struct sk_buff *skb,
1157 - return ip_route_input_slow(skb, daddr, saddr, tos, dev);
1158 + return ip_route_input_slow(skb, daddr, saddr, tos, dev, lsrc);
1161 +int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1162 + u8 tos, struct net_device *dev)
1164 + return ip_route_input_cached(skb, daddr, saddr, tos, dev, 0);
1167 +int ip_route_input_lookup(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1168 + u8 tos, struct net_device *dev, __be32 lsrc)
1170 + return ip_route_input_cached(skb, daddr, saddr, tos, dev, lsrc);
1173 static inline int __mkroute_output(struct rtable **result,
1174 @@ -2227,6 +2259,7 @@ static inline int __mkroute_output(struc
1175 rth->fl.fl4_tos = tos;
1176 rth->fl.fl4_src = oldflp->fl4_src;
1177 rth->fl.oif = oldflp->oif;
1178 + rth->fl.fl4_gw = oldflp->fl4_gw;
1179 rth->fl.mark = oldflp->mark;
1180 rth->rt_dst = fl->fl4_dst;
1181 rth->rt_src = fl->fl4_src;
1182 @@ -2367,6 +2400,7 @@ static int ip_route_output_slow(struct r
1183 struct flowi fl = { .nl_u = { .ip4_u =
1184 { .daddr = oldflp->fl4_dst,
1185 .saddr = oldflp->fl4_src,
1186 + .gw = oldflp->fl4_gw,
1187 .tos = tos & IPTOS_RT_MASK,
1188 .scope = ((tos & RTO_ONLINK) ?
1190 @@ -2470,6 +2504,7 @@ static int ip_route_output_slow(struct r
1191 dev_out = &loopback_dev;
1193 fl.oif = loopback_dev.ifindex;
1195 res.type = RTN_LOCAL;
1196 flags |= RTCF_LOCAL;
1198 @@ -2477,7 +2512,7 @@ static int ip_route_output_slow(struct r
1200 if (fib_lookup(&fl, &res)) {
1202 - if (oldflp->oif) {
1203 + if (oldflp->oif && dev_out->flags & IFF_UP) {
1204 /* Apparently, routing tables are wrong. Assume,
1205 that the destination is on link.
1207 @@ -2517,6 +2552,7 @@ static int ip_route_output_slow(struct r
1208 dev_out = &loopback_dev;
1210 fl.oif = dev_out->ifindex;
1213 fib_info_put(res.fi);
1215 @@ -2524,13 +2560,12 @@ static int ip_route_output_slow(struct r
1219 + if (res.type == RTN_UNICAST)
1220 + fib_select_default(&fl, &res);
1221 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1222 - if (res.fi->fib_nhs > 1 && fl.oif == 0)
1223 + if (res.fi->fib_nhs > 1)
1224 fib_select_multipath(&fl, &res);
1227 - if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif)
1228 - fib_select_default(&fl, &res);
1231 fl.fl4_src = FIB_RES_PREFSRC(res);
1232 @@ -2567,6 +2602,7 @@ int __ip_route_output_key(struct rtable
1233 rth->fl.fl4_src == flp->fl4_src &&
1235 rth->fl.oif == flp->oif &&
1236 + rth->fl.fl4_gw == flp->fl4_gw &&
1237 rth->fl.mark == flp->mark &&
1238 !((rth->fl.fl4_tos ^ flp->fl4_tos) &
1239 (IPTOS_RT_MASK | RTO_ONLINK))) {
1240 @@ -3271,3 +3307,4 @@ int __init ip_rt_init(void)
1241 EXPORT_SYMBOL(__ip_select_ident);
1242 EXPORT_SYMBOL(ip_route_input);
1243 EXPORT_SYMBOL(ip_route_output_key);
1244 +EXPORT_SYMBOL(ip_route_input_lookup);