]> git.pld-linux.org Git - packages/kernel.git/blob - kernel-routes.patch
- update routes, vserver and tuxonice patches
[packages/kernel.git] / kernel-routes.patch
1 diff -urp v2.6.37/linux/include/linux/rtnetlink.h linux/include/linux/rtnetlink.h
2 --- v2.6.37/linux/include/linux/rtnetlink.h     2011-01-06 00:01:18.000000000 +0200
3 +++ linux/include/linux/rtnetlink.h     2011-01-08 17:20:30.066270851 +0200
4 @@ -312,6 +312,8 @@ struct rtnexthop {
5  #define RTNH_F_DEAD            1       /* Nexthop is dead (used by multipath)  */
6  #define RTNH_F_PERVASIVE       2       /* Do recursive gateway lookup  */
7  #define RTNH_F_ONLINK          4       /* Gateway is forced on link    */
8 +#define RTNH_F_SUSPECT         8       /* We don't know the real state */
9 +#define RTNH_F_BADSTATE                (RTNH_F_DEAD | RTNH_F_SUSPECT)
10  
11  /* Macros to handle hexthops */
12  
13 diff -urp v2.6.37/linux/include/net/flow.h linux/include/net/flow.h
14 --- v2.6.37/linux/include/net/flow.h    2010-08-02 09:37:48.000000000 +0300
15 +++ linux/include/net/flow.h    2011-01-08 17:21:40.092271753 +0200
16 @@ -19,6 +19,8 @@ struct flowi {
17                 struct {
18                         __be32                  daddr;
19                         __be32                  saddr;
20 +                       __be32                  lsrc;
21 +                       __be32                  gw;
22                         __u8                    tos;
23                         __u8                    scope;
24                 } ip4_u;
25 @@ -43,6 +45,8 @@ struct flowi {
26  #define fl6_flowlabel  nl_u.ip6_u.flowlabel
27  #define fl4_dst                nl_u.ip4_u.daddr
28  #define fl4_src                nl_u.ip4_u.saddr
29 +#define fl4_lsrc       nl_u.ip4_u.lsrc
30 +#define fl4_gw         nl_u.ip4_u.gw
31  #define fl4_tos                nl_u.ip4_u.tos
32  #define fl4_scope      nl_u.ip4_u.scope
33  
34 diff -urp v2.6.37/linux/include/net/ip_fib.h linux/include/net/ip_fib.h
35 --- v2.6.37/linux/include/net/ip_fib.h  2011-01-06 00:01:19.000000000 +0200
36 +++ linux/include/net/ip_fib.h  2011-01-08 17:20:30.066270851 +0200
37 @@ -210,6 +210,8 @@ extern int fib_lookup(struct net *n, str
38  extern struct fib_table *fib_new_table(struct net *net, u32 id);
39  extern struct fib_table *fib_get_table(struct net *net, u32 id);
40  
41 +extern int fib_result_table(struct fib_result *res);
42 +
43  #endif /* CONFIG_IP_MULTIPLE_TABLES */
44  
45  /* Exported by fib_frontend.c */
46 @@ -270,4 +272,6 @@ static inline void fib_proc_exit(struct 
47  }
48  #endif
49  
50 +extern rwlock_t fib_nhflags_lock;
51 +
52  #endif  /* _NET_FIB_H */
53 diff -urp v2.6.37/linux/include/net/netfilter/nf_nat.h linux/include/net/netfilter/nf_nat.h
54 --- v2.6.37/linux/include/net/netfilter/nf_nat.h        2010-02-25 09:01:36.000000000 +0200
55 +++ linux/include/net/netfilter/nf_nat.h        2011-01-08 17:21:40.092271753 +0200
56 @@ -73,6 +73,13 @@ struct nf_conn_nat {
57  #endif
58  };
59  
60 +/* Call input routing for SNAT-ed traffic */
61 +extern unsigned int ip_nat_route_input(unsigned int hooknum,
62 +                                      struct sk_buff *skb,
63 +                                      const struct net_device *in,
64 +                                      const struct net_device *out,
65 +                                      int (*okfn)(struct sk_buff *));
66 +
67  /* Set up the info structure to map into this range. */
68  extern unsigned int nf_nat_setup_info(struct nf_conn *ct,
69                                       const struct nf_nat_range *range,
70 diff -urp v2.6.37/linux/include/net/route.h linux/include/net/route.h
71 --- v2.6.37/linux/include/net/route.h   2010-10-22 11:34:37.000000000 +0300
72 +++ linux/include/net/route.h   2011-01-08 17:21:40.093271136 +0200
73 @@ -126,6 +126,7 @@ static inline int ip_route_input_noref(s
74         return ip_route_input_common(skb, dst, src, tos, devin, true);
75  }
76  
77 +extern int             ip_route_input_lookup(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin, __be32 lsrc);
78  extern unsigned short  ip_rt_frag_needed(struct net *net, struct iphdr *iph, unsigned short new_mtu, struct net_device *dev);
79  extern void            ip_rt_send_redirect(struct sk_buff *skb);
80  
81 diff -urp v2.6.37/linux/net/bridge/br_netfilter.c linux/net/bridge/br_netfilter.c
82 --- v2.6.37/linux/net/bridge/br_netfilter.c     2011-01-06 00:01:21.000000000 +0200
83 +++ linux/net/bridge/br_netfilter.c     2011-01-08 17:21:40.094271883 +0200
84 @@ -404,6 +404,9 @@ static int br_nf_pre_routing_finish(stru
85         struct rtable *rt;
86         int err;
87  
88 +       /* Old skb->dst is not expected, it is lost in all cases */
89 +       skb_dst_drop(skb);
90 +
91         if (nf_bridge->mask & BRNF_PKT_TYPE) {
92                 skb->pkt_type = PACKET_OTHERHOST;
93                 nf_bridge->mask ^= BRNF_PKT_TYPE;
94 diff -urp v2.6.37/linux/net/ipv4/fib_frontend.c linux/net/ipv4/fib_frontend.c
95 --- v2.6.37/linux/net/ipv4/fib_frontend.c       2011-01-06 00:01:22.000000000 +0200
96 +++ linux/net/ipv4/fib_frontend.c       2011-01-08 17:20:30.067270952 +0200
97 @@ -47,6 +47,8 @@
98  
99  #ifndef CONFIG_IP_MULTIPLE_TABLES
100  
101 +#define FIB_RES_TABLE(r) (RT_TABLE_MAIN)
102 +
103  static int __net_init fib4_rules_init(struct net *net)
104  {
105         struct fib_table *local_table, *main_table;
106 @@ -71,6 +73,8 @@ fail:
107  }
108  #else
109  
110 +#define FIB_RES_TABLE(r) (fib_result_table(r))
111 +
112  struct fib_table *fib_new_table(struct net *net, u32 id)
113  {
114         struct fib_table *tb;
115 @@ -125,7 +129,8 @@ void fib_select_default(struct net *net,
116         table = res->r->table;
117  #endif
118         tb = fib_get_table(net, table);
119 -       if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
120 +       if ((FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) ||
121 +           FIB_RES_NH(*res).nh_scope == RT_SCOPE_HOST)
122                 fib_table_select_default(tb, flp, res);
123  }
124  
125 @@ -264,6 +269,9 @@ int fib_validate_source(__be32 src, __be
126                 .iif = oif
127         };
128         struct fib_result res;
129 +       int table;
130 +       unsigned char prefixlen;
131 +       unsigned char scope;
132         int no_addr, rpf, accept_local;
133         bool dev_match;
134         int ret;
135 @@ -310,19 +318,26 @@ int fib_validate_source(__be32 src, __be
136                 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
137                 return ret;
138         }
139 +       table = FIB_RES_TABLE(&res);
140 +       prefixlen = res.prefixlen;
141 +       scope = res.scope;
142         if (no_addr)
143                 goto last_resort;
144 -       if (rpf == 1)
145 -               goto e_rpf;
146         fl.oif = dev->ifindex;
147  
148         ret = 0;
149         if (fib_lookup(net, &fl, &res) == 0) {
150 -               if (res.type == RTN_UNICAST) {
151 +               if (res.type == RTN_UNICAST &&
152 +                   ((table == FIB_RES_TABLE(&res) &&
153 +                     res.prefixlen >= prefixlen && res.scope >= scope) ||
154 +                    !rpf)) {
155                         *spec_dst = FIB_RES_PREFSRC(res);
156                         ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
157 +                       return ret;
158                 }
159         }
160 +       if (rpf == 1)
161 +               goto e_rpf;
162         return ret;
163  
164  last_resort:
165 @@ -954,9 +969,7 @@ static int fib_inetaddr_event(struct not
166         switch (event) {
167         case NETDEV_UP:
168                 fib_add_ifaddr(ifa);
169 -#ifdef CONFIG_IP_ROUTE_MULTIPATH
170                 fib_sync_up(dev);
171 -#endif
172                 rt_cache_flush(dev_net(dev), -1);
173                 break;
174         case NETDEV_DOWN:
175 @@ -992,9 +1005,7 @@ static int fib_netdev_event(struct notif
176                 for_ifa(in_dev) {
177                         fib_add_ifaddr(ifa);
178                 } endfor_ifa(in_dev);
179 -#ifdef CONFIG_IP_ROUTE_MULTIPATH
180                 fib_sync_up(dev);
181 -#endif
182                 rt_cache_flush(dev_net(dev), -1);
183                 break;
184         case NETDEV_DOWN:
185 diff -urp v2.6.37/linux/net/ipv4/fib_hash.c linux/net/ipv4/fib_hash.c
186 --- v2.6.37/linux/net/ipv4/fib_hash.c   2011-01-06 00:01:22.000000000 +0200
187 +++ linux/net/ipv4/fib_hash.c   2011-01-08 17:20:30.068270994 +0200
188 @@ -305,27 +305,43 @@ out:
189  void fib_table_select_default(struct fib_table *tb,
190                               const struct flowi *flp, struct fib_result *res)
191  {
192 -       int order, last_idx;
193 +       int order, last_idx, last_dflt, last_nhsel, good;
194 +       struct fib_alias *first_fa;
195         struct hlist_node *node;
196         struct fib_node *f;
197 -       struct fib_info *fi = NULL;
198 +       struct fib_info *fi;
199         struct fib_info *last_resort;
200         struct fn_hash *t = (struct fn_hash *)tb->tb_data;
201 -       struct fn_zone *fz = t->fn_zones[0];
202 +       struct fn_zone *fz = t->fn_zones[res->prefixlen];
203         struct hlist_head *head;
204 +       __be32 k;
205 +       unsigned int seq;
206  
207         if (fz == NULL)
208                 return;
209  
210 +       k = fz_key(flp->fl4_dst, fz);
211 +
212 +       rcu_read_lock();
213 +
214 +retry:
215 +       last_dflt = -2;
216 +       last_nhsel = 0;
217         last_idx = -1;
218         last_resort = NULL;
219         order = -1;
220 +       fi = NULL;
221 +       first_fa = NULL;
222 +       good = 0;
223  
224 -       rcu_read_lock();
225 -       head = rcu_dereference(fz->fz_hash);
226 +       seq = read_seqbegin(&fz->fz_lock);
227 +       head = rcu_dereference(fz->fz_hash) + fn_hash(k, fz);
228         hlist_for_each_entry_rcu(f, node, head, fn_hash) {
229                 struct fib_alias *fa;
230  
231 +               if (f->fn_key != k)
232 +                       continue;
233 +
234                 list_for_each_entry_rcu(fa, &f->fn_alias, fa_list) {
235                         struct fib_info *next_fi = fa->fa_info;
236  
237 @@ -333,43 +349,66 @@ void fib_table_select_default(struct fib
238                             fa->fa_type != RTN_UNICAST)
239                                 continue;
240  
241 +                       if (fa->fa_tos &&
242 +                           fa->fa_tos != flp->fl4_tos)
243 +                               continue;
244                         if (next_fi->fib_priority > res->fi->fib_priority)
245                                 break;
246 -                       if (!next_fi->fib_nh[0].nh_gw ||
247 -                           next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
248 -                               continue;
249  
250                         fib_alias_accessed(fa);
251  
252 -                       if (fi == NULL) {
253 -                               if (next_fi != res->fi)
254 -                                       break;
255 -                       } else if (!fib_detect_death(fi, order, &last_resort,
256 -                                               &last_idx, tb->tb_default)) {
257 -                               fib_result_assign(res, fi);
258 -                               tb->tb_default = order;
259 -                               goto out;
260 +                       if (!first_fa) {
261 +                               last_dflt = fa->fa_last_dflt;
262 +                               first_fa = fa;
263 +                       }
264 +                       if (fi && !fib_detect_death(fi, order, &last_resort,
265 +                               &last_idx, &last_dflt, &last_nhsel, flp)) {
266 +                               good = 1;
267 +                               goto done1;
268                         }
269                         fi = next_fi;
270                         order++;
271                 }
272 +               break;
273 +       }
274 +
275 +done1:
276 +       if (read_seqretry(&fz->fz_lock, seq))
277 +               goto retry;
278 +
279 +       if (good) {
280 +               fib_result_assign(res, fi);
281 +               first_fa->fa_last_dflt = order;
282 +               goto out;
283         }
284  
285         if (order <= 0 || fi == NULL) {
286 -               tb->tb_default = -1;
287 +               if (fi && fi->fib_nhs > 1 &&
288 +                   fib_detect_death(fi, order, &last_resort, &last_idx,
289 +                       &last_dflt, &last_nhsel, flp) &&
290 +                   last_resort == fi) {
291 +                       read_lock_bh(&fib_nhflags_lock);
292 +                       fi->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
293 +                       read_unlock_bh(&fib_nhflags_lock);
294 +               }
295 +               if (first_fa) first_fa->fa_last_dflt = -1;
296                 goto out;
297         }
298  
299         if (!fib_detect_death(fi, order, &last_resort, &last_idx,
300 -                               tb->tb_default)) {
301 +                             &last_dflt, &last_nhsel, flp)) {
302                 fib_result_assign(res, fi);
303 -               tb->tb_default = order;
304 +               first_fa->fa_last_dflt = order;
305                 goto out;
306         }
307  
308 -       if (last_idx >= 0)
309 +       if (last_idx >= 0) {
310                 fib_result_assign(res, last_resort);
311 -       tb->tb_default = last_idx;
312 +               read_lock_bh(&fib_nhflags_lock);
313 +               last_resort->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
314 +               read_unlock_bh(&fib_nhflags_lock);
315 +               first_fa->fa_last_dflt = last_idx;
316 +       }
317  out:
318         rcu_read_unlock();
319  }
320 @@ -507,6 +546,7 @@ int fib_table_insert(struct fib_table *t
321  
322                         new_fa->fa_tos = fa->fa_tos;
323                         new_fa->fa_info = fi;
324 +                       new_fa->fa_last_dflt = -1;
325                         new_fa->fa_type = cfg->fc_type;
326                         new_fa->fa_scope = cfg->fc_scope;
327                         state = fa->fa_state;
328 @@ -559,6 +599,7 @@ int fib_table_insert(struct fib_table *t
329         new_fa->fa_type = cfg->fc_type;
330         new_fa->fa_scope = cfg->fc_scope;
331         new_fa->fa_state = 0;
332 +       new_fa->fa_last_dflt = -1;
333  
334         /*
335          * Insert new entry to the list.
336 diff -urp v2.6.37/linux/net/ipv4/fib_lookup.h linux/net/ipv4/fib_lookup.h
337 --- v2.6.37/linux/net/ipv4/fib_lookup.h 2011-01-06 00:01:22.000000000 +0200
338 +++ linux/net/ipv4/fib_lookup.h 2011-01-08 17:20:30.069271071 +0200
339 @@ -8,6 +8,7 @@
340  struct fib_alias {
341         struct list_head        fa_list;
342         struct fib_info         *fa_info;
343 +       int                     fa_last_dflt;
344         u8                      fa_tos;
345         u8                      fa_type;
346         u8                      fa_scope;
347 @@ -42,7 +43,8 @@ extern struct fib_alias *fib_find_alias(
348                                         u8 tos, u32 prio);
349  extern int fib_detect_death(struct fib_info *fi, int order,
350                             struct fib_info **last_resort,
351 -                           int *last_idx, int dflt);
352 +                           int *last_idx, int *dflt, int *last_nhsel,
353 +                           const struct flowi *flp);
354  
355  static inline void fib_result_assign(struct fib_result *res,
356                                      struct fib_info *fi)
357 diff -urp v2.6.37/linux/net/ipv4/fib_rules.c linux/net/ipv4/fib_rules.c
358 --- v2.6.37/linux/net/ipv4/fib_rules.c  2011-01-06 00:01:22.000000000 +0200
359 +++ linux/net/ipv4/fib_rules.c  2011-01-08 17:20:30.070271447 +0200
360 @@ -53,6 +53,11 @@ u32 fib_rules_tclass(struct fib_result *
361  }
362  #endif
363  
364 +int fib_result_table(struct fib_result *res)
365 +{
366 +       return res->r->table;
367 +}
368 +
369  int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res)
370  {
371         struct fib_lookup_arg arg = {
372 diff -urp v2.6.37/linux/net/ipv4/fib_semantics.c linux/net/ipv4/fib_semantics.c
373 --- v2.6.37/linux/net/ipv4/fib_semantics.c      2011-01-06 00:01:22.000000000 +0200
374 +++ linux/net/ipv4/fib_semantics.c      2011-01-08 17:21:40.095271450 +0200
375 @@ -51,6 +51,7 @@ static struct hlist_head *fib_info_hash;
376  static struct hlist_head *fib_info_laddrhash;
377  static unsigned int fib_hash_size;
378  static unsigned int fib_info_cnt;
379 +rwlock_t fib_nhflags_lock = RW_LOCK_UNLOCKED;
380  
381  #define DEVINDEX_HASHBITS 8
382  #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
383 @@ -203,7 +204,7 @@ static inline int nh_comp(const struct f
384  #ifdef CONFIG_NET_CLS_ROUTE
385                     nh->nh_tclassid != onh->nh_tclassid ||
386  #endif
387 -                   ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD))
388 +                   ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_BADSTATE))
389                         return -1;
390                 onh++;
391         } endfor_nexthops(fi);
392 @@ -254,7 +255,7 @@ static struct fib_info *fib_find_info(co
393                     nfi->fib_priority == fi->fib_priority &&
394                     memcmp(nfi->fib_metrics, fi->fib_metrics,
395                            sizeof(fi->fib_metrics)) == 0 &&
396 -                   ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 &&
397 +                   ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_BADSTATE) == 0 &&
398                     (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
399                         return fi;
400         }
401 @@ -365,26 +366,70 @@ struct fib_alias *fib_find_alias(struct 
402  }
403  
404  int fib_detect_death(struct fib_info *fi, int order,
405 -                    struct fib_info **last_resort, int *last_idx, int dflt)
406 +                    struct fib_info **last_resort, int *last_idx, int *dflt,
407 +                    int *last_nhsel, const struct flowi *flp)
408  {
409         struct neighbour *n;
410 -       int state = NUD_NONE;
411 +       int nhsel;
412 +       int state;
413 +       struct fib_nh * nh;
414 +       __be32 dst;
415 +       int flag, dead = 1;
416 +
417 +       /* change_nexthops(fi) { */
418 +       for (nhsel = 0, nh = fi->fib_nh; nhsel < fi->fib_nhs; nh++, nhsel++) {
419 +               if (flp->oif && flp->oif != nh->nh_oif)
420 +                       continue;
421 +               if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && nh->nh_gw &&
422 +                   nh->nh_scope == RT_SCOPE_LINK)
423 +                       continue;
424 +               if (nh->nh_flags & RTNH_F_DEAD)
425 +                       continue;
426  
427 -       n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
428 -       if (n) {
429 -               state = n->nud_state;
430 -               neigh_release(n);
431 -       }
432 -       if (state == NUD_REACHABLE)
433 -               return 0;
434 -       if ((state & NUD_VALID) && order != dflt)
435 -               return 0;
436 -       if ((state & NUD_VALID) ||
437 -           (*last_idx < 0 && order > dflt)) {
438 -               *last_resort = fi;
439 -               *last_idx = order;
440 +               flag = 0;
441 +               if (nh->nh_dev->flags & IFF_NOARP) {
442 +                       dead = 0;
443 +                       goto setfl;
444 +               }
445 +
446 +               dst = nh->nh_gw;
447 +               if (!nh->nh_gw || nh->nh_scope != RT_SCOPE_LINK)
448 +                       dst = flp->fl4_dst;
449 +
450 +               state = NUD_NONE;
451 +               n = neigh_lookup(&arp_tbl, &dst, nh->nh_dev);
452 +               if (n) {
453 +                       state = n->nud_state;
454 +                       neigh_release(n);
455 +               }
456 +               if (state == NUD_REACHABLE ||
457 +                   ((state & NUD_VALID) && order != *dflt)) {
458 +                       dead = 0;
459 +                       goto setfl;
460 +               }
461 +               if (!(state & NUD_VALID))
462 +                       flag = 1;
463 +               if (!dead)
464 +                       goto setfl;
465 +               if ((state & NUD_VALID) ||
466 +                   (*last_idx < 0 && order >= *dflt)) {
467 +                       *last_resort = fi;
468 +                       *last_idx = order;
469 +                       *last_nhsel = nhsel;
470 +               }
471 +
472 +               setfl:
473 +
474 +               read_lock_bh(&fib_nhflags_lock);
475 +               if (flag)
476 +                       nh->nh_flags |= RTNH_F_SUSPECT;
477 +               else
478 +                       nh->nh_flags &= ~RTNH_F_SUSPECT;
479 +               read_unlock_bh(&fib_nhflags_lock);
480         }
481 -       return 1;
482 +       /* } endfor_nexthops(fi) */
483 +
484 +       return dead;
485  }
486  
487  #ifdef CONFIG_IP_ROUTE_MULTIPATH
488 @@ -553,8 +598,11 @@ static int fib_check_nh(struct fib_confi
489                         dev = __dev_get_by_index(net, nh->nh_oif);
490                         if (!dev)
491                                 return -ENODEV;
492 -                       if (!(dev->flags & IFF_UP))
493 -                               return -ENETDOWN;
494 +                       if (!(dev->flags & IFF_UP)) {
495 +                               if (fi->fib_protocol != RTPROT_STATIC)
496 +                                       return -ENETDOWN;
497 +                               nh->nh_flags |= RTNH_F_DEAD;
498 +                       }
499                         nh->nh_dev = dev;
500                         dev_hold(dev);
501                         nh->nh_scope = RT_SCOPE_LINK;
502 @@ -576,21 +624,41 @@ static int fib_check_nh(struct fib_confi
503                         if (fl.fl4_scope < RT_SCOPE_LINK)
504                                 fl.fl4_scope = RT_SCOPE_LINK;
505                         err = fib_lookup(net, &fl, &res);
506 -                       if (err) {
507 -                               rcu_read_unlock();
508 -                               return err;
509 +               }
510 +               if (err) {
511 +                       struct in_device *in_dev;
512 +
513 +                       if (err != -ENETUNREACH ||
514 +                           fi->fib_protocol != RTPROT_STATIC)
515 +                               goto out;
516 +
517 +                       in_dev = inetdev_by_index(net, nh->nh_oif);
518 +                       if (in_dev == NULL ||
519 +                           in_dev->dev->flags & IFF_UP)
520 +                               goto out;
521 +                       nh->nh_flags |= RTNH_F_DEAD;
522 +                       nh->nh_scope = RT_SCOPE_LINK;
523 +                       nh->nh_dev = in_dev->dev;
524 +                       dev_hold(nh->nh_dev);
525 +               } else {
526 +                       err = -EINVAL;
527 +                       if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
528 +                               goto out;
529 +                       nh->nh_scope = res.scope;
530 +                       nh->nh_oif = FIB_RES_OIF(res);
531 +                       nh->nh_dev = dev = FIB_RES_DEV(res);
532 +                       if (!dev)
533 +                               goto out;
534 +                       dev_hold(dev);
535 +                       if (!(nh->nh_dev->flags & IFF_UP)) {
536 +                               if (fi->fib_protocol != RTPROT_STATIC) {
537 +                                       err = -ENETDOWN;
538 +                                       goto out;
539 +                               }
540 +                               nh->nh_flags |= RTNH_F_DEAD;
541                         }
542 +                       err = 0;
543                 }
544 -               err = -EINVAL;
545 -               if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
546 -                       goto out;
547 -               nh->nh_scope = res.scope;
548 -               nh->nh_oif = FIB_RES_OIF(res);
549 -               nh->nh_dev = dev = FIB_RES_DEV(res);
550 -               if (!dev)
551 -                       goto out;
552 -               dev_hold(dev);
553 -               err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
554         } else {
555                 struct in_device *in_dev;
556  
557 @@ -603,8 +671,11 @@ static int fib_check_nh(struct fib_confi
558                 if (in_dev == NULL)
559                         goto out;
560                 err = -ENETDOWN;
561 -               if (!(in_dev->dev->flags & IFF_UP))
562 -                       goto out;
563 +               if (!(in_dev->dev->flags & IFF_UP)) {
564 +                       if (fi->fib_protocol != RTPROT_STATIC)
565 +                               goto out;
566 +                       nh->nh_flags |= RTNH_F_DEAD;
567 +               }
568                 nh->nh_dev = in_dev->dev;
569                 dev_hold(nh->nh_dev);
570                 nh->nh_scope = RT_SCOPE_HOST;
571 @@ -919,8 +990,12 @@ int fib_semantic_match(struct list_head 
572                                 for_nexthops(fi) {
573                                         if (nh->nh_flags & RTNH_F_DEAD)
574                                                 continue;
575 -                                       if (!flp->oif || flp->oif == nh->nh_oif)
576 -                                               break;
577 +                                       if (flp->oif && flp->oif != nh->nh_oif)
578 +                                               continue;
579 +                                       if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
580 +                                           nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
581 +                                               continue;
582 +                                       break;
583                                 }
584  #ifdef CONFIG_IP_ROUTE_MULTIPATH
585                                 if (nhsel < fi->fib_nhs) {
586 @@ -1100,18 +1175,29 @@ int fib_sync_down_dev(struct net_device 
587                 prev_fi = fi;
588                 dead = 0;
589                 change_nexthops(fi) {
590 -                       if (nexthop_nh->nh_flags & RTNH_F_DEAD)
591 -                               dead++;
592 -                       else if (nexthop_nh->nh_dev == dev &&
593 -                                nexthop_nh->nh_scope != scope) {
594 -                               nexthop_nh->nh_flags |= RTNH_F_DEAD;
595 +                       if (nexthop_nh->nh_flags & RTNH_F_DEAD) {
596 +                               if (fi->fib_protocol != RTPROT_STATIC ||
597 +                                   nexthop_nh->nh_dev == NULL ||
598 +                                   __in_dev_get_rtnl(nexthop_nh->nh_dev) == NULL ||
599 +                                   nexthop_nh->nh_dev->flags&IFF_UP)
600 +                                       dead++;
601 +                       } else if (nexthop_nh->nh_dev == dev &&
602 +                                  nexthop_nh->nh_scope != scope) {
603 +                               write_lock_bh(&fib_nhflags_lock);
604  #ifdef CONFIG_IP_ROUTE_MULTIPATH
605 -                               spin_lock_bh(&fib_multipath_lock);
606 +                               spin_lock(&fib_multipath_lock);
607 +                               nexthop_nh->nh_flags |= RTNH_F_DEAD;
608                                 fi->fib_power -= nexthop_nh->nh_power;
609                                 nexthop_nh->nh_power = 0;
610 -                               spin_unlock_bh(&fib_multipath_lock);
611 +                               spin_unlock(&fib_multipath_lock);
612 +#else
613 +                               nexthop_nh->nh_flags |= RTNH_F_DEAD;
614  #endif
615 -                               dead++;
616 +                               write_unlock_bh(&fib_nhflags_lock);
617 +                               if (fi->fib_protocol!=RTPROT_STATIC ||
618 +                                   force ||
619 +                                   __in_dev_get_rtnl(dev) == NULL)
620 +                                       dead++;
621                         }
622  #ifdef CONFIG_IP_ROUTE_MULTIPATH
623                         if (force > 1 && nexthop_nh->nh_dev == dev) {
624 @@ -1129,11 +1215,8 @@ int fib_sync_down_dev(struct net_device 
625         return ret;
626  }
627  
628 -#ifdef CONFIG_IP_ROUTE_MULTIPATH
629 -
630  /*
631 - * Dead device goes up. We wake up dead nexthops.
632 - * It takes sense only on multipath routes.
633 ++   Dead device goes up or new address is added. We wake up dead nexthops.
634   */
635  int fib_sync_up(struct net_device *dev)
636  {
637 @@ -1142,8 +1225,10 @@ int fib_sync_up(struct net_device *dev)
638         struct hlist_head *head;
639         struct hlist_node *node;
640         struct fib_nh *nh;
641 -       int ret;
642 +       struct fib_result res;
643 +       int ret, rep;
644  
645 +repeat:
646         if (!(dev->flags & IFF_UP))
647                 return 0;
648  
649 @@ -1151,6 +1236,7 @@ int fib_sync_up(struct net_device *dev)
650         hash = fib_devindex_hashfn(dev->ifindex);
651         head = &fib_info_devhash[hash];
652         ret = 0;
653 +       rep = 0;
654  
655         hlist_for_each_entry(nh, node, head, nh_hash) {
656                 struct fib_info *fi = nh->nh_parent;
657 @@ -1163,21 +1249,45 @@ int fib_sync_up(struct net_device *dev)
658                 prev_fi = fi;
659                 alive = 0;
660                 change_nexthops(fi) {
661 -                       if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) {
662 -                               alive++;
663 +                       if (!(nexthop_nh->nh_flags & RTNH_F_DEAD))
664                                 continue;
665 -                       }
666                         if (nexthop_nh->nh_dev == NULL ||
667                             !(nexthop_nh->nh_dev->flags & IFF_UP))
668                                 continue;
669                         if (nexthop_nh->nh_dev != dev ||
670                             !__in_dev_get_rtnl(dev))
671                                 continue;
672 +                       if (nexthop_nh->nh_gw && fi->fib_protocol == RTPROT_STATIC) {
673 +                               struct flowi fl = {
674 +                                       .nl_u = { .ip4_u =
675 +                                                 { .daddr = nexthop_nh->nh_gw,
676 +                                                   .scope = nexthop_nh->nh_scope } },
677 +                                       .oif =  nexthop_nh->nh_oif,
678 +                               };
679 +
680 +                               rcu_read_lock();
681 +                               if (fib_lookup(dev_net(dev), &fl, &res) != 0) {
682 +                                       rcu_read_unlock();
683 +                                       continue;
684 +                               }
685 +                               if (res.type != RTN_UNICAST &&
686 +                                   res.type != RTN_LOCAL) {
687 +                                       rcu_read_unlock();
688 +                                       continue;
689 +                               }
690 +                               nexthop_nh->nh_scope = res.scope;
691 +                               rcu_read_unlock();
692 +                               rep = 1;
693 +                       }
694                         alive++;
695 +#ifdef CONFIG_IP_ROUTE_MULTIPATH
696                         spin_lock_bh(&fib_multipath_lock);
697                         nexthop_nh->nh_power = 0;
698 +#endif
699                         nexthop_nh->nh_flags &= ~RTNH_F_DEAD;
700 +#ifdef CONFIG_IP_ROUTE_MULTIPATH
701                         spin_unlock_bh(&fib_multipath_lock);
702 +#endif
703                 } endfor_nexthops(fi)
704  
705                 if (alive > 0) {
706 @@ -1185,10 +1295,14 @@ int fib_sync_up(struct net_device *dev)
707                         ret++;
708                 }
709         }
710 +       if (rep)
711 +               goto repeat;
712  
713         return ret;
714  }
715  
716 +#ifdef CONFIG_IP_ROUTE_MULTIPATH
717 +
718  /*
719   * The algorithm is suboptimal, but it provides really
720   * fair weighted route distribution.
721 @@ -1196,24 +1310,46 @@ int fib_sync_up(struct net_device *dev)
722  void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
723  {
724         struct fib_info *fi = res->fi;
725 -       int w;
726 +       int w, alive;
727  
728         spin_lock_bh(&fib_multipath_lock);
729 +       if (flp->oif) {
730 +               int sel = -1;
731 +               w = -1;
732 +               change_nexthops(fi) {
733 +                       if (flp->oif != nexthop_nh->nh_oif)
734 +                               continue;
735 +                       if (flp->fl4_gw && flp->fl4_gw != nexthop_nh->nh_gw &&
736 +                           nexthop_nh->nh_gw &&
737 +                           nexthop_nh->nh_scope == RT_SCOPE_LINK)
738 +                               continue;
739 +                       if (!(nexthop_nh->nh_flags & RTNH_F_BADSTATE)) {
740 +                               if (nexthop_nh->nh_power > w) {
741 +                                       w = nexthop_nh->nh_power;
742 +                                       sel = nhsel;
743 +                               }
744 +                       }
745 +               } endfor_nexthops(fi);
746 +               if (sel >= 0) {
747 +                       spin_unlock_bh(&fib_multipath_lock);
748 +                       res->nh_sel = sel;
749 +                       return;
750 +               }
751 +               goto last_resort;
752 +       }
753 +
754 +repeat:
755         if (fi->fib_power <= 0) {
756                 int power = 0;
757                 change_nexthops(fi) {
758 -                       if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) {
759 +                       if (!(nexthop_nh->nh_flags & RTNH_F_BADSTATE)) {
760                                 power += nexthop_nh->nh_weight;
761                                 nexthop_nh->nh_power = nexthop_nh->nh_weight;
762                         }
763                 } endfor_nexthops(fi);
764                 fi->fib_power = power;
765 -               if (power <= 0) {
766 -                       spin_unlock_bh(&fib_multipath_lock);
767 -                       /* Race condition: route has just become dead. */
768 -                       res->nh_sel = 0;
769 -                       return;
770 -               }
771 +               if (power <= 0)
772 +                       goto last_resort;
773         }
774  
775  
776 @@ -1223,8 +1359,9 @@ void fib_select_multipath(const struct f
777  
778         w = jiffies % fi->fib_power;
779  
780 +       alive = 0;
781         change_nexthops(fi) {
782 -               if (!(nexthop_nh->nh_flags & RTNH_F_DEAD) &&
783 +               if (!(nexthop_nh->nh_flags & RTNH_F_BADSTATE) &&
784                     nexthop_nh->nh_power) {
785                         w -= nexthop_nh->nh_power;
786                         if (w <= 0) {
787 @@ -1234,11 +1371,29 @@ void fib_select_multipath(const struct f
788                                 spin_unlock_bh(&fib_multipath_lock);
789                                 return;
790                         }
791 +                       alive = 1;
792 +               }
793 +       } endfor_nexthops(fi);
794 +       if (alive) {
795 +               fi->fib_power = 0;
796 +               goto repeat;
797 +       }
798 +
799 +last_resort:
800 +       for_nexthops(fi) {
801 +               if (!(nh->nh_flags & RTNH_F_DEAD)) {
802 +                       if (flp->oif && flp->oif != nh->nh_oif)
803 +                               continue;
804 +                       if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
805 +                           nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
806 +                               continue;
807 +                       spin_unlock_bh(&fib_multipath_lock);
808 +                       res->nh_sel = nhsel;
809 +                       return;
810                 }
811         } endfor_nexthops(fi);
812  
813         /* Race condition: route has just become dead. */
814 -       res->nh_sel = 0;
815         spin_unlock_bh(&fib_multipath_lock);
816  }
817  #endif
818 diff -urp v2.6.37/linux/net/ipv4/fib_trie.c linux/net/ipv4/fib_trie.c
819 --- v2.6.37/linux/net/ipv4/fib_trie.c   2011-01-06 00:01:22.000000000 +0200
820 +++ linux/net/ipv4/fib_trie.c   2011-01-08 17:20:30.073271586 +0200
821 @@ -1270,6 +1270,7 @@ int fib_table_insert(struct fib_table *t
822                         fi_drop = fa->fa_info;
823                         new_fa->fa_tos = fa->fa_tos;
824                         new_fa->fa_info = fi;
825 +                       new_fa->fa_last_dflt = -1;
826                         new_fa->fa_type = cfg->fc_type;
827                         new_fa->fa_scope = cfg->fc_scope;
828                         state = fa->fa_state;
829 @@ -1310,6 +1311,7 @@ int fib_table_insert(struct fib_table *t
830         new_fa->fa_type = cfg->fc_type;
831         new_fa->fa_scope = cfg->fc_scope;
832         new_fa->fa_state = 0;
833 +       new_fa->fa_last_dflt = -1;
834         /*
835          * Insert new entry to the list.
836          */
837 @@ -1807,24 +1809,31 @@ void fib_table_select_default(struct fib
838                               struct fib_result *res)
839  {
840         struct trie *t = (struct trie *) tb->tb_data;
841 -       int order, last_idx;
842 +       int order, last_idx, last_dflt, last_nhsel;
843 +       struct fib_alias *first_fa = NULL;
844         struct fib_info *fi = NULL;
845         struct fib_info *last_resort;
846         struct fib_alias *fa = NULL;
847         struct list_head *fa_head;
848         struct leaf *l;
849 +       u32 key, mask;
850  
851 +       last_dflt = -2;
852 +       last_nhsel = 0;
853         last_idx = -1;
854         last_resort = NULL;
855         order = -1;
856  
857 +       mask = inet_make_mask(res->prefixlen);
858 +       key = ntohl(flp->fl4_dst & mask);
859 +
860         rcu_read_lock();
861  
862 -       l = fib_find_node(t, 0);
863 +       l = fib_find_node(t, key);
864         if (!l)
865                 goto out;
866  
867 -       fa_head = get_fa_head(l, 0);
868 +       fa_head = get_fa_head(l, res->prefixlen);
869         if (!fa_head)
870                 goto out;
871  
872 @@ -1838,40 +1847,53 @@ void fib_table_select_default(struct fib
873                     fa->fa_type != RTN_UNICAST)
874                         continue;
875  
876 +               if (fa->fa_tos &&
877 +                   fa->fa_tos != flp->fl4_tos)
878 +                       continue;
879                 if (next_fi->fib_priority > res->fi->fib_priority)
880                         break;
881 -               if (!next_fi->fib_nh[0].nh_gw ||
882 -                   next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
883 -                       continue;
884  
885                 fib_alias_accessed(fa);
886  
887 -               if (fi == NULL) {
888 -                       if (next_fi != res->fi)
889 -                               break;
890 -               } else if (!fib_detect_death(fi, order, &last_resort,
891 -                                            &last_idx, tb->tb_default)) {
892 +               if (!first_fa) {
893 +                       last_dflt = fa->fa_last_dflt;
894 +                       first_fa = fa;
895 +               }
896 +               if (fi && !fib_detect_death(fi, order, &last_resort,
897 +                   &last_idx, &last_dflt, &last_nhsel, flp)) {
898                         fib_result_assign(res, fi);
899 -                       tb->tb_default = order;
900 +                       first_fa->fa_last_dflt = order;
901                         goto out;
902                 }
903                 fi = next_fi;
904                 order++;
905         }
906         if (order <= 0 || fi == NULL) {
907 -               tb->tb_default = -1;
908 +               if (fi && fi->fib_nhs > 1 &&
909 +                   fib_detect_death(fi, order, &last_resort, &last_idx,
910 +                                    &last_dflt, &last_nhsel, flp) &&
911 +                   last_resort == fi) {
912 +                       read_lock_bh(&fib_nhflags_lock);
913 +                       fi->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
914 +                       read_unlock_bh(&fib_nhflags_lock);
915 +               }
916 +               if (first_fa) first_fa->fa_last_dflt = -1;
917                 goto out;
918         }
919  
920         if (!fib_detect_death(fi, order, &last_resort, &last_idx,
921 -                               tb->tb_default)) {
922 +                               &last_dflt, &last_nhsel, flp)) {
923                 fib_result_assign(res, fi);
924 -               tb->tb_default = order;
925 +               first_fa->fa_last_dflt = order;
926                 goto out;
927         }
928 -       if (last_idx >= 0)
929 +       if (last_idx >= 0) {
930                 fib_result_assign(res, last_resort);
931 -       tb->tb_default = last_idx;
932 +               read_lock_bh(&fib_nhflags_lock);
933 +               last_resort->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
934 +               read_unlock_bh(&fib_nhflags_lock);
935 +               first_fa->fa_last_dflt = last_idx;
936 +       }
937  out:
938         rcu_read_unlock();
939  }
940 diff -urp v2.6.37/linux/net/ipv4/netfilter/ipt_MASQUERADE.c linux/net/ipv4/netfilter/ipt_MASQUERADE.c
941 --- v2.6.37/linux/net/ipv4/netfilter/ipt_MASQUERADE.c   2010-08-02 09:37:49.000000000 +0300
942 +++ linux/net/ipv4/netfilter/ipt_MASQUERADE.c   2011-01-08 17:21:40.096271293 +0200
943 @@ -51,7 +51,7 @@ masquerade_tg(struct sk_buff *skb, const
944         enum ip_conntrack_info ctinfo;
945         struct nf_nat_range newrange;
946         const struct nf_nat_multi_range_compat *mr;
947 -       const struct rtable *rt;
948 +       struct rtable *rt;
949         __be32 newsrc;
950  
951         NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING);
952 @@ -69,13 +69,29 @@ masquerade_tg(struct sk_buff *skb, const
953                 return NF_ACCEPT;
954  
955         mr = par->targinfo;
956 -       rt = skb_rtable(skb);
957 -       newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
958 -       if (!newsrc) {
959 -               pr_info("%s ate my IP address\n", par->out->name);
960 -               return NF_DROP;
961 +
962 +       {
963 +               struct flowi fl = { .nl_u = { .ip4_u =
964 +                                             { .daddr = ip_hdr(skb)->daddr,
965 +                                               .tos = (RT_TOS(ip_hdr(skb)->tos) |
966 +                                                       RTO_CONN),
967 +                                               .gw = skb_rtable(skb)->rt_gateway,
968 +                                             } },
969 +                                   .mark = skb->mark,
970 +                                   .oif = par->out->ifindex };
971 +               if (ip_route_output_key(dev_net(par->out), &rt, &fl) != 0) {
972 +                       /* Funky routing can do this. */
973 +                       if (net_ratelimit())
974 +                               pr_info("%s:"
975 +                                      " No route: Rusty's brain broke!\n",
976 +                                      par->out->name);
977 +                       return NF_DROP;
978 +               }
979         }
980  
981 +       newsrc = rt->rt_src;
982 +       ip_rt_put(rt);
983 +
984         nat->masq_index = par->out->ifindex;
985  
986         /* Transfer from original range. */
987 diff -urp v2.6.37/linux/net/ipv4/netfilter/nf_nat_core.c linux/net/ipv4/netfilter/nf_nat_core.c
988 --- v2.6.37/linux/net/ipv4/netfilter/nf_nat_core.c      2011-01-06 00:01:22.000000000 +0200
989 +++ linux/net/ipv4/netfilter/nf_nat_core.c      2011-01-08 17:21:40.097270104 +0200
990 @@ -711,6 +711,52 @@ static struct pernet_operations nf_nat_n
991         .exit = nf_nat_net_exit,
992  };
993  
994 +unsigned int
995 +ip_nat_route_input(unsigned int hooknum,
996 +               struct sk_buff *skb,
997 +               const struct net_device *in,
998 +               const struct net_device *out,
999 +               int (*okfn)(struct sk_buff *))
1000 +{
1001 +       struct iphdr *iph;
1002 +       struct nf_conn *conn;
1003 +       enum ip_conntrack_info ctinfo;
1004 +       enum ip_conntrack_dir dir;
1005 +       unsigned long statusbit;
1006 +       __be32 saddr;
1007 +
1008 +       if (!(conn = nf_ct_get(skb, &ctinfo)))
1009 +               return NF_ACCEPT;
1010 +
1011 +       if (!(conn->status & IPS_NAT_DONE_MASK))
1012 +               return NF_ACCEPT;
1013 +       dir = CTINFO2DIR(ctinfo);
1014 +       statusbit = IPS_SRC_NAT;
1015 +       if (dir == IP_CT_DIR_REPLY)
1016 +               statusbit ^= IPS_NAT_MASK;
1017 +       if (!(conn->status & statusbit))
1018 +               return NF_ACCEPT;
1019 +
1020 +       if (skb_dst(skb))
1021 +               return NF_ACCEPT;
1022 +
1023 +       if (skb->len < sizeof(struct iphdr))
1024 +               return NF_ACCEPT;
1025 +
1026 +       /* use daddr in other direction as masquerade address (lsrc) */
1027 +       iph = ip_hdr(skb);
1028 +       saddr = conn->tuplehash[!dir].tuple.dst.u3.ip;
1029 +       if (saddr == iph->saddr)
1030 +               return NF_ACCEPT;
1031 +
1032 +       if (ip_route_input_lookup(skb, iph->daddr, iph->saddr, iph->tos,
1033 +           skb->dev, saddr))
1034 +               return NF_DROP;
1035 +
1036 +       return NF_ACCEPT;
1037 +}
1038 +EXPORT_SYMBOL_GPL(ip_nat_route_input);
1039 +
1040  static int __init nf_nat_init(void)
1041  {
1042         size_t i;
1043 diff -urp v2.6.37/linux/net/ipv4/netfilter/nf_nat_standalone.c linux/net/ipv4/netfilter/nf_nat_standalone.c
1044 --- v2.6.37/linux/net/ipv4/netfilter/nf_nat_standalone.c        2010-10-22 11:34:38.000000000 +0300
1045 +++ linux/net/ipv4/netfilter/nf_nat_standalone.c        2011-01-08 17:21:40.097270104 +0200
1046 @@ -249,6 +249,14 @@ static struct nf_hook_ops nf_nat_ops[] _
1047                 .hooknum        = NF_INET_PRE_ROUTING,
1048                 .priority       = NF_IP_PRI_NAT_DST,
1049         },
1050 +       /* Before routing, route before mangling */
1051 +       {
1052 +               .hook           = ip_nat_route_input,
1053 +               .owner          = THIS_MODULE,
1054 +               .pf             = NFPROTO_IPV4,
1055 +               .hooknum        = NF_INET_PRE_ROUTING,
1056 +               .priority       = NF_IP_PRI_LAST-1,
1057 +       },
1058         /* After packet filtering, change source */
1059         {
1060                 .hook           = nf_nat_out,
1061 diff -urp v2.6.37/linux/net/ipv4/route.c linux/net/ipv4/route.c
1062 --- v2.6.37/linux/net/ipv4/route.c      2011-01-06 00:01:22.000000000 +0200
1063 +++ linux/net/ipv4/route.c      2011-01-08 17:21:40.100270360 +0200
1064 @@ -692,6 +692,8 @@ static inline int compare_keys(struct fl
1065         return (((__force u32)fl1->nl_u.ip4_u.daddr ^ (__force u32)fl2->nl_u.ip4_u.daddr) |
1066                 ((__force u32)fl1->nl_u.ip4_u.saddr ^ (__force u32)fl2->nl_u.ip4_u.saddr) |
1067                 (fl1->mark ^ fl2->mark) |
1068 +               ((__force u32)fl1->nl_u.ip4_u.lsrc ^ (__force u32)fl2->nl_u.ip4_u.lsrc) |
1069 +               ((__force u32)fl1->nl_u.ip4_u.gw ^ (__force u32)fl2->nl_u.ip4_u.gw) |
1070                 (*(u16 *)&fl1->nl_u.ip4_u.tos ^ *(u16 *)&fl2->nl_u.ip4_u.tos) |
1071                 (fl1->oif ^ fl2->oif) |
1072                 (fl1->iif ^ fl2->iif)) == 0;
1073 @@ -1448,6 +1450,7 @@ void ip_rt_redirect(__be32 old_gw, __be3
1074  
1075                                 /* Gateway is different ... */
1076                                 rt->rt_gateway          = new_gw;
1077 +                               if (rt->fl.fl4_gw) rt->fl.fl4_gw = new_gw;
1078  
1079                                 /* Redirect received -> path was valid */
1080                                 dst_confirm(&rth->dst);
1081 @@ -1902,6 +1905,7 @@ static int ip_route_input_mc(struct sk_b
1082         rth->fl.fl4_tos = tos;
1083         rth->fl.mark    = skb->mark;
1084         rth->fl.fl4_src = saddr;
1085 +       rth->fl.fl4_lsrc = 0;
1086         rth->rt_src     = saddr;
1087  #ifdef CONFIG_NET_CLS_ROUTE
1088         rth->dst.tclassid = itag;
1089 @@ -1912,6 +1916,7 @@ static int ip_route_input_mc(struct sk_b
1090         dev_hold(rth->dst.dev);
1091         rth->idev       = in_dev_get(rth->dst.dev);
1092         rth->fl.oif     = 0;
1093 +       rth->fl.fl4_gw  = 0;
1094         rth->rt_gateway = daddr;
1095         rth->rt_spec_dst= spec_dst;
1096         rth->rt_genid   = rt_genid(dev_net(dev));
1097 @@ -1975,7 +1980,7 @@ static int __mkroute_input(struct sk_buf
1098                            struct fib_result *res,
1099                            struct in_device *in_dev,
1100                            __be32 daddr, __be32 saddr, u32 tos,
1101 -                          struct rtable **result)
1102 +                          __be32 lsrc, struct rtable **result)
1103  {
1104         struct rtable *rth;
1105         int err;
1106 @@ -2007,6 +2012,7 @@ static int __mkroute_input(struct sk_buf
1107                 flags |= RTCF_DIRECTSRC;
1108  
1109         if (out_dev == in_dev && err &&
1110 +           !lsrc &&
1111             (IN_DEV_SHARED_MEDIA(out_dev) ||
1112              inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1113                 flags |= RTCF_DOREDIRECT;
1114 @@ -2045,6 +2051,7 @@ static int __mkroute_input(struct sk_buf
1115         rth->fl.mark    = skb->mark;
1116         rth->fl.fl4_src = saddr;
1117         rth->rt_src     = saddr;
1118 +       rth->fl.fl4_lsrc        = lsrc;
1119         rth->rt_gateway = daddr;
1120         rth->rt_iif     =
1121                 rth->fl.iif     = in_dev->dev->ifindex;
1122 @@ -2052,6 +2059,7 @@ static int __mkroute_input(struct sk_buf
1123         dev_hold(rth->dst.dev);
1124         rth->idev       = in_dev_get(rth->dst.dev);
1125         rth->fl.oif     = 0;
1126 +       rth->fl.fl4_gw  = 0;
1127         rth->rt_spec_dst= spec_dst;
1128  
1129         rth->dst.obsolete = -1;
1130 @@ -2071,21 +2079,23 @@ static int __mkroute_input(struct sk_buf
1131  
1132  static int ip_mkroute_input(struct sk_buff *skb,
1133                             struct fib_result *res,
1134 +                           struct net *net,
1135                             const struct flowi *fl,
1136                             struct in_device *in_dev,
1137 -                           __be32 daddr, __be32 saddr, u32 tos)
1138 +                           __be32 daddr, __be32 saddr, u32 tos, __be32 lsrc)
1139  {
1140         struct rtable* rth = NULL;
1141         int err;
1142         unsigned hash;
1143  
1144 +       fib_select_default(net, fl, res);
1145  #ifdef CONFIG_IP_ROUTE_MULTIPATH
1146 -       if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0)
1147 +       if (res->fi && res->fi->fib_nhs > 1)
1148                 fib_select_multipath(fl, res);
1149  #endif
1150  
1151         /* create a routing cache entry */
1152 -       err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth);
1153 +       err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, lsrc, &rth);
1154         if (err)
1155                 return err;
1156  
1157 @@ -2107,18 +2117,20 @@ static int ip_mkroute_input(struct sk_bu
1158   */
1159  
1160  static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1161 -                              u8 tos, struct net_device *dev)
1162 +                              u8 tos, struct net_device *dev, __be32 lsrc)
1163  {
1164         struct fib_result res;
1165         struct in_device *in_dev = __in_dev_get_rcu(dev);
1166         struct flowi fl = { .nl_u = { .ip4_u =
1167                                       { .daddr = daddr,
1168 -                                       .saddr = saddr,
1169 +                                       .saddr = lsrc? : saddr,
1170                                         .tos = tos,
1171                                         .scope = RT_SCOPE_UNIVERSE,
1172                                       } },
1173                             .mark = skb->mark,
1174 -                           .iif = dev->ifindex };
1175 +                           .iif = lsrc?
1176 +                                       dev_net(dev)->loopback_dev->ifindex :
1177 +                                       dev->ifindex };
1178         unsigned        flags = 0;
1179         u32             itag = 0;
1180         struct rtable * rth;
1181 @@ -2152,6 +2164,12 @@ static int ip_route_input_slow(struct sk
1182         if (ipv4_is_zeronet(daddr) || ipv4_is_loopback(daddr))
1183                 goto martian_destination;
1184  
1185 +       if (lsrc) {
1186 +               if (ipv4_is_multicast(lsrc) || ipv4_is_lbcast(lsrc) ||
1187 +                   ipv4_is_zeronet(lsrc) || ipv4_is_loopback(lsrc))
1188 +                       goto e_inval;
1189 +       }
1190 +
1191         /*
1192          *      Now we are ready to route packet.
1193          */
1194 @@ -2161,6 +2179,8 @@ static int ip_route_input_slow(struct sk
1195                         goto e_hostunreach;
1196                 goto no_route;
1197         }
1198 +       fl.iif = dev->ifindex;
1199 +       fl.fl4_src = saddr;
1200  
1201         RT_CACHE_STAT_INC(in_slow_tot);
1202  
1203 @@ -2184,12 +2204,14 @@ static int ip_route_input_slow(struct sk
1204         if (res.type != RTN_UNICAST)
1205                 goto martian_destination;
1206  
1207 -       err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
1208 +       err = ip_mkroute_input(skb, &res, net, &fl, in_dev, daddr, saddr, tos, lsrc);
1209  out:   return err;
1210  
1211  brd_input:
1212         if (skb->protocol != htons(ETH_P_IP))
1213                 goto e_inval;
1214 +       if (lsrc)
1215 +               goto e_inval;
1216  
1217         if (ipv4_is_zeronet(saddr))
1218                 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1219 @@ -2232,6 +2254,7 @@ local_input:
1220         rth->dst.dev    = net->loopback_dev;
1221         dev_hold(rth->dst.dev);
1222         rth->idev       = in_dev_get(rth->dst.dev);
1223 +       rth->fl.fl4_gw  = 0;
1224         rth->rt_gateway = daddr;
1225         rth->rt_spec_dst= spec_dst;
1226         rth->dst.input= ip_local_deliver;
1227 @@ -2284,8 +2307,9 @@ martian_source_keep_err:
1228         goto out;
1229  }
1230  
1231 -int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1232 -                          u8 tos, struct net_device *dev, bool noref)
1233 +int ip_route_input_cached(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1234 +                          u8 tos, struct net_device *dev, bool noref,
1235 +                          __be32 lsrc)
1236  {
1237         struct rtable * rth;
1238         unsigned        hash;
1239 @@ -2308,6 +2332,7 @@ int ip_route_input_common(struct sk_buff
1240                 if ((((__force u32)rth->fl.fl4_dst ^ (__force u32)daddr) |
1241                      ((__force u32)rth->fl.fl4_src ^ (__force u32)saddr) |
1242                      (rth->fl.iif ^ iif) |
1243 +                    (rth->fl.fl4_lsrc ^ lsrc) |
1244                      rth->fl.oif |
1245                      (rth->fl.fl4_tos ^ tos)) == 0 &&
1246                     rth->fl.mark == skb->mark &&
1247 @@ -2361,12 +2386,25 @@ skip_cache:
1248                 rcu_read_unlock();
1249                 return -EINVAL;
1250         }
1251 -       res = ip_route_input_slow(skb, daddr, saddr, tos, dev);
1252 +       res = ip_route_input_slow(skb, daddr, saddr, tos, dev, lsrc);
1253         rcu_read_unlock();
1254         return res;
1255  }
1256 +
1257 +int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1258 +                         u8 tos, struct net_device *dev, bool noref)
1259 +{
1260 +       return ip_route_input_cached(skb, daddr, saddr, tos, dev, noref, 0);
1261 +}
1262  EXPORT_SYMBOL(ip_route_input_common);
1263  
1264 +int ip_route_input_lookup(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1265 +                         u8 tos, struct net_device *dev, __be32 lsrc)
1266 +{
1267 +       return ip_route_input_cached(skb, daddr, saddr, tos, dev, true, lsrc);
1268 +}
1269 +EXPORT_SYMBOL(ip_route_input_lookup);
1270 +
1271  /* called with rcu_read_lock() */
1272  static int __mkroute_output(struct rtable **result,
1273                             struct fib_result *res,
1274 @@ -2431,6 +2469,7 @@ static int __mkroute_output(struct rtabl
1275         rth->fl.fl4_tos = tos;
1276         rth->fl.fl4_src = oldflp->fl4_src;
1277         rth->fl.oif     = oldflp->oif;
1278 +       rth->fl.fl4_gw  = oldflp->fl4_gw;
1279         rth->fl.mark    = oldflp->mark;
1280         rth->rt_dst     = fl->fl4_dst;
1281         rth->rt_src     = fl->fl4_src;
1282 @@ -2509,6 +2548,7 @@ static int ip_route_output_slow(struct n
1283         struct flowi fl = { .nl_u = { .ip4_u =
1284                                       { .daddr = oldflp->fl4_dst,
1285                                         .saddr = oldflp->fl4_src,
1286 +                                       .gw = oldflp->fl4_gw,
1287                                         .tos = tos & IPTOS_RT_MASK,
1288                                         .scope = ((tos & RTO_ONLINK) ?
1289                                                   RT_SCOPE_LINK :
1290 @@ -2612,6 +2652,7 @@ static int ip_route_output_slow(struct n
1291                         fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
1292                 dev_out = net->loopback_dev;
1293                 fl.oif = net->loopback_dev->ifindex;
1294 +               fl.fl4_gw = 0;
1295                 res.type = RTN_LOCAL;
1296                 flags |= RTCF_LOCAL;
1297                 goto make_route;
1298 @@ -2619,7 +2660,7 @@ static int ip_route_output_slow(struct n
1299  
1300         if (fib_lookup(net, &fl, &res)) {
1301                 res.fi = NULL;
1302 -               if (oldflp->oif) {
1303 +               if (oldflp->oif && dev_out->flags & IFF_UP) {
1304                         /* Apparently, routing tables are wrong. Assume,
1305                            that the destination is on link.
1306  
1307 @@ -2657,18 +2698,18 @@ static int ip_route_output_slow(struct n
1308                 }
1309                 dev_out = net->loopback_dev;
1310                 fl.oif = dev_out->ifindex;
1311 +               fl.fl4_gw = 0;
1312                 res.fi = NULL;
1313                 flags |= RTCF_LOCAL;
1314                 goto make_route;
1315         }
1316  
1317 +       if (res.type == RTN_UNICAST)
1318 +               fib_select_default(net, &fl, &res);
1319  #ifdef CONFIG_IP_ROUTE_MULTIPATH
1320 -       if (res.fi->fib_nhs > 1 && fl.oif == 0)
1321 +       if (res.fi->fib_nhs > 1)
1322                 fib_select_multipath(&fl, &res);
1323 -       else
1324  #endif
1325 -       if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif)
1326 -               fib_select_default(net, &fl, &res);
1327  
1328         if (!fl.fl4_src)
1329                 fl.fl4_src = FIB_RES_PREFSRC(res);
1330 @@ -2702,6 +2743,7 @@ int __ip_route_output_key(struct net *ne
1331                     rth->fl.fl4_src == flp->fl4_src &&
1332                     rth->fl.iif == 0 &&
1333                     rth->fl.oif == flp->oif &&
1334 +                   rth->fl.fl4_gw == flp->fl4_gw &&
1335                     rth->fl.mark == flp->mark &&
1336                     !((rth->fl.fl4_tos ^ flp->fl4_tos) &
1337                             (IPTOS_RT_MASK | RTO_ONLINK)) &&
This page took 0.157218 seconds and 3 git commands to generate.