]> git.pld-linux.org Git - packages/kernel.git/blob - kernel-routes.patch
- remove from HEAD
[packages/kernel.git] / kernel-routes.patch
1 diff -urp v2.6.28/linux/include/linux/rtnetlink.h linux/include/linux/rtnetlink.h
2 --- v2.6.28/linux/include/linux/rtnetlink.h     2008-12-25 10:12:24.000000000 +0200
3 +++ linux/include/linux/rtnetlink.h     2009-02-06 09:43:23.000000000 +0200
4 @@ -304,6 +304,8 @@ struct rtnexthop
5  #define RTNH_F_DEAD            1       /* Nexthop is dead (used by multipath)  */
6  #define RTNH_F_PERVASIVE       2       /* Do recursive gateway lookup  */
7  #define RTNH_F_ONLINK          4       /* Gateway is forced on link    */
8 +#define RTNH_F_SUSPECT         8       /* We don't know the real state */
9 +#define RTNH_F_BADSTATE                (RTNH_F_DEAD | RTNH_F_SUSPECT)
10  
11  /* Macros to handle hexthops */
12  
13 diff -urp v2.6.28/linux/include/net/flow.h linux/include/net/flow.h
14 --- v2.6.28/linux/include/net/flow.h    2008-12-25 10:12:24.000000000 +0200
15 +++ linux/include/net/flow.h    2009-02-06 09:43:23.000000000 +0200
16 @@ -19,6 +19,8 @@ struct flowi {
17                 struct {
18                         __be32                  daddr;
19                         __be32                  saddr;
20 +                       __be32                  lsrc;
21 +                       __be32                  gw;
22                         __u8                    tos;
23                         __u8                    scope;
24                 } ip4_u;
25 @@ -43,6 +45,8 @@ struct flowi {
26  #define fl6_flowlabel  nl_u.ip6_u.flowlabel
27  #define fl4_dst                nl_u.ip4_u.daddr
28  #define fl4_src                nl_u.ip4_u.saddr
29 +#define fl4_lsrc       nl_u.ip4_u.lsrc
30 +#define fl4_gw         nl_u.ip4_u.gw
31  #define fl4_tos                nl_u.ip4_u.tos
32  #define fl4_scope      nl_u.ip4_u.scope
33  
34 diff -urp v2.6.28/linux/include/net/ip_fib.h linux/include/net/ip_fib.h
35 --- v2.6.28/linux/include/net/ip_fib.h  2008-04-17 09:58:08.000000000 +0300
36 +++ linux/include/net/ip_fib.h  2009-02-06 09:43:23.000000000 +0200
37 @@ -207,6 +207,8 @@ extern int fib_lookup(struct net *n, str
38  extern struct fib_table *fib_new_table(struct net *net, u32 id);
39  extern struct fib_table *fib_get_table(struct net *net, u32 id);
40  
41 +extern int fib_result_table(struct fib_result *res);
42 +
43  #endif /* CONFIG_IP_MULTIPLE_TABLES */
44  
45  /* Exported by fib_frontend.c */
46 @@ -276,4 +278,6 @@ static inline void fib_proc_exit(struct 
47  }
48  #endif
49  
50 +extern rwlock_t fib_nhflags_lock;
51 +
52  #endif  /* _NET_FIB_H */
53 diff -urp v2.6.28/linux/include/net/netfilter/nf_nat.h linux/include/net/netfilter/nf_nat.h
54 --- v2.6.28/linux/include/net/netfilter/nf_nat.h        2008-04-17 09:58:08.000000000 +0300
55 +++ linux/include/net/netfilter/nf_nat.h        2009-02-06 09:43:23.000000000 +0200
56 @@ -77,6 +77,13 @@ struct nf_conn_nat
57  #endif
58  };
59  
60 +/* Call input routing for SNAT-ed traffic */
61 +extern unsigned int ip_nat_route_input(unsigned int hooknum,
62 +                                      struct sk_buff *skb,
63 +                                      const struct net_device *in,
64 +                                      const struct net_device *out,
65 +                                      int (*okfn)(struct sk_buff *));
66 +
67  /* Set up the info structure to map into this range. */
68  extern unsigned int nf_nat_setup_info(struct nf_conn *ct,
69                                       const struct nf_nat_range *range,
70 diff -urp v2.6.28/linux/include/net/route.h linux/include/net/route.h
71 --- v2.6.28/linux/include/net/route.h   2008-12-25 10:12:24.000000000 +0200
72 +++ linux/include/net/route.h   2009-02-06 09:43:23.000000000 +0200
73 @@ -116,6 +116,7 @@ extern int          __ip_route_output_key(struct
74  extern int             ip_route_output_key(struct net *, struct rtable **, struct flowi *flp);
75  extern int             ip_route_output_flow(struct net *, struct rtable **rp, struct flowi *flp, struct sock *sk, int flags);
76  extern int             ip_route_input(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin);
77 +extern int             ip_route_input_lookup(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin, __be32 lsrc);
78  extern unsigned short  ip_rt_frag_needed(struct net *net, struct iphdr *iph, unsigned short new_mtu, struct net_device *dev);
79  extern void            ip_rt_send_redirect(struct sk_buff *skb);
80  
81 diff -urp v2.6.28/linux/net/bridge/br_netfilter.c linux/net/bridge/br_netfilter.c
82 --- v2.6.28/linux/net/bridge/br_netfilter.c     2008-12-25 10:12:25.000000000 +0200
83 +++ linux/net/bridge/br_netfilter.c     2009-02-06 09:43:23.000000000 +0200
84 @@ -341,6 +341,10 @@ static int br_nf_pre_routing_finish(stru
85         struct nf_bridge_info *nf_bridge = skb->nf_bridge;
86         int err;
87  
88 +       /* Old skb->dst is not expected, it is lost in all cases */
89 +       dst_release(skb->dst);
90 +       skb->dst = NULL;
91 +
92         if (nf_bridge->mask & BRNF_PKT_TYPE) {
93                 skb->pkt_type = PACKET_OTHERHOST;
94                 nf_bridge->mask ^= BRNF_PKT_TYPE;
95 diff -urp v2.6.28/linux/net/ipv4/fib_frontend.c linux/net/ipv4/fib_frontend.c
96 --- v2.6.28/linux/net/ipv4/fib_frontend.c       2008-10-11 12:46:16.000000000 +0300
97 +++ linux/net/ipv4/fib_frontend.c       2009-02-06 09:43:23.000000000 +0200
98 @@ -47,6 +47,8 @@
99  
100  #ifndef CONFIG_IP_MULTIPLE_TABLES
101  
102 +#define FIB_RES_TABLE(r) (RT_TABLE_MAIN)
103 +
104  static int __net_init fib4_rules_init(struct net *net)
105  {
106         struct fib_table *local_table, *main_table;
107 @@ -71,6 +73,8 @@ fail:
108  }
109  #else
110  
111 +#define FIB_RES_TABLE(r) (fib_result_table(r))
112 +
113  struct fib_table *fib_new_table(struct net *net, u32 id)
114  {
115         struct fib_table *tb;
116 @@ -125,7 +129,8 @@ void fib_select_default(struct net *net,
117         table = res->r->table;
118  #endif
119         tb = fib_get_table(net, table);
120 -       if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
121 +       if ((FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) ||
122 +           FIB_RES_NH(*res).nh_scope == RT_SCOPE_HOST)
123                 tb->tb_select_default(tb, flp, res);
124  }
125  
126 @@ -239,6 +244,9 @@ int fib_validate_source(__be32 src, __be
127                                         .tos = tos } },
128                             .iif = oif };
129         struct fib_result res;
130 +       int table;
131 +       unsigned char prefixlen;
132 +       unsigned char scope;
133         int no_addr, rpf;
134         int ret;
135         struct net *net;
136 @@ -262,31 +270,35 @@ int fib_validate_source(__be32 src, __be
137                 goto e_inval_res;
138         *spec_dst = FIB_RES_PREFSRC(res);
139         fib_combine_itag(itag, &res);
140 -#ifdef CONFIG_IP_ROUTE_MULTIPATH
141 -       if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1)
142 -#else
143         if (FIB_RES_DEV(res) == dev)
144 -#endif
145         {
146                 ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
147                 fib_res_put(&res);
148                 return ret;
149         }
150 +       table = FIB_RES_TABLE(&res);
151 +       prefixlen = res.prefixlen;
152 +       scope = res.scope;
153         fib_res_put(&res);
154         if (no_addr)
155                 goto last_resort;
156 -       if (rpf == 1)
157 -               goto e_inval;
158         fl.oif = dev->ifindex;
159  
160         ret = 0;
161         if (fib_lookup(net, &fl, &res) == 0) {
162 -               if (res.type == RTN_UNICAST) {
163 +               if (res.type == RTN_UNICAST &&
164 +                   ((table == FIB_RES_TABLE(&res) &&
165 +                     res.prefixlen >= prefixlen && res.scope >= scope) ||
166 +                    !rpf)) {
167                         *spec_dst = FIB_RES_PREFSRC(res);
168                         ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
169 +                       fib_res_put(&res);
170 +                       return ret;
171                 }
172                 fib_res_put(&res);
173         }
174 +       if (rpf)
175 +               goto e_inval;
176         return ret;
177  
178  last_resort:
179 @@ -909,9 +921,7 @@ static int fib_inetaddr_event(struct not
180         switch (event) {
181         case NETDEV_UP:
182                 fib_add_ifaddr(ifa);
183 -#ifdef CONFIG_IP_ROUTE_MULTIPATH
184                 fib_sync_up(dev);
185 -#endif
186                 rt_cache_flush(dev_net(dev), -1);
187                 break;
188         case NETDEV_DOWN:
189 @@ -947,9 +957,7 @@ static int fib_netdev_event(struct notif
190                 for_ifa(in_dev) {
191                         fib_add_ifaddr(ifa);
192                 } endfor_ifa(in_dev);
193 -#ifdef CONFIG_IP_ROUTE_MULTIPATH
194                 fib_sync_up(dev);
195 -#endif
196                 rt_cache_flush(dev_net(dev), -1);
197                 break;
198         case NETDEV_DOWN:
199 diff -urp v2.6.28/linux/net/ipv4/fib_hash.c linux/net/ipv4/fib_hash.c
200 --- v2.6.28/linux/net/ipv4/fib_hash.c   2008-10-11 12:46:16.000000000 +0300
201 +++ linux/net/ipv4/fib_hash.c   2009-02-06 09:43:23.000000000 +0200
202 @@ -278,25 +278,35 @@ out:
203  static void
204  fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
205  {
206 -       int order, last_idx;
207 +       int order, last_idx, last_dflt, last_nhsel;
208 +       struct fib_alias *first_fa = NULL;
209 +       struct hlist_head *head;
210         struct hlist_node *node;
211         struct fib_node *f;
212         struct fib_info *fi = NULL;
213         struct fib_info *last_resort;
214         struct fn_hash *t = (struct fn_hash *)tb->tb_data;
215 -       struct fn_zone *fz = t->fn_zones[0];
216 +       struct fn_zone *fz = t->fn_zones[res->prefixlen];
217 +       __be32 k;
218  
219         if (fz == NULL)
220                 return;
221  
222 +       k = fz_key(flp->fl4_dst, fz);
223 +       last_dflt = -2;
224 +       last_nhsel = 0;
225         last_idx = -1;
226         last_resort = NULL;
227         order = -1;
228  
229         read_lock(&fib_hash_lock);
230 -       hlist_for_each_entry(f, node, &fz->fz_hash[0], fn_hash) {
231 +       head = &fz->fz_hash[fn_hash(k, fz)];
232 +       hlist_for_each_entry(f, node, head, fn_hash) {
233                 struct fib_alias *fa;
234  
235 +               if (f->fn_key != k)
236 +                       continue;
237 +
238                 list_for_each_entry(fa, &f->fn_alias, fa_list) {
239                         struct fib_info *next_fi = fa->fa_info;
240  
241 @@ -304,42 +314,56 @@ fn_hash_select_default(struct fib_table 
242                             fa->fa_type != RTN_UNICAST)
243                                 continue;
244  
245 +                       if (fa->fa_tos &&
246 +                           fa->fa_tos != flp->fl4_tos)
247 +                               continue;
248                         if (next_fi->fib_priority > res->fi->fib_priority)
249                                 break;
250 -                       if (!next_fi->fib_nh[0].nh_gw ||
251 -                           next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
252 -                               continue;
253                         fa->fa_state |= FA_S_ACCESSED;
254  
255 -                       if (fi == NULL) {
256 -                               if (next_fi != res->fi)
257 -                                       break;
258 -                       } else if (!fib_detect_death(fi, order, &last_resort,
259 -                                               &last_idx, tb->tb_default)) {
260 +                       if (!first_fa) {
261 +                               last_dflt = fa->fa_last_dflt;
262 +                               first_fa = fa;
263 +                       }
264 +                       if (fi && !fib_detect_death(fi, order, &last_resort,
265 +                               &last_idx, &last_dflt, &last_nhsel, flp)) {
266                                 fib_result_assign(res, fi);
267 -                               tb->tb_default = order;
268 +                               first_fa->fa_last_dflt = order;
269                                 goto out;
270                         }
271                         fi = next_fi;
272                         order++;
273                 }
274 +               break;
275         }
276  
277         if (order <= 0 || fi == NULL) {
278 -               tb->tb_default = -1;
279 +               if (fi && fi->fib_nhs > 1 &&
280 +                   fib_detect_death(fi, order, &last_resort, &last_idx,
281 +                       &last_dflt, &last_nhsel, flp) &&
282 +                   last_resort == fi) {
283 +                       read_lock_bh(&fib_nhflags_lock);
284 +                       fi->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
285 +                       read_unlock_bh(&fib_nhflags_lock);
286 +               }
287 +               if (first_fa) first_fa->fa_last_dflt = -1;
288                 goto out;
289         }
290  
291         if (!fib_detect_death(fi, order, &last_resort, &last_idx,
292 -                               tb->tb_default)) {
293 +                             &last_dflt, &last_nhsel, flp)) {
294                 fib_result_assign(res, fi);
295 -               tb->tb_default = order;
296 +               first_fa->fa_last_dflt = order;
297                 goto out;
298         }
299  
300 -       if (last_idx >= 0)
301 +       if (last_idx >= 0) {
302                 fib_result_assign(res, last_resort);
303 -       tb->tb_default = last_idx;
304 +               read_lock_bh(&fib_nhflags_lock);
305 +               last_resort->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
306 +               read_unlock_bh(&fib_nhflags_lock);
307 +               first_fa->fa_last_dflt = last_idx;
308 +       }
309  out:
310         read_unlock(&fib_hash_lock);
311  }
312 @@ -463,6 +487,7 @@ static int fn_hash_insert(struct fib_tab
313                         write_lock_bh(&fib_hash_lock);
314                         fi_drop = fa->fa_info;
315                         fa->fa_info = fi;
316 +                       fa->fa_last_dflt = -1;
317                         fa->fa_type = cfg->fc_type;
318                         fa->fa_scope = cfg->fc_scope;
319                         state = fa->fa_state;
320 @@ -517,6 +542,7 @@ static int fn_hash_insert(struct fib_tab
321         new_fa->fa_type = cfg->fc_type;
322         new_fa->fa_scope = cfg->fc_scope;
323         new_fa->fa_state = 0;
324 +       new_fa->fa_last_dflt = -1;
325  
326         /*
327          * Insert new entry to the list.
328 diff -urp v2.6.28/linux/net/ipv4/fib_lookup.h linux/net/ipv4/fib_lookup.h
329 --- v2.6.28/linux/net/ipv4/fib_lookup.h 2008-04-17 09:58:09.000000000 +0300
330 +++ linux/net/ipv4/fib_lookup.h 2009-02-06 09:43:23.000000000 +0200
331 @@ -8,6 +8,7 @@
332  struct fib_alias {
333         struct list_head        fa_list;
334         struct fib_info         *fa_info;
335 +       int                     fa_last_dflt;
336         u8                      fa_tos;
337         u8                      fa_type;
338         u8                      fa_scope;
339 @@ -38,7 +39,8 @@ extern struct fib_alias *fib_find_alias(
340                                         u8 tos, u32 prio);
341  extern int fib_detect_death(struct fib_info *fi, int order,
342                             struct fib_info **last_resort,
343 -                           int *last_idx, int dflt);
344 +                           int *last_idx, int *dflt, int *last_nhsel,
345 +                           const struct flowi *flp);
346  
347  static inline void fib_result_assign(struct fib_result *res,
348                                      struct fib_info *fi)
349 diff -urp v2.6.28/linux/net/ipv4/fib_rules.c linux/net/ipv4/fib_rules.c
350 --- v2.6.28/linux/net/ipv4/fib_rules.c  2008-10-11 12:46:16.000000000 +0300
351 +++ linux/net/ipv4/fib_rules.c  2009-02-06 09:43:23.000000000 +0200
352 @@ -54,6 +54,11 @@ u32 fib_rules_tclass(struct fib_result *
353  }
354  #endif
355  
356 +int fib_result_table(struct fib_result *res)
357 +{
358 +       return res->r->table;
359 +}
360 +
361  int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res)
362  {
363         struct fib_lookup_arg arg = {
364 diff -urp v2.6.28/linux/net/ipv4/fib_semantics.c linux/net/ipv4/fib_semantics.c
365 --- v2.6.28/linux/net/ipv4/fib_semantics.c      2008-10-11 12:46:16.000000000 +0300
366 +++ linux/net/ipv4/fib_semantics.c      2009-02-06 09:43:23.000000000 +0200
367 @@ -50,6 +50,7 @@ static struct hlist_head *fib_info_hash;
368  static struct hlist_head *fib_info_laddrhash;
369  static unsigned int fib_hash_size;
370  static unsigned int fib_info_cnt;
371 +rwlock_t fib_nhflags_lock = RW_LOCK_UNLOCKED;
372  
373  #define DEVINDEX_HASHBITS 8
374  #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
375 @@ -186,7 +187,7 @@ static __inline__ int nh_comp(const stru
376  #ifdef CONFIG_NET_CLS_ROUTE
377                     nh->nh_tclassid != onh->nh_tclassid ||
378  #endif
379 -                   ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
380 +                   ((nh->nh_flags^onh->nh_flags)&~RTNH_F_BADSTATE))
381                         return -1;
382                 onh++;
383         } endfor_nexthops(fi);
384 @@ -237,7 +238,7 @@ static struct fib_info *fib_find_info(co
385                     nfi->fib_priority == fi->fib_priority &&
386                     memcmp(nfi->fib_metrics, fi->fib_metrics,
387                            sizeof(fi->fib_metrics)) == 0 &&
388 -                   ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
389 +                   ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_BADSTATE) == 0 &&
390                     (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
391                         return fi;
392         }
393 @@ -348,26 +349,70 @@ struct fib_alias *fib_find_alias(struct 
394  }
395  
396  int fib_detect_death(struct fib_info *fi, int order,
397 -                    struct fib_info **last_resort, int *last_idx, int dflt)
398 +                    struct fib_info **last_resort, int *last_idx, int *dflt,
399 +                    int *last_nhsel, const struct flowi *flp)
400  {
401         struct neighbour *n;
402 -       int state = NUD_NONE;
403 +       int nhsel;
404 +       int state;
405 +       struct fib_nh * nh;
406 +       __be32 dst;
407 +       int flag, dead = 1;
408 +
409 +       /* change_nexthops(fi) { */
410 +       for (nhsel = 0, nh = fi->fib_nh; nhsel < fi->fib_nhs; nh++, nhsel++) {
411 +               if (flp->oif && flp->oif != nh->nh_oif)
412 +                       continue;
413 +               if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && nh->nh_gw &&
414 +                   nh->nh_scope == RT_SCOPE_LINK)
415 +                       continue;
416 +               if (nh->nh_flags & RTNH_F_DEAD)
417 +                       continue;
418  
419 -       n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
420 -       if (n) {
421 -               state = n->nud_state;
422 -               neigh_release(n);
423 -       }
424 -       if (state == NUD_REACHABLE)
425 -               return 0;
426 -       if ((state&NUD_VALID) && order != dflt)
427 -               return 0;
428 -       if ((state&NUD_VALID) ||
429 -           (*last_idx<0 && order > dflt)) {
430 -               *last_resort = fi;
431 -               *last_idx = order;
432 +               flag = 0;
433 +               if (nh->nh_dev->flags & IFF_NOARP) {
434 +                       dead = 0;
435 +                       goto setfl;
436 +               }
437 +
438 +               dst = nh->nh_gw;
439 +               if (!nh->nh_gw || nh->nh_scope != RT_SCOPE_LINK)
440 +                       dst = flp->fl4_dst;
441 +
442 +               state = NUD_NONE;
443 +               n = neigh_lookup(&arp_tbl, &dst, nh->nh_dev);
444 +               if (n) {
445 +                       state = n->nud_state;
446 +                       neigh_release(n);
447 +               }
448 +               if (state == NUD_REACHABLE ||
449 +                       ((state&NUD_VALID) && order != *dflt)) {
450 +                       dead = 0;
451 +                       goto setfl;
452 +               }
453 +               if (!(state&NUD_VALID))
454 +                       flag = 1;
455 +               if (!dead)
456 +                       goto setfl;
457 +               if ((state&NUD_VALID) ||
458 +                   (*last_idx<0 && order >= *dflt)) {
459 +                       *last_resort = fi;
460 +                       *last_idx = order;
461 +                       *last_nhsel = nhsel;
462 +               }
463 +
464 +               setfl:
465 +
466 +               read_lock_bh(&fib_nhflags_lock);
467 +               if (flag)
468 +                       nh->nh_flags |= RTNH_F_SUSPECT;
469 +               else
470 +                       nh->nh_flags &= ~RTNH_F_SUSPECT;
471 +               read_unlock_bh(&fib_nhflags_lock);
472         }
473 -       return 1;
474 +       /* } endfor_nexthops(fi) */
475 +
476 +       return dead;
477  }
478  
479  #ifdef CONFIG_IP_ROUTE_MULTIPATH
480 @@ -539,8 +584,11 @@ static int fib_check_nh(struct fib_confi
481                                 return -EINVAL;
482                         if ((dev = __dev_get_by_index(net, nh->nh_oif)) == NULL)
483                                 return -ENODEV;
484 -                       if (!(dev->flags&IFF_UP))
485 -                               return -ENETDOWN;
486 +                       if (!(dev->flags&IFF_UP)) {
487 +                               if (fi->fib_protocol != RTPROT_STATIC)
488 +                                       return -ENETDOWN;
489 +                               nh->nh_flags |= RTNH_F_DEAD;
490 +                       }
491                         nh->nh_dev = dev;
492                         dev_hold(dev);
493                         nh->nh_scope = RT_SCOPE_LINK;
494 @@ -560,24 +608,48 @@ static int fib_check_nh(struct fib_confi
495                         /* It is not necessary, but requires a bit of thinking */
496                         if (fl.fl4_scope < RT_SCOPE_LINK)
497                                 fl.fl4_scope = RT_SCOPE_LINK;
498 -                       if ((err = fib_lookup(net, &fl, &res)) != 0)
499 -                               return err;
500 +                       err = fib_lookup(net, &fl, &res);
501                 }
502 -               err = -EINVAL;
503 -               if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
504 -                       goto out;
505 -               nh->nh_scope = res.scope;
506 -               nh->nh_oif = FIB_RES_OIF(res);
507 -               if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
508 -                       goto out;
509 -               dev_hold(nh->nh_dev);
510 -               err = -ENETDOWN;
511 -               if (!(nh->nh_dev->flags & IFF_UP))
512 -                       goto out;
513 -               err = 0;
514 +               if (err) {
515 +                       struct in_device *in_dev;
516 +
517 +                       if (err != -ENETUNREACH ||
518 +                           fi->fib_protocol != RTPROT_STATIC)
519 +                               return err;
520 +
521 +                       in_dev = inetdev_by_index(net, nh->nh_oif);
522 +                       if (in_dev == NULL ||
523 +                           in_dev->dev->flags & IFF_UP) {
524 +                               if (in_dev)
525 +                                       in_dev_put(in_dev);
526 +                               return err;
527 +                       }
528 +                       nh->nh_flags |= RTNH_F_DEAD;
529 +                       nh->nh_scope = RT_SCOPE_LINK;
530 +                       nh->nh_dev = in_dev->dev;
531 +                       dev_hold(nh->nh_dev);
532 +                       in_dev_put(in_dev);
533 +               } else {
534 +                       err = -EINVAL;
535 +                       if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
536 +                               goto out;
537 +                       nh->nh_scope = res.scope;
538 +                       nh->nh_oif = FIB_RES_OIF(res);
539 +                       if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
540 +                               goto out;
541 +                       dev_hold(nh->nh_dev);
542 +                       if (!(nh->nh_dev->flags & IFF_UP)) {
543 +                               if (fi->fib_protocol != RTPROT_STATIC) {
544 +                                       err = -ENETDOWN;
545 +                                       goto out;
546 +                               }
547 +                               nh->nh_flags |= RTNH_F_DEAD;
548 +                       }
549 +                       err = 0;
550  out:
551 -               fib_res_put(&res);
552 -               return err;
553 +                       fib_res_put(&res);
554 +                       return err;
555 +               }
556         } else {
557                 struct in_device *in_dev;
558  
559 @@ -588,8 +660,11 @@ out:
560                 if (in_dev == NULL)
561                         return -ENODEV;
562                 if (!(in_dev->dev->flags&IFF_UP)) {
563 -                       in_dev_put(in_dev);
564 -                       return -ENETDOWN;
565 +                       if (fi->fib_protocol != RTPROT_STATIC) {
566 +                               in_dev_put(in_dev);
567 +                               return -ENETDOWN;
568 +                       }
569 +                       nh->nh_flags |= RTNH_F_DEAD;
570                 }
571                 nh->nh_dev = in_dev->dev;
572                 dev_hold(nh->nh_dev);
573 @@ -899,8 +974,12 @@ int fib_semantic_match(struct list_head 
574                                 for_nexthops(fi) {
575                                         if (nh->nh_flags&RTNH_F_DEAD)
576                                                 continue;
577 -                                       if (!flp->oif || flp->oif == nh->nh_oif)
578 -                                               break;
579 +                                       if (flp->oif && flp->oif != nh->nh_oif)
580 +                                               continue;
581 +                                       if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
582 +                                           nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
583 +                                               continue;
584 +                                       break;
585                                 }
586  #ifdef CONFIG_IP_ROUTE_MULTIPATH
587                                 if (nhsel < fi->fib_nhs) {
588 @@ -1080,18 +1159,29 @@ int fib_sync_down_dev(struct net_device 
589                 prev_fi = fi;
590                 dead = 0;
591                 change_nexthops(fi) {
592 -                       if (nh->nh_flags&RTNH_F_DEAD)
593 -                               dead++;
594 -                       else if (nh->nh_dev == dev &&
595 -                                       nh->nh_scope != scope) {
596 -                               nh->nh_flags |= RTNH_F_DEAD;
597 +                       if (nh->nh_flags&RTNH_F_DEAD) {
598 +                               if (fi->fib_protocol!=RTPROT_STATIC ||
599 +                                   nh->nh_dev == NULL ||
600 +                                   __in_dev_get_rtnl(nh->nh_dev) == NULL ||
601 +                                   nh->nh_dev->flags&IFF_UP)
602 +                                       dead++;
603 +                       } else if (nh->nh_dev == dev &&
604 +                                  nh->nh_scope != scope) {
605 +                               write_lock_bh(&fib_nhflags_lock);
606  #ifdef CONFIG_IP_ROUTE_MULTIPATH
607 -                               spin_lock_bh(&fib_multipath_lock);
608 +                               spin_lock(&fib_multipath_lock);
609 +                               nh->nh_flags |= RTNH_F_DEAD;
610                                 fi->fib_power -= nh->nh_power;
611                                 nh->nh_power = 0;
612 -                               spin_unlock_bh(&fib_multipath_lock);
613 +                               spin_unlock(&fib_multipath_lock);
614 +#else
615 +                               nh->nh_flags |= RTNH_F_DEAD;
616  #endif
617 -                               dead++;
618 +                               write_unlock_bh(&fib_nhflags_lock);
619 +                               if (fi->fib_protocol!=RTPROT_STATIC ||
620 +                                   force ||
621 +                                   __in_dev_get_rtnl(dev) == NULL)
622 +                                       dead++;
623                         }
624  #ifdef CONFIG_IP_ROUTE_MULTIPATH
625                         if (force > 1 && nh->nh_dev == dev) {
626 @@ -1109,11 +1199,8 @@ int fib_sync_down_dev(struct net_device 
627         return ret;
628  }
629  
630 -#ifdef CONFIG_IP_ROUTE_MULTIPATH
631 -
632  /*
633 -   Dead device goes up. We wake up dead nexthops.
634 -   It takes sense only on multipath routes.
635 +   Dead device goes up or new address is added. We wake up dead nexthops.
636   */
637  
638  int fib_sync_up(struct net_device *dev)
639 @@ -1123,8 +1210,10 @@ int fib_sync_up(struct net_device *dev)
640         struct hlist_head *head;
641         struct hlist_node *node;
642         struct fib_nh *nh;
643 -       int ret;
644 +       struct fib_result res;
645 +       int ret, rep;
646  
647 +repeat:
648         if (!(dev->flags&IFF_UP))
649                 return 0;
650  
651 @@ -1132,6 +1221,7 @@ int fib_sync_up(struct net_device *dev)
652         hash = fib_devindex_hashfn(dev->ifindex);
653         head = &fib_info_devhash[hash];
654         ret = 0;
655 +       rep = 0;
656  
657         hlist_for_each_entry(nh, node, head, nh_hash) {
658                 struct fib_info *fi = nh->nh_parent;
659 @@ -1144,19 +1234,39 @@ int fib_sync_up(struct net_device *dev)
660                 prev_fi = fi;
661                 alive = 0;
662                 change_nexthops(fi) {
663 -                       if (!(nh->nh_flags&RTNH_F_DEAD)) {
664 -                               alive++;
665 +                       if (!(nh->nh_flags&RTNH_F_DEAD))
666                                 continue;
667 -                       }
668                         if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
669                                 continue;
670                         if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
671                                 continue;
672 +                       if (nh->nh_gw && fi->fib_protocol == RTPROT_STATIC) {
673 +                               struct flowi fl = {
674 +                                       .nl_u = { .ip4_u =
675 +                                                 { .daddr = nh->nh_gw,
676 +                                                   .scope = nh->nh_scope } },
677 +                                       .oif =  nh->nh_oif,
678 +                               };
679 +                               if (fib_lookup(dev_net(dev), &fl, &res) != 0)
680 +                                       continue;
681 +                               if (res.type != RTN_UNICAST &&
682 +                                   res.type != RTN_LOCAL) {
683 +                                       fib_res_put(&res);
684 +                                       continue;
685 +                               }
686 +                               nh->nh_scope = res.scope;
687 +                               fib_res_put(&res);
688 +                               rep = 1;
689 +                       }
690                         alive++;
691 +#ifdef CONFIG_IP_ROUTE_MULTIPATH
692                         spin_lock_bh(&fib_multipath_lock);
693                         nh->nh_power = 0;
694 +#endif
695                         nh->nh_flags &= ~RTNH_F_DEAD;
696 +#ifdef CONFIG_IP_ROUTE_MULTIPATH
697                         spin_unlock_bh(&fib_multipath_lock);
698 +#endif
699                 } endfor_nexthops(fi)
700  
701                 if (alive > 0) {
702 @@ -1164,10 +1274,14 @@ int fib_sync_up(struct net_device *dev)
703                         ret++;
704                 }
705         }
706 +       if (rep)
707 +               goto repeat;
708  
709         return ret;
710  }
711  
712 +#ifdef CONFIG_IP_ROUTE_MULTIPATH
713 +
714  /*
715     The algorithm is suboptimal, but it provides really
716     fair weighted route distribution.
717 @@ -1176,24 +1290,45 @@ int fib_sync_up(struct net_device *dev)
718  void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
719  {
720         struct fib_info *fi = res->fi;
721 -       int w;
722 +       int w, alive;
723  
724         spin_lock_bh(&fib_multipath_lock);
725 +       if (flp->oif) {
726 +               int sel = -1;
727 +               w = -1;
728 +               change_nexthops(fi) {
729 +                       if (flp->oif != nh->nh_oif)
730 +                               continue;
731 +                       if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
732 +                           nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
733 +                               continue;
734 +                       if (!(nh->nh_flags&RTNH_F_BADSTATE)) {
735 +                               if (nh->nh_power > w) {
736 +                                       w = nh->nh_power;
737 +                                       sel = nhsel;
738 +                               }
739 +                       }
740 +               } endfor_nexthops(fi);
741 +               if (sel >= 0) {
742 +                       spin_unlock_bh(&fib_multipath_lock);
743 +                       res->nh_sel = sel;
744 +                       return;
745 +               }
746 +               goto last_resort;
747 +       }
748 +
749 +repeat:
750         if (fi->fib_power <= 0) {
751                 int power = 0;
752                 change_nexthops(fi) {
753 -                       if (!(nh->nh_flags&RTNH_F_DEAD)) {
754 +                       if (!(nh->nh_flags&RTNH_F_BADSTATE)) {
755                                 power += nh->nh_weight;
756                                 nh->nh_power = nh->nh_weight;
757                         }
758                 } endfor_nexthops(fi);
759                 fi->fib_power = power;
760 -               if (power <= 0) {
761 -                       spin_unlock_bh(&fib_multipath_lock);
762 -                       /* Race condition: route has just become dead. */
763 -                       res->nh_sel = 0;
764 -                       return;
765 -               }
766 +               if (power <= 0)
767 +                       goto last_resort;
768         }
769  
770  
771 @@ -1203,20 +1338,40 @@ void fib_select_multipath(const struct f
772  
773         w = jiffies % fi->fib_power;
774  
775 +       alive = 0;
776         change_nexthops(fi) {
777 -               if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
778 +               if (!(nh->nh_flags&RTNH_F_BADSTATE) && nh->nh_power) {
779                         if ((w -= nh->nh_power) <= 0) {
780                                 nh->nh_power--;
781                                 fi->fib_power--;
782 -                               res->nh_sel = nhsel;
783                                 spin_unlock_bh(&fib_multipath_lock);
784 +                               res->nh_sel = nhsel;
785                                 return;
786                         }
787 +                       alive = 1;
788 +               }
789 +       } endfor_nexthops(fi);
790 +       if (alive) {
791 +               fi->fib_power = 0;
792 +               goto repeat;
793 +       }
794 +
795 +last_resort:
796 +
797 +       for_nexthops(fi) {
798 +               if (!(nh->nh_flags&RTNH_F_DEAD)) {
799 +                       if (flp->oif && flp->oif != nh->nh_oif)
800 +                               continue;
801 +                       if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw &&
802 +                           nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK)
803 +                               continue;
804 +                       spin_unlock_bh(&fib_multipath_lock);
805 +                       res->nh_sel = nhsel;
806 +                       return;
807                 }
808         } endfor_nexthops(fi);
809  
810         /* Race condition: route has just become dead. */
811 -       res->nh_sel = 0;
812         spin_unlock_bh(&fib_multipath_lock);
813  }
814  #endif
815 diff -urp v2.6.28/linux/net/ipv4/fib_trie.c linux/net/ipv4/fib_trie.c
816 --- v2.6.28/linux/net/ipv4/fib_trie.c   2008-10-11 12:46:16.000000000 +0300
817 +++ linux/net/ipv4/fib_trie.c   2009-02-06 09:43:23.000000000 +0200
818 @@ -1261,6 +1261,7 @@ static int fn_trie_insert(struct fib_tab
819                         fi_drop = fa->fa_info;
820                         new_fa->fa_tos = fa->fa_tos;
821                         new_fa->fa_info = fi;
822 +                       new_fa->fa_last_dflt = -1;
823                         new_fa->fa_type = cfg->fc_type;
824                         new_fa->fa_scope = cfg->fc_scope;
825                         state = fa->fa_state;
826 @@ -1301,6 +1302,7 @@ static int fn_trie_insert(struct fib_tab
827         new_fa->fa_type = cfg->fc_type;
828         new_fa->fa_scope = cfg->fc_scope;
829         new_fa->fa_state = 0;
830 +       new_fa->fa_last_dflt = -1;
831         /*
832          * Insert new entry to the list.
833          */
834 @@ -1802,24 +1804,31 @@ static void fn_trie_select_default(struc
835                                    struct fib_result *res)
836  {
837         struct trie *t = (struct trie *) tb->tb_data;
838 -       int order, last_idx;
839 +       int order, last_idx, last_dflt, last_nhsel;
840 +       struct fib_alias *first_fa = NULL;
841         struct fib_info *fi = NULL;
842         struct fib_info *last_resort;
843         struct fib_alias *fa = NULL;
844         struct list_head *fa_head;
845         struct leaf *l;
846 +       u32 key, mask;
847  
848 +       last_dflt = -2;
849 +       last_nhsel = 0;
850         last_idx = -1;
851         last_resort = NULL;
852         order = -1;
853  
854 +       mask = inet_make_mask(res->prefixlen);
855 +       key = ntohl(flp->fl4_dst & mask);
856 +
857         rcu_read_lock();
858  
859 -       l = fib_find_node(t, 0);
860 +       l = fib_find_node(t, key);
861         if (!l)
862                 goto out;
863  
864 -       fa_head = get_fa_head(l, 0);
865 +       fa_head = get_fa_head(l, res->prefixlen);
866         if (!fa_head)
867                 goto out;
868  
869 @@ -1833,39 +1842,52 @@ static void fn_trie_select_default(struc
870                     fa->fa_type != RTN_UNICAST)
871                         continue;
872  
873 +               if (fa->fa_tos &&
874 +                   fa->fa_tos != flp->fl4_tos)
875 +                       continue;
876                 if (next_fi->fib_priority > res->fi->fib_priority)
877                         break;
878 -               if (!next_fi->fib_nh[0].nh_gw ||
879 -                   next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
880 -                       continue;
881                 fa->fa_state |= FA_S_ACCESSED;
882  
883 -               if (fi == NULL) {
884 -                       if (next_fi != res->fi)
885 -                               break;
886 -               } else if (!fib_detect_death(fi, order, &last_resort,
887 -                                            &last_idx, tb->tb_default)) {
888 +               if (!first_fa) {
889 +                       last_dflt = fa->fa_last_dflt;
890 +                       first_fa = fa;
891 +               }
892 +               if (fi && !fib_detect_death(fi, order, &last_resort,
893 +                   &last_idx, &last_dflt, &last_nhsel, flp)) {
894                         fib_result_assign(res, fi);
895 -                       tb->tb_default = order;
896 +                       first_fa->fa_last_dflt = order;
897                         goto out;
898                 }
899                 fi = next_fi;
900                 order++;
901         }
902         if (order <= 0 || fi == NULL) {
903 -               tb->tb_default = -1;
904 +               if (fi && fi->fib_nhs > 1 &&
905 +                   fib_detect_death(fi, order, &last_resort, &last_idx,
906 +                                    &last_dflt, &last_nhsel, flp) &&
907 +                   last_resort == fi) {
908 +                       read_lock_bh(&fib_nhflags_lock);
909 +                       fi->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
910 +                       read_unlock_bh(&fib_nhflags_lock);
911 +               }
912 +               if (first_fa) first_fa->fa_last_dflt = -1;
913                 goto out;
914         }
915  
916         if (!fib_detect_death(fi, order, &last_resort, &last_idx,
917 -                               tb->tb_default)) {
918 +                               &last_dflt, &last_nhsel, flp)) {
919                 fib_result_assign(res, fi);
920 -               tb->tb_default = order;
921 +               first_fa->fa_last_dflt = order;
922                 goto out;
923         }
924 -       if (last_idx >= 0)
925 +       if (last_idx >= 0) {
926                 fib_result_assign(res, last_resort);
927 -       tb->tb_default = last_idx;
928 +               read_lock_bh(&fib_nhflags_lock);
929 +               last_resort->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT;
930 +               read_unlock_bh(&fib_nhflags_lock);
931 +               first_fa->fa_last_dflt = last_idx;
932 +       }
933  out:
934         rcu_read_unlock();
935  }
936 diff -urp v2.6.28/linux/net/ipv4/netfilter/ipt_MASQUERADE.c linux/net/ipv4/netfilter/ipt_MASQUERADE.c
937 --- v2.6.28/linux/net/ipv4/netfilter/ipt_MASQUERADE.c   2008-12-25 10:12:25.000000000 +0200
938 +++ linux/net/ipv4/netfilter/ipt_MASQUERADE.c   2009-02-06 09:43:23.000000000 +0200
939 @@ -54,7 +54,7 @@ masquerade_tg(struct sk_buff *skb, const
940         enum ip_conntrack_info ctinfo;
941         struct nf_nat_range newrange;
942         const struct nf_nat_multi_range_compat *mr;
943 -       const struct rtable *rt;
944 +       struct rtable *rt;
945         __be32 newsrc;
946  
947         NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING);
948 @@ -72,13 +72,28 @@ masquerade_tg(struct sk_buff *skb, const
949                 return NF_ACCEPT;
950  
951         mr = par->targinfo;
952 -       rt = skb->rtable;
953 -       newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
954 -       if (!newsrc) {
955 -               printk("MASQUERADE: %s ate my IP address\n", par->out->name);
956 -               return NF_DROP;
957 +
958 +       {
959 +               struct flowi fl = { .nl_u = { .ip4_u =
960 +                                             { .daddr = ip_hdr(skb)->daddr,
961 +                                               .tos = (RT_TOS(ip_hdr(skb)->tos) |
962 +                                                       RTO_CONN),
963 +                                               .gw = skb->rtable->rt_gateway,
964 +                                             } },
965 +                                   .mark = skb->mark,
966 +                                   .oif = par->out->ifindex };
967 +               if (ip_route_output_key(dev_net(par->out), &rt, &fl) != 0) {
968 +                       /* Funky routing can do this. */
969 +                       if (net_ratelimit())
970 +                               printk("MASQUERADE:"
971 +                                      " No route: Rusty's brain broke!\n");
972 +                       return NF_DROP;
973 +               }
974         }
975  
976 +       newsrc = rt->rt_src;
977 +       ip_rt_put(rt);
978 +
979         write_lock_bh(&masq_lock);
980         nat->masq_index = par->out->ifindex;
981         write_unlock_bh(&masq_lock);
982 diff -urp v2.6.28/linux/net/ipv4/netfilter/nf_nat_core.c linux/net/ipv4/netfilter/nf_nat_core.c
983 --- v2.6.28/linux/net/ipv4/netfilter/nf_nat_core.c      2008-12-25 10:12:25.000000000 +0200
984 +++ linux/net/ipv4/netfilter/nf_nat_core.c      2009-02-06 09:43:23.000000000 +0200
985 @@ -710,6 +710,52 @@ static struct pernet_operations nf_nat_n
986         .exit = nf_nat_net_exit,
987  };
988  
989 +unsigned int
990 +ip_nat_route_input(unsigned int hooknum,
991 +               struct sk_buff *skb,
992 +               const struct net_device *in,
993 +               const struct net_device *out,
994 +               int (*okfn)(struct sk_buff *))
995 +{
996 +       struct iphdr *iph;
997 +       struct nf_conn *conn;
998 +       enum ip_conntrack_info ctinfo;
999 +       enum ip_conntrack_dir dir;
1000 +       unsigned long statusbit;
1001 +       __be32 saddr;
1002 +
1003 +       if (!(conn = nf_ct_get(skb, &ctinfo)))
1004 +               return NF_ACCEPT;
1005 +
1006 +       if (!(conn->status & IPS_NAT_DONE_MASK))
1007 +               return NF_ACCEPT;
1008 +       dir = CTINFO2DIR(ctinfo);
1009 +       statusbit = IPS_SRC_NAT;
1010 +       if (dir == IP_CT_DIR_REPLY)
1011 +               statusbit ^= IPS_NAT_MASK;
1012 +       if (!(conn->status & statusbit))
1013 +               return NF_ACCEPT;
1014 +
1015 +       if (skb->dst)
1016 +               return NF_ACCEPT;
1017 +
1018 +       if (skb->len < sizeof(struct iphdr))
1019 +               return NF_ACCEPT;
1020 +
1021 +       /* use daddr in other direction as masquerade address (lsrc) */
1022 +       iph = ip_hdr(skb);
1023 +       saddr = conn->tuplehash[!dir].tuple.dst.u3.ip;
1024 +       if (saddr == iph->saddr)
1025 +               return NF_ACCEPT;
1026 +
1027 +       if (ip_route_input_lookup(skb, iph->daddr, iph->saddr, iph->tos,
1028 +           skb->dev, saddr))
1029 +               return NF_DROP;
1030 +
1031 +       return NF_ACCEPT;
1032 +}
1033 +EXPORT_SYMBOL_GPL(ip_nat_route_input);
1034 +
1035  static int __init nf_nat_init(void)
1036  {
1037         size_t i;
1038 diff -urp v2.6.28/linux/net/ipv4/netfilter/nf_nat_standalone.c linux/net/ipv4/netfilter/nf_nat_standalone.c
1039 --- v2.6.28/linux/net/ipv4/netfilter/nf_nat_standalone.c        2008-07-14 09:58:50.000000000 +0300
1040 +++ linux/net/ipv4/netfilter/nf_nat_standalone.c        2009-02-06 09:43:23.000000000 +0200
1041 @@ -256,6 +256,14 @@ static struct nf_hook_ops nf_nat_ops[] _
1042                 .hooknum        = NF_INET_PRE_ROUTING,
1043                 .priority       = NF_IP_PRI_NAT_DST,
1044         },
1045 +       /* Before routing, route before mangling */
1046 +       {
1047 +               .hook           = ip_nat_route_input,
1048 +               .owner          = THIS_MODULE,
1049 +               .pf             = PF_INET,
1050 +               .hooknum        = NF_INET_PRE_ROUTING,
1051 +               .priority       = NF_IP_PRI_LAST-1,
1052 +       },
1053         /* After packet filtering, change source */
1054         {
1055                 .hook           = nf_nat_out,
1056 diff -urp v2.6.28/linux/net/ipv4/route.c linux/net/ipv4/route.c
1057 --- v2.6.28/linux/net/ipv4/route.c      2008-12-25 10:12:25.000000000 +0200
1058 +++ linux/net/ipv4/route.c      2009-02-06 09:43:43.000000000 +0200
1059 @@ -679,6 +679,7 @@ static inline int compare_keys(struct fl
1060         return ((__force u32)((fl1->nl_u.ip4_u.daddr ^ fl2->nl_u.ip4_u.daddr) |
1061                 (fl1->nl_u.ip4_u.saddr ^ fl2->nl_u.ip4_u.saddr)) |
1062                 (fl1->mark ^ fl2->mark) |
1063 +               ((__force u32)(fl1->nl_u.ip4_u.lsrc ^ fl2->nl_u.ip4_u.lsrc)) |
1064                 (*(u16 *)&fl1->nl_u.ip4_u.tos ^
1065                  *(u16 *)&fl2->nl_u.ip4_u.tos) |
1066                 (fl1->oif ^ fl2->oif) |
1067 @@ -1286,6 +1287,7 @@ void ip_rt_redirect(__be32 old_gw, __be3
1068  
1069                                 /* Gateway is different ... */
1070                                 rt->rt_gateway          = new_gw;
1071 +                               if (rt->fl.fl4_gw) rt->fl.fl4_gw = new_gw;
1072  
1073                                 /* Redirect received -> path was valid */
1074                                 dst_confirm(&rth->u.dst);
1075 @@ -1735,6 +1737,7 @@ static int ip_route_input_mc(struct sk_b
1076         rth->fl.fl4_tos = tos;
1077         rth->fl.mark    = skb->mark;
1078         rth->fl.fl4_src = saddr;
1079 +       rth->fl.fl4_lsrc = 0;
1080         rth->rt_src     = saddr;
1081  #ifdef CONFIG_NET_CLS_ROUTE
1082         rth->u.dst.tclassid = itag;
1083 @@ -1745,6 +1748,7 @@ static int ip_route_input_mc(struct sk_b
1084         dev_hold(rth->u.dst.dev);
1085         rth->idev       = in_dev_get(rth->u.dst.dev);
1086         rth->fl.oif     = 0;
1087 +       rth->fl.fl4_gw  = 0;
1088         rth->rt_gateway = daddr;
1089         rth->rt_spec_dst= spec_dst;
1090         rth->rt_genid   = rt_genid(dev_net(dev));
1091 @@ -1810,7 +1814,7 @@ static int __mkroute_input(struct sk_buf
1092                            struct fib_result *res,
1093                            struct in_device *in_dev,
1094                            __be32 daddr, __be32 saddr, u32 tos,
1095 -                          struct rtable **result)
1096 +                          __be32 lsrc, struct rtable **result)
1097  {
1098  
1099         struct rtable *rth;
1100 @@ -1844,6 +1848,7 @@ static int __mkroute_input(struct sk_buf
1101                 flags |= RTCF_DIRECTSRC;
1102  
1103         if (out_dev == in_dev && err &&
1104 +           !lsrc &&
1105             (IN_DEV_SHARED_MEDIA(out_dev) ||
1106              inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res))))
1107                 flags |= RTCF_DOREDIRECT;
1108 @@ -1877,6 +1882,7 @@ static int __mkroute_input(struct sk_buf
1109         rth->fl.mark    = skb->mark;
1110         rth->fl.fl4_src = saddr;
1111         rth->rt_src     = saddr;
1112 +       rth->fl.fl4_lsrc        = lsrc;
1113         rth->rt_gateway = daddr;
1114         rth->rt_iif     =
1115                 rth->fl.iif     = in_dev->dev->ifindex;
1116 @@ -1884,6 +1890,7 @@ static int __mkroute_input(struct sk_buf
1117         dev_hold(rth->u.dst.dev);
1118         rth->idev       = in_dev_get(rth->u.dst.dev);
1119         rth->fl.oif     = 0;
1120 +       rth->fl.fl4_gw  = 0;
1121         rth->rt_spec_dst= spec_dst;
1122  
1123         rth->u.dst.input = ip_forward;
1124 @@ -1904,21 +1911,23 @@ static int __mkroute_input(struct sk_buf
1125  
1126  static int ip_mkroute_input(struct sk_buff *skb,
1127                             struct fib_result *res,
1128 +                           struct net *net,
1129                             const struct flowi *fl,
1130                             struct in_device *in_dev,
1131 -                           __be32 daddr, __be32 saddr, u32 tos)
1132 +                           __be32 daddr, __be32 saddr, u32 tos, __be32 lsrc)
1133  {
1134         struct rtable* rth = NULL;
1135         int err;
1136         unsigned hash;
1137  
1138 +       fib_select_default(net, fl, res);
1139  #ifdef CONFIG_IP_ROUTE_MULTIPATH
1140 -       if (res->fi && res->fi->fib_nhs > 1 && fl->oif == 0)
1141 +       if (res->fi && res->fi->fib_nhs > 1)
1142                 fib_select_multipath(fl, res);
1143  #endif
1144  
1145         /* create a routing cache entry */
1146 -       err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, &rth);
1147 +       err = __mkroute_input(skb, res, in_dev, daddr, saddr, tos, lsrc, &rth);
1148         if (err)
1149                 return err;
1150  
1151 @@ -1939,18 +1948,19 @@ static int ip_mkroute_input(struct sk_bu
1152   */
1153  
1154  static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1155 -                              u8 tos, struct net_device *dev)
1156 +                              u8 tos, struct net_device *dev, __be32 lsrc)
1157  {
1158         struct fib_result res;
1159         struct in_device *in_dev = in_dev_get(dev);
1160         struct flowi fl = { .nl_u = { .ip4_u =
1161                                       { .daddr = daddr,
1162 -                                       .saddr = saddr,
1163 +                                       .saddr = lsrc? : saddr,
1164                                         .tos = tos,
1165                                         .scope = RT_SCOPE_UNIVERSE,
1166                                       } },
1167                             .mark = skb->mark,
1168 -                           .iif = dev->ifindex };
1169 +                           .iif = lsrc?
1170 +                               dev_net(dev)->loopback_dev->ifindex : dev->ifindex };
1171         unsigned        flags = 0;
1172         u32             itag = 0;
1173         struct rtable * rth;
1174 @@ -1986,6 +1996,12 @@ static int ip_route_input_slow(struct sk
1175             ipv4_is_loopback(daddr))
1176                 goto martian_destination;
1177  
1178 +       if (lsrc) {
1179 +               if (ipv4_is_multicast(lsrc) || ipv4_is_lbcast(lsrc) ||
1180 +                   ipv4_is_zeronet(lsrc) || ipv4_is_loopback(lsrc))
1181 +                       goto e_inval;
1182 +       }
1183 +
1184         /*
1185          *      Now we are ready to route packet.
1186          */
1187 @@ -1995,6 +2011,8 @@ static int ip_route_input_slow(struct sk
1188                 goto no_route;
1189         }
1190         free_res = 1;
1191 +       fl.iif = dev->ifindex;
1192 +       fl.fl4_src = saddr;
1193  
1194         RT_CACHE_STAT_INC(in_slow_tot);
1195  
1196 @@ -2019,7 +2037,7 @@ static int ip_route_input_slow(struct sk
1197         if (res.type != RTN_UNICAST)
1198                 goto martian_destination;
1199  
1200 -       err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos);
1201 +       err = ip_mkroute_input(skb, &res, net, &fl, in_dev, daddr, saddr, tos, lsrc);
1202  done:
1203         in_dev_put(in_dev);
1204         if (free_res)
1205 @@ -2029,6 +2047,8 @@ out:      return err;
1206  brd_input:
1207         if (skb->protocol != htons(ETH_P_IP))
1208                 goto e_inval;
1209 +       if (lsrc)
1210 +               goto e_inval;
1211  
1212         if (ipv4_is_zeronet(saddr))
1213                 spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
1214 @@ -2070,6 +2090,7 @@ local_input:
1215         rth->u.dst.dev  = net->loopback_dev;
1216         dev_hold(rth->u.dst.dev);
1217         rth->idev       = in_dev_get(rth->u.dst.dev);
1218 +       rth->fl.fl4_gw  = 0;
1219         rth->rt_gateway = daddr;
1220         rth->rt_spec_dst= spec_dst;
1221         rth->u.dst.input= ip_local_deliver;
1222 @@ -2121,8 +2142,9 @@ martian_source:
1223         goto e_inval;
1224  }
1225  
1226 -int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1227 -                  u8 tos, struct net_device *dev)
1228 +static inline int
1229 +ip_route_input_cached(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1230 +                  u8 tos, struct net_device *dev, __be32 lsrc)
1231  {
1232         struct rtable * rth;
1233         unsigned        hash;
1234 @@ -2139,6 +2161,7 @@ int ip_route_input(struct sk_buff *skb, 
1235                 if (((rth->fl.fl4_dst ^ daddr) |
1236                      (rth->fl.fl4_src ^ saddr) |
1237                      (rth->fl.iif ^ iif) |
1238 +                    (rth->fl.fl4_lsrc ^ lsrc) |
1239                      rth->fl.oif |
1240                      (rth->fl.fl4_tos ^ tos)) == 0 &&
1241                     rth->fl.mark == skb->mark &&
1242 @@ -2186,7 +2209,19 @@ int ip_route_input(struct sk_buff *skb, 
1243                 rcu_read_unlock();
1244                 return -EINVAL;
1245         }
1246 -       return ip_route_input_slow(skb, daddr, saddr, tos, dev);
1247 +       return ip_route_input_slow(skb, daddr, saddr, tos, dev, lsrc);
1248 +}
1249 +
1250 +int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1251 +                  u8 tos, struct net_device *dev)
1252 +{
1253 +       return ip_route_input_cached(skb, daddr, saddr, tos, dev, 0);
1254 +}
1255 +
1256 +int ip_route_input_lookup(struct sk_buff *skb, __be32 daddr, __be32 saddr,
1257 +                         u8 tos, struct net_device *dev, __be32 lsrc)
1258 +{
1259 +       return ip_route_input_cached(skb, daddr, saddr, tos, dev, lsrc);
1260  }
1261  
1262  static int __mkroute_output(struct rtable **result,
1263 @@ -2258,6 +2293,7 @@ static int __mkroute_output(struct rtabl
1264         rth->fl.fl4_tos = tos;
1265         rth->fl.fl4_src = oldflp->fl4_src;
1266         rth->fl.oif     = oldflp->oif;
1267 +       rth->fl.fl4_gw  = oldflp->fl4_gw;
1268         rth->fl.mark    = oldflp->mark;
1269         rth->rt_dst     = fl->fl4_dst;
1270         rth->rt_src     = fl->fl4_src;
1271 @@ -2339,6 +2375,7 @@ static int ip_route_output_slow(struct n
1272         struct flowi fl = { .nl_u = { .ip4_u =
1273                                       { .daddr = oldflp->fl4_dst,
1274                                         .saddr = oldflp->fl4_src,
1275 +                                       .gw = oldflp->fl4_gw,
1276                                         .tos = tos & IPTOS_RT_MASK,
1277                                         .scope = ((tos & RTO_ONLINK) ?
1278                                                   RT_SCOPE_LINK :
1279 @@ -2450,6 +2487,7 @@ static int ip_route_output_slow(struct n
1280                 dev_out = net->loopback_dev;
1281                 dev_hold(dev_out);
1282                 fl.oif = net->loopback_dev->ifindex;
1283 +               fl.fl4_gw = 0;
1284                 res.type = RTN_LOCAL;
1285                 flags |= RTCF_LOCAL;
1286                 goto make_route;
1287 @@ -2457,7 +2495,7 @@ static int ip_route_output_slow(struct n
1288  
1289         if (fib_lookup(net, &fl, &res)) {
1290                 res.fi = NULL;
1291 -               if (oldflp->oif) {
1292 +               if (oldflp->oif && dev_out->flags & IFF_UP) {
1293                         /* Apparently, routing tables are wrong. Assume,
1294                            that the destination is on link.
1295  
1296 @@ -2497,6 +2535,7 @@ static int ip_route_output_slow(struct n
1297                 dev_out = net->loopback_dev;
1298                 dev_hold(dev_out);
1299                 fl.oif = dev_out->ifindex;
1300 +               fl.fl4_gw = 0;
1301                 if (res.fi)
1302                         fib_info_put(res.fi);
1303                 res.fi = NULL;
1304 @@ -2504,13 +2543,12 @@ static int ip_route_output_slow(struct n
1305                 goto make_route;
1306         }
1307  
1308 +       if (res.type == RTN_UNICAST)
1309 +               fib_select_default(net, &fl, &res);
1310  #ifdef CONFIG_IP_ROUTE_MULTIPATH
1311 -       if (res.fi->fib_nhs > 1 && fl.oif == 0)
1312 +       if (res.fi->fib_nhs > 1)
1313                 fib_select_multipath(&fl, &res);
1314 -       else
1315  #endif
1316 -       if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif)
1317 -               fib_select_default(net, &fl, &res);
1318  
1319         if (!fl.fl4_src)
1320                 fl.fl4_src = FIB_RES_PREFSRC(res);
1321 @@ -2548,6 +2586,7 @@ int __ip_route_output_key(struct net *ne
1322                     rth->fl.fl4_src == flp->fl4_src &&
1323                     rth->fl.iif == 0 &&
1324                     rth->fl.oif == flp->oif &&
1325 +                   rth->fl.fl4_gw == flp->fl4_gw &&
1326                     rth->fl.mark == flp->mark &&
1327                     !((rth->fl.fl4_tos ^ flp->fl4_tos) &
1328                             (IPTOS_RT_MASK | RTO_ONLINK)) &&
1329 @@ -3322,3 +3361,4 @@ void __init ip_static_sysctl_init(void)
1330  EXPORT_SYMBOL(__ip_select_ident);
1331  EXPORT_SYMBOL(ip_route_input);
1332  EXPORT_SYMBOL(ip_route_output_key);
1333 +EXPORT_SYMBOL(ip_route_input_lookup);
This page took 0.196637 seconds and 3 git commands to generate.