diff -ur v2.6.7-before-nf_reroute/linux/include/linux/netfilter_ipv4/ip_nat.h linux/include/linux/netfilter_ipv4/ip_nat.h --- v2.6.7-before-nf_reroute/linux/include/linux/netfilter_ipv4/ip_nat.h 2004-03-11 23:48:04.000000000 +0200 +++ linux/include/linux/netfilter_ipv4/ip_nat.h 2004-06-17 01:02:46.811977384 +0300 @@ -121,5 +121,13 @@ extern u_int16_t ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck); + +/* Call input routing for SNAT-ed traffic */ +extern unsigned int ip_nat_route_input(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)); + #endif /*__KERNEL__*/ #endif diff -ur v2.6.7-before-nf_reroute/linux/include/net/flow.h linux/include/net/flow.h --- v2.6.7-before-nf_reroute/linux/include/net/flow.h 2004-04-04 09:43:36.000000000 +0300 +++ linux/include/net/flow.h 2004-06-17 01:02:46.812977232 +0300 @@ -19,6 +19,8 @@ __u32 daddr; __u32 saddr; __u32 fwmark; + __u32 lsrc; + __u32 gw; __u8 tos; __u8 scope; } ip4_u; @@ -46,6 +48,8 @@ #define fl4_dst nl_u.ip4_u.daddr #define fl4_src nl_u.ip4_u.saddr #define fl4_fwmark nl_u.ip4_u.fwmark +#define fl4_lsrc nl_u.ip4_u.lsrc +#define fl4_gw nl_u.ip4_u.gw #define fl4_tos nl_u.ip4_u.tos #define fl4_scope nl_u.ip4_u.scope diff -ur v2.6.7-before-nf_reroute/linux/include/net/route.h linux/include/net/route.h --- v2.6.7-before-nf_reroute/linux/include/net/route.h 2004-06-16 23:54:05.000000000 +0300 +++ linux/include/net/route.h 2004-06-17 01:02:46.812977232 +0300 @@ -124,6 +124,7 @@ extern int ip_route_output_key(struct rtable **, struct flowi *flp); extern int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags); extern int ip_route_input(struct sk_buff*, u32 dst, u32 src, u8 tos, struct net_device *devin); +extern int ip_route_input_lookup(struct sk_buff*, u32 dst, u32 src, u8 tos, struct net_device *devin, u32 lsrc); extern unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu); extern void ip_rt_send_redirect(struct sk_buff *skb); diff -ur v2.6.7-before-nf_reroute/linux/net/ipv4/fib_hash.c linux/net/ipv4/fib_hash.c --- v2.6.7-before-nf_reroute/linux/net/ipv4/fib_hash.c 2004-06-17 01:01:41.000000000 +0300 +++ linux/net/ipv4/fib_hash.c 2004-06-17 01:02:46.813977080 +0300 @@ -352,6 +352,9 @@ for (nhsel = 0, nh = fi->fib_nh; nhsel < fi->fib_nhs; nh++, nhsel++) { if (flp->oif && flp->oif != nh->nh_oif) continue; + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && nh->nh_gw && + nh->nh_scope == RT_SCOPE_LINK) + continue; if (nh->nh_flags & RTNH_F_DEAD) continue; diff -ur v2.6.7-before-nf_reroute/linux/net/ipv4/fib_semantics.c linux/net/ipv4/fib_semantics.c --- v2.6.7-before-nf_reroute/linux/net/ipv4/fib_semantics.c 2004-06-17 01:01:41.000000000 +0300 +++ linux/net/ipv4/fib_semantics.c 2004-06-17 01:02:46.814976928 +0300 @@ -674,8 +674,12 @@ for_nexthops(fi) { if (nh->nh_flags&RTNH_F_DEAD) continue; - if (!flp->oif || flp->oif == nh->nh_oif) - break; + if (flp->oif && flp->oif != nh->nh_oif) + continue; + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && + nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) + continue; + break; } #ifdef CONFIG_IP_ROUTE_MULTIPATH if (nhsel < fi->fib_nhs) { @@ -1068,6 +1072,9 @@ change_nexthops(fi) { if (flp->oif != nh->nh_oif) continue; + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && + nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) + continue; if (!(nh->nh_flags&RTNH_F_BADSTATE)) { if (nh->nh_power > w) { w = nh->nh_power; @@ -1126,11 +1133,14 @@ for_nexthops(fi) { if (!(nh->nh_flags&RTNH_F_DEAD)) { - if (!flp->oif || flp->oif == nh->nh_oif) { - spin_unlock_bh(&fib_multipath_lock); - res->nh_sel = nhsel; - return; - } + if (flp->oif && flp->oif != nh->nh_oif) + continue; + if (flp->fl4_gw && flp->fl4_gw != nh->nh_gw && + nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) + continue; + spin_unlock_bh(&fib_multipath_lock); + res->nh_sel = nhsel; + return; } } endfor_nexthops(fi); diff -ur v2.6.7-before-nf_reroute/linux/net/ipv4/netfilter/ip_fw_compat_masq.c linux/net/ipv4/netfilter/ip_fw_compat_masq.c --- v2.6.7-before-nf_reroute/linux/net/ipv4/netfilter/ip_fw_compat_masq.c 2004-04-04 09:43:37.000000000 +0300 +++ linux/net/ipv4/netfilter/ip_fw_compat_masq.c 2004-06-17 01:02:46.814976928 +0300 @@ -44,15 +44,20 @@ unsigned int do_masquerade(struct sk_buff **pskb, const struct net_device *dev) { + struct iphdr *iph = (*pskb)->nh.iph; struct ip_nat_info *info; enum ip_conntrack_info ctinfo; struct ip_conntrack *ct; unsigned int ret; + struct rtable *rt, *skb_rt; + struct net_device *skb_dev; + __u32 saddr; + int new; /* Sorry, only ICMP, TCP and UDP. */ - if ((*pskb)->nh.iph->protocol != IPPROTO_ICMP - && (*pskb)->nh.iph->protocol != IPPROTO_TCP - && (*pskb)->nh.iph->protocol != IPPROTO_UDP) + if (iph->protocol != IPPROTO_ICMP + && iph->protocol != IPPROTO_TCP + && iph->protocol != IPPROTO_UDP) return NF_DROP; /* Feed it to connection tracking; in fact we're in NF_IP_FORWARD, @@ -71,23 +76,30 @@ } info = &ct->nat.info; + iph = (*pskb)->nh.iph; + saddr = iph->saddr; + new = 0; WRITE_LOCK(&ip_nat_lock); /* Setup the masquerade, if not already */ if (!info->initialized) { u_int32_t newsrc; struct flowi fl = { .nl_u = { .ip4_u = { .daddr = (*pskb)->nh.iph->daddr } } }; - struct rtable *rt; struct ip_nat_multi_range range; + skb_rt = (struct rtable *) (*pskb)->dst; + skb_dev = skb_rt->u.dst.dev; /* Pass 0 instead of saddr, since it's going to be changed anyway. */ + fl.fl4_tos = RT_TOS(iph->tos); + fl.fl4_gw = skb_dev? skb_rt->rt_gateway : 0; + fl.oif = skb_dev? skb_dev->ifindex : 0; if (ip_route_output_key(&rt, &fl) != 0) { + WRITE_UNLOCK(&ip_nat_lock); DEBUGP("ipnat_rule_masquerade: Can't reroute.\n"); return NF_DROP; } - newsrc = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, - RT_SCOPE_UNIVERSE); + newsrc = rt->rt_src; ip_rt_put(rt); range = ((struct ip_nat_multi_range) { 1, @@ -100,11 +112,36 @@ WRITE_UNLOCK(&ip_nat_lock); return ret; } + new = 1; } else DEBUGP("Masquerading already done on this conn.\n"); WRITE_UNLOCK(&ip_nat_lock); - return do_bindings(ct, ctinfo, info, NF_IP_POST_ROUTING, pskb); + ret = do_bindings(ct, ctinfo, info, NF_IP_POST_ROUTING, pskb); + if (ret != NF_ACCEPT || saddr == (*pskb)->nh.iph->saddr || new) + return ret; + + iph = (*pskb)->nh.iph; + { + struct flowi fl = { .nl_u = { .ip4_u = + { .saddr = iph->saddr, + .daddr = iph->daddr, + .tos = RT_TOS(iph->tos) } } }; + if (ip_route_output_key(&rt, &fl) != 0) + return NF_DROP; + } + skb_rt = (struct rtable *) (*pskb)->dst; + skb_dev = skb_rt->u.dst.dev; + if (skb_dev != rt->u.dst.dev || rt->rt_gateway != skb_rt->rt_gateway) { + if (skb_dev != rt->u.dst.dev) { + /* TODO: check the new mtu and reply FRAG_NEEDED */ + } + dst_release((*pskb)->dst); + (*pskb)->dst = &rt->u.dst; + } else { + ip_rt_put(rt); + } + return NF_ACCEPT; } void diff -ur v2.6.7-before-nf_reroute/linux/net/ipv4/netfilter/ip_nat_core.c linux/net/ipv4/netfilter/ip_nat_core.c --- v2.6.7-before-nf_reroute/linux/net/ipv4/netfilter/ip_nat_core.c 2004-05-11 02:09:54.000000000 +0300 +++ linux/net/ipv4/netfilter/ip_nat_core.c 2004-06-17 01:02:46.815976776 +0300 @@ -987,6 +987,60 @@ return 0; } +unsigned int +ip_nat_route_input(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct sk_buff *skb = *pskb; + struct iphdr *iph; + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; + struct ip_nat_info *info; + enum ip_conntrack_dir dir; + __u32 saddr; + int i; + + if (!(ct = ip_conntrack_get(skb, &ctinfo))) + return NF_ACCEPT; + + info = &ct->nat.info; + if (!info->initialized) + return NF_ACCEPT; + + if (skb->dst) + return NF_ACCEPT; + + if (skb->len < sizeof(struct iphdr)) + return NF_ACCEPT; + + iph = skb->nh.iph; + saddr = iph->saddr; + hooknum = NF_IP_POST_ROUTING; + dir = CTINFO2DIR(ctinfo); + + READ_LOCK(&ip_nat_lock); + for (i = 0; i < info->num_manips; i++) { + if (info->manips[i].direction == dir + && info->manips[i].hooknum == hooknum + && info->manips[i].maniptype == IP_NAT_MANIP_SRC) { + saddr = info->manips[i].manip.ip; + } + } + READ_UNLOCK(&ip_nat_lock); + + if (saddr == iph->saddr) + return NF_ACCEPT; + + if (ip_route_input_lookup(skb, iph->daddr, iph->saddr, iph->tos, + skb->dev, saddr)) + return NF_DROP; + + return NF_ACCEPT; +} + int __init ip_nat_init(void) { size_t i; diff -ur v2.6.7-before-nf_reroute/linux/net/ipv4/netfilter/ip_nat_standalone.c linux/net/ipv4/netfilter/ip_nat_standalone.c --- v2.6.7-before-nf_reroute/linux/net/ipv4/netfilter/ip_nat_standalone.c 2004-06-16 23:54:06.000000000 +0300 +++ linux/net/ipv4/netfilter/ip_nat_standalone.c 2004-06-17 01:02:46.816976624 +0300 @@ -247,6 +247,14 @@ .priority = NF_IP_PRI_NAT_DST, }; +/* Before routing, route before mangling */ +static struct nf_hook_ops ip_nat_inr_ops = { + .hook = ip_nat_route_input, + .pf = PF_INET, + .hooknum = NF_IP_PRE_ROUTING, + .priority = NF_IP_PRI_LAST-1, +}; + /* After packet filtering, change source */ static struct nf_hook_ops ip_nat_out_ops = { .hook = ip_nat_out, @@ -331,10 +339,15 @@ printk("ip_nat_init: can't register in hook.\n"); goto cleanup_nat; } + ret = nf_register_hook(&ip_nat_inr_ops); + if (ret < 0) { + printk("ip_nat_init: can't register inr hook.\n"); + goto cleanup_inops; + } ret = nf_register_hook(&ip_nat_out_ops); if (ret < 0) { printk("ip_nat_init: can't register out hook.\n"); - goto cleanup_inops; + goto cleanup_inrops; } #ifdef CONFIG_IP_NF_NAT_LOCAL ret = nf_register_hook(&ip_nat_local_out_ops); @@ -358,6 +371,8 @@ cleanup_outops: #endif nf_unregister_hook(&ip_nat_out_ops); + cleanup_inrops: + nf_unregister_hook(&ip_nat_inr_ops); cleanup_inops: nf_unregister_hook(&ip_nat_in_ops); cleanup_nat: diff -ur v2.6.7-before-nf_reroute/linux/net/ipv4/netfilter/ipt_MASQUERADE.c linux/net/ipv4/netfilter/ipt_MASQUERADE.c --- v2.6.7-before-nf_reroute/linux/net/ipv4/netfilter/ipt_MASQUERADE.c 2004-05-11 02:09:54.000000000 +0300 +++ linux/net/ipv4/netfilter/ipt_MASQUERADE.c 2004-06-17 01:02:46.816976624 +0300 @@ -101,10 +101,12 @@ { .daddr = (*pskb)->nh.iph->daddr, .tos = (RT_TOS((*pskb)->nh.iph->tos) | RTO_CONN), + .gw = ((struct rtable *) (*pskb)->dst)->rt_gateway, #ifdef CONFIG_IP_ROUTE_FWMARK .fwmark = (*pskb)->nfmark #endif - } } }; + } }, + .oif = out->ifindex }; if (ip_route_output_key(&rt, &fl) != 0) { /* Funky routing can do this. */ if (net_ratelimit()) @@ -112,13 +114,6 @@ " No route: Rusty's brain broke!\n"); return NF_DROP; } - if (rt->u.dst.dev != out) { - if (net_ratelimit()) - printk("MASQUERADE:" - " Route sent us somewhere else.\n"); - ip_rt_put(rt); - return NF_DROP; - } } newsrc = rt->rt_src; diff -ur v2.6.7-before-nf_reroute/linux/net/ipv4/route.c linux/net/ipv4/route.c --- v2.6.7-before-nf_reroute/linux/net/ipv4/route.c 2004-06-17 01:01:41.000000000 +0300 +++ linux/net/ipv4/route.c 2004-06-17 01:03:04.388305376 +0300 @@ -1055,6 +1055,7 @@ /* Gateway is different ... */ rt->rt_gateway = new_gw; + if (rt->fl.fl4_gw) rt->fl.fl4_gw = new_gw; /* Redirect received -> path was valid */ dst_confirm(&rth->u.dst); @@ -1496,6 +1497,7 @@ rth->fl.fl4_fwmark= skb->nfmark; #endif rth->fl.fl4_src = saddr; + rth->fl.fl4_lsrc = 0; rth->rt_src = saddr; #ifdef CONFIG_IP_ROUTE_NAT rth->rt_dst_map = daddr; @@ -1510,6 +1512,7 @@ dev_hold(rth->u.dst.dev); rth->idev = in_dev_get(rth->u.dst.dev); rth->fl.oif = 0; + rth->fl.fl4_gw = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_type = RTN_MULTICAST; @@ -1549,21 +1552,21 @@ */ static int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr, - u8 tos, struct net_device *dev) + u8 tos, struct net_device *dev, u32 lsrc) { struct fib_result res; struct in_device *in_dev = in_dev_get(dev); struct in_device *out_dev = NULL; struct flowi fl = { .nl_u = { .ip4_u = { .daddr = daddr, - .saddr = saddr, + .saddr = lsrc? : saddr, .tos = tos, .scope = RT_SCOPE_UNIVERSE, #ifdef CONFIG_IP_ROUTE_FWMARK .fwmark = skb->nfmark #endif } }, - .iif = dev->ifindex }; + .iif = lsrc? loopback_dev.ifindex : dev->ifindex }; unsigned flags = 0; u32 itag = 0; struct rtable * rth; @@ -1577,7 +1580,7 @@ if (!in_dev) goto out; - hash = rt_hash_code(daddr, saddr ^ (fl.iif << 5), tos); + hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5), tos); /* Check for the most weird martians, which can be not detected by fib_lookup. @@ -1598,6 +1601,12 @@ if (BADCLASS(daddr) || ZERONET(daddr) || LOOPBACK(daddr)) goto martian_destination; + if (lsrc) { + if (MULTICAST(lsrc) || BADCLASS(lsrc) || + ZERONET(lsrc) || LOOPBACK(lsrc)) + goto e_inval; + } + /* * Now we are ready to route packet. */ @@ -1607,6 +1616,10 @@ goto no_route; } free_res = 1; + if (lsrc && res.type != RTN_UNICAST && res.type != RTN_NAT) + goto e_inval; + fl.iif = dev->ifindex; + fl.fl4_src = saddr; RT_CACHE_STAT_INC(in_slow_tot); @@ -1617,7 +1630,7 @@ if (1) { u32 src_map = saddr; - if (res.r) + if (res.r && !lsrc) src_map = fib_rules_policy(saddr, &res, &flags); if (res.type == RTN_NAT) { @@ -1678,6 +1691,7 @@ flags |= RTCF_DIRECTSRC; if (out_dev == in_dev && err && !(flags & (RTCF_NAT | RTCF_MASQ)) && + !lsrc && (IN_DEV_SHARED_MEDIA(out_dev) || inet_addr_onlink(out_dev, saddr, FIB_RES_GW(res)))) flags |= RTCF_DOREDIRECT; @@ -1708,6 +1722,7 @@ #endif rth->fl.fl4_src = saddr; rth->rt_src = saddr; + rth->fl.fl4_lsrc = lsrc; rth->rt_gateway = daddr; #ifdef CONFIG_IP_ROUTE_NAT rth->rt_src_map = fl.fl4_src; @@ -1721,6 +1736,7 @@ dev_hold(rth->u.dst.dev); rth->idev = in_dev_get(rth->u.dst.dev); rth->fl.oif = 0; + rth->fl.fl4_gw = 0; rth->rt_spec_dst= spec_dst; rth->u.dst.input = ip_forward; @@ -1731,7 +1747,8 @@ rth->rt_flags = flags; #ifdef CONFIG_NET_FASTROUTE - if (netdev_fastroute && !(flags&(RTCF_NAT|RTCF_MASQ|RTCF_DOREDIRECT))) { + if (netdev_fastroute && !(flags&(RTCF_NAT|RTCF_MASQ|RTCF_DOREDIRECT)) && + !lsrc) { struct net_device *odev = rth->u.dst.dev; if (odev != dev && dev->accept_fastpath && @@ -1754,6 +1771,8 @@ brd_input: if (skb->protocol != htons(ETH_P_IP)) goto e_inval; + if (lsrc) + goto e_inval; if (ZERONET(saddr)) spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); @@ -1800,6 +1819,7 @@ rth->u.dst.dev = &loopback_dev; dev_hold(rth->u.dst.dev); rth->idev = in_dev_get(rth->u.dst.dev); + rth->fl.fl4_gw = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->u.dst.input= ip_local_deliver; @@ -1865,8 +1885,9 @@ goto e_inval; } -int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr, - u8 tos, struct net_device *dev) +static inline int +ip_route_input_cached(struct sk_buff *skb, u32 daddr, u32 saddr, + u8 tos, struct net_device *dev, u32 lsrc) { struct rtable * rth; unsigned hash; @@ -1881,6 +1902,7 @@ if (rth->fl.fl4_dst == daddr && rth->fl.fl4_src == saddr && rth->fl.iif == iif && + rth->fl.fl4_lsrc == lsrc && rth->fl.oif == 0 && #ifdef CONFIG_IP_ROUTE_FWMARK rth->fl.fl4_fwmark == skb->nfmark && @@ -1929,9 +1951,21 @@ read_unlock(&inetdev_lock); return -EINVAL; } - return ip_route_input_slow(skb, daddr, saddr, tos, dev); + return ip_route_input_slow(skb, daddr, saddr, tos, dev, lsrc); } +int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr, + u8 tos, struct net_device *dev) +{ + return ip_route_input_cached(skb, daddr, saddr, tos, dev, 0); +} + +int ip_route_input_lookup(struct sk_buff *skb, u32 daddr, u32 saddr, + u8 tos, struct net_device *dev, u32 lsrc) +{ + return ip_route_input_cached(skb, daddr, saddr, tos, dev, lsrc); +} + /* * Major route resolver routine. */ @@ -1942,6 +1976,7 @@ struct flowi fl = { .nl_u = { .ip4_u = { .daddr = oldflp->fl4_dst, .saddr = oldflp->fl4_src, + .gw = oldflp->fl4_gw, .tos = tos & IPTOS_RT_MASK, .scope = ((tos & RTO_ONLINK) ? RT_SCOPE_LINK : @@ -2045,6 +2080,7 @@ dev_out = &loopback_dev; dev_hold(dev_out); fl.oif = loopback_dev.ifindex; + fl.fl4_gw = 0; res.type = RTN_LOCAL; flags |= RTCF_LOCAL; goto make_route; @@ -2095,6 +2131,7 @@ dev_out = &loopback_dev; dev_hold(dev_out); fl.oif = dev_out->ifindex; + fl.fl4_gw = 0; if (res.fi) fib_info_put(res.fi); res.fi = NULL; @@ -2170,6 +2207,7 @@ rth->fl.fl4_tos = tos; rth->fl.fl4_src = oldflp->fl4_src; rth->fl.oif = oldflp->oif; + rth->fl.fl4_gw = oldflp->fl4_gw; #ifdef CONFIG_IP_ROUTE_FWMARK rth->fl.fl4_fwmark= oldflp->fl4_fwmark; #endif @@ -2249,6 +2287,7 @@ rth->fl.fl4_src == flp->fl4_src && rth->fl.iif == 0 && rth->fl.oif == flp->oif && + rth->fl.fl4_gw == flp->fl4_gw && #ifdef CONFIG_IP_ROUTE_FWMARK rth->fl.fl4_fwmark == flp->fl4_fwmark && #endif @@ -2862,3 +2901,4 @@ EXPORT_SYMBOL(__ip_select_ident); EXPORT_SYMBOL(ip_route_input); EXPORT_SYMBOL(ip_route_output_key); +EXPORT_SYMBOL(ip_route_input_lookup);