From f01a1b1a0cb61eaadebd9d910286c5f382a4928c Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Mon, 28 Apr 2008 14:46:46 +0200 Subject: [PATCH] Loosen source address check on IPv4 output ip_route_output() contains a check to make sure that no flows with non-local source IP addresses are routed. This obviously makes using such addresses impossible. This patch introduces a flowi flag which makes omitting this check possible. The new flag provides a way of handling transparent and non-transparent connections differently. Signed-off-by: Julian Anastasov Signed-off-by: KOVACS Krisztian Acked-by: Patrick McHardy --- include/net/flow.h | 1 + net/ipv4/route.c | 20 +++++++++++++------- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/include/net/flow.h b/include/net/flow.h index ad16e00..b45a5e4 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -48,6 +48,7 @@ struct flowi { __u8 proto; __u8 flags; +#define FLOWI_FLAG_ANYSRC 0x01 union { struct { __be16 sport; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ce25a13..c0ed024 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2307,11 +2307,6 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, ipv4_is_zeronet(oldflp->fl4_src)) goto out; - /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ - dev_out = ip_dev_find(net, oldflp->fl4_src); - if (dev_out == NULL) - goto out; - /* I removed check for oif == dev_out->oif here. It was wrong for two reasons: 1. ip_dev_find(net, saddr) can return wrong iface, if saddr @@ -2323,6 +2318,11 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, if (oldflp->oif == 0 && (ipv4_is_multicast(oldflp->fl4_dst) || oldflp->fl4_dst == htonl(0xFFFFFFFF))) { + /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ + dev_out = ip_dev_find(net, oldflp->fl4_src); + if (dev_out == NULL) + goto out; + /* Special hack: user can direct multicasts and limited broadcast via necessary interface without fiddling with IP_MULTICAST_IF or IP_PKTINFO. @@ -2341,9 +2341,15 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, fl.oif = dev_out->ifindex; goto make_route; } - if (dev_out) + + if (!(oldflp->flags & FLOWI_FLAG_ANYSRC)) { + /* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */ + dev_out = ip_dev_find(net, oldflp->fl4_src); + if (dev_out == NULL) + goto out; dev_put(dev_out); - dev_out = NULL; + dev_out = NULL; + } } -- 1.5.2.5 From cc2f0afbffaad66b7794e4b4e5d50609f3571fbd Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Mon, 28 Apr 2008 14:46:48 +0200 Subject: [PATCH] Implement IP_TRANSPARENT socket option This patch introduces the IP_TRANSPARENT socket option: enabling that will make the IPv4 routing omit the non-local source address check on output. Setting IP_TRANSPARENT requires NET_ADMIN capability. Signed-off-by: KOVACS Krisztian Acked-by: Patrick McHardy --- include/linux/in.h | 1 + include/net/inet_sock.h | 3 ++- include/net/inet_timewait_sock.h | 3 ++- net/ipv4/inet_timewait_sock.c | 1 + net/ipv4/ip_sockglue.c | 12 +++++++++++- 5 files changed, 17 insertions(+), 3 deletions(-) diff --git a/include/linux/in.h b/include/linux/in.h index 4065313..db458be 100644 --- a/include/linux/in.h +++ b/include/linux/in.h @@ -75,6 +75,7 @@ struct in_addr { #define IP_IPSEC_POLICY 16 #define IP_XFRM_POLICY 17 #define IP_PASSSEC 18 +#define IP_TRANSPARENT 19 /* BSD compatibility */ #define IP_RECVRETOPTS IP_RETOPTS diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index a42cd63..2fafaab 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -128,7 +128,8 @@ struct inet_sock { is_icsk:1, freebind:1, hdrincl:1, - mc_loop:1; + mc_loop:1, + transparent:1; int mc_index; __be32 mc_addr; struct ip_mc_socklist *mc_list; diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 95c660c..8d983a2 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -128,7 +128,8 @@ struct inet_timewait_sock { __be16 tw_dport; __u16 tw_num; /* And these are ours. */ - __u8 tw_ipv6only:1; + __u8 tw_ipv6only:1, + tw_transparent:1; /* 15 bits hole, try to pack */ __u16 tw_ipv6_offset; unsigned long tw_ttd; diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index ce16e9a..af16fd4 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -124,6 +124,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int stat tw->tw_reuse = sk->sk_reuse; tw->tw_hash = sk->sk_hash; tw->tw_ipv6only = 0; + tw->tw_transparent = inet->transparent; tw->tw_prot = sk->sk_prot_creator; twsk_net_set(tw, hold_net(sock_net(sk))); atomic_set(&tw->tw_refcnt, 1); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index d8adfd4..a05bec6 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -420,7 +420,7 @@ static int do_ip_setsockopt(struct sock *sk, int level, (1<= sizeof(int)) { @@ -879,6 +879,16 @@ static int do_ip_setsockopt(struct sock *sk, int level, err = xfrm_user_policy(sk, optname, optval, optlen); break; + case IP_TRANSPARENT: + if (!capable(CAP_NET_ADMIN)) { + err = -EPERM; + break; + } + if (optlen < 1) + goto e_inval; + inet->transparent = !!val; + break; + default: err = -ENOPROTOOPT; break; -- 1.5.2.5 From b0f9031d33741ccd7745112d425d465035444859 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Mon, 28 Apr 2008 14:46:48 +0200 Subject: [PATCH] Allow binding to non-local addresses if IP_TRANSPARENT is set MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Setting IP_TRANSPARENT is not really useful without allowing non-local binds for the socket. To make user-space code simpler we allow these binds even if IP_TRANSPARENT is set but IP_FREEBIND is not. Signed-off-by: Tóth László Attila Acked-by: Patrick McHardy --- net/ipv4/af_inet.c | 2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index f2b5270..d2d1001 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -475,7 +475,7 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) */ err = -EADDRNOTAVAIL; if (!sysctl_ip_nonlocal_bind && - !inet->freebind && + !(inet->freebind || inet->transparent) && nsa.saddr != htonl(INADDR_ANY) && chk_addr_ret != RTN_LOCAL && chk_addr_ret != RTN_MULTICAST && -- 1.5.2.5 From 2b3cf4f3e3aa34ff42d17b202b945db2c5c563b2 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Mon, 28 Apr 2008 14:46:49 +0200 Subject: [PATCH] Make inet_sock.h independent of route.h inet_iif() in inet_sock.h requires route.h. Since users of inet_iif() usually require other route.h functionality anyway this patch moves inet_iif() to route.h. Signed-off-by: KOVACS Krisztian --- include/net/inet_sock.h | 7 ------- VSERVER patch removed this! include/net/route.h | 5 +++++ net/ipv4/netfilter/nf_nat_helper.c | 1 + net/ipv4/syncookies.c | 1 + net/ipv6/af_inet6.c | 1 + 5 files changed, 8 insertions(+), 7 deletions(-) VSERVER patch has own (almost identical) implementation of this. -- 1.5.2.5 From 03bf9d04e8a6d0dc994363c8133ff494226701ef Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Mon, 28 Apr 2008 14:46:50 +0200 Subject: [PATCH] Conditionally enable transparent flow flag when connecting Set FLOWI_FLAG_ANYSRC in flowi->flags if the socket has the transparent socket option set. This way we selectively enable certain connections with non-local source addresses to be routed. Signed-off-by: KOVACS Krisztian --- include/net/route.h | 6 +++++- 1 files changed, 5 insertions(+), 1 deletions(-) diff --git a/include/net/route.h b/include/net/route.h index 13e464f..2928618 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -27,7 +27,7 @@ #include #include #include -#include +#include // #include #include #include @@ -175,6 +175,9 @@ struct net *net = sock_net(sk); struct nx_info *nx_info = current->nx_info; + if (inet_sk(sk)->transparent) + fl.flags |= FLOWI_FLAG_ANYSRC; + if (sk) nx_info = sk->sk_nx_info; -- 1.5.2.5 From 181680c6c0df4335bd8eeeafe06ea12e6918c3b7 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Mon, 28 Apr 2008 14:46:50 +0200 Subject: [PATCH] Handle TCP SYN+ACK/ACK/RST transparency The TCP stack sends out SYN+ACK/ACK/RST reply packets in response to incoming packets. The non-local source address check on output bites us again, as replies for transparently redirected traffic won't have a chance to leave the node. This patch selectively sets the FLOWI_FLAG_ANYSRC flag when doing the route lookup for those replies. Transparent replies are enabled if the listening socket has the transparent socket flag set. Signed-off-by: KOVACS Krisztian --- include/net/inet_sock.h | 8 +++++++- include/net/ip.h | 9 +++++++++ net/ipv4/inet_connection_sock.c | 1 + net/ipv4/ip_output.c | 4 +++- net/ipv4/syncookies.c | 1 + net/ipv4/tcp_ipv4.c | 11 ++++++++--- 6 files changed, 29 insertions(+), 5 deletions(-) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 5ca683e..013e41d 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -71,7 +71,8 @@ struct inet_request_sock { sack_ok : 1, wscale_ok : 1, ecn_ok : 1, - acked : 1; + acked : 1, + no_srccheck: 1; struct ip_options *opt; }; @@ -191,4 +192,9 @@ static inline int inet_sk_ehashfn(const struct sock *sk) return inet_ehashfn(laddr, lport, faddr, fport); } +static inline __u8 inet_sk_flowi_flags(const struct sock *sk) +{ + return inet_sk(sk)->transparent ? FLOWI_FLAG_ANYSRC : 0; +} + #endif /* _INET_SOCK_H */ diff --git a/include/net/ip.h b/include/net/ip.h index 6d7bcd5..c611608 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -29,6 +29,7 @@ #include #include +#include struct sock; @@ -140,12 +141,20 @@ static inline void ip_tr_mc_map(__be32 addr, char *buf) struct ip_reply_arg { struct kvec iov[1]; + int flags; __wsum csum; int csumoffset; /* u16 offset of csum in iov[0].iov_base */ /* -1 if not needed */ int bound_dev_if; }; +#define IP_REPLY_ARG_NOSRCCHECK 1 + +static inline __u8 ip_reply_arg_flowi_flags(const struct ip_reply_arg *arg) +{ + return (arg->flags & IP_REPLY_ARG_NOSRCCHECK) ? FLOWI_FLAG_ANYSRC : 0; +} + void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg, unsigned int len); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 828ea21..6d70d51 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -333,6 +333,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, .saddr = ireq->loc_addr, .tos = RT_CONN_FLAGS(sk) } }, .proto = sk->sk_protocol, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = inet_sk(sk)->sport, .dport = ireq->rmt_port } } }; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 0834926..9ac5270 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -342,6 +342,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) .saddr = inet->saddr, .tos = RT_CONN_FLAGS(sk) } }, .proto = sk->sk_protocol, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = inet->sport, .dport = inet->dport } } }; @@ -1380,7 +1381,8 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar .uli_u = { .ports = { .sport = tcp_hdr(skb)->dest, .dport = tcp_hdr(skb)->source } }, - .proto = sk->sk_protocol }; + .proto = sk->sk_protocol, + .flags = ip_reply_arg_flowi_flags(arg) }; security_skb_classify_flow(skb, &fl); if (ip_route_output_key(sock_net(sk), &rt, &fl)) return; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 19a1037..d5fa8c7 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -340,6 +340,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, .saddr = ireq->loc_addr, .tos = RT_CONN_FLAGS(sk) } }, .proto = IPPROTO_TCP, + .flags = inet_sk_flowi_flags(sk), .uli_u = { .ports = { .sport = th->dest, .dport = th->source } } }; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0e9bc12..58873ad 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -594,6 +594,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) ip_hdr(skb)->saddr, /* XXX */ sizeof(struct tcphdr), IPPROTO_TCP, 0); arg.csumoffset = offsetof(struct tcphdr, check) / 2; + arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; ip_send_reply(skb->dst->dev->nd_net->ipv4.tcp_sock, skb, &arg, arg.iov[0].iov_len); @@ -608,7 +609,7 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, struct sk_buff *skb, u32 seq, u32 ack, - u32 win, u32 ts) + u32 win, u32 ts, int reply_flags) { struct tcphdr *th = tcp_hdr(skb); struct { @@ -684,6 +685,7 @@ static void tcp_v4_send_ack(struct tcp_timewait_sock *twsk, arg.iov[0].iov_len); } #endif + arg.flags = reply_flags; arg.csum = csum_tcpudp_nofold(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, /* XXX */ arg.iov[0].iov_len, IPPROTO_TCP, 0); @@ -704,7 +706,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) tcp_v4_send_ack(tcptw, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, - tcptw->tw_ts_recent); + tcptw->tw_ts_recent, + tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0); inet_twsk_put(tw); } @@ -714,7 +717,8 @@ static void tcp_v4_reqsk_send_ack(struct sk_buff *skb, { tcp_v4_send_ack(NULL, skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, - req->ts_recent); + req->ts_recent, + inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0); } /* @@ -1321,6 +1325,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); ireq->loc_addr = daddr; ireq->rmt_addr = saddr; + ireq->no_srccheck = inet_sk(sk)->transparent; ireq->opt = tcp_v4_save_options(sk, skb); if (!want_cookie) TCP_ECN_create_request(req, tcp_hdr(skb)); -- 1.5.2.5 From f8be7a55d2a449d188f033d7914d2af007054fbc Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Mon, 28 Apr 2008 14:46:50 +0200 Subject: [PATCH] Make Netfilter's ip_route_me_harder() non-local address compatible Netfilter's ip_route_me_harder() tries to re-route packets either generated or re-routed by Netfilter. This patch changes ip_route_me_harder() to handle packets from non-locally-bound sockets with IP_TRANSPARENT set as local and to set the appropriate flowi flags when re-doing the routing lookup. Signed-off-by: KOVACS Krisztian --- net/ipv4/netfilter.c | 3 +++ 1 files changed, 3 insertions(+), 0 deletions(-) diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index f8edacd..01671ad 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -20,6 +20,8 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) unsigned int type; type = inet_addr_type(&init_net, iph->saddr); + if (skb->sk && inet_sk(skb->sk)->transparent) + type = RTN_LOCAL; if (addr_type == RTN_UNSPEC) addr_type = type; @@ -33,6 +35,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type) fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); fl.oif = skb->sk ? skb->sk->sk_bound_dev_if : 0; fl.mark = skb->mark; + fl.flags = skb->sk ? inet_sk_flowi_flags(skb->sk) : 0; if (ip_route_output_key(&init_net, &rt, &fl) != 0) return -1; -- 1.5.2.5 From 7797a39b59dae015e06bd4860affbdb250c312b7 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Mon, 28 Apr 2008 14:46:50 +0200 Subject: [PATCH] Port redirection support for TCP Current TCP code relies on the local port of the listening socket being the same as the destination address of the incoming connection. Port redirection used by many transparent proxying techniques obviously breaks this, so we have to store the original destination port address. This patch extends struct inet_request_sock and stores the incoming destination port value there. It also modifies the handshake code to use that value as the source port when sending reply packets. Signed-off-by: KOVACS Krisztian --- include/net/inet_sock.h | 2 +- include/net/tcp.h | 1 + net/ipv4/inet_connection_sock.c | 2 ++ net/ipv4/syncookies.c | 1 + net/ipv4/tcp_output.c | 2 +- 5 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 013e41d..42b5d36 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -60,8 +60,8 @@ struct inet_request_sock { struct request_sock req; #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) u16 inet6_rsk_offset; - /* 2 bytes hole, try to pack */ #endif + __be16 loc_port; __be32 loc_addr; __be32 rmt_addr; __be16 rmt_port; diff --git a/include/net/tcp.h b/include/net/tcp.h index 633147c..3c6a549 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -975,6 +975,7 @@ static inline void tcp_openreq_init(struct request_sock *req, ireq->acked = 0; ireq->ecn_ok = 0; ireq->rmt_port = tcp_hdr(skb)->source; + ireq->loc_port = tcp_hdr(skb)->dest; } extern void tcp_enter_memory_pressure(void); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 6d70d51..33dc826 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -508,6 +508,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, newicsk->icsk_bind_hash = NULL; inet_sk(newsk)->dport = inet_rsk(req)->rmt_port; + inet_sk(newsk)->num = ntohs(inet_rsk(req)->loc_port); + inet_sk(newsk)->sport = inet_rsk(req)->loc_port; newsk->sk_write_space = sk_stream_write_space; newicsk->icsk_retransmits = 0; diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index d5fa8c7..676c550 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -299,6 +299,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = cookie; req->mss = mss; + ireq->loc_port = th->dest; ireq->rmt_port = th->source; ireq->loc_addr = ip_hdr(skb)->daddr; ireq->rmt_addr = ip_hdr(skb)->saddr; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index debf235..2b1d34d 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2211,7 +2211,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, th->syn = 1; th->ack = 1; TCP_ECN_make_synack(req, th); - th->source = inet_sk(sk)->sport; + th->source = ireq->loc_port; th->dest = ireq->rmt_port; /* Setting of flags are superfluous here for callers (and ECE is * not even correctly set) -- 1.5.2.5 From 4feb1a188b6bd0ff8cab5c8d817fbfc7d7788802 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Mon, 28 Apr 2008 14:46:51 +0200 Subject: [PATCH] Export UDP socket lookup function The iptables tproxy code has to be able to do UDP socket hash lookups, so we have to provide an exported lookup function for this purpose. Signed-off-by: KOVACS Krisztian --- include/net/udp.h | 4 ++++ net/ipv4/udp.c | 7 +++++++ 2 files changed, 11 insertions(+), 0 deletions(-) diff --git a/include/net/udp.h b/include/net/udp.h index 3e55a99..b88a196 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -147,6 +147,10 @@ extern int udp_lib_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen, int (*push_pending_frames)(struct sock *)); +extern struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, + __be32 daddr, __be16 dport, + int dif); + DECLARE_SNMP_STAT(struct udp_mib, udp_statistics); DECLARE_SNMP_STAT(struct udp_mib, udp_stats_in6); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1f535e3..672d0d4 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -307,6 +307,13 @@ static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, return result; } +struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, + __be32 daddr, __be16 dport, int dif) +{ + return __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, udp_hash); +} +EXPORT_SYMBOL_GPL(udp4_lib_lookup); + static inline struct sock *udp_v4_mcast_next(struct sock *sk, __be16 loc_port, __be32 loc_addr, __be16 rmt_port, __be32 rmt_addr, -- 1.5.2.5 From d49cee38069417f3cb83ca67baa65a2034d2d2ce Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Mon, 28 Apr 2008 14:46:51 +0200 Subject: [PATCH] Split Netfilter IPv4 defragmentation into a separate module Netfilter connection tracking requires all IPv4 packets to be defragmented. Both the socket match and the TPROXY target depend on this functionality, so this patch separates the Netfilter IPv4 defrag hooks into a separate module. Signed-off-by: KOVACS Krisztian --- include/net/netfilter/ipv4/nf_defrag_ipv4.h | 6 ++ net/ipv4/netfilter/Kconfig | 5 + net/ipv4/netfilter/Makefile | 3 + net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c | 56 +------------- net/ipv4/netfilter/nf_defrag_ipv4.c | 96 ++++++++++++++++++++++++ 5 files changed, 113 insertions(+), 53 deletions(-) diff --git a/include/net/netfilter/ipv4/nf_defrag_ipv4.h b/include/net/netfilter/ipv4/nf_defrag_ipv4.h new file mode 100644 index 0000000..6b00ea3 --- /dev/null +++ b/include/net/netfilter/ipv4/nf_defrag_ipv4.h @@ -0,0 +1,6 @@ +#ifndef _NF_DEFRAG_IPV4_H +#define _NF_DEFRAG_IPV4_H + +extern void nf_defrag_ipv4_enable(void); + +#endif /* _NF_DEFRAG_IPV4_H */ diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 2767841..3520c01 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -5,10 +5,15 @@ menu "IP: Netfilter Configuration" depends on INET && NETFILTER +config NF_DEFRAG_IPV4 + tristate + default n + config NF_CONNTRACK_IPV4 tristate "IPv4 connection tracking support (required for NAT)" depends on NF_CONNTRACK default m if NETFILTER_ADVANCED=n + select NF_DEFRAG_IPV4 ---help--- Connection tracking keeps a record of what packets have passed through your machine, in order to figure out how they are related diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index d9b92fb..782b1e7 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -18,6 +18,9 @@ obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o obj-$(CONFIG_NF_NAT) += nf_nat.o +# defrag +obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o + # NAT helpers (nf_conntrack) obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index cacb9cb..9da59f3 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -1,3 +1,4 @@ + /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team * @@ -23,6 +24,7 @@ #include #include #include +#include #include int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb, @@ -63,23 +65,6 @@ NIPQUAD(tuple->dst.u3.ip)); } -/* Returns new sk_buff, or NULL */ -static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) -{ - int err; - - skb_orphan(skb); - - local_bh_disable(); - err = ip_defrag(skb, user); - local_bh_enable(); - - if (!err) - ip_send_check(ip_hdr(skb)); - - return err; -} - static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, unsigned int *dataoff, u_int8_t *protonum) { @@ -144,28 +129,6 @@ return nf_conntrack_confirm(skb); } -static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - /* Previously seen (loopback)? Ignore. Do this before - fragment check. */ - if (skb->nfct) - return NF_ACCEPT; - - /* Gather fragments. */ - if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - if (nf_ct_ipv4_gather_frags(skb, - hooknum == NF_INET_PRE_ROUTING ? - IP_DEFRAG_CONNTRACK_IN : - IP_DEFRAG_CONNTRACK_OUT)) - return NF_STOLEN; - } - return NF_ACCEPT; -} - static unsigned int ipv4_conntrack_in(unsigned int hooknum, struct sk_buff *skb, const struct net_device *in, @@ -195,13 +158,6 @@ make it the first hook. */ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { { - .hook = ipv4_conntrack_defrag, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP_PRI_CONNTRACK_DEFRAG, - }, - { .hook = ipv4_conntrack_in, .owner = THIS_MODULE, .pf = PF_INET, @@ -209,13 +165,6 @@ .priority = NF_IP_PRI_CONNTRACK, }, { - .hook = ipv4_conntrack_defrag, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP_PRI_CONNTRACK_DEFRAG, - }, - { .hook = ipv4_conntrack_local, .owner = THIS_MODULE, .pf = PF_INET, @@ -422,6 +371,7 @@ int ret = 0; need_conntrack(); + nf_defrag_ipv4_enable(); ret = nf_register_sockopt(&so_getorigdst); if (ret < 0) { diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c new file mode 100644 index 0000000..aa2c50a --- /dev/null +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -0,0 +1,96 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* Returns new sk_buff, or NULL */ +static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) +{ + int err; + + skb_orphan(skb); + + local_bh_disable(); + err = ip_defrag(skb, user); + local_bh_enable(); + + if (!err) + ip_send_check(ip_hdr(skb)); + + return err; +} + +static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + /* Previously seen (loopback)? Ignore. Do this before + fragment check. */ + if (skb->nfct) + return NF_ACCEPT; +#endif + + /* Gather fragments. */ + if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { + if (nf_ct_ipv4_gather_frags(skb, + hooknum == NF_INET_PRE_ROUTING ? + IP_DEFRAG_CONNTRACK_IN : + IP_DEFRAG_CONNTRACK_OUT)) + return NF_STOLEN; + } + return NF_ACCEPT; +} + +static struct nf_hook_ops ipv4_defrag_ops[] = { + { + .hook = ipv4_conntrack_defrag, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_DEFRAG, + }, + { + .hook = ipv4_conntrack_defrag, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_CONNTRACK_DEFRAG, + }, +}; + +static int __init nf_defrag_init(void) +{ + return nf_register_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); +} + +static void __exit nf_defrag_fini(void) +{ + nf_unregister_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); +} + +void nf_defrag_ipv4_enable(void) +{ +} +EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable); + +module_init(nf_defrag_init); +module_exit(nf_defrag_fini); + +MODULE_LICENSE("GPL"); -- 1.5.2.5 From 40b01b3ada99906bb5818e15cddfe49344d09fd9 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Mon, 28 Apr 2008 14:46:51 +0200 Subject: [PATCH] iptables tproxy core The iptables tproxy core is a module that contains the common routines used by various tproxy related modules (TPROXY target and socket match) Signed-off-by: KOVACS Krisztian --- include/net/netfilter/nf_tproxy_core.h | 32 +++++++++++ net/netfilter/Kconfig | 15 +++++ net/netfilter/Makefile | 3 + net/netfilter/nf_tproxy_core.c | 96 ++++++++++++++++++++++++++++++++ 4 files changed, 146 insertions(+), 0 deletions(-) diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h new file mode 100644 index 0000000..208b46f --- /dev/null +++ b/include/net/netfilter/nf_tproxy_core.h @@ -0,0 +1,32 @@ +#ifndef _NF_TPROXY_CORE_H +#define _NF_TPROXY_CORE_H + +#include +#include +#include +#include +#include +#include + +/* look up and get a reference to a matching socket */ +extern struct sock * +nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, + const __be32 saddr, const __be32 daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in, bool listening); + +static inline void +nf_tproxy_put_sock(struct sock *sk) +{ + /* TIME_WAIT inet sockets have to be handled differently */ + if ((sk->sk_protocol == IPPROTO_TCP) && (sk->sk_state == TCP_TIME_WAIT)) + inet_twsk_put(inet_twsk(sk)); + else + sock_put(sk); +} + +/* assign a socket to the skb -- consumes sk */ +int +nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk); + +#endif diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index c1fc0f1..de42858 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -277,6 +277,21 @@ config NF_CT_NETLINK help This option enables support for a netlink-based userspace interface +# transparent proxy support +config NETFILTER_TPROXY + tristate "Transparent proxying support (EXPERIMENTAL)" + depends on EXPERIMENTAL + depends on IP_NF_MANGLE + depends on NETFILTER_ADVANCED + help + This option enables transparent proxying support, that is, + support for handling non-locally bound IPv4 TCP and UDP sockets. + For it to work you will have to configure certain iptables rules + and use policy routing. For more information on how to set it up + see Documentation/networking/tproxy.txt. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XTABLES tristate "Netfilter Xtables support (required for ip_tables)" default m if NETFILTER_ADVANCED=n diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 5c4b183..d644e82 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -34,6 +34,9 @@ obj-$(CONFIG_NF_CONNTRACK_SANE) += nf_conntrack_sane.o obj-$(CONFIG_NF_CONNTRACK_SIP) += nf_conntrack_sip.o obj-$(CONFIG_NF_CONNTRACK_TFTP) += nf_conntrack_tftp.o +# transparent proxy support +obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o + # generic X tables obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c new file mode 100644 index 0000000..8a6075d --- /dev/null +++ b/net/netfilter/nf_tproxy_core.c @@ -0,0 +1,96 @@ +/* + * Transparent proxy support for Linux/iptables + * + * Copyright (c) 2006-2007 BalaBit IT Ltd. + * Author: Balazs Scheidler, Krisztian Kovacs + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include + +#include +#include +#include +#include +#include + +struct sock * +nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, + const __be32 saddr, const __be32 daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in, bool listening_only) +{ + struct sock *sk; + + /* look up socket */ + switch (protocol) { + case IPPROTO_TCP: + if (listening_only) + sk = __inet_lookup_listener(net, &tcp_hashinfo, + daddr, ntohs(dport), + in->ifindex); + else + sk = __inet_lookup(net, &tcp_hashinfo, + saddr, sport, daddr, dport, + in->ifindex); + break; + case IPPROTO_UDP: + sk = udp4_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + break; + default: + WARN_ON(1); + sk = NULL; + } + + pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u sock %p\n", + protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), sk); + + return sk; +} +EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v4); + +static void +nf_tproxy_destructor(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + + skb->sk = NULL; + skb->destructor = NULL; + + if (sk) + nf_tproxy_put_sock(sk); +} + +/* consumes sk */ +int +nf_tproxy_assign_sock(struct sk_buff *skb, struct sock *sk) +{ + if (inet_sk(sk)->transparent) { + skb->sk = sk; + skb->destructor = nf_tproxy_destructor; + return 1; + } else + nf_tproxy_put_sock(sk); + + return 0; +} +EXPORT_SYMBOL_GPL(nf_tproxy_assign_sock); + +static int __init nf_tproxy_init(void) +{ + pr_info("NF_TPROXY: Transparent proxy support initialized, version 4.1.0\n"); + pr_info("NF_TPROXY: Copyright (c) 2006-2007 BalaBit IT Ltd.\n"); + return 0; +} + +module_init(nf_tproxy_init); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Krisztian Kovacs"); +MODULE_DESCRIPTION("Transparent proxy support core routines"); -- 1.5.2.5 From 7962977079ac28db798ab5b11ee31df6566fb472 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Mon, 28 Apr 2008 14:46:52 +0200 Subject: [PATCH] iptables socket match Add iptables 'socket' match, which matches packets for which a TCP/UDP socket lookup succeeds. Signed-off-by: Jan Engelhardt Signed-off-by: KOVACS Krisztian --- net/netfilter/Kconfig | 15 +++++ net/netfilter/Makefile | 1 + net/netfilter/xt_socket.c | 133 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+), 0 deletions(-) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index de42858..ce21278 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -748,6 +748,21 @@ config NETFILTER_XT_MATCH_SCTP If you want to compile it as a module, say M here and read . If unsure, say `N'. +config NETFILTER_XT_MATCH_SOCKET + tristate '"socket" match support (EXPERIMENTAL)' + depends on EXPERIMENTAL + depends on NETFILTER_TPROXY + depends on NETFILTER_XTABLES + depends on NETFILTER_ADVANCED + select NF_DEFRAG_IPV4 + help + This option adds a `socket' match, which can be used to match + packets for which a TCP or UDP socket lookup finds a valid socket. + It can be used in combination with the MARK target and policy + routing to implement full featured non-locally bound sockets. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_STATE tristate '"state" match support' depends on NETFILTER_XTABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index d644e82..6d2eee6 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -80,6 +80,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_QUOTA) += xt_quota.o obj-$(CONFIG_NETFILTER_XT_MATCH_RATEEST) += xt_rateest.o obj-$(CONFIG_NETFILTER_XT_MATCH_REALM) += xt_realm.o obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o +obj-$(CONFIG_NETFILTER_XT_MATCH_SOCKET) += xt_socket.o obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c new file mode 100644 index 0000000..fa33358 --- /dev/null +++ b/net/netfilter/xt_socket.c @@ -0,0 +1,133 @@ +/* + * Transparent proxy support for Linux/iptables + * + * Copyright (C) 2007 BalaBit IT Ltd. + * Author: Krisztian Kovacs + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#define XT_SOCKET_HAVE_CONNTRACK 1 +#include +#endif + +static bool +socket_mt(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, + int offset, + unsigned int protoff, + bool *hotdrop) +{ + const struct iphdr *iph = ip_hdr(skb); + struct udphdr _hdr, *hp; + struct sock *sk; + __be32 daddr; + __be16 dport; +#ifdef XT_SOCKET_HAVE_CONNTRACK + struct nf_conn const *ct; + enum ip_conntrack_info ctinfo; +#endif + + hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr); + if (hp == NULL) + return false; + + daddr = iph->daddr; + dport = hp->dest; + +#ifdef XT_SOCKET_HAVE_CONNTRACK + /* Do the lookup with the original socket address in case this is a + * reply packet of an established SNAT-ted connection. */ + ct = nf_ct_get(skb, &ctinfo); + if (ct && (ct != &nf_conntrack_untracked) && + (ctinfo == IP_CT_IS_REPLY + IP_CT_ESTABLISHED) && + (ct->status & IPS_SRC_NAT_DONE)) { + daddr = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip; + dport = (iph->protocol == IPPROTO_TCP) ? + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.tcp.port : + ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; + } +#endif + + sk = nf_tproxy_get_sock_v4(skb->dev->nd_net, iph->protocol, + iph->saddr, daddr, + hp->source, dport, in, false); + if (sk != NULL) { + bool wildcard = (inet_sk(sk)->rcv_saddr == 0); + + nf_tproxy_put_sock(sk); + if (wildcard) + sk = NULL; + } + + pr_debug("socket match: proto %u %08x:%u -> %08x:%u (orig %08x:%u) sock %p\n", + iph->protocol, ntohl(iph->saddr), ntohs(hp->source), + ntohl(daddr), ntohs(dport), + ntohl(iph->daddr), ntohs(hp->dest), sk); + + return (sk != NULL); +} + +static bool +socket_mt_check(const char *tablename, + const void *entry, + const struct xt_match *match, + void *matchinfo, + unsigned int hook_mask) +{ + const struct ipt_ip *i = entry; + + if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) + && !(i->invflags & IPT_INV_PROTO)) + return true; + + pr_info("xt_socket: Can be used only in combination with " + "either -p tcp or -p udp\n"); + return false; +} + +static struct xt_match socket_mt_reg __read_mostly = { + .name = "socket", + .family = AF_INET, + .match = socket_mt, + .checkentry = socket_mt_check, + .hooks = 1 << NF_INET_PRE_ROUTING, + .me = THIS_MODULE, +}; + +static int __init socket_mt_init(void) +{ + nf_defrag_ipv4_enable(); + return xt_register_match(&socket_mt_reg); +} + +static void __exit socket_mt_exit(void) +{ + xt_unregister_match(&socket_mt_reg); +} + +module_init(socket_mt_init); +module_exit(socket_mt_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Krisztian Kovacs"); +MODULE_DESCRIPTION("x_tables socket match module"); +MODULE_ALIAS("ipt_socket"); -- 1.5.2.5 From cb15c50f3d6419fec98bc309b5e4ea31b665cb8d Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Mon, 28 Apr 2008 14:46:52 +0200 Subject: [PATCH] iptables TPROXY target The TPROXY target implements redirection of non-local TCP/UDP traffic to local sockets. Additionally, it's possible to manipulate the packet mark if and only if a socket has been found. (We need this because we cannot use multiple targets in the same iptables rule.) Signed-off-by: Jan Engelhardt Signed-off-by: KOVACS Krisztian --- include/linux/netfilter/xt_TPROXY.h | 14 ++++ net/netfilter/Kconfig | 15 +++++ net/netfilter/Makefile | 1 + net/netfilter/xt_TPROXY.c | 112 +++++++++++++++++++++++++++++++++++ 4 files changed, 142 insertions(+), 0 deletions(-) diff --git a/include/linux/netfilter/xt_TPROXY.h b/include/linux/netfilter/xt_TPROXY.h new file mode 100644 index 0000000..152e8f9 --- /dev/null +++ b/include/linux/netfilter/xt_TPROXY.h @@ -0,0 +1,14 @@ +#ifndef _XT_TPROXY_H_target +#define _XT_TPROXY_H_target + +/* TPROXY target is capable of marking the packet to perform + * redirection. We can get rid of that whenever we get support for + * mutliple targets in the same rule. */ +struct xt_tproxy_target_info { + u_int32_t mark_mask; + u_int32_t mark_value; + __be32 laddr; + __be16 lport; +}; + +#endif /* _XT_TPROXY_H_target */ diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index ce21278..d36018d 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -411,6 +411,21 @@ config NETFILTER_XT_TARGET_RATEEST To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_TPROXY + tristate '"TPROXY" target support (EXPERIMENTAL)' + depends on EXPERIMENTAL + depends on NETFILTER_TPROXY + depends on NETFILTER_XTABLES + depends on NETFILTER_ADVANCED + select NF_DEFRAG_IPV4 + help + This option adds a `TPROXY' target, which is somewhat similar to + REDIRECT. It can only be used in the mangle table and is useful + to redirect traffic to a transparent proxy. It does _not_ depend + on Netfilter connection tracking and NAT, unlike REDIRECT. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_TRACE tristate '"TRACE" target support' depends on NETFILTER_XTABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 6d2eee6..04844d0 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -51,6 +51,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o obj-$(CONFIG_NETFILTER_XT_TARGET_RATEEST) += xt_RATEEST.o obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o +obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c new file mode 100644 index 0000000..183f251 --- /dev/null +++ b/net/netfilter/xt_TPROXY.c @@ -0,0 +1,112 @@ +/* + * Transparent proxy support for Linux/iptables + * + * Copyright (c) 2006-2007 BalaBit IT Ltd. + * Author: Balazs Scheidler, Krisztian Kovacs + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +static unsigned int +tproxy_tg(struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const struct xt_target *target, + const void *targinfo) +{ + const struct iphdr *iph = ip_hdr(skb); + const struct xt_tproxy_target_info *tgi = targinfo; + struct udphdr _hdr, *hp; + struct sock *sk; + + hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr); + if (hp == NULL) + return NF_DROP; + + sk = nf_tproxy_get_sock_v4(skb->dev->nd_net, iph->protocol, + iph->saddr, tgi->laddr ? tgi->laddr : iph->daddr, + hp->source, tgi->lport ? tgi->lport : hp->dest, + in, true); + + /* NOTE: assign_sock consumes our sk reference */ + if (sk && nf_tproxy_assign_sock(skb, sk)) { + /* This should be in a separate target, but we don't do multiple + targets on the same rule yet */ + skb->mark = (skb->mark & ~tgi->mark_mask) ^ tgi->mark_value; + + pr_debug("redirecting: proto %u %08x:%u -> %08x:%u, mark: %x\n", + iph->protocol, ntohl(iph->daddr), ntohs(hp->dest), + ntohl(tgi->laddr), ntohs(tgi->lport), skb->mark); + return NF_ACCEPT; + } + + pr_debug("no socket, dropping: proto %u %08x:%u -> %08x:%u, mark: %x\n", + iph->protocol, ntohl(iph->daddr), ntohs(hp->dest), + ntohl(tgi->laddr), ntohs(tgi->lport), skb->mark); + return NF_DROP; +} + +static bool +tproxy_tg_check(const char *tablename, + const void *entry, + const struct xt_target *target, + void *targetinfo, + unsigned int hook_mask) +{ + const struct ipt_ip *i = entry; + + if ((i->proto == IPPROTO_TCP || i->proto == IPPROTO_UDP) + && !(i->invflags & IPT_INV_PROTO)) + return true; + + pr_info("xt_TPROXY: Can be used only in combination with " + "either -p tcp or -p udp\n"); + return false; +} + +static struct xt_target tproxy_tg_reg __read_mostly = { + .name = "TPROXY", + .family = AF_INET, + .table = "mangle", + .target = tproxy_tg, + .targetsize = sizeof(struct xt_tproxy_target_info), + .checkentry = tproxy_tg_check, + .hooks = 1 << NF_INET_PRE_ROUTING, + .me = THIS_MODULE, +}; + +static int __init tproxy_tg_init(void) +{ + nf_defrag_ipv4_enable(); + return xt_register_target(&tproxy_tg_reg); +} + +static void __exit tproxy_tg_exit(void) +{ + xt_unregister_target(&tproxy_tg_reg); +} + +module_init(tproxy_tg_init); +module_exit(tproxy_tg_exit); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Krisztian Kovacs"); +MODULE_DESCRIPTION("Netfilter transparent proxy (TPROXY) target module."); +MODULE_ALIAS("ipt_TPROXY"); -- 1.5.2.5 From 1ba926a05e2c25585f15e7c9f895557e6a48407f Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Mon, 28 Apr 2008 14:46:53 +0200 Subject: [PATCH] Don't lookup the socket if there's a socket attached to the skb Use the socket cached in the TPROXY target if it's present. Signed-off-by: KOVACS Krisztian --- net/ipv4/tcp_ipv4.c | 9 +++++++++ 1 files changed, 9 insertions(+), 0 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 58873ad..8937675 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1643,6 +1643,15 @@ TCP_SKB_CB(skb)->flags = iph->tos; TCP_SKB_CB(skb)->sacked = 0; +#if defined(CONFIG_NETFILTER_TPROXY) || defined(CONFIG_NETFILTER_TPROXY_MODULE) + if (unlikely(skb->sk)) { + /* steal reference */ + sk = skb->sk; + skb->destructor = NULL; + skb->sk = NULL; + } else +#endif + sk = __inet_lookup(dev_net(skb->dev), &tcp_hashinfo, iph->saddr, th->source, iph->daddr, th->dest, inet_iif(skb)); if (!sk) -- 1.5.2.5 From 8b291305fd4891a92118cf358666f25a0e42ca00 Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Mon, 28 Apr 2008 14:46:53 +0200 Subject: [PATCH] Don't lookup the socket if there's a socket attached to the skb Use the socket cached in the TPROXY target if it's present. Signed-off-by: KOVACS Krisztian --- net/ipv4/udp.c | 16 ++++++++++++++++ 1 files changed, 16 insertions(+), 0 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 672d0d4..c29597c 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -365,6 +365,14 @@ int harderr; int err; +#if defined(CONFIG_NETFILTER_TPROXY) || defined(CONFIG_NETFILTER_TPROXY_MODULE) + if (unlikely(skb->sk)) { + /* steal reference */ + sk = skb->sk; + skb->destructor = NULL; + skb->sk = NULL; + } else +#endif sk = __udp4_lib_lookup(dev_net(skb->dev), iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex, udptable); if (sk == NULL) { @@ -1199,6 +1207,14 @@ if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); +#if defined(CONFIG_NETFILTER_TPROXY) || defined(CONFIG_NETFILTER_TPROXY_MODULE) + if (unlikely(skb->sk)) { + /* steal reference */ + sk = skb->sk; + skb->destructor = NULL; + skb->sk = NULL; + } else +#endif sk = __udp4_lib_lookup(dev_net(skb->dev), saddr, uh->source, daddr, uh->dest, inet_iif(skb), udptable); -- 1.5.2.5 From 6e33de49987ecc73930148c88f07f61527929dcc Mon Sep 17 00:00:00 2001 From: KOVACS Krisztian Date: Mon, 28 Apr 2008 14:46:53 +0200 Subject: [PATCH] Add documentation Add basic usage instructions to Documentation/networking. Signed-off-by: KOVACS Krisztian --- Documentation/networking/tproxy.txt | 62 +++++++++++++++++++++++++++++++++++ 1 files changed, 62 insertions(+), 0 deletions(-) diff --git a/Documentation/networking/tproxy.txt b/Documentation/networking/tproxy.txt new file mode 100644 index 0000000..dfcb613 --- /dev/null +++ b/Documentation/networking/tproxy.txt @@ -0,0 +1,62 @@ +Transparent proxy support +========================= + +This feature adds Linux 2.2-like transparent proxy support to current kernels. +To use it, enable NETFILTER_TPROXY, the socket match and the TPROXY target in +your kernel config. You will need policy routing too, so be sure to enable that +as well. + +1. Making non-local sockets work +================================ + +The idea is that you identify packets with destination address matching a local +socket your box, set the packet mark to a certain value, and then match on that +value using policy routing to have those packets delivered locally: + +# iptables -t mangle -N DIVERT +# iptables -t mangle -A PREROUTING -p tcp -m socket -j DIVERT +# iptables -t mangle -A DIVERT -j MARK --set-mark 1 +# iptables -t mangle -A DIVERT -j ACCEPT + +# ip rule add fwmark 1 lookup 100 +# ip route add local 0.0.0.0/0 dev lo table 100 + +Because of certain restrictions in the IPv4 routing output code you'll have to +modify your application to allow it sending datagrams _from_ non-local IP +addresses. All you have to do is to enable the (SOL_IP, IP_TRANSPARENT) socket +option before calling bind: + +fd = socket(AF_INET, SOCK_STREAM, 0); +/* - 8< -*/ +int value = 1; +setsockopt(fd, SOL_IP, IP_TRANSPARENT, &value, sizeof(value)); +/* - 8< -*/ +name.sin_family = AF_INET; +name.sin_port = htons(0xCAFE); +name.sin_addr.s_addr = htonl(0xDEADBEEF); +bind(fd, &name, sizeof(name)); + +A trivial patch for netcat is available here: +http://people.netfilter.org/hidden/tproxy/netcat-ip_transparent-support.patch + + +2. Redirecting traffic +====================== + +Transparent proxying often involves "intercepting" traffic on a router. This is +usually done with the iptables REDIRECT target, however, there are serious +limitations of that method. One of the major issues is that it actually +modifies the packets to change the destination address -- which might not be +acceptable in certain situations. (Think of proxying UDP for example: you won't +be able to find out the original destination address. Even in case of TCP +getting the original destination address is racy.) + +The 'TPROXY' target provides similar functionality without relying on NAT. Simply +add rules like this to the iptables ruleset above: + +# iptables -t mangle -A PREROUTING -p tcp --dport 80 -j TPROXY \ + --tproxy-mark 0x1/0x1 --on-port 50080 + +Note that for this to work you'll have to modify the proxy to enable (SOL_IP, +IP_TRANSPARENT) for the listening socket. + -- 1.5.2.5