diff -Nur linux-2.6.0-test9.org/include/linux/netfilter.h linux-2.6.0-test9/include/linux/netfilter.h --- linux-2.6.0-test9.org/include/linux/netfilter.h 2003-10-25 20:43:40.000000000 +0200 +++ linux-2.6.0-test9/include/linux/netfilter.h 2003-11-13 11:01:38.000000000 +0100 @@ -23,6 +23,7 @@ <= 0x2000 is used for protocol-flags. */ #define NFC_UNKNOWN 0x4000 #define NFC_ALTERED 0x8000 +#define NFC_TRACE 0x10000 #ifdef __KERNEL__ #include @@ -99,6 +100,24 @@ extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; +typedef void nf_logfn(unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const char *prefix); + +/* Function to register/unregister log function. */ +int nf_log_register(int pf, nf_logfn *logfn); +void nf_log_unregister(int pf, nf_logfn *logfn); + +/* Calls the registered backend logging function */ +void nf_log_packet(int pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const char *fmt, ...); + /* Activate hook; either okfn or kfree_skb called, unless a hook returns NF_STOLEN (in which case, it's up to the hook to deal with the consequences). diff -Nur linux-2.6.0-test9.org/include/linux/netfilter_ipv4/ip_conntrack.h linux-2.6.0-test9/include/linux/netfilter_ipv4/ip_conntrack.h --- linux-2.6.0-test9.org/include/linux/netfilter_ipv4/ip_conntrack.h 2003-10-25 20:44:44.000000000 +0200 +++ linux-2.6.0-test9/include/linux/netfilter_ipv4/ip_conntrack.h 2003-11-13 11:01:38.000000000 +0100 @@ -268,6 +268,9 @@ extern unsigned int ip_conntrack_htable_size; +/* A fake conntrack entry which never vanishes. */ +extern struct ip_conntrack ip_conntrack_untracked; + /* eg. PROVIDES_CONNTRACK(ftp); */ #define PROVIDES_CONNTRACK(name) \ int needs_ip_conntrack_##name; \ diff -Nur linux-2.6.0-test9.org/include/linux/netfilter_ipv4/ip_tables.h linux-2.6.0-test9/include/linux/netfilter_ipv4/ip_tables.h --- linux-2.6.0-test9.org/include/linux/netfilter_ipv4/ip_tables.h 2003-10-25 20:43:46.000000000 +0200 +++ linux-2.6.0-test9/include/linux/netfilter_ipv4/ip_tables.h 2003-11-13 11:01:38.000000000 +0100 @@ -134,6 +134,12 @@ /* Back pointer */ unsigned int comefrom; + /* Name of the chain */ + char *chainname; + + /* Rule number in the chain. */ + u_int32_t rulenum; + /* Packet and byte counters. */ struct ipt_counters counters; diff -Nur linux-2.6.0-test9.org/include/linux/netfilter_ipv4/ipt_ULOG.h linux-2.6.0-test9/include/linux/netfilter_ipv4/ipt_ULOG.h --- linux-2.6.0-test9.org/include/linux/netfilter_ipv4/ipt_ULOG.h 2003-10-25 20:43:28.000000000 +0200 +++ linux-2.6.0-test9/include/linux/netfilter_ipv4/ipt_ULOG.h 2003-11-13 11:01:18.000000000 +0100 @@ -11,6 +11,9 @@ #define NETLINK_NFLOG 5 #endif +#define NFLOG_DEFAULT_NLGROUP 1 +#define NFLOG_DEFAULT_QTHRESHOLD 1 + #define ULOG_MAC_LEN 80 #define ULOG_PREFIX_LEN 32 diff -Nur linux-2.6.0-test9.org/include/linux/netfilter_ipv4/ipt_conntrack.h linux-2.6.0-test9/include/linux/netfilter_ipv4/ipt_conntrack.h --- linux-2.6.0-test9.org/include/linux/netfilter_ipv4/ipt_conntrack.h 2003-10-25 20:44:16.000000000 +0200 +++ linux-2.6.0-test9/include/linux/netfilter_ipv4/ipt_conntrack.h 2003-11-13 11:01:38.000000000 +0100 @@ -10,6 +10,7 @@ #define IPT_CONNTRACK_STATE_SNAT (1 << (IP_CT_NUMBER + 1)) #define IPT_CONNTRACK_STATE_DNAT (1 << (IP_CT_NUMBER + 2)) +#define IPT_CONNTRACK_STATE_UNTRACKED (1 << (IP_CT_NUMBER + 3)) /* flags, invflags: */ #define IPT_CONNTRACK_STATE 0x01 diff -Nur linux-2.6.0-test9.org/include/linux/netfilter_ipv4/ipt_sctp.h linux-2.6.0-test9/include/linux/netfilter_ipv4/ipt_sctp.h --- linux-2.6.0-test9.org/include/linux/netfilter_ipv4/ipt_sctp.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.0-test9/include/linux/netfilter_ipv4/ipt_sctp.h 2003-11-13 11:01:48.000000000 +0100 @@ -0,0 +1,25 @@ +/* iptables module for matching the SCTP header + * + * (C) 2003 Harald Welte + * + * This software is distributed under GNU GPL v2, 1991 + * + * $Id$ + */ +#ifndef _IPT_SCTP_H +#define _IPT_SCTP_H + +struct ipt_sctp_info { + u_int16_t spts[2]; /* Souce port range */ + u_int16_t dpts[2]; /* Destination port range */ + u_int32_t chunks; /* chunks to be matched */ + u_int32_t chunk_mask; /* chunk mask to be matched */ + u_int8_t invflags; /* Inverse flags */ +}; + +#define IPT_SCTP_INV_SRCPT 0x01 /* Invert the sense of source ports */ +#define IPT_SCTP_INV_DSTPT 0x02 /* Invert the sense of dest ports */ +#define IPT_SCTP_INV_CHUNKS 0x03 /* Invert the sense of chunks */ +#define IPT_SCTP_INV_MASK 0x03 /* All possible flags */ + +#endif /* _IPT_SCTP_H */ diff -Nur linux-2.6.0-test9.org/include/linux/netfilter_ipv4/ipt_state.h linux-2.6.0-test9/include/linux/netfilter_ipv4/ipt_state.h --- linux-2.6.0-test9.org/include/linux/netfilter_ipv4/ipt_state.h 2003-10-25 20:44:36.000000000 +0200 +++ linux-2.6.0-test9/include/linux/netfilter_ipv4/ipt_state.h 2003-11-13 11:01:38.000000000 +0100 @@ -4,6 +4,8 @@ #define IPT_STATE_BIT(ctinfo) (1 << ((ctinfo)%IP_CT_IS_REPLY+1)) #define IPT_STATE_INVALID (1 << 0) +#define IPT_STATE_UNTRACKED (1 << (IP_CT_NUMBER + 1)) + struct ipt_state_info { unsigned int statemask; diff -Nur linux-2.6.0-test9.org/include/linux/netfilter_ipv4.h linux-2.6.0-test9/include/linux/netfilter_ipv4.h --- linux-2.6.0-test9.org/include/linux/netfilter_ipv4.h 2003-10-25 20:44:34.000000000 +0200 +++ linux-2.6.0-test9/include/linux/netfilter_ipv4.h 2003-11-13 11:01:38.000000000 +0100 @@ -51,6 +51,8 @@ enum nf_ip_hook_priorities { NF_IP_PRI_FIRST = INT_MIN, + NF_IP_PRI_CONNTRACK_DEFRAG = -400, + NF_IP_PRI_RAW = -300, NF_IP_PRI_CONNTRACK = -200, NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD = -175, NF_IP_PRI_MANGLE = -150, diff -Nur linux-2.6.0-test9.org/include/linux/netfilter_ipv6/ip6_tables.h linux-2.6.0-test9/include/linux/netfilter_ipv6/ip6_tables.h --- linux-2.6.0-test9.org/include/linux/netfilter_ipv6/ip6_tables.h 2003-10-25 20:44:48.000000000 +0200 +++ linux-2.6.0-test9/include/linux/netfilter_ipv6/ip6_tables.h 2003-11-13 11:01:38.000000000 +0100 @@ -140,6 +140,12 @@ /* Back pointer */ unsigned int comefrom; + /* Name of the chain */ + char *chainname; + + /* Rule number in the chain. */ + u_int32_t rulenum; + /* Packet and byte counters. */ struct ip6t_counters counters; diff -Nur linux-2.6.0-test9.org/net/bridge/br_netfilter.c linux-2.6.0-test9/net/bridge/br_netfilter.c --- linux-2.6.0-test9.org/net/bridge/br_netfilter.c 2003-10-25 20:44:49.000000000 +0200 +++ linux-2.6.0-test9/net/bridge/br_netfilter.c 2003-11-13 10:57:06.000000000 +0100 @@ -297,6 +297,8 @@ if (skb->pkt_type == PACKET_OTHERHOST) { skb->pkt_type = PACKET_HOST; nf_bridge->mask |= BRNF_PKT_TYPE; + /* The physdev module checks on this */ + nf_bridge->mask |= BRNF_BRIDGED; } nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING; diff -Nur linux-2.6.0-test9.org/net/bridge/br_netfilter.c.orig linux-2.6.0-test9/net/bridge/br_netfilter.c.orig --- linux-2.6.0-test9.org/net/bridge/br_netfilter.c.orig 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.0-test9/net/bridge/br_netfilter.c.orig 2003-10-25 20:44:49.000000000 +0200 @@ -0,0 +1,754 @@ +/* + * Handle firewalling + * Linux ethernet bridge + * + * Authors: + * Lennert Buytenhek + * Bart De Schuymer (maintainer) + * + * Changes: + * Apr 29 2003: physdev module support (bdschuym) + * Jun 19 2003: let arptables see bridged ARP traffic (bdschuym) + * Oct 06 2003: filter encapsulated IP/ARP VLAN traffic on untagged bridge + * (bdschuym) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Lennert dedicates this file to Kerstin Wurdinger. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "br_private.h" + + +#define skb_origaddr(skb) (((struct bridge_skb_cb *) \ + (skb->cb))->daddr.ipv4) +#define store_orig_dstaddr(skb) (skb_origaddr(skb) = (skb)->nh.iph->daddr) +#define dnat_took_place(skb) (skb_origaddr(skb) != (skb)->nh.iph->daddr) +#define clear_cb(skb) (memset(&skb_origaddr(skb), 0, \ + sizeof(struct bridge_skb_cb))) + +#define has_bridge_parent(device) ((device)->br_port != NULL) +#define bridge_parent(device) ((device)->br_port->br->dev) + +#define IS_VLAN_IP (skb->protocol == __constant_htons(ETH_P_8021Q) && \ + hdr->h_vlan_encapsulated_proto == __constant_htons(ETH_P_IP)) +#define IS_VLAN_ARP (skb->protocol == __constant_htons(ETH_P_8021Q) && \ + hdr->h_vlan_encapsulated_proto == __constant_htons(ETH_P_ARP)) + +/* We need these fake structures to make netfilter happy -- + * lots of places assume that skb->dst != NULL, which isn't + * all that unreasonable. + * + * Currently, we fill in the PMTU entry because netfilter + * refragmentation needs it, and the rt_flags entry because + * ipt_REJECT needs it. Future netfilter modules might + * require us to fill additional fields. + */ +static struct net_device __fake_net_device = { + .hard_header_len = ETH_HLEN +}; + +static struct rtable __fake_rtable = { + .u = { + .dst = { + .__refcnt = ATOMIC_INIT(1), + .dev = &__fake_net_device, + .path = &__fake_rtable.u.dst, + .metrics = {[RTAX_MTU - 1] = 1500}, + } + }, + + .rt_flags = 0 +}; + + +/* PF_BRIDGE/PRE_ROUTING *********************************************/ +static void __br_dnat_complain(void) +{ + static unsigned long last_complaint; + + if (jiffies - last_complaint >= 5 * HZ) { + printk(KERN_WARNING "Performing cross-bridge DNAT requires IP " + "forwarding to be enabled\n"); + last_complaint = jiffies; + } +} + + +/* This requires some explaining. If DNAT has taken place, + * we will need to fix up the destination Ethernet address, + * and this is a tricky process. + * + * There are two cases to consider: + * 1. The packet was DNAT'ed to a device in the same bridge + * port group as it was received on. We can still bridge + * the packet. + * 2. The packet was DNAT'ed to a different device, either + * a non-bridged device or another bridge port group. + * The packet will need to be routed. + * + * The correct way of distinguishing between these two cases is to + * call ip_route_input() and to look at skb->dst->dev, which is + * changed to the destination device if ip_route_input() succeeds. + * + * Let us first consider the case that ip_route_input() succeeds: + * + * If skb->dst->dev equals the logical bridge device the packet + * came in on, we can consider this bridging. We then call + * skb->dst->output() which will make the packet enter br_nf_local_out() + * not much later. In that function it is assured that the iptables + * FORWARD chain is traversed for the packet. + * + * Otherwise, the packet is considered to be routed and we just + * change the destination MAC address so that the packet will + * later be passed up to the IP stack to be routed. + * + * Let us now consider the case that ip_route_input() fails: + * + * After a "echo '0' > /proc/sys/net/ipv4/ip_forward" ip_route_input() + * will fail, while __ip_route_output_key() will return success. The source + * address for __ip_route_output_key() is set to zero, so __ip_route_output_key + * thinks we're handling a locally generated packet and won't care + * if IP forwarding is allowed. We send a warning message to the users's + * log telling her to put IP forwarding on. + * + * ip_route_input() will also fail if there is no route available. + * In that case we just drop the packet. + * + * --Lennert, 20020411 + * --Bart, 20020416 (updated) + * --Bart, 20021007 (updated) + */ + +static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb) +{ +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug |= (1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_FORWARD); +#endif + + if (skb->pkt_type == PACKET_OTHERHOST) { + skb->pkt_type = PACKET_HOST; + skb->nf_bridge->mask |= BRNF_PKT_TYPE; + } + skb->nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; + + skb->dev = bridge_parent(skb->dev); + if (skb->protocol == __constant_htons(ETH_P_8021Q)) { + skb_pull(skb, VLAN_HLEN); + skb->nh.raw += VLAN_HLEN; + } + skb->dst->output(skb); + return 0; +} + +static int br_nf_pre_routing_finish(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct iphdr *iph = skb->nh.iph; + struct nf_bridge_info *nf_bridge = skb->nf_bridge; + +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug ^= (1 << NF_BR_PRE_ROUTING); +#endif + + if (nf_bridge->mask & BRNF_PKT_TYPE) { + skb->pkt_type = PACKET_OTHERHOST; + nf_bridge->mask ^= BRNF_PKT_TYPE; + } + nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING; + + if (dnat_took_place(skb)) { + if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, + dev)) { + struct rtable *rt; + struct flowi fl = { .nl_u = + { .ip4_u = { .daddr = iph->daddr, .saddr = 0 , + .tos = iph->tos} }, .proto = 0}; + + if (!ip_route_output_key(&rt, &fl)) { + /* Bridged-and-DNAT'ed traffic doesn't + * require ip_forwarding. + */ + if (((struct dst_entry *)rt)->dev == dev) { + skb->dst = (struct dst_entry *)rt; + goto bridged_dnat; + } + __br_dnat_complain(); + dst_release((struct dst_entry *)rt); + } + kfree_skb(skb); + return 0; + } else { + if (skb->dst->dev == dev) { +bridged_dnat: + /* Tell br_nf_local_out this is a + * bridged frame + */ + nf_bridge->mask |= BRNF_BRIDGED_DNAT; + skb->dev = nf_bridge->physindev; + clear_cb(skb); + if (skb->protocol == + __constant_htons(ETH_P_8021Q)) { + skb_push(skb, VLAN_HLEN); + skb->nh.raw -= VLAN_HLEN; + } + NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, + skb, skb->dev, NULL, + br_nf_pre_routing_finish_bridge, + 1); + return 0; + } + memcpy(skb->mac.ethernet->h_dest, dev->dev_addr, + ETH_ALEN); + skb->pkt_type = PACKET_HOST; + } + } else { + skb->dst = (struct dst_entry *)&__fake_rtable; + dst_hold(skb->dst); + } + + clear_cb(skb); + skb->dev = nf_bridge->physindev; + if (skb->protocol == __constant_htons(ETH_P_8021Q)) { + skb_push(skb, VLAN_HLEN); + skb->nh.raw -= VLAN_HLEN; + } + NF_HOOK_THRESH(PF_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL, + br_handle_frame_finish, 1); + + return 0; +} + +/* Replicate the checks that IPv4 does on packet reception. + * Set skb->dev to the bridge device (i.e. parent of the + * receiving device) to make netfilter happy, the REDIRECT + * target in particular. Save the original destination IP + * address to be able to detect DNAT afterwards. + */ +static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct iphdr *iph; + __u32 len; + struct sk_buff *skb = *pskb; + struct nf_bridge_info *nf_bridge; + + if (skb->protocol != __constant_htons(ETH_P_IP)) { + struct vlan_ethhdr *hdr = (struct vlan_ethhdr *) + ((*pskb)->mac.ethernet); + + if (!IS_VLAN_IP) + return NF_ACCEPT; + if ((skb = skb_share_check(*pskb, GFP_ATOMIC)) == NULL) + goto out; + skb_pull(*pskb, VLAN_HLEN); + (*pskb)->nh.raw += VLAN_HLEN; + } else if ((skb = skb_share_check(*pskb, GFP_ATOMIC)) == NULL) + goto out; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto inhdr_error; + + iph = skb->nh.iph; + if (iph->ihl < 5 || iph->version != 4) + goto inhdr_error; + + if (!pskb_may_pull(skb, 4*iph->ihl)) + goto inhdr_error; + + iph = skb->nh.iph; + if (ip_fast_csum((__u8 *)iph, iph->ihl) != 0) + goto inhdr_error; + + len = ntohs(iph->tot_len); + if (skb->len < len || len < 4*iph->ihl) + goto inhdr_error; + + if (skb->len > len) { + __pskb_trim(skb, len); + if (skb->ip_summed == CHECKSUM_HW) + skb->ip_summed = CHECKSUM_NONE; + } + +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug ^= (1 << NF_IP_PRE_ROUTING); +#endif + if ((nf_bridge = nf_bridge_alloc(skb)) == NULL) + return NF_DROP; + + if (skb->pkt_type == PACKET_OTHERHOST) { + skb->pkt_type = PACKET_HOST; + nf_bridge->mask |= BRNF_PKT_TYPE; + } + + nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING; + nf_bridge->physindev = skb->dev; + skb->dev = bridge_parent(skb->dev); + store_orig_dstaddr(skb); + + NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb->dev, NULL, + br_nf_pre_routing_finish); + + return NF_STOLEN; + +inhdr_error: +// IP_INC_STATS_BH(IpInHdrErrors); +out: + return NF_DROP; +} + + +/* PF_BRIDGE/LOCAL_IN ************************************************/ +/* The packet is locally destined, which requires a real + * dst_entry, so detach the fake one. On the way up, the + * packet would pass through PRE_ROUTING again (which already + * took place when the packet entered the bridge), but we + * register an IPv4 PRE_ROUTING 'sabotage' hook that will + * prevent this from happening. + */ +static unsigned int br_nf_local_in(unsigned int hook, struct sk_buff **pskb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct sk_buff *skb = *pskb; + + if (skb->dst == (struct dst_entry *)&__fake_rtable) { + dst_release(skb->dst); + skb->dst = NULL; + } + + return NF_ACCEPT; +} + +/* PF_BRIDGE/FORWARD *************************************************/ +static int br_nf_forward_finish(struct sk_buff *skb) +{ + struct nf_bridge_info *nf_bridge = skb->nf_bridge; + struct net_device *in; + struct vlan_ethhdr *hdr = (struct vlan_ethhdr *)(skb->mac.ethernet); + +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug ^= (1 << NF_BR_FORWARD); +#endif + + if (skb->protocol == __constant_htons(ETH_P_IP) || IS_VLAN_IP) { + in = nf_bridge->physindev; + if (nf_bridge->mask & BRNF_PKT_TYPE) { + skb->pkt_type = PACKET_OTHERHOST; + nf_bridge->mask ^= BRNF_PKT_TYPE; + } + } else { + in = *((struct net_device **)(skb->cb)); + } + if (skb->protocol == __constant_htons(ETH_P_8021Q)) { + skb_push(skb, VLAN_HLEN); + skb->nh.raw -= VLAN_HLEN; + } + NF_HOOK_THRESH(PF_BRIDGE, NF_BR_FORWARD, skb, in, + skb->dev, br_forward_finish, 1); + return 0; +} + +/* This is the 'purely bridged' case. For IP, we pass the packet to + * netfilter with indev and outdev set to the bridge device, + * but we are still able to filter on the 'real' indev/outdev + * because of the ipt_physdev.c module. For ARP, indev and outdev are the + * bridge ports. + */ +static unsigned int br_nf_forward(unsigned int hook, struct sk_buff **pskb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct sk_buff *skb = *pskb; + struct nf_bridge_info *nf_bridge; + struct vlan_ethhdr *hdr = (struct vlan_ethhdr *)(skb->mac.ethernet); + + if (skb->protocol != __constant_htons(ETH_P_IP) && + skb->protocol != __constant_htons(ETH_P_ARP)) { + if (!IS_VLAN_IP && !IS_VLAN_ARP) + return NF_ACCEPT; + skb_pull(*pskb, VLAN_HLEN); + (*pskb)->nh.raw += VLAN_HLEN; + } + +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug ^= (1 << NF_BR_FORWARD); +#endif + if (skb->protocol == __constant_htons(ETH_P_IP) || IS_VLAN_IP) { + nf_bridge = skb->nf_bridge; + if (skb->pkt_type == PACKET_OTHERHOST) { + skb->pkt_type = PACKET_HOST; + nf_bridge->mask |= BRNF_PKT_TYPE; + } + + /* The physdev module checks on this */ + nf_bridge->mask |= BRNF_BRIDGED; + nf_bridge->physoutdev = skb->dev; + + NF_HOOK(PF_INET, NF_IP_FORWARD, skb, bridge_parent(in), + bridge_parent(out), br_nf_forward_finish); + } else { + struct net_device **d = (struct net_device **)(skb->cb); + struct arphdr *arp = skb->nh.arph; + + if (arp->ar_pln != 4) { + if (IS_VLAN_ARP) { + skb_push(*pskb, VLAN_HLEN); + (*pskb)->nh.raw -= VLAN_HLEN; + } + return NF_ACCEPT; + } + *d = (struct net_device *)in; + NF_HOOK(NF_ARP, NF_ARP_FORWARD, skb, (struct net_device *)in, + (struct net_device *)out, br_nf_forward_finish); + } + + return NF_STOLEN; +} + + +/* PF_BRIDGE/LOCAL_OUT ***********************************************/ +static int br_nf_local_out_finish(struct sk_buff *skb) +{ +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug &= ~(1 << NF_BR_LOCAL_OUT); +#endif + if (skb->protocol == __constant_htons(ETH_P_8021Q)) { + skb_push(skb, VLAN_HLEN); + skb->nh.raw -= VLAN_HLEN; + } + + NF_HOOK_THRESH(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev, + br_forward_finish, NF_BR_PRI_FIRST + 1); + + return 0; +} + + +/* This function sees both locally originated IP packets and forwarded + * IP packets (in both cases the destination device is a bridge + * device). It also sees bridged-and-DNAT'ed packets. + * To be able to filter on the physical bridge devices (with the ipt_physdev.c + * module), we steal packets destined to a bridge device away from the + * PF_INET/FORWARD and PF_INET/OUTPUT hook functions, and give them back later, + * when we have determined the real output device. This is done in here. + * + * If (nf_bridge->mask & BRNF_BRIDGED_DNAT) then the packet is bridged + * and we fake the PF_BRIDGE/FORWARD hook. The function br_nf_forward() + * will then fake the PF_INET/FORWARD hook. br_nf_local_out() has priority + * NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor + * will be executed. + * Otherwise, if nf_bridge->physindev is NULL, the bridge-nf code never touched + * this packet before, and so the packet was locally originated. We fake + * the PF_INET/LOCAL_OUT hook. + * Finally, if nf_bridge->physindev isn't NULL, then the packet was IP routed, + * so we fake the PF_INET/FORWARD hook. ipv4_sabotage_out() makes sure + * even routed packets that didn't arrive on a bridge interface have their + * nf_bridge->physindev set. + */ + +static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb, + const struct net_device *in, const struct net_device *out, + int (*_okfn)(struct sk_buff *)) +{ + int (*okfn)(struct sk_buff *skb); + struct net_device *realindev; + struct sk_buff *skb = *pskb; + struct nf_bridge_info *nf_bridge; + struct vlan_ethhdr *hdr = (struct vlan_ethhdr *)(skb->mac.ethernet); + + if (skb->protocol != __constant_htons(ETH_P_IP) && !IS_VLAN_IP) + return NF_ACCEPT; + + /* Sometimes we get packets with NULL ->dst here (for example, + * running a dhcp client daemon triggers this). + */ + if (skb->dst == NULL) + return NF_ACCEPT; + + nf_bridge = skb->nf_bridge; + nf_bridge->physoutdev = skb->dev; + + realindev = nf_bridge->physindev; + + /* Bridged, take PF_BRIDGE/FORWARD. + * (see big note in front of br_nf_pre_routing_finish) + */ + if (nf_bridge->mask & BRNF_BRIDGED_DNAT) { + okfn = br_forward_finish; + + if (nf_bridge->mask & BRNF_PKT_TYPE) { + skb->pkt_type = PACKET_OTHERHOST; + nf_bridge->mask ^= BRNF_PKT_TYPE; + } + if (skb->protocol == __constant_htons(ETH_P_8021Q)) { + skb_push(skb, VLAN_HLEN); + skb->nh.raw -= VLAN_HLEN; + } + + NF_HOOK(PF_BRIDGE, NF_BR_FORWARD, skb, realindev, + skb->dev, okfn); + } else { + struct net_device *realoutdev = bridge_parent(skb->dev); + +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) + /* iptables should match -o br0.x */ + if (nf_bridge->netoutdev) + realoutdev = nf_bridge->netoutdev; +#endif + okfn = br_nf_local_out_finish; + if (skb->protocol == __constant_htons(ETH_P_8021Q)) { + skb_pull(skb, VLAN_HLEN); + (*pskb)->nh.raw += VLAN_HLEN; + } + /* IP forwarded traffic has a physindev, locally + * generated traffic hasn't. + */ + if (realindev != NULL) { + if (((nf_bridge->mask & BRNF_DONT_TAKE_PARENT) == 0) && + has_bridge_parent(realindev)) + realindev = bridge_parent(realindev); + NF_HOOK_THRESH(PF_INET, NF_IP_FORWARD, skb, realindev, + realoutdev, okfn, + NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1); + } else { +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug ^= (1 << NF_IP_LOCAL_OUT); +#endif + + NF_HOOK_THRESH(PF_INET, NF_IP_LOCAL_OUT, skb, realindev, + realoutdev, okfn, + NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1); + } + } + + return NF_STOLEN; +} + + +/* PF_BRIDGE/POST_ROUTING ********************************************/ +static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct sk_buff *skb = *pskb; + struct nf_bridge_info *nf_bridge = (*pskb)->nf_bridge; + struct vlan_ethhdr *hdr = (struct vlan_ethhdr *)(skb->mac.ethernet); + struct net_device *realoutdev = bridge_parent(skb->dev); + + /* Be very paranoid. Must be a device driver bug. */ + if (skb->mac.raw < skb->head || skb->mac.raw + ETH_HLEN > skb->data) { + printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: " + "bad mac.raw pointer."); + if (skb->dev != NULL) { + printk("[%s]", skb->dev->name); + if (has_bridge_parent(skb->dev)) + printk("[%s]", bridge_parent(skb->dev)->name); + } + printk(" head:%p, raw:%p\n", skb->head, skb->mac.raw); + return NF_ACCEPT; + } + + if (skb->protocol != __constant_htons(ETH_P_IP) && !IS_VLAN_IP) + return NF_ACCEPT; + + /* Sometimes we get packets with NULL ->dst here (for example, + * running a dhcp client daemon triggers this). + */ + if (skb->dst == NULL) + return NF_ACCEPT; + +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug ^= (1 << NF_IP_POST_ROUTING); +#endif + + /* We assume any code from br_dev_queue_push_xmit onwards doesn't care + * about the value of skb->pkt_type. + */ + if (skb->pkt_type == PACKET_OTHERHOST) { + skb->pkt_type = PACKET_HOST; + nf_bridge->mask |= BRNF_PKT_TYPE; + } + + if (skb->protocol == __constant_htons(ETH_P_8021Q)) { + skb_pull(skb, VLAN_HLEN); + skb->nh.raw += VLAN_HLEN; + } + + nf_bridge_save_header(skb); + +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) + if (nf_bridge->netoutdev) + realoutdev = nf_bridge->netoutdev; +#endif + NF_HOOK(PF_INET, NF_IP_POST_ROUTING, skb, NULL, + realoutdev, br_dev_queue_push_xmit); + + return NF_STOLEN; +} + + +/* IPv4/SABOTAGE *****************************************************/ + +/* Don't hand locally destined packets to PF_INET/PRE_ROUTING + * for the second time. + */ +static unsigned int ipv4_sabotage_in(unsigned int hook, struct sk_buff **pskb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + if ((*pskb)->nf_bridge && + !((*pskb)->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) { + okfn(*pskb); + return NF_STOLEN; + } + + return NF_ACCEPT; +} + +/* Postpone execution of PF_INET/FORWARD, PF_INET/LOCAL_OUT + * and PF_INET/POST_ROUTING until we have done the forwarding + * decision in the bridge code and have determined skb->physoutdev. + */ +static unsigned int ipv4_sabotage_out(unsigned int hook, struct sk_buff **pskb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + if ((out->hard_start_xmit == br_dev_xmit && + okfn != br_nf_forward_finish && + okfn != br_nf_local_out_finish && + okfn != br_dev_queue_push_xmit) +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) + || ((out->priv_flags & IFF_802_1Q_VLAN) && + VLAN_DEV_INFO(out)->real_dev->hard_start_xmit == br_dev_xmit) +#endif + ) { + struct sk_buff *skb = *pskb; + struct nf_bridge_info *nf_bridge; + + if (!skb->nf_bridge && !nf_bridge_alloc(skb)) + return NF_DROP; + + nf_bridge = skb->nf_bridge; + + /* This frame will arrive on PF_BRIDGE/LOCAL_OUT and we + * will need the indev then. For a brouter, the real indev + * can be a bridge port, so we make sure br_nf_local_out() + * doesn't use the bridge parent of the indev by using + * the BRNF_DONT_TAKE_PARENT mask. + */ + if (hook == NF_IP_FORWARD && nf_bridge->physindev == NULL) { + nf_bridge->mask &= BRNF_DONT_TAKE_PARENT; + nf_bridge->physindev = (struct net_device *)in; + } +#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) + /* the iptables outdev is br0.x, not br0 */ + if (out->priv_flags & IFF_802_1Q_VLAN) + nf_bridge->netoutdev = (struct net_device *)out; +#endif + okfn(skb); + return NF_STOLEN; + } + + return NF_ACCEPT; +} + +/* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that innocent + * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input. + * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because + * ip_refrag() can return NF_STOLEN. + */ +static struct nf_hook_ops br_nf_ops[] = { + { .hook = br_nf_pre_routing, + .owner = THIS_MODULE, + .pf = PF_BRIDGE, + .hooknum = NF_BR_PRE_ROUTING, + .priority = NF_BR_PRI_BRNF, }, + { .hook = br_nf_local_in, + .owner = THIS_MODULE, + .pf = PF_BRIDGE, + .hooknum = NF_BR_LOCAL_IN, + .priority = NF_BR_PRI_BRNF, }, + { .hook = br_nf_forward, + .owner = THIS_MODULE, + .pf = PF_BRIDGE, + .hooknum = NF_BR_FORWARD, + .priority = NF_BR_PRI_BRNF, }, + { .hook = br_nf_local_out, + .owner = THIS_MODULE, + .pf = PF_BRIDGE, + .hooknum = NF_BR_LOCAL_OUT, + .priority = NF_BR_PRI_FIRST, }, + { .hook = br_nf_post_routing, + .owner = THIS_MODULE, + .pf = PF_BRIDGE, + .hooknum = NF_BR_POST_ROUTING, + .priority = NF_BR_PRI_LAST, }, + { .hook = ipv4_sabotage_in, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_PRE_ROUTING, + .priority = NF_IP_PRI_FIRST, }, + { .hook = ipv4_sabotage_out, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_FORWARD, + .priority = NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD, }, + { .hook = ipv4_sabotage_out, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_LOCAL_OUT, + .priority = NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT, }, + { .hook = ipv4_sabotage_out, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_POST_ROUTING, + .priority = NF_IP_PRI_FIRST, }, +}; + +int br_netfilter_init(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(br_nf_ops); i++) { + int ret; + + if ((ret = nf_register_hook(&br_nf_ops[i])) >= 0) + continue; + + while (i--) + nf_unregister_hook(&br_nf_ops[i]); + + return ret; + } + + printk(KERN_NOTICE "Bridge firewalling registered\n"); + + return 0; +} + +void br_netfilter_fini(void) +{ + int i; + + for (i = ARRAY_SIZE(br_nf_ops) - 1; i >= 0; i--) + nf_unregister_hook(&br_nf_ops[i]); +} diff -Nur linux-2.6.0-test9.org/net/core/netfilter.c linux-2.6.0-test9/net/core/netfilter.c --- linux-2.6.0-test9.org/net/core/netfilter.c 2003-10-25 20:43:34.000000000 +0200 +++ linux-2.6.0-test9/net/core/netfilter.c 2003-11-13 11:01:18.000000000 +0100 @@ -8,8 +8,10 @@ * * February 2000: Modified by James Morris to have 1 queue per protocol. * 15-Mar-2000: Added NF_REPEAT --RR. + * 08-May-2003: Internal logging interface added by Jozsef Kadlecsik. */ #include +#include #include #include #include @@ -743,7 +745,70 @@ EXPORT_SYMBOL(skb_ip_make_writable); #endif /*CONFIG_INET*/ +/* Internal logging interface, which relies on the real + LOG target modules */ +#define NF_LOG_PREFIXLEN 128 + +static nf_logfn *nf_logging[NPROTO]; /* = NULL */ +static int reported = 0; +static spinlock_t nf_log_lock = SPIN_LOCK_UNLOCKED; + +int nf_log_register(int pf, nf_logfn *logfn) +{ + int ret = -EBUSY; + + /* Any setup of logging members must be done before + * substituting pointer. */ + smp_wmb(); + spin_lock(&nf_log_lock); + if (!nf_logging[pf]) { + nf_logging[pf] = logfn; + ret = 0; + } + spin_unlock(&nf_log_lock); + return ret; +} + +void nf_log_unregister(int pf, nf_logfn *logfn) +{ + spin_lock(&nf_log_lock); + if (nf_logging[pf] == logfn) + nf_logging[pf] = NULL; + spin_unlock(&nf_log_lock); + + /* Give time to concurrent readers. */ + synchronize_net(); +} + +void nf_log_packet(int pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const char *fmt, ...) +{ + va_list args; + char prefix[NF_LOG_PREFIXLEN]; + nf_logfn *logfn; + + rcu_read_lock(); + logfn = nf_logging[pf]; + if (logfn) { + va_start(args, fmt); + vsnprintf(prefix, sizeof(prefix), fmt, args); + va_end(args); + /* We must read logging before nf_logfn[pf] */ + smp_read_barrier_depends(); + logfn(hooknum, skb, in, out, prefix); + } else if (!reported) { + printk(KERN_WARNING "nf_log_packet: can\'t log yet, " + "no backend logging module loaded in!\n"); + reported++; + } + rcu_read_unlock(); +} + /* This does not belong here, but ipt_REJECT needs it if connection tracking in use: without this, connection may not be in hash table, and hence manufactured ICMP or RST packets will not be associated @@ -763,6 +828,9 @@ EXPORT_SYMBOL(ip_ct_attach); EXPORT_SYMBOL(ip_route_me_harder); EXPORT_SYMBOL(nf_getsockopt); +EXPORT_SYMBOL(nf_log_register); +EXPORT_SYMBOL(nf_log_unregister); +EXPORT_SYMBOL(nf_log_packet); EXPORT_SYMBOL(nf_hook_slow); EXPORT_SYMBOL(nf_hooks); EXPORT_SYMBOL(nf_register_hook); diff -Nur linux-2.6.0-test9.org/net/core/netfilter.c.orig linux-2.6.0-test9/net/core/netfilter.c.orig --- linux-2.6.0-test9.org/net/core/netfilter.c.orig 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.0-test9/net/core/netfilter.c.orig 2003-11-13 11:01:18.000000000 +0100 @@ -0,0 +1,840 @@ +/* netfilter.c: look after the filters for various protocols. + * Heavily influenced by the old firewall.c by David Bonn and Alan Cox. + * + * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any + * way. + * + * Rusty Russell (C)2000 -- This code is GPL. + * + * February 2000: Modified by James Morris to have 1 queue per protocol. + * 15-Mar-2000: Added NF_REPEAT --RR. + * 08-May-2003: Internal logging interface added by Jozsef Kadlecsik. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define __KERNEL_SYSCALLS__ +#include + +/* In this code, we can be waiting indefinitely for userspace to + * service a packet if a hook returns NF_QUEUE. We could keep a count + * of skbuffs queued for userspace, and not deregister a hook unless + * this is zero, but that sucks. Now, we simply check when the + * packets come back: if the hook is gone, the packet is discarded. */ +#ifdef CONFIG_NETFILTER_DEBUG +#define NFDEBUG(format, args...) printk(format , ## args) +#else +#define NFDEBUG(format, args...) +#endif + +/* Sockopts only registered and called from user context, so + net locking would be overkill. Also, [gs]etsockopt calls may + sleep. */ +static DECLARE_MUTEX(nf_sockopt_mutex); + +struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS]; +static LIST_HEAD(nf_sockopts); +static spinlock_t nf_hook_lock = SPIN_LOCK_UNLOCKED; + +/* + * A queue handler may be registered for each protocol. Each is protected by + * long term mutex. The handler must provide an an outfn() to accept packets + * for queueing and must reinject all packets it receives, no matter what. + */ +static struct nf_queue_handler_t { + nf_queue_outfn_t outfn; + void *data; +} queue_handler[NPROTO]; +static rwlock_t queue_handler_lock = RW_LOCK_UNLOCKED; + +int nf_register_hook(struct nf_hook_ops *reg) +{ + struct list_head *i; + + spin_lock_bh(&nf_hook_lock); + list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) { + if (reg->priority < ((struct nf_hook_ops *)i)->priority) + break; + } + list_add_rcu(®->list, i->prev); + spin_unlock_bh(&nf_hook_lock); + + synchronize_net(); + return 0; +} + +void nf_unregister_hook(struct nf_hook_ops *reg) +{ + spin_lock_bh(&nf_hook_lock); + list_del_rcu(®->list); + spin_unlock_bh(&nf_hook_lock); + + synchronize_net(); +} + +/* Do exclusive ranges overlap? */ +static inline int overlap(int min1, int max1, int min2, int max2) +{ + return max1 > min2 && min1 < max2; +} + +/* Functions to register sockopt ranges (exclusive). */ +int nf_register_sockopt(struct nf_sockopt_ops *reg) +{ + struct list_head *i; + int ret = 0; + + if (down_interruptible(&nf_sockopt_mutex) != 0) + return -EINTR; + + list_for_each(i, &nf_sockopts) { + struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i; + if (ops->pf == reg->pf + && (overlap(ops->set_optmin, ops->set_optmax, + reg->set_optmin, reg->set_optmax) + || overlap(ops->get_optmin, ops->get_optmax, + reg->get_optmin, reg->get_optmax))) { + NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n", + ops->set_optmin, ops->set_optmax, + ops->get_optmin, ops->get_optmax, + reg->set_optmin, reg->set_optmax, + reg->get_optmin, reg->get_optmax); + ret = -EBUSY; + goto out; + } + } + + list_add(®->list, &nf_sockopts); +out: + up(&nf_sockopt_mutex); + return ret; +} + +void nf_unregister_sockopt(struct nf_sockopt_ops *reg) +{ + /* No point being interruptible: we're probably in cleanup_module() */ + restart: + down(&nf_sockopt_mutex); + if (reg->use != 0) { + /* To be woken by nf_sockopt call... */ + /* FIXME: Stuart Young's name appears gratuitously. */ + set_current_state(TASK_UNINTERRUPTIBLE); + reg->cleanup_task = current; + up(&nf_sockopt_mutex); + schedule(); + goto restart; + } + list_del(®->list); + up(&nf_sockopt_mutex); +} + +#ifdef CONFIG_NETFILTER_DEBUG +#include +#include +#include + +static void debug_print_hooks_ip(unsigned int nf_debug) +{ + if (nf_debug & (1 << NF_IP_PRE_ROUTING)) { + printk("PRE_ROUTING "); + nf_debug ^= (1 << NF_IP_PRE_ROUTING); + } + if (nf_debug & (1 << NF_IP_LOCAL_IN)) { + printk("LOCAL_IN "); + nf_debug ^= (1 << NF_IP_LOCAL_IN); + } + if (nf_debug & (1 << NF_IP_FORWARD)) { + printk("FORWARD "); + nf_debug ^= (1 << NF_IP_FORWARD); + } + if (nf_debug & (1 << NF_IP_LOCAL_OUT)) { + printk("LOCAL_OUT "); + nf_debug ^= (1 << NF_IP_LOCAL_OUT); + } + if (nf_debug & (1 << NF_IP_POST_ROUTING)) { + printk("POST_ROUTING "); + nf_debug ^= (1 << NF_IP_POST_ROUTING); + } + if (nf_debug) + printk("Crap bits: 0x%04X", nf_debug); + printk("\n"); +} + +void nf_dump_skb(int pf, struct sk_buff *skb) +{ + printk("skb: pf=%i %s dev=%s len=%u\n", + pf, + skb->sk ? "(owned)" : "(unowned)", + skb->dev ? skb->dev->name : "(no dev)", + skb->len); + switch (pf) { + case PF_INET: { + const struct iphdr *ip = skb->nh.iph; + __u32 *opt = (__u32 *) (ip + 1); + int opti; + __u16 src_port = 0, dst_port = 0; + + if (ip->protocol == IPPROTO_TCP + || ip->protocol == IPPROTO_UDP) { + struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl); + src_port = ntohs(tcp->source); + dst_port = ntohs(tcp->dest); + } + + printk("PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu" + " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu", + ip->protocol, NIPQUAD(ip->saddr), + src_port, NIPQUAD(ip->daddr), + dst_port, + ntohs(ip->tot_len), ip->tos, ntohs(ip->id), + ntohs(ip->frag_off), ip->ttl); + + for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++) + printk(" O=0x%8.8X", *opt++); + printk("\n"); + } + } +} + +void nf_debug_ip_local_deliver(struct sk_buff *skb) +{ + /* If it's a loopback packet, it must have come through + * NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and + * NF_IP_LOCAL_IN. Otherwise, must have gone through + * NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING. */ + if (!skb->dev) { + printk("ip_local_deliver: skb->dev is NULL.\n"); + } + else if (strcmp(skb->dev->name, "lo") == 0) { + if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT) + | (1 << NF_IP_POST_ROUTING) + | (1 << NF_IP_PRE_ROUTING) + | (1 << NF_IP_LOCAL_IN))) { + printk("ip_local_deliver: bad loopback skb: "); + debug_print_hooks_ip(skb->nf_debug); + nf_dump_skb(PF_INET, skb); + } + } + else { + if (skb->nf_debug != ((1<nf_debug); + nf_dump_skb(PF_INET, skb); + } + } +} + +void nf_debug_ip_loopback_xmit(struct sk_buff *newskb) +{ + if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT) + | (1 << NF_IP_POST_ROUTING))) { + printk("ip_dev_loopback_xmit: bad owned skb = %p: ", + newskb); + debug_print_hooks_ip(newskb->nf_debug); + nf_dump_skb(PF_INET, newskb); + } + /* Clear to avoid confusing input check */ + newskb->nf_debug = 0; +} + +void nf_debug_ip_finish_output2(struct sk_buff *skb) +{ + /* If it's owned, it must have gone through the + * NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING. + * Otherwise, must have gone through + * NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING. + */ + if (skb->sk) { + if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT) + | (1 << NF_IP_POST_ROUTING))) { + printk("ip_finish_output: bad owned skb = %p: ", skb); + debug_print_hooks_ip(skb->nf_debug); + nf_dump_skb(PF_INET, skb); + } + } else { + if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING) + | (1 << NF_IP_FORWARD) + | (1 << NF_IP_POST_ROUTING))) { + /* Fragments, entunnelled packets, TCP RSTs + generated by ipt_REJECT will have no + owners, but still may be local */ + if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT) + | (1 << NF_IP_POST_ROUTING))){ + printk("ip_finish_output:" + " bad unowned skb = %p: ",skb); + debug_print_hooks_ip(skb->nf_debug); + nf_dump_skb(PF_INET, skb); + } + } + } +} +#endif /*CONFIG_NETFILTER_DEBUG*/ + +/* Call get/setsockopt() */ +static int nf_sockopt(struct sock *sk, int pf, int val, + char *opt, int *len, int get) +{ + struct list_head *i; + struct nf_sockopt_ops *ops; + int ret; + + if (down_interruptible(&nf_sockopt_mutex) != 0) + return -EINTR; + + list_for_each(i, &nf_sockopts) { + ops = (struct nf_sockopt_ops *)i; + if (ops->pf == pf) { + if (get) { + if (val >= ops->get_optmin + && val < ops->get_optmax) { + ops->use++; + up(&nf_sockopt_mutex); + ret = ops->get(sk, val, opt, len); + goto out; + } + } else { + if (val >= ops->set_optmin + && val < ops->set_optmax) { + ops->use++; + up(&nf_sockopt_mutex); + ret = ops->set(sk, val, opt, *len); + goto out; + } + } + } + } + up(&nf_sockopt_mutex); + return -ENOPROTOOPT; + + out: + down(&nf_sockopt_mutex); + ops->use--; + if (ops->cleanup_task) + wake_up_process(ops->cleanup_task); + up(&nf_sockopt_mutex); + return ret; +} + +int nf_setsockopt(struct sock *sk, int pf, int val, char *opt, + int len) +{ + return nf_sockopt(sk, pf, val, opt, &len, 0); +} + +int nf_getsockopt(struct sock *sk, int pf, int val, char *opt, int *len) +{ + return nf_sockopt(sk, pf, val, opt, len, 1); +} + +static unsigned int nf_iterate(struct list_head *head, + struct sk_buff **skb, + int hook, + const struct net_device *indev, + const struct net_device *outdev, + struct list_head **i, + int (*okfn)(struct sk_buff *), + int hook_thresh) +{ + /* + * The caller must not block between calls to this + * function because of risk of continuing from deleted element. + */ + list_for_each_continue_rcu(*i, head) { + struct nf_hook_ops *elem = (struct nf_hook_ops *)*i; + + if (hook_thresh > elem->priority) + continue; + + /* Optimization: we don't need to hold module + reference here, since function can't sleep. --RR */ + switch (elem->hook(hook, skb, indev, outdev, okfn)) { + case NF_QUEUE: + return NF_QUEUE; + + case NF_STOLEN: + return NF_STOLEN; + + case NF_DROP: + return NF_DROP; + + case NF_REPEAT: + *i = (*i)->prev; + break; + +#ifdef CONFIG_NETFILTER_DEBUG + case NF_ACCEPT: + break; + + default: + NFDEBUG("Evil return from %p(%u).\n", + elem->hook, hook); +#endif + } + } + return NF_ACCEPT; +} + +int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data) +{ + int ret; + + write_lock_bh(&queue_handler_lock); + if (queue_handler[pf].outfn) + ret = -EBUSY; + else { + queue_handler[pf].outfn = outfn; + queue_handler[pf].data = data; + ret = 0; + } + write_unlock_bh(&queue_handler_lock); + + return ret; +} + +/* The caller must flush their queue before this */ +int nf_unregister_queue_handler(int pf) +{ + write_lock_bh(&queue_handler_lock); + queue_handler[pf].outfn = NULL; + queue_handler[pf].data = NULL; + write_unlock_bh(&queue_handler_lock); + + return 0; +} + +/* + * Any packet that leaves via this function must come back + * through nf_reinject(). + */ +static int nf_queue(struct sk_buff *skb, + struct list_head *elem, + int pf, unsigned int hook, + struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sk_buff *)) +{ + int status; + struct nf_info *info; +#ifdef CONFIG_BRIDGE_NETFILTER + struct net_device *physindev = NULL; + struct net_device *physoutdev = NULL; +#endif + + /* QUEUE == DROP if noone is waiting, to be safe. */ + read_lock(&queue_handler_lock); + if (!queue_handler[pf].outfn) { + read_unlock(&queue_handler_lock); + kfree_skb(skb); + return 1; + } + + info = kmalloc(sizeof(*info), GFP_ATOMIC); + if (!info) { + if (net_ratelimit()) + printk(KERN_ERR "OOM queueing packet %p\n", + skb); + read_unlock(&queue_handler_lock); + kfree_skb(skb); + return 1; + } + + *info = (struct nf_info) { + (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn }; + + /* If it's going away, ignore hook. */ + if (!try_module_get(info->elem->owner)) { + read_unlock(&queue_handler_lock); + kfree(info); + return 0; + } + + /* Bump dev refs so they don't vanish while packet is out */ + if (indev) dev_hold(indev); + if (outdev) dev_hold(outdev); + +#ifdef CONFIG_BRIDGE_NETFILTER + if (skb->nf_bridge) { + physindev = skb->nf_bridge->physindev; + if (physindev) dev_hold(physindev); + physoutdev = skb->nf_bridge->physoutdev; + if (physoutdev) dev_hold(physoutdev); + } +#endif + + status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data); + read_unlock(&queue_handler_lock); + + if (status < 0) { + /* James M doesn't say fuck enough. */ + if (indev) dev_put(indev); + if (outdev) dev_put(outdev); +#ifdef CONFIG_BRIDGE_NETFILTER + if (physindev) dev_put(physindev); + if (physoutdev) dev_put(physoutdev); +#endif + module_put(info->elem->owner); + kfree(info); + kfree_skb(skb); + return 1; + } + return 1; +} + +int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb, + struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sk_buff *), + int hook_thresh) +{ + struct list_head *elem; + unsigned int verdict; + int ret = 0; + + if (skb->ip_summed == CHECKSUM_HW) { + if (outdev == NULL) { + skb->ip_summed = CHECKSUM_NONE; + } else { + skb_checksum_help(skb); + } + } + + /* We may already have this, but read-locks nest anyway */ + rcu_read_lock(); + +#ifdef CONFIG_NETFILTER_DEBUG + if (skb->nf_debug & (1 << hook)) { + printk("nf_hook: hook %i already set.\n", hook); + nf_dump_skb(pf, skb); + } + skb->nf_debug |= (1 << hook); +#endif + + elem = &nf_hooks[pf][hook]; + next_hook: + verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev, + outdev, &elem, okfn, hook_thresh); + if (verdict == NF_QUEUE) { + NFDEBUG("nf_hook: Verdict = QUEUE.\n"); + if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn)) + goto next_hook; + } + + switch (verdict) { + case NF_ACCEPT: + ret = okfn(skb); + break; + + case NF_DROP: + kfree_skb(skb); + ret = -EPERM; + break; + } + + rcu_read_unlock(); + return ret; +} + +void nf_reinject(struct sk_buff *skb, struct nf_info *info, + unsigned int verdict) +{ + struct list_head *elem = &info->elem->list; + struct list_head *i; + + rcu_read_lock(); + + /* Release those devices we held, or Alexey will kill me. */ + if (info->indev) dev_put(info->indev); + if (info->outdev) dev_put(info->outdev); +#ifdef CONFIG_BRIDGE_NETFILTER + if (skb->nf_bridge) { + if (skb->nf_bridge->physindev) + dev_put(skb->nf_bridge->physindev); + if (skb->nf_bridge->physoutdev) + dev_put(skb->nf_bridge->physoutdev); + } +#endif + + /* Drop reference to owner of hook which queued us. */ + module_put(info->elem->owner); + + list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) { + if (i == elem) + break; + } + + if (elem == &nf_hooks[info->pf][info->hook]) { + /* The module which sent it to userspace is gone. */ + NFDEBUG("%s: module disappeared, dropping packet.\n", + __FUNCTION__); + verdict = NF_DROP; + } + + /* Continue traversal iff userspace said ok... */ + if (verdict == NF_REPEAT) { + elem = elem->prev; + verdict = NF_ACCEPT; + } + + if (verdict == NF_ACCEPT) { + next_hook: + verdict = nf_iterate(&nf_hooks[info->pf][info->hook], + &skb, info->hook, + info->indev, info->outdev, &elem, + info->okfn, INT_MIN); + } + + switch (verdict) { + case NF_ACCEPT: + info->okfn(skb); + break; + + case NF_QUEUE: + if (!nf_queue(skb, elem, info->pf, info->hook, + info->indev, info->outdev, info->okfn)) + goto next_hook; + break; + } + rcu_read_unlock(); + + if (verdict == NF_DROP) + kfree_skb(skb); + + kfree(info); + return; +} + +#ifdef CONFIG_INET +/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */ +int ip_route_me_harder(struct sk_buff **pskb) +{ + struct iphdr *iph = (*pskb)->nh.iph; + struct rtable *rt; + struct flowi fl = {}; + struct dst_entry *odst; + unsigned int hh_len; + + /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause + * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook. + */ + if (inet_addr_type(iph->saddr) == RTN_LOCAL) { + fl.nl_u.ip4_u.daddr = iph->daddr; + fl.nl_u.ip4_u.saddr = iph->saddr; + fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); + fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0; +#ifdef CONFIG_IP_ROUTE_FWMARK + fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark; +#endif + if (ip_route_output_key(&rt, &fl) != 0) + return -1; + + /* Drop old route. */ + dst_release((*pskb)->dst); + (*pskb)->dst = &rt->u.dst; + } else { + /* non-local src, find valid iif to satisfy + * rp-filter when calling ip_route_input. */ + fl.nl_u.ip4_u.daddr = iph->saddr; + if (ip_route_output_key(&rt, &fl) != 0) + return -1; + + odst = (*pskb)->dst; + if (ip_route_input(*pskb, iph->daddr, iph->saddr, + RT_TOS(iph->tos), rt->u.dst.dev) != 0) { + dst_release(&rt->u.dst); + return -1; + } + dst_release(&rt->u.dst); + dst_release(odst); + } + + if ((*pskb)->dst->error) + return -1; + + /* Change in oif may mean change in hh_len. */ + hh_len = (*pskb)->dst->dev->hard_header_len; + if (skb_headroom(*pskb) < hh_len) { + struct sk_buff *nskb; + + nskb = skb_realloc_headroom(*pskb, hh_len); + if (!nskb) + return -1; + if ((*pskb)->sk) + skb_set_owner_w(nskb, (*pskb)->sk); + kfree_skb(*pskb); + *pskb = nskb; + } + + return 0; +} + +int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len) +{ + struct sk_buff *nskb; + unsigned int iplen; + + if (writable_len > (*pskb)->len) + return 0; + + /* Not exclusive use of packet? Must copy. */ + if (skb_shared(*pskb) || skb_cloned(*pskb)) + goto copy_skb; + + /* Alexey says IP hdr is always modifiable and linear, so ok. */ + if (writable_len <= (*pskb)->nh.iph->ihl*4) + return 1; + + iplen = writable_len - (*pskb)->nh.iph->ihl*4; + + /* DaveM says protocol headers are also modifiable. */ + switch ((*pskb)->nh.iph->protocol) { + case IPPROTO_TCP: { + struct tcphdr hdr; + if (skb_copy_bits(*pskb, (*pskb)->nh.iph->ihl*4, + &hdr, sizeof(hdr)) != 0) + goto copy_skb; + if (writable_len <= (*pskb)->nh.iph->ihl*4 + hdr.doff*4) + goto pull_skb; + goto copy_skb; + } + case IPPROTO_UDP: + if (writable_len<=(*pskb)->nh.iph->ihl*4+sizeof(struct udphdr)) + goto pull_skb; + goto copy_skb; + case IPPROTO_ICMP: + if (writable_len + <= (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr)) + goto pull_skb; + goto copy_skb; + /* Insert other cases here as desired */ + } + +copy_skb: + nskb = skb_copy(*pskb, GFP_ATOMIC); + if (!nskb) + return 0; + BUG_ON(skb_is_nonlinear(nskb)); + + /* Rest of kernel will get very unhappy if we pass it a + suddenly-orphaned skbuff */ + if ((*pskb)->sk) + skb_set_owner_w(nskb, (*pskb)->sk); + kfree_skb(*pskb); + *pskb = nskb; + return 1; + +pull_skb: + return pskb_may_pull(*pskb, writable_len); +} +EXPORT_SYMBOL(skb_ip_make_writable); +#endif /*CONFIG_INET*/ + +/* Internal logging interface, which relies on the real + LOG target modules */ + +#define NF_LOG_PREFIXLEN 128 + +static nf_logfn *nf_logging[NPROTO]; /* = NULL */ +static int reported = 0; +static spinlock_t nf_log_lock = SPIN_LOCK_UNLOCKED; + +int nf_log_register(int pf, nf_logfn *logfn) +{ + int ret = -EBUSY; + + /* Any setup of logging members must be done before + * substituting pointer. */ + smp_wmb(); + spin_lock(&nf_log_lock); + if (!nf_logging[pf]) { + nf_logging[pf] = logfn; + ret = 0; + } + spin_unlock(&nf_log_lock); + return ret; +} + +void nf_log_unregister(int pf, nf_logfn *logfn) +{ + spin_lock(&nf_log_lock); + if (nf_logging[pf] == logfn) + nf_logging[pf] = NULL; + spin_unlock(&nf_log_lock); + + /* Give time to concurrent readers. */ + synchronize_net(); +} + +void nf_log_packet(int pf, + unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const char *fmt, ...) +{ + va_list args; + char prefix[NF_LOG_PREFIXLEN]; + nf_logfn *logfn; + + rcu_read_lock(); + logfn = nf_logging[pf]; + if (logfn) { + va_start(args, fmt); + vsnprintf(prefix, sizeof(prefix), fmt, args); + va_end(args); + /* We must read logging before nf_logfn[pf] */ + smp_read_barrier_depends(); + logfn(hooknum, skb, in, out, prefix); + } else if (!reported) { + printk(KERN_WARNING "nf_log_packet: can\'t log yet, " + "no backend logging module loaded in!\n"); + reported++; + } + rcu_read_unlock(); +} + +/* This does not belong here, but ipt_REJECT needs it if connection + tracking in use: without this, connection may not be in hash table, + and hence manufactured ICMP or RST packets will not be associated + with it. */ +void (*ip_ct_attach)(struct sk_buff *, struct nf_ct_info *); + +void __init netfilter_init(void) +{ + int i, h; + + for (i = 0; i < NPROTO; i++) { + for (h = 0; h < NF_MAX_HOOKS; h++) + INIT_LIST_HEAD(&nf_hooks[i][h]); + } +} + +EXPORT_SYMBOL(ip_ct_attach); +EXPORT_SYMBOL(ip_route_me_harder); +EXPORT_SYMBOL(nf_getsockopt); +EXPORT_SYMBOL(nf_hook_slow); +EXPORT_SYMBOL(nf_hooks); +EXPORT_SYMBOL(nf_register_hook); +EXPORT_SYMBOL(nf_register_queue_handler); +EXPORT_SYMBOL(nf_register_sockopt); +EXPORT_SYMBOL(nf_reinject); +EXPORT_SYMBOL(nf_setsockopt); +EXPORT_SYMBOL(nf_unregister_hook); +EXPORT_SYMBOL(nf_unregister_queue_handler); +EXPORT_SYMBOL(nf_unregister_sockopt); diff -Nur linux-2.6.0-test9.org/net/ipv4/netfilter/Kconfig linux-2.6.0-test9/net/ipv4/netfilter/Kconfig --- linux-2.6.0-test9.org/net/ipv4/netfilter/Kconfig 2003-11-13 11:07:50.000000000 +0100 +++ linux-2.6.0-test9/net/ipv4/netfilter/Kconfig 2003-11-13 11:01:48.000000000 +0100 @@ -197,6 +197,15 @@ To compile it as a module, choose M here. If unsure, say N. +config IP_NF_MATCH_SCTP + tristate "SCTP match support" + depends on IP_NF_IPTABLES + help + This match allows iptables to match on the SCTP header. + + If you want to compile it as a module, say M here and read + . If unsure, say `N'. + config IP_NF_MATCH_LENGTH tristate "LENGTH match support" depends on IP_NF_IPTABLES @@ -527,6 +536,42 @@ To compile it as a module, choose M here. If unsure, say N. +config IP_NF_RAW + tristate "Raw table" + depends on IP_NF_IPTABLES + help + This option adds a `raw' table to iptables: see the man page for + iptables(8). This table is the very first in the netfilter + framework and hooks in at the PREROUTING and OUTPUT chains. + The TRACE and NOTRACK targets can be used in this table only. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_TRACE + tristate "TRACE target support" + depends on IP_NF_RAW + help + The TRACE target allows packets to be traced as those matches + any subsequent rule in any table/rule. The matched rule and + the packet is logged with the prefix + + TRACE: tablename/chainname/rulenum + + if the ipt_LOG or ipt_ULOG targets are loaded in. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_NOTRACK + tristate "NOTRACK target support" + depends on IP_NF_RAW + help + The NOTRACK target allows a select rule to specify which + packets *not* to enter the conntrack/NAT subsystems + with all the consequences (no ICMP error tracking, + no protocol helpers for the selected packets). + + To compile it as a module, choose M here. If unsure, say N. + config IP_NF_ARPTABLES tristate "ARP tables support" diff -Nur linux-2.6.0-test9.org/net/ipv4/netfilter/Kconfig.orig linux-2.6.0-test9/net/ipv4/netfilter/Kconfig.orig --- linux-2.6.0-test9.org/net/ipv4/netfilter/Kconfig.orig 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.0-test9/net/ipv4/netfilter/Kconfig.orig 2003-11-13 11:01:38.000000000 +0100 @@ -0,0 +1,606 @@ +# +# IP netfilter configuration +# + +menu "IP: Netfilter Configuration" + depends on INET && NETFILTER + +config IP_NF_CONNTRACK + tristate "Connection tracking (required for masq/NAT)" + ---help--- + Connection tracking keeps a record of what packets have passed + through your machine, in order to figure out how they are related + into connections. + + This is required to do Masquerading or other kinds of Network + Address Translation (except for Fast NAT). It can also be used to + enhance packet filtering (see `Connection state match support' + below). + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_FTP + tristate "FTP protocol support" + depends on IP_NF_CONNTRACK + help + Tracking FTP connections is problematic: special helpers are + required for tracking them, and doing masquerading and other forms + of Network Address Translation on them. + + To compile it as a module, choose M here. If unsure, say Y. + +config IP_NF_IRC + tristate "IRC protocol support" + depends on IP_NF_CONNTRACK + ---help--- + There is a commonly-used extension to IRC called + Direct Client-to-Client Protocol (DCC). This enables users to send + files to each other, and also chat to each other without the need + of a server. DCC Sending is used anywhere you send files over IRC, + and DCC Chat is most commonly used by Eggdrop bots. If you are + using NAT, this extension will enable you to send files and initiate + chats. Note that you do NOT need this extension to get files or + have others initiate chats, or everything else in IRC. + + To compile it as a module, choose M here. If unsure, say Y. + +config IP_NF_TFTP + tristate "TFTP protocol support" + depends on IP_NF_CONNTRACK + help + TFTP connection tracking helper, this is required depending + on how restrictive your ruleset is. + If you are using a tftp client behind -j SNAT or -j MASQUERADING + you will need this. + + To compile it as a module, choose M here. If unsure, say Y. + +config IP_NF_AMANDA + tristate "Amanda backup protocol support" + depends on IP_NF_CONNTRACK + help + If you are running the Amanda backup package + on this machine or machines that will be MASQUERADED through this + machine, then you may want to enable this feature. This allows the + connection tracking and natting code to allow the sub-channels that + Amanda requires for communication of the backup data, messages and + index. + + To compile it as a module, choose M here. If unsure, say Y. + +config IP_NF_QUEUE + tristate "Userspace queueing via NETLINK" + help + Netfilter has the ability to queue packets to user space: the + netlink device can be used to access them using this driver. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_IPTABLES + tristate "IP tables support (required for filtering/masq/NAT)" + help + iptables is a general, extensible packet identification framework. + The packet filtering and full NAT (masquerading, port forwarding, + etc) subsystems now use this: say `Y' or `M' here if you want to use + either of those. + + To compile it as a module, choose M here. If unsure, say N. + +# The simple matches. +config IP_NF_MATCH_LIMIT + tristate "limit match support" + depends on IP_NF_IPTABLES + help + limit matching allows you to control the rate at which a rule can be + matched: mainly useful in combination with the LOG target ("LOG + target support", below) and to avoid some Denial of Service attacks. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_MATCH_IPRANGE + tristate "IP range match support" + depends on IP_NF_IPTABLES + help + This option makes possible to match IP addresses against IP address + ranges. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_MATCH_MAC + tristate "MAC address match support" + depends on IP_NF_IPTABLES + help + MAC matching allows you to match packets based on the source + Ethernet address of the packet. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_MATCH_PKTTYPE + tristate "Packet type match support" + depends on IP_NF_IPTABLES + help + Packet type matching allows you to match a packet by + its "class", eg. BROADCAST, MULTICAST, ... + + Typical usage: + iptables -A INPUT -m pkttype --pkt-type broadcast -j LOG + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_MATCH_MARK + tristate "netfilter MARK match support" + depends on IP_NF_IPTABLES + help + Netfilter mark matching allows you to match packets based on the + `nfmark' value in the packet. This can be set by the MARK target + (see below). + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_MATCH_MULTIPORT + tristate "Multiple port match support" + depends on IP_NF_IPTABLES + help + Multiport matching allows you to match TCP or UDP packets based on + a series of source or destination ports: normally a rule can only + match a single range of ports. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_MATCH_TOS + tristate "TOS match support" + depends on IP_NF_IPTABLES + help + TOS matching allows you to match packets based on the Type Of + Service fields of the IP packet. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_MATCH_RECENT + tristate "recent match support" + depends on IP_NF_IPTABLES + help + This match is used for creating one or many lists of recently + used addresses and then matching against that/those list(s). + + Short options are available by using 'iptables -m recent -h' + Official Website: + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_MATCH_ECN + tristate "ECN match support" + depends on IP_NF_IPTABLES + help + This option adds a `ECN' match, which allows you to match against + the IPv4 and TCP header ECN fields. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_MATCH_DSCP + tristate "DSCP match support" + depends on IP_NF_IPTABLES + help + This option adds a `DSCP' match, which allows you to match against + the IPv4 header DSCP field (DSCP codepoint). + + The DSCP codepoint can have any value between 0x0 and 0x4f. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_MATCH_AH_ESP + tristate "AH/ESP match support" + depends on IP_NF_IPTABLES + help + These two match extensions (`ah' and `esp') allow you to match a + range of SPIs inside AH or ESP headers of IPSec packets. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_MATCH_LENGTH + tristate "LENGTH match support" + depends on IP_NF_IPTABLES + help + This option allows you to match the length of a packet against a + specific value or range of values. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_MATCH_TTL + tristate "TTL match support" + depends on IP_NF_IPTABLES + help + This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user + to match packets by their TTL value. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_MATCH_TCPMSS + tristate "tcpmss match support" + depends on IP_NF_IPTABLES + help + This option adds a `tcpmss' match, which allows you to examine the + MSS value of TCP SYN packets, which control the maximum packet size + for that connection. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_MATCH_HELPER + tristate "Helper match support" + depends on IP_NF_CONNTRACK && IP_NF_IPTABLES + help + Helper matching allows you to match packets in dynamic connections + tracked by a conntrack-helper, ie. ip_conntrack_ftp + + To compile it as a module, choose M here. If unsure, say Y. + +config IP_NF_MATCH_STATE + tristate "Connection state match support" + depends on IP_NF_CONNTRACK && IP_NF_IPTABLES + help + Connection state matching allows you to match packets based on their + relationship to a tracked connection (ie. previous packets). This + is a powerful tool for packet classification. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_MATCH_CONNTRACK + tristate "Connection tracking match support" + depends on IP_NF_CONNTRACK && IP_NF_IPTABLES + help + This is a general conntrack match module, a superset of the state match. + + It allows matching on additional conntrack information, which is + useful in complex configurations, such as NAT gateways with multiple + internet links or tunnels. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_MATCH_OWNER + tristate "Owner match support" + depends on IP_NF_IPTABLES + help + Packet owner matching allows you to match locally-generated packets + based on who created them: the user, group, process or session. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_MATCH_PHYSDEV + tristate "Physdev match support" + depends on IP_NF_IPTABLES && BRIDGE_NETFILTER + help + Physdev packet matching matches against the physical bridge ports + the IP packet arrived on or will leave by. + + To compile it as a module, choose M here. If unsure, say N. + +# The targets +config IP_NF_FILTER + tristate "Packet filtering" + depends on IP_NF_IPTABLES + help + Packet filtering defines a table `filter', which has a series of + rules for simple packet filtering at local input, forwarding and + local output. See the man page for iptables(8). + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_REJECT + tristate "REJECT target support" + depends on IP_NF_FILTER + help + The REJECT target allows a filtering rule to specify that an ICMP + error should be issued in response to an incoming packet, rather + than silently being dropped. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_NAT + tristate "Full NAT" + depends on IP_NF_IPTABLES && IP_NF_CONNTRACK + help + The Full NAT option allows masquerading, port forwarding and other + forms of full Network Address Port Translation. It is controlled by + the `nat' table in iptables: see the man page for iptables(8). + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_NAT_NEEDED + bool + depends on IP_NF_CONNTRACK!=y && IP_NF_IPTABLES!=y && (IP_NF_COMPAT_IPCHAINS!=y && IP_NF_COMPAT_IPFWADM || IP_NF_COMPAT_IPCHAINS) || IP_NF_IPTABLES && IP_NF_CONNTRACK && IP_NF_NAT + default y + +config IP_NF_TARGET_MASQUERADE + tristate "MASQUERADE target support" + depends on IP_NF_NAT + help + Masquerading is a special case of NAT: all outgoing connections are + changed to seem to come from a particular interface's address, and + if the interface goes down, those connections are lost. This is + only useful for dialup accounts with dynamic IP address (ie. your IP + address will be different on next dialup). + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_REDIRECT + tristate "REDIRECT target support" + depends on IP_NF_NAT + help + REDIRECT is a special case of NAT: all incoming connections are + mapped onto the incoming interface's address, causing the packets to + come to the local machine instead of passing through. This is + useful for transparent proxies. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_NETMAP + tristate "NETMAP target support" + depends on IP_NF_NAT + help + NETMAP is an implementation of static 1:1 NAT mapping of network + addresses. It maps the network address part, while keeping the host + address part intact. It is similar to Fast NAT, except that + Netfilter's connection tracking doesn't work well with Fast NAT. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_SAME + tristate "SAME target support" + depends on IP_NF_NAT + help + This option adds a `SAME' target, which works like the standard SNAT + target, but attempts to give clients the same IP for all connections. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_NAT_LOCAL + bool "NAT of local connections (READ HELP)" + depends on IP_NF_NAT + help + This option enables support for NAT of locally originated connections. + Enable this if you need to use destination NAT on connections + originating from local processes on the nat box itself. + + Please note that you will need a recent version (>= 1.2.6a) + of the iptables userspace program in order to use this feature. + See http://www.iptables.org/ for download instructions. + + If unsure, say 'N'. + +config IP_NF_NAT_SNMP_BASIC + tristate "Basic SNMP-ALG support (EXPERIMENTAL)" + depends on EXPERIMENTAL && IP_NF_NAT + ---help--- + + This module implements an Application Layer Gateway (ALG) for + SNMP payloads. In conjunction with NAT, it allows a network + management system to access multiple private networks with + conflicting addresses. It works by modifying IP addresses + inside SNMP payloads to match IP-layer NAT mapping. + + This is the "basic" form of SNMP-ALG, as described in RFC 2962 + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_NAT_IRC + tristate + depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n + default IP_NF_NAT if IP_NF_IRC=y + default m if IP_NF_IRC=m + +# If they want FTP, set to $CONFIG_IP_NF_NAT (m or y), +# or $CONFIG_IP_NF_FTP (m or y), whichever is weaker. Argh. +config IP_NF_NAT_FTP + tristate + depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n + default IP_NF_NAT if IP_NF_FTP=y + default m if IP_NF_FTP=m + +config IP_NF_NAT_TFTP + tristate + depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n + default IP_NF_NAT if IP_NF_TFTP=y + default m if IP_NF_TFTP=m + +config IP_NF_NAT_AMANDA + tristate + depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n + default IP_NF_NAT if IP_NF_AMANDA=y + default m if IP_NF_AMANDA=m + +config IP_NF_MANGLE + tristate "Packet mangling" + depends on IP_NF_IPTABLES + help + This option adds a `mangle' table to iptables: see the man page for + iptables(8). This table is used for various packet alterations + which can effect how the packet is routed. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_TOS + tristate "TOS target support" + depends on IP_NF_MANGLE + help + This option adds a `TOS' target, which allows you to create rules in + the `mangle' table which alter the Type Of Service field of an IP + packet prior to routing. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_ECN + tristate "ECN target support" + depends on IP_NF_MANGLE + ---help--- + This option adds a `ECN' target, which can be used in the iptables mangle + table. + + You can use this target to remove the ECN bits from the IPv4 header of + an IP packet. This is particularly useful, if you need to work around + existing ECN blackholes on the internet, but don't want to disable + ECN support in general. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_DSCP + tristate "DSCP target support" + depends on IP_NF_MANGLE + help + This option adds a `DSCP' match, which allows you to match against + the IPv4 header DSCP field (DSCP codepoint). + + The DSCP codepoint can have any value between 0x0 and 0x4f. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_MARK + tristate "MARK target support" + depends on IP_NF_MANGLE + help + This option adds a `MARK' target, which allows you to create rules + in the `mangle' table which alter the netfilter mark (nfmark) field + associated with the packet prior to routing. This can change + the routing method (see `Use netfilter MARK value as routing + key') and can also be used by other subsystems to change their + behavior. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_CLASSIFY + tristate "CLASSIFY target support" + depends on IP_NF_MANGLE + help + This option adds a `CLASSIFY' target, which enables the user to set + the priority of a packet. Some qdiscs can use this value for + classification, among these are: + + atm, cbq, dsmark, pfifo_fast, htb, prio + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_LOG + tristate "LOG target support" + depends on IP_NF_IPTABLES + help + This option adds a `LOG' target, which allows you to create rules in + any iptables table which records the packet header to the syslog. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_ULOG + tristate "ULOG target support" + depends on IP_NF_IPTABLES + ---help--- + This option adds a `ULOG' target, which allows you to create rules in + any iptables table. The packet is passed to a userspace logging + daemon using netlink multicast sockets; unlike the LOG target + which can only be viewed through syslog. + + The apropriate userspace logging daemon (ulogd) may be obtained from + http://www.gnumonks.org/projects/ulogd + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_TCPMSS + tristate "TCPMSS target support" + depends on IP_NF_IPTABLES + ---help--- + This option adds a `TCPMSS' target, which allows you to alter the + MSS value of TCP SYN packets, to control the maximum size for that + connection (usually limiting it to your outgoing interface's MTU + minus 40). + + This is used to overcome criminally braindead ISPs or servers which + block ICMP Fragmentation Needed packets. The symptoms of this + problem are that everything works fine from your Linux + firewall/router, but machines behind it can never exchange large + packets: + 1) Web browsers connect, then hang with no data received. + 2) Small mail works fine, but large emails hang. + 3) ssh works fine, but scp hangs after initial handshaking. + + Workaround: activate this option and add a rule to your firewall + configuration like: + + iptables -A FORWARD -p tcp --tcp-flags SYN,RST SYN \ + -j TCPMSS --clamp-mss-to-pmtu + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_RAW + tristate "Raw table" + depends on IP_NF_IPTABLES + help + This option adds a `raw' table to iptables: see the man page for + iptables(8). This table is the very first in the netfilter + framework and hooks in at the PREROUTING and OUTPUT chains. + The TRACE and NOTRACK targets can be used in this table only. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_TRACE + tristate "TRACE target support" + depends on IP_NF_RAW + help + The TRACE target allows packets to be traced as those matches + any subsequent rule in any table/rule. The matched rule and + the packet is logged with the prefix + + TRACE: tablename/chainname/rulenum + + if the ipt_LOG or ipt_ULOG targets are loaded in. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_NOTRACK + tristate "NOTRACK target support" + depends on IP_NF_RAW + help + The NOTRACK target allows a select rule to specify which + packets *not* to enter the conntrack/NAT subsystems + with all the consequences (no ICMP error tracking, + no protocol helpers for the selected packets). + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_ARPTABLES + tristate "ARP tables support" + +config IP_NF_ARPFILTER + tristate "ARP packet filtering" + depends on IP_NF_ARPTABLES + +config IP_NF_ARP_MANGLE + tristate "ARP payload mangling" + depends on IP_NF_ARPTABLES + help + Allows altering the ARP packet payload: source and destination + hardware and network addresses. + +# Backwards compatibility modules: only if you don't build in the others. +config IP_NF_COMPAT_IPCHAINS + tristate "ipchains (2.2-style) support" + depends on IP_NF_CONNTRACK!=y && IP_NF_IPTABLES!=y + help + This option places ipchains (with masquerading and redirection + support) back into the kernel, using the new netfilter + infrastructure. It is not recommended for new installations (see + `Packet filtering'). With this enabled, you should be able to use + the ipchains tool exactly as in 2.2 kernels. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_COMPAT_IPFWADM + tristate "ipfwadm (2.0-style) support" + depends on IP_NF_CONNTRACK!=y && IP_NF_IPTABLES!=y && IP_NF_COMPAT_IPCHAINS!=y + help + This option places ipfwadm (with masquerading and redirection + support) back into the kernel, using the new netfilter + infrastructure. It is not recommended for new installations (see + `Packet filtering'). With this enabled, you should be able to use + the ipfwadm tool exactly as in 2.0 kernels. + + To compile it as a module, choose M here. If unsure, say N. + +endmenu + diff -Nur linux-2.6.0-test9.org/net/ipv4/netfilter/Makefile linux-2.6.0-test9/net/ipv4/netfilter/Makefile --- linux-2.6.0-test9.org/net/ipv4/netfilter/Makefile 2003-10-25 20:43:07.000000000 +0200 +++ linux-2.6.0-test9/net/ipv4/netfilter/Makefile 2003-11-13 11:01:48.000000000 +0100 @@ -34,12 +34,14 @@ # generic IP tables obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o -# the three instances of ip_tables +# the four instances of ip_tables obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o +obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o # matches +obj-$(CONFIG_IP_NF_MATCH_SCTP) += ipt_sctp.o obj-$(CONFIG_IP_NF_MATCH_HELPER) += ipt_helper.o obj-$(CONFIG_IP_NF_MATCH_LIMIT) += ipt_limit.o obj-$(CONFIG_IP_NF_MATCH_MARK) += ipt_mark.o @@ -81,6 +83,8 @@ obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o +obj-$(CONFIG_IP_NF_TARGET_NOTRACK) += ipt_NOTRACK.o +obj-$(CONFIG_IP_NF_TARGET_TRACE) += ipt_TRACE.o # generic ARP tables obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o diff -Nur linux-2.6.0-test9.org/net/ipv4/netfilter/ip_conntrack_core.c linux-2.6.0-test9/net/ipv4/netfilter/ip_conntrack_core.c --- linux-2.6.0-test9.org/net/ipv4/netfilter/ip_conntrack_core.c 2003-10-25 20:42:41.000000000 +0200 +++ linux-2.6.0-test9/net/ipv4/netfilter/ip_conntrack_core.c 2003-11-13 11:01:39.000000000 +0100 @@ -29,8 +29,7 @@ #include #include #include -/* For ERR_PTR(). Yeah, I know... --RR */ -#include +#include /* This rwlock protects the main hash table, protocol/helper/expected registrations, conntrack timers*/ @@ -63,6 +62,7 @@ static atomic_t ip_conntrack_count = ATOMIC_INIT(0); struct list_head *ip_conntrack_hash; static kmem_cache_t *ip_conntrack_cachep; +struct ip_conntrack ip_conntrack_untracked; extern struct ip_conntrack_protocol ip_conntrack_generic_protocol; @@ -808,18 +808,10 @@ } #endif - /* Previously seen (loopback)? Ignore. Do this before - fragment check. */ + /* Previously seen (loopback or untracked)? Ignore. */ if ((*pskb)->nfct) return NF_ACCEPT; - /* Gather fragments. */ - if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { - *pskb = ip_ct_gather_frags(*pskb); - if (!*pskb) - return NF_STOLEN; - } - proto = ip_ct_find_proto((*pskb)->nh.iph->protocol); /* It may be an icmp error... */ @@ -953,7 +945,6 @@ } } else if (related_to->helper->max_expected && related_to->expecting >= related_to->helper->max_expected) { - struct list_head *cur_item; /* old == NULL */ if (!(related_to->helper->flags & IP_CT_HELPER_F_REUSE_EXPECT)) { @@ -978,21 +969,14 @@ NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip)); /* choose the the oldest expectation to evict */ - list_for_each(cur_item, &related_to->sibling_list) { - struct ip_conntrack_expect *cur; - - cur = list_entry(cur_item, - struct ip_conntrack_expect, - expected_list); - if (cur->sibling == NULL) { - old = cur; + list_for_each_entry(old, &related_to->sibling_list, + expected_list) + if (old->sibling == NULL) break; - } - } - /* (!old) cannot happen, since related_to->expecting is the - * number of unconfirmed expects */ - IP_NF_ASSERT(old); + /* We cannot fail since related_to->expecting is the number + * of unconfirmed expectations */ + IP_NF_ASSERT(old && old->sibling == NULL); /* newnat14 does not reuse the real allocated memory * structures but rather unexpects the old and @@ -1024,7 +1008,7 @@ atomic_set(&new->use, 1); /* add to expected list for this connection */ - list_add(&new->expected_list, &related_to->sibling_list); + list_add_tail(&new->expected_list, &related_to->sibling_list); /* add to global list of expectations */ list_prepend(&ip_conntrack_expect_list, &new->list); /* add and start timer if required */ @@ -1419,6 +1403,15 @@ /* For use by ipt_REJECT */ ip_ct_attach = ip_conntrack_attach; + + /* Set up fake conntrack: + - to never be deleted, not in any hashes */ + atomic_set(&ip_conntrack_untracked.ct_general.use, 1); + /* - and look it like as a confirmed connection */ + set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status); + /* - and prepare the ctinfo field for NAT. */ + ip_conntrack_untracked.infos[IP_CT_NEW].master = &ip_conntrack_untracked.ct_general; + return ret; err_free_hash: diff -Nur linux-2.6.0-test9.org/net/ipv4/netfilter/ip_conntrack_core.c.orig linux-2.6.0-test9/net/ipv4/netfilter/ip_conntrack_core.c.orig --- linux-2.6.0-test9.org/net/ipv4/netfilter/ip_conntrack_core.c.orig 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.0-test9/net/ipv4/netfilter/ip_conntrack_core.c.orig 2003-11-13 11:01:27.000000000 +0100 @@ -0,0 +1,1421 @@ +/* Connection state tracking for netfilter. This is separated from, + but required by, the NAT layer; it can also be used by an iptables + extension. */ + +/* (c) 1999 Paul `Rusty' Russell. Licenced under the GNU General + * Public Licence. + * + * 23 Apr 2001: Harald Welte + * - new API and handling of conntrack/nat helpers + * - now capable of multiple expectations for one master + * 16 Jul 2002: Harald Welte + * - add usage/reference counts to ip_conntrack_expect + * - export ip_conntrack[_expect]_{find_get,put} functions + * */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* This rwlock protects the main hash table, protocol/helper/expected + registrations, conntrack timers*/ +#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock) +#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock) + +#include +#include +#include +#include +#include + +#define IP_CONNTRACK_VERSION "2.1" + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +DECLARE_RWLOCK(ip_conntrack_lock); +DECLARE_RWLOCK(ip_conntrack_expect_tuple_lock); + +void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL; +LIST_HEAD(ip_conntrack_expect_list); +LIST_HEAD(protocol_list); +static LIST_HEAD(helpers); +unsigned int ip_conntrack_htable_size = 0; +int ip_conntrack_max; +static atomic_t ip_conntrack_count = ATOMIC_INIT(0); +struct list_head *ip_conntrack_hash; +static kmem_cache_t *ip_conntrack_cachep; + +extern struct ip_conntrack_protocol ip_conntrack_generic_protocol; + +static inline int proto_cmpfn(const struct ip_conntrack_protocol *curr, + u_int8_t protocol) +{ + return protocol == curr->proto; +} + +struct ip_conntrack_protocol *__ip_ct_find_proto(u_int8_t protocol) +{ + struct ip_conntrack_protocol *p; + + MUST_BE_READ_LOCKED(&ip_conntrack_lock); + p = LIST_FIND(&protocol_list, proto_cmpfn, + struct ip_conntrack_protocol *, protocol); + if (!p) + p = &ip_conntrack_generic_protocol; + + return p; +} + +struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol) +{ + struct ip_conntrack_protocol *p; + + READ_LOCK(&ip_conntrack_lock); + p = __ip_ct_find_proto(protocol); + READ_UNLOCK(&ip_conntrack_lock); + return p; +} + +inline void +ip_conntrack_put(struct ip_conntrack *ct) +{ + IP_NF_ASSERT(ct); + IP_NF_ASSERT(ct->infos[0].master); + /* nf_conntrack_put wants to go via an info struct, so feed it + one at random. */ + nf_conntrack_put(&ct->infos[0]); +} + +static int ip_conntrack_hash_rnd_initted; +static unsigned int ip_conntrack_hash_rnd; + +static u_int32_t +hash_conntrack(const struct ip_conntrack_tuple *tuple) +{ +#if 0 + dump_tuple(tuple); +#endif + return (jhash_3words(tuple->src.ip, + (tuple->dst.ip ^ tuple->dst.protonum), + (tuple->src.u.all | (tuple->dst.u.all << 16)), + ip_conntrack_hash_rnd) % ip_conntrack_htable_size); +} + +int +get_tuple(const struct iphdr *iph, + const struct sk_buff *skb, + unsigned int dataoff, + struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_protocol *protocol) +{ + /* Never happen */ + if (iph->frag_off & htons(IP_OFFSET)) { + printk("ip_conntrack_core: Frag of proto %u.\n", + iph->protocol); + return 0; + } + + tuple->src.ip = iph->saddr; + tuple->dst.ip = iph->daddr; + tuple->dst.protonum = iph->protocol; + + return protocol->pkt_to_tuple(skb, dataoff, tuple); +} + +static int +invert_tuple(struct ip_conntrack_tuple *inverse, + const struct ip_conntrack_tuple *orig, + const struct ip_conntrack_protocol *protocol) +{ + inverse->src.ip = orig->dst.ip; + inverse->dst.ip = orig->src.ip; + inverse->dst.protonum = orig->dst.protonum; + + return protocol->invert_tuple(inverse, orig); +} + + +/* ip_conntrack_expect helper functions */ + +/* Compare tuple parts depending on mask. */ +static inline int expect_cmp(const struct ip_conntrack_expect *i, + const struct ip_conntrack_tuple *tuple) +{ + MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock); + return ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask); +} + +static void +destroy_expect(struct ip_conntrack_expect *exp) +{ + DEBUGP("destroy_expect(%p) use=%d\n", exp, atomic_read(&exp->use)); + IP_NF_ASSERT(atomic_read(&exp->use)); + IP_NF_ASSERT(!timer_pending(&exp->timeout)); + + kfree(exp); +} + + +inline void ip_conntrack_expect_put(struct ip_conntrack_expect *exp) +{ + IP_NF_ASSERT(exp); + + if (atomic_dec_and_test(&exp->use)) { + /* usage count dropped to zero */ + destroy_expect(exp); + } +} + +static inline struct ip_conntrack_expect * +__ip_ct_expect_find(const struct ip_conntrack_tuple *tuple) +{ + MUST_BE_READ_LOCKED(&ip_conntrack_lock); + MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock); + return LIST_FIND(&ip_conntrack_expect_list, expect_cmp, + struct ip_conntrack_expect *, tuple); +} + +/* Find a expectation corresponding to a tuple. */ +struct ip_conntrack_expect * +ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple) +{ + struct ip_conntrack_expect *exp; + + READ_LOCK(&ip_conntrack_lock); + READ_LOCK(&ip_conntrack_expect_tuple_lock); + exp = __ip_ct_expect_find(tuple); + if (exp) + atomic_inc(&exp->use); + READ_UNLOCK(&ip_conntrack_expect_tuple_lock); + READ_UNLOCK(&ip_conntrack_lock); + + return exp; +} + +/* remove one specific expectation from all lists and drop refcount, + * does _NOT_ delete the timer. */ +static void __unexpect_related(struct ip_conntrack_expect *expect) +{ + DEBUGP("unexpect_related(%p)\n", expect); + MUST_BE_WRITE_LOCKED(&ip_conntrack_lock); + + /* we're not allowed to unexpect a confirmed expectation! */ + IP_NF_ASSERT(!expect->sibling); + + /* delete from global and local lists */ + list_del(&expect->list); + list_del(&expect->expected_list); + + /* decrement expect-count of master conntrack */ + if (expect->expectant) + expect->expectant->expecting--; + + ip_conntrack_expect_put(expect); +} + +/* remove one specific expecatation from all lists, drop refcount + * and expire timer. + * This function can _NOT_ be called for confirmed expects! */ +static void unexpect_related(struct ip_conntrack_expect *expect) +{ + IP_NF_ASSERT(expect->expectant); + IP_NF_ASSERT(expect->expectant->helper); + /* if we are supposed to have a timer, but we can't delete + * it: race condition. __unexpect_related will + * be calledd by timeout function */ + if (expect->expectant->helper->timeout + && !del_timer(&expect->timeout)) + return; + + __unexpect_related(expect); +} + +/* delete all unconfirmed expectations for this conntrack */ +static void remove_expectations(struct ip_conntrack *ct, int drop_refcount) +{ + struct list_head *exp_entry, *next; + struct ip_conntrack_expect *exp; + + DEBUGP("remove_expectations(%p)\n", ct); + + list_for_each_safe(exp_entry, next, &ct->sibling_list) { + exp = list_entry(exp_entry, struct ip_conntrack_expect, + expected_list); + + /* we skip established expectations, as we want to delete + * the un-established ones only */ + if (exp->sibling) { + DEBUGP("remove_expectations: skipping established %p of %p\n", exp->sibling, ct); + if (drop_refcount) { + /* Indicate that this expectations parent is dead */ + ip_conntrack_put(exp->expectant); + exp->expectant = NULL; + } + continue; + } + + IP_NF_ASSERT(list_inlist(&ip_conntrack_expect_list, exp)); + IP_NF_ASSERT(exp->expectant == ct); + + /* delete expectation from global and private lists */ + unexpect_related(exp); + } +} + +static void +clean_from_lists(struct ip_conntrack *ct) +{ + unsigned int ho, hr; + + DEBUGP("clean_from_lists(%p)\n", ct); + MUST_BE_WRITE_LOCKED(&ip_conntrack_lock); + + ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]); + LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); + + /* Destroy all un-established, pending expectations */ + remove_expectations(ct, 1); +} + +static void +destroy_conntrack(struct nf_conntrack *nfct) +{ + struct ip_conntrack *ct = (struct ip_conntrack *)nfct, *master = NULL; + struct ip_conntrack_protocol *proto; + + DEBUGP("destroy_conntrack(%p)\n", ct); + IP_NF_ASSERT(atomic_read(&nfct->use) == 0); + IP_NF_ASSERT(!timer_pending(&ct->timeout)); + + /* To make sure we don't get any weird locking issues here: + * destroy_conntrack() MUST NOT be called with a write lock + * to ip_conntrack_lock!!! -HW */ + proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); + if (proto && proto->destroy) + proto->destroy(ct); + + if (ip_conntrack_destroyed) + ip_conntrack_destroyed(ct); + + WRITE_LOCK(&ip_conntrack_lock); + /* Delete us from our own list to prevent corruption later */ + list_del(&ct->sibling_list); + + /* Delete our master expectation */ + if (ct->master) { + if (ct->master->expectant) { + /* can't call __unexpect_related here, + * since it would screw up expect_list */ + list_del(&ct->master->expected_list); + master = ct->master->expectant; + } + kfree(ct->master); + } + WRITE_UNLOCK(&ip_conntrack_lock); + + if (master) + ip_conntrack_put(master); + + DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct); + kmem_cache_free(ip_conntrack_cachep, ct); + atomic_dec(&ip_conntrack_count); +} + +static void death_by_timeout(unsigned long ul_conntrack) +{ + struct ip_conntrack *ct = (void *)ul_conntrack; + + WRITE_LOCK(&ip_conntrack_lock); + clean_from_lists(ct); + WRITE_UNLOCK(&ip_conntrack_lock); + ip_conntrack_put(ct); +} + +static inline int +conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i, + const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack) +{ + MUST_BE_READ_LOCKED(&ip_conntrack_lock); + return i->ctrack != ignored_conntrack + && ip_ct_tuple_equal(tuple, &i->tuple); +} + +static struct ip_conntrack_tuple_hash * +__ip_conntrack_find(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack) +{ + struct ip_conntrack_tuple_hash *h; + unsigned int hash = hash_conntrack(tuple); + + MUST_BE_READ_LOCKED(&ip_conntrack_lock); + h = LIST_FIND(&ip_conntrack_hash[hash], + conntrack_tuple_cmp, + struct ip_conntrack_tuple_hash *, + tuple, ignored_conntrack); + return h; +} + +/* Find a connection corresponding to a tuple. */ +struct ip_conntrack_tuple_hash * +ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack) +{ + struct ip_conntrack_tuple_hash *h; + + READ_LOCK(&ip_conntrack_lock); + h = __ip_conntrack_find(tuple, ignored_conntrack); + if (h) + atomic_inc(&h->ctrack->ct_general.use); + READ_UNLOCK(&ip_conntrack_lock); + + return h; +} + +static inline struct ip_conntrack * +__ip_conntrack_get(struct nf_ct_info *nfct, enum ip_conntrack_info *ctinfo) +{ + struct ip_conntrack *ct + = (struct ip_conntrack *)nfct->master; + + /* ctinfo is the index of the nfct inside the conntrack */ + *ctinfo = nfct - ct->infos; + IP_NF_ASSERT(*ctinfo >= 0 && *ctinfo < IP_CT_NUMBER); + return ct; +} + +/* Return conntrack and conntrack_info given skb->nfct->master */ +struct ip_conntrack * +ip_conntrack_get(struct sk_buff *skb, enum ip_conntrack_info *ctinfo) +{ + if (skb->nfct) + return __ip_conntrack_get(skb->nfct, ctinfo); + return NULL; +} + +/* Confirm a connection given skb->nfct; places it in hash table */ +int +__ip_conntrack_confirm(struct nf_ct_info *nfct) +{ + unsigned int hash, repl_hash; + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; + + ct = __ip_conntrack_get(nfct, &ctinfo); + + /* ipt_REJECT uses ip_conntrack_attach to attach related + ICMP/TCP RST packets in other direction. Actual packet + which created connection will be IP_CT_NEW or for an + expected connection, IP_CT_RELATED. */ + if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) + return NF_ACCEPT; + + hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + + /* We're not in hash table, and we refuse to set up related + connections for unconfirmed conns. But packet copies and + REJECT will give spurious warnings here. */ + /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ + + /* No external references means noone else could have + confirmed us. */ + IP_NF_ASSERT(!is_confirmed(ct)); + DEBUGP("Confirming conntrack %p\n", ct); + + WRITE_LOCK(&ip_conntrack_lock); + /* See if there's one in the list already, including reverse: + NAT could have grabbed it without realizing, since we're + not in the hash. If there is, we lost race. */ + if (!LIST_FIND(&ip_conntrack_hash[hash], + conntrack_tuple_cmp, + struct ip_conntrack_tuple_hash *, + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL) + && !LIST_FIND(&ip_conntrack_hash[repl_hash], + conntrack_tuple_cmp, + struct ip_conntrack_tuple_hash *, + &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) { + list_prepend(&ip_conntrack_hash[hash], + &ct->tuplehash[IP_CT_DIR_ORIGINAL]); + list_prepend(&ip_conntrack_hash[repl_hash], + &ct->tuplehash[IP_CT_DIR_REPLY]); + /* Timer relative to confirmation time, not original + setting time, otherwise we'd get timer wrap in + weird delay cases. */ + ct->timeout.expires += jiffies; + add_timer(&ct->timeout); + atomic_inc(&ct->ct_general.use); + set_bit(IPS_CONFIRMED_BIT, &ct->status); + WRITE_UNLOCK(&ip_conntrack_lock); + return NF_ACCEPT; + } + + WRITE_UNLOCK(&ip_conntrack_lock); + return NF_DROP; +} + +/* Returns true if a connection correspondings to the tuple (required + for NAT). */ +int +ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack) +{ + struct ip_conntrack_tuple_hash *h; + + READ_LOCK(&ip_conntrack_lock); + h = __ip_conntrack_find(tuple, ignored_conntrack); + READ_UNLOCK(&ip_conntrack_lock); + + return h != NULL; +} + +/* Returns conntrack if it dealt with ICMP, and filled in skb fields */ +struct ip_conntrack * +icmp_error_track(struct sk_buff *skb, + enum ip_conntrack_info *ctinfo, + unsigned int hooknum) +{ + struct ip_conntrack_tuple innertuple, origtuple; + struct { + struct icmphdr icmp; + struct iphdr ip; + } inside; + struct ip_conntrack_protocol *innerproto; + struct ip_conntrack_tuple_hash *h; + int dataoff; + + IP_NF_ASSERT(skb->nfct == NULL); + + /* Not enough header? */ + if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &inside, sizeof(inside))!=0) + return NULL; + + if (inside.icmp.type != ICMP_DEST_UNREACH + && inside.icmp.type != ICMP_SOURCE_QUENCH + && inside.icmp.type != ICMP_TIME_EXCEEDED + && inside.icmp.type != ICMP_PARAMETERPROB + && inside.icmp.type != ICMP_REDIRECT) + return NULL; + + /* Ignore ICMP's containing fragments (shouldn't happen) */ + if (inside.ip.frag_off & htons(IP_OFFSET)) { + DEBUGP("icmp_error_track: fragment of proto %u\n", + inside.ip.protocol); + return NULL; + } + + innerproto = ip_ct_find_proto(inside.ip.protocol); + dataoff = skb->nh.iph->ihl*4 + sizeof(inside.icmp) + inside.ip.ihl*4; + /* Are they talking about one of our connections? */ + if (!get_tuple(&inside.ip, skb, dataoff, &origtuple, innerproto)) { + DEBUGP("icmp_error: ! get_tuple p=%u", inside.ip.protocol); + return NULL; + } + + /* Ordinarily, we'd expect the inverted tupleproto, but it's + been preserved inside the ICMP. */ + if (!invert_tuple(&innertuple, &origtuple, innerproto)) { + DEBUGP("icmp_error_track: Can't invert tuple\n"); + return NULL; + } + + *ctinfo = IP_CT_RELATED; + + h = ip_conntrack_find_get(&innertuple, NULL); + if (!h) { + /* Locally generated ICMPs will match inverted if they + haven't been SNAT'ed yet */ + /* FIXME: NAT code has to handle half-done double NAT --RR */ + if (hooknum == NF_IP_LOCAL_OUT) + h = ip_conntrack_find_get(&origtuple, NULL); + + if (!h) { + DEBUGP("icmp_error_track: no match\n"); + return NULL; + } + /* Reverse direction from that found */ + if (DIRECTION(h) != IP_CT_DIR_REPLY) + *ctinfo += IP_CT_IS_REPLY; + } else { + if (DIRECTION(h) == IP_CT_DIR_REPLY) + *ctinfo += IP_CT_IS_REPLY; + } + + /* Update skb to refer to this connection */ + skb->nfct = &h->ctrack->infos[*ctinfo]; + return h->ctrack; +} + +/* There's a small race here where we may free a just-assured + connection. Too bad: we're in trouble anyway. */ +static inline int unreplied(const struct ip_conntrack_tuple_hash *i) +{ + return !(test_bit(IPS_ASSURED_BIT, &i->ctrack->status)); +} + +static int early_drop(struct list_head *chain) +{ + /* Traverse backwards: gives us oldest, which is roughly LRU */ + struct ip_conntrack_tuple_hash *h; + int dropped = 0; + + READ_LOCK(&ip_conntrack_lock); + h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *); + if (h) + atomic_inc(&h->ctrack->ct_general.use); + READ_UNLOCK(&ip_conntrack_lock); + + if (!h) + return dropped; + + if (del_timer(&h->ctrack->timeout)) { + death_by_timeout((unsigned long)h->ctrack); + dropped = 1; + } + ip_conntrack_put(h->ctrack); + return dropped; +} + +static inline int helper_cmp(const struct ip_conntrack_helper *i, + const struct ip_conntrack_tuple *rtuple) +{ + return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); +} + +struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple) +{ + return LIST_FIND(&helpers, helper_cmp, + struct ip_conntrack_helper *, + tuple); +} + +/* Allocate a new conntrack: we return -ENOMEM if classification + failed due to stress. Otherwise it really is unclassifiable. */ +static struct ip_conntrack_tuple_hash * +init_conntrack(const struct ip_conntrack_tuple *tuple, + struct ip_conntrack_protocol *protocol, + struct sk_buff *skb) +{ + struct ip_conntrack *conntrack; + struct ip_conntrack_tuple repl_tuple; + size_t hash; + struct ip_conntrack_expect *expected; + int i; + static unsigned int drop_next; + + if (!ip_conntrack_hash_rnd_initted) { + get_random_bytes(&ip_conntrack_hash_rnd, 4); + ip_conntrack_hash_rnd_initted = 1; + } + + hash = hash_conntrack(tuple); + + if (ip_conntrack_max && + atomic_read(&ip_conntrack_count) >= ip_conntrack_max) { + /* Try dropping from random chain, or else from the + chain about to put into (in case they're trying to + bomb one hash chain). */ + unsigned int next = (drop_next++)%ip_conntrack_htable_size; + + if (!early_drop(&ip_conntrack_hash[next]) + && !early_drop(&ip_conntrack_hash[hash])) { + if (net_ratelimit()) + printk(KERN_WARNING + "ip_conntrack: table full, dropping" + " packet.\n"); + return ERR_PTR(-ENOMEM); + } + } + + if (!invert_tuple(&repl_tuple, tuple, protocol)) { + DEBUGP("Can't invert tuple.\n"); + return NULL; + } + + conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC); + if (!conntrack) { + DEBUGP("Can't allocate conntrack.\n"); + return ERR_PTR(-ENOMEM); + } + + memset(conntrack, 0, sizeof(*conntrack)); + atomic_set(&conntrack->ct_general.use, 1); + conntrack->ct_general.destroy = destroy_conntrack; + conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple; + conntrack->tuplehash[IP_CT_DIR_ORIGINAL].ctrack = conntrack; + conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple; + conntrack->tuplehash[IP_CT_DIR_REPLY].ctrack = conntrack; + for (i=0; i < IP_CT_NUMBER; i++) + conntrack->infos[i].master = &conntrack->ct_general; + + if (!protocol->new(conntrack, skb)) { + kmem_cache_free(ip_conntrack_cachep, conntrack); + return NULL; + } + /* Don't set timer yet: wait for confirmation */ + init_timer(&conntrack->timeout); + conntrack->timeout.data = (unsigned long)conntrack; + conntrack->timeout.function = death_by_timeout; + + INIT_LIST_HEAD(&conntrack->sibling_list); + + WRITE_LOCK(&ip_conntrack_lock); + /* Need finding and deleting of expected ONLY if we win race */ + READ_LOCK(&ip_conntrack_expect_tuple_lock); + expected = LIST_FIND(&ip_conntrack_expect_list, expect_cmp, + struct ip_conntrack_expect *, tuple); + READ_UNLOCK(&ip_conntrack_expect_tuple_lock); + + /* If master is not in hash table yet (ie. packet hasn't left + this machine yet), how can other end know about expected? + Hence these are not the droids you are looking for (if + master ct never got confirmed, we'd hold a reference to it + and weird things would happen to future packets). */ + if (expected && !is_confirmed(expected->expectant)) + expected = NULL; + + /* Look up the conntrack helper for master connections only */ + if (!expected) + conntrack->helper = ip_ct_find_helper(&repl_tuple); + + /* If the expectation is dying, then this is a loser. */ + if (expected + && expected->expectant->helper->timeout + && ! del_timer(&expected->timeout)) + expected = NULL; + + if (expected) { + DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n", + conntrack, expected); + /* Welcome, Mr. Bond. We've been expecting you... */ + IP_NF_ASSERT(master_ct(conntrack)); + __set_bit(IPS_EXPECTED_BIT, &conntrack->status); + conntrack->master = expected; + expected->sibling = conntrack; + LIST_DELETE(&ip_conntrack_expect_list, expected); + expected->expectant->expecting--; + nf_conntrack_get(&master_ct(conntrack)->infos[0]); + } + atomic_inc(&ip_conntrack_count); + WRITE_UNLOCK(&ip_conntrack_lock); + + if (expected && expected->expectfn) + expected->expectfn(conntrack); + return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL]; +} + +/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ +static inline struct ip_conntrack * +resolve_normal_ct(struct sk_buff *skb, + struct ip_conntrack_protocol *proto, + int *set_reply, + unsigned int hooknum, + enum ip_conntrack_info *ctinfo) +{ + struct ip_conntrack_tuple tuple; + struct ip_conntrack_tuple_hash *h; + + IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0); + + if (!get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4, &tuple, proto)) + return NULL; + + /* look for tuple match */ + h = ip_conntrack_find_get(&tuple, NULL); + if (!h) { + h = init_conntrack(&tuple, proto, skb); + if (!h) + return NULL; + if (IS_ERR(h)) + return (void *)h; + } + + /* It exists; we have (non-exclusive) reference. */ + if (DIRECTION(h) == IP_CT_DIR_REPLY) { + *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY; + /* Please set reply bit if this packet OK */ + *set_reply = 1; + } else { + /* Once we've had two way comms, always ESTABLISHED. */ + if (test_bit(IPS_SEEN_REPLY_BIT, &h->ctrack->status)) { + DEBUGP("ip_conntrack_in: normal packet for %p\n", + h->ctrack); + *ctinfo = IP_CT_ESTABLISHED; + } else if (test_bit(IPS_EXPECTED_BIT, &h->ctrack->status)) { + DEBUGP("ip_conntrack_in: related packet for %p\n", + h->ctrack); + *ctinfo = IP_CT_RELATED; + } else { + DEBUGP("ip_conntrack_in: new packet for %p\n", + h->ctrack); + *ctinfo = IP_CT_NEW; + } + *set_reply = 0; + } + skb->nfct = &h->ctrack->infos[*ctinfo]; + return h->ctrack; +} + +/* Netfilter hook itself. */ +unsigned int ip_conntrack_in(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; + struct ip_conntrack_protocol *proto; + int set_reply; + int ret; + + /* FIXME: Do this right please. --RR */ + (*pskb)->nfcache |= NFC_UNKNOWN; + +/* Doesn't cover locally-generated broadcast, so not worth it. */ +#if 0 + /* Ignore broadcast: no `connection'. */ + if ((*pskb)->pkt_type == PACKET_BROADCAST) { + printk("Broadcast packet!\n"); + return NF_ACCEPT; + } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF)) + == htonl(0x000000FF)) { + printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n", + NIPQUAD((*pskb)->nh.iph->saddr), + NIPQUAD((*pskb)->nh.iph->daddr), + (*pskb)->sk, (*pskb)->pkt_type); + } +#endif + + /* Previously seen (loopback)? Ignore. Do this before + fragment check. */ + if ((*pskb)->nfct) + return NF_ACCEPT; + + /* Gather fragments. */ + if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { + *pskb = ip_ct_gather_frags(*pskb); + if (!*pskb) + return NF_STOLEN; + } + + proto = ip_ct_find_proto((*pskb)->nh.iph->protocol); + + /* It may be an icmp error... */ + if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP + && icmp_error_track(*pskb, &ctinfo, hooknum)) + return NF_ACCEPT; + + if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) + /* Not valid part of a connection */ + return NF_ACCEPT; + + if (IS_ERR(ct)) + /* Too stressed to deal. */ + return NF_DROP; + + IP_NF_ASSERT((*pskb)->nfct); + + ret = proto->packet(ct, *pskb, ctinfo); + if (ret == -1) { + /* Invalid */ + nf_conntrack_put((*pskb)->nfct); + (*pskb)->nfct = NULL; + return NF_ACCEPT; + } + + if (ret != NF_DROP && ct->helper) { + ret = ct->helper->help(*pskb, ct, ctinfo); + if (ret == -1) { + /* Invalid */ + nf_conntrack_put((*pskb)->nfct); + (*pskb)->nfct = NULL; + return NF_ACCEPT; + } + } + if (set_reply) + set_bit(IPS_SEEN_REPLY_BIT, &ct->status); + + return ret; +} + +int invert_tuplepr(struct ip_conntrack_tuple *inverse, + const struct ip_conntrack_tuple *orig) +{ + return invert_tuple(inverse, orig, ip_ct_find_proto(orig->dst.protonum)); +} + +static inline int resent_expect(const struct ip_conntrack_expect *i, + const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_tuple *mask) +{ + DEBUGP("resent_expect\n"); + DEBUGP(" tuple: "); DUMP_TUPLE(&i->tuple); + DEBUGP("ct_tuple: "); DUMP_TUPLE(&i->ct_tuple); + DEBUGP("test tuple: "); DUMP_TUPLE(tuple); + return (((i->ct_tuple.dst.protonum == 0 && ip_ct_tuple_equal(&i->tuple, tuple)) + || (i->ct_tuple.dst.protonum && ip_ct_tuple_equal(&i->ct_tuple, tuple))) + && ip_ct_tuple_equal(&i->mask, mask)); +} + +/* Would two expected things clash? */ +static inline int expect_clash(const struct ip_conntrack_expect *i, + const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_tuple *mask) +{ + /* Part covered by intersection of masks must be unequal, + otherwise they clash */ + struct ip_conntrack_tuple intersect_mask + = { { i->mask.src.ip & mask->src.ip, + { i->mask.src.u.all & mask->src.u.all } }, + { i->mask.dst.ip & mask->dst.ip, + { i->mask.dst.u.all & mask->dst.u.all }, + i->mask.dst.protonum & mask->dst.protonum } }; + + return ip_ct_tuple_mask_cmp(&i->tuple, tuple, &intersect_mask); +} + +inline void ip_conntrack_unexpect_related(struct ip_conntrack_expect *expect) +{ + WRITE_LOCK(&ip_conntrack_lock); + unexpect_related(expect); + WRITE_UNLOCK(&ip_conntrack_lock); +} + +static void expectation_timed_out(unsigned long ul_expect) +{ + struct ip_conntrack_expect *expect = (void *) ul_expect; + + DEBUGP("expectation %p timed out\n", expect); + WRITE_LOCK(&ip_conntrack_lock); + __unexpect_related(expect); + WRITE_UNLOCK(&ip_conntrack_lock); +} + +/* Add a related connection. */ +int ip_conntrack_expect_related(struct ip_conntrack *related_to, + struct ip_conntrack_expect *expect) +{ + struct ip_conntrack_expect *old, *new; + int ret = 0; + + WRITE_LOCK(&ip_conntrack_lock); + /* Because of the write lock, no reader can walk the lists, + * so there is no need to use the tuple lock too */ + + DEBUGP("ip_conntrack_expect_related %p\n", related_to); + DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple); + DEBUGP("mask: "); DUMP_TUPLE(&expect->mask); + + old = LIST_FIND(&ip_conntrack_expect_list, resent_expect, + struct ip_conntrack_expect *, &expect->tuple, + &expect->mask); + if (old) { + /* Helper private data may contain offsets but no pointers + pointing into the payload - otherwise we should have to copy + the data filled out by the helper over the old one */ + DEBUGP("expect_related: resent packet\n"); + if (related_to->helper->timeout) { + if (!del_timer(&old->timeout)) { + /* expectation is dying. Fall through */ + old = NULL; + } else { + old->timeout.expires = jiffies + + related_to->helper->timeout * HZ; + add_timer(&old->timeout); + } + } + + if (old) { + WRITE_UNLOCK(&ip_conntrack_lock); + return -EEXIST; + } + } else if (related_to->helper->max_expected && + related_to->expecting >= related_to->helper->max_expected) { + /* old == NULL */ + if (!(related_to->helper->flags & + IP_CT_HELPER_F_REUSE_EXPECT)) { + WRITE_UNLOCK(&ip_conntrack_lock); + if (net_ratelimit()) + printk(KERN_WARNING + "ip_conntrack: max number of expected " + "connections %i of %s reached for " + "%u.%u.%u.%u->%u.%u.%u.%u\n", + related_to->helper->max_expected, + related_to->helper->name, + NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip), + NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip)); + return -EPERM; + } + DEBUGP("ip_conntrack: max number of expected " + "connections %i of %s reached for " + "%u.%u.%u.%u->%u.%u.%u.%u, reusing\n", + related_to->helper->max_expected, + related_to->helper->name, + NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip), + NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip)); + + /* choose the the oldest expectation to evict */ + list_for_each_entry(old, &related_to->sibling_list, + expected_list) + if (old->sibling == NULL) + break; + + /* We cannot fail since related_to->expecting is the number + * of unconfirmed expectations */ + IP_NF_ASSERT(old && old->sibling == NULL); + + /* newnat14 does not reuse the real allocated memory + * structures but rather unexpects the old and + * allocates a new. unexpect_related will decrement + * related_to->expecting. + */ + unexpect_related(old); + ret = -EPERM; + } else if (LIST_FIND(&ip_conntrack_expect_list, expect_clash, + struct ip_conntrack_expect *, &expect->tuple, + &expect->mask)) { + WRITE_UNLOCK(&ip_conntrack_lock); + DEBUGP("expect_related: busy!\n"); + return -EBUSY; + } + + new = (struct ip_conntrack_expect *) + kmalloc(sizeof(struct ip_conntrack_expect), GFP_ATOMIC); + if (!new) { + WRITE_UNLOCK(&ip_conntrack_lock); + DEBUGP("expect_relaed: OOM allocating expect\n"); + return -ENOMEM; + } + + DEBUGP("new expectation %p of conntrack %p\n", new, related_to); + memcpy(new, expect, sizeof(*expect)); + new->expectant = related_to; + new->sibling = NULL; + atomic_set(&new->use, 1); + + /* add to expected list for this connection */ + list_add_tail(&new->expected_list, &related_to->sibling_list); + /* add to global list of expectations */ + list_prepend(&ip_conntrack_expect_list, &new->list); + /* add and start timer if required */ + if (related_to->helper->timeout) { + init_timer(&new->timeout); + new->timeout.data = (unsigned long)new; + new->timeout.function = expectation_timed_out; + new->timeout.expires = jiffies + + related_to->helper->timeout * HZ; + add_timer(&new->timeout); + } + related_to->expecting++; + + WRITE_UNLOCK(&ip_conntrack_lock); + + return ret; +} + +/* Change tuple in an existing expectation */ +int ip_conntrack_change_expect(struct ip_conntrack_expect *expect, + struct ip_conntrack_tuple *newtuple) +{ + int ret; + + MUST_BE_READ_LOCKED(&ip_conntrack_lock); + WRITE_LOCK(&ip_conntrack_expect_tuple_lock); + + DEBUGP("change_expect:\n"); + DEBUGP("exp tuple: "); DUMP_TUPLE(&expect->tuple); + DEBUGP("exp mask: "); DUMP_TUPLE(&expect->mask); + DEBUGP("newtuple: "); DUMP_TUPLE(newtuple); + if (expect->ct_tuple.dst.protonum == 0) { + /* Never seen before */ + DEBUGP("change expect: never seen before\n"); + if (!ip_ct_tuple_equal(&expect->tuple, newtuple) + && LIST_FIND(&ip_conntrack_expect_list, expect_clash, + struct ip_conntrack_expect *, newtuple, &expect->mask)) { + /* Force NAT to find an unused tuple */ + ret = -1; + } else { + memcpy(&expect->ct_tuple, &expect->tuple, sizeof(expect->tuple)); + memcpy(&expect->tuple, newtuple, sizeof(expect->tuple)); + ret = 0; + } + } else { + /* Resent packet */ + DEBUGP("change expect: resent packet\n"); + if (ip_ct_tuple_equal(&expect->tuple, newtuple)) { + ret = 0; + } else { + /* Force NAT to choose again the same port */ + ret = -1; + } + } + WRITE_UNLOCK(&ip_conntrack_expect_tuple_lock); + + return ret; +} + +/* Alter reply tuple (maybe alter helper). If it's already taken, + return 0 and don't do alteration. */ +int ip_conntrack_alter_reply(struct ip_conntrack *conntrack, + const struct ip_conntrack_tuple *newreply) +{ + WRITE_LOCK(&ip_conntrack_lock); + if (__ip_conntrack_find(newreply, conntrack)) { + WRITE_UNLOCK(&ip_conntrack_lock); + return 0; + } + /* Should be unconfirmed, so not in hash table yet */ + IP_NF_ASSERT(!is_confirmed(conntrack)); + + DEBUGP("Altering reply tuple of %p to ", conntrack); + DUMP_TUPLE(newreply); + + conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; + if (!conntrack->master) + conntrack->helper = LIST_FIND(&helpers, helper_cmp, + struct ip_conntrack_helper *, + newreply); + WRITE_UNLOCK(&ip_conntrack_lock); + + return 1; +} + +int ip_conntrack_helper_register(struct ip_conntrack_helper *me) +{ + WRITE_LOCK(&ip_conntrack_lock); + list_prepend(&helpers, me); + WRITE_UNLOCK(&ip_conntrack_lock); + + return 0; +} + +static inline int unhelp(struct ip_conntrack_tuple_hash *i, + const struct ip_conntrack_helper *me) +{ + if (i->ctrack->helper == me) { + /* Get rid of any expected. */ + remove_expectations(i->ctrack, 0); + /* And *then* set helper to NULL */ + i->ctrack->helper = NULL; + } + return 0; +} + +void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) +{ + unsigned int i; + + /* Need write lock here, to delete helper. */ + WRITE_LOCK(&ip_conntrack_lock); + LIST_DELETE(&helpers, me); + + /* Get rid of expecteds, set helpers to NULL. */ + for (i = 0; i < ip_conntrack_htable_size; i++) + LIST_FIND_W(&ip_conntrack_hash[i], unhelp, + struct ip_conntrack_tuple_hash *, me); + WRITE_UNLOCK(&ip_conntrack_lock); + + /* Someone could be still looking at the helper in a bh. */ + synchronize_net(); +} + +/* Refresh conntrack for this many jiffies. */ +void ip_ct_refresh(struct ip_conntrack *ct, unsigned long extra_jiffies) +{ + IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct); + + WRITE_LOCK(&ip_conntrack_lock); + /* If not in hash table, timer will not be active yet */ + if (!is_confirmed(ct)) + ct->timeout.expires = extra_jiffies; + else { + /* Need del_timer for race avoidance (may already be dying). */ + if (del_timer(&ct->timeout)) { + ct->timeout.expires = jiffies + extra_jiffies; + add_timer(&ct->timeout); + } + } + WRITE_UNLOCK(&ip_conntrack_lock); +} + +/* Returns new sk_buff, or NULL */ +struct sk_buff * +ip_ct_gather_frags(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; +#ifdef CONFIG_NETFILTER_DEBUG + unsigned int olddebug = skb->nf_debug; +#endif + if (sk) { + sock_hold(sk); + skb_orphan(skb); + } + + local_bh_disable(); + skb = ip_defrag(skb); + local_bh_enable(); + + if (!skb) { + if (sk) + sock_put(sk); + return skb; + } + + if (sk) { + skb_set_owner_w(skb, sk); + sock_put(sk); + } + + ip_send_check(skb->nh.iph); + skb->nfcache |= NFC_ALTERED; +#ifdef CONFIG_NETFILTER_DEBUG + /* Packet path as if nothing had happened. */ + skb->nf_debug = olddebug; +#endif + return skb; +} + +/* Used by ipt_REJECT. */ +static void ip_conntrack_attach(struct sk_buff *nskb, struct nf_ct_info *nfct) +{ + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; + + ct = __ip_conntrack_get(nfct, &ctinfo); + + /* This ICMP is in reverse direction to the packet which + caused it */ + if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) + ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY; + else + ctinfo = IP_CT_RELATED; + + /* Attach new skbuff, and increment count */ + nskb->nfct = &ct->infos[ctinfo]; + atomic_inc(&ct->ct_general.use); +} + +static inline int +do_kill(const struct ip_conntrack_tuple_hash *i, + int (*kill)(const struct ip_conntrack *i, void *data), + void *data) +{ + return kill(i->ctrack, data); +} + +/* Bring out ya dead! */ +static struct ip_conntrack_tuple_hash * +get_next_corpse(int (*kill)(const struct ip_conntrack *i, void *data), + void *data) +{ + struct ip_conntrack_tuple_hash *h = NULL; + unsigned int i; + + READ_LOCK(&ip_conntrack_lock); + for (i = 0; !h && i < ip_conntrack_htable_size; i++) { + h = LIST_FIND(&ip_conntrack_hash[i], do_kill, + struct ip_conntrack_tuple_hash *, kill, data); + } + if (h) + atomic_inc(&h->ctrack->ct_general.use); + READ_UNLOCK(&ip_conntrack_lock); + + return h; +} + +void +ip_ct_selective_cleanup(int (*kill)(const struct ip_conntrack *i, void *data), + void *data) +{ + struct ip_conntrack_tuple_hash *h; + + /* This is order n^2, by the way. */ + while ((h = get_next_corpse(kill, data)) != NULL) { + /* Time to push up daises... */ + if (del_timer(&h->ctrack->timeout)) + death_by_timeout((unsigned long)h->ctrack); + /* ... else the timer will get him soon. */ + + ip_conntrack_put(h->ctrack); + } +} + +/* Fast function for those who don't want to parse /proc (and I don't + blame them). */ +/* Reversing the socket's dst/src point of view gives us the reply + mapping. */ +static int +getorigdst(struct sock *sk, int optval, void *user, int *len) +{ + struct inet_opt *inet = inet_sk(sk); + struct ip_conntrack_tuple_hash *h; + struct ip_conntrack_tuple tuple; + + IP_CT_TUPLE_U_BLANK(&tuple); + tuple.src.ip = inet->rcv_saddr; + tuple.src.u.tcp.port = inet->sport; + tuple.dst.ip = inet->daddr; + tuple.dst.u.tcp.port = inet->dport; + tuple.dst.protonum = IPPROTO_TCP; + + /* We only do TCP at the moment: is there a better way? */ + if (strcmp(sk->sk_prot->name, "TCP")) { + DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n"); + return -ENOPROTOOPT; + } + + if ((unsigned int) *len < sizeof(struct sockaddr_in)) { + DEBUGP("SO_ORIGINAL_DST: len %u not %u\n", + *len, sizeof(struct sockaddr_in)); + return -EINVAL; + } + + h = ip_conntrack_find_get(&tuple, NULL); + if (h) { + struct sockaddr_in sin; + + sin.sin_family = AF_INET; + sin.sin_port = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.u.tcp.port; + sin.sin_addr.s_addr = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.ip; + + DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n", + NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); + ip_conntrack_put(h->ctrack); + if (copy_to_user(user, &sin, sizeof(sin)) != 0) + return -EFAULT; + else + return 0; + } + DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n", + NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port), + NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port)); + return -ENOENT; +} + +static struct nf_sockopt_ops so_getorigdst = { + .pf = PF_INET, + .get_optmin = SO_ORIGINAL_DST, + .get_optmax = SO_ORIGINAL_DST+1, + .get = &getorigdst, +}; + +static int kill_all(const struct ip_conntrack *i, void *data) +{ + return 1; +} + +/* Mishearing the voices in his head, our hero wonders how he's + supposed to kill the mall. */ +void ip_conntrack_cleanup(void) +{ + ip_ct_attach = NULL; + /* This makes sure all current packets have passed through + netfilter framework. Roll on, two-stage module + delete... */ + synchronize_net(); + + i_see_dead_people: + ip_ct_selective_cleanup(kill_all, NULL); + if (atomic_read(&ip_conntrack_count) != 0) { + schedule(); + goto i_see_dead_people; + } + + kmem_cache_destroy(ip_conntrack_cachep); + vfree(ip_conntrack_hash); + nf_unregister_sockopt(&so_getorigdst); +} + +static int hashsize; +MODULE_PARM(hashsize, "i"); + +int __init ip_conntrack_init(void) +{ + unsigned int i; + int ret; + + /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB + * machine has 256 buckets. >= 1GB machines have 8192 buckets. */ + if (hashsize) { + ip_conntrack_htable_size = hashsize; + } else { + ip_conntrack_htable_size + = (((num_physpages << PAGE_SHIFT) / 16384) + / sizeof(struct list_head)); + if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) + ip_conntrack_htable_size = 8192; + if (ip_conntrack_htable_size < 16) + ip_conntrack_htable_size = 16; + } + ip_conntrack_max = 8 * ip_conntrack_htable_size; + + printk("ip_conntrack version %s (%u buckets, %d max)" + " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION, + ip_conntrack_htable_size, ip_conntrack_max, + sizeof(struct ip_conntrack)); + + ret = nf_register_sockopt(&so_getorigdst); + if (ret != 0) { + printk(KERN_ERR "Unable to register netfilter socket option\n"); + return ret; + } + + ip_conntrack_hash = vmalloc(sizeof(struct list_head) + * ip_conntrack_htable_size); + if (!ip_conntrack_hash) { + printk(KERN_ERR "Unable to create ip_conntrack_hash\n"); + goto err_unreg_sockopt; + } + + ip_conntrack_cachep = kmem_cache_create("ip_conntrack", + sizeof(struct ip_conntrack), 0, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (!ip_conntrack_cachep) { + printk(KERN_ERR "Unable to create ip_conntrack slab cache\n"); + goto err_free_hash; + } + /* Don't NEED lock here, but good form anyway. */ + WRITE_LOCK(&ip_conntrack_lock); + /* Sew in builtin protocols. */ + list_append(&protocol_list, &ip_conntrack_protocol_tcp); + list_append(&protocol_list, &ip_conntrack_protocol_udp); + list_append(&protocol_list, &ip_conntrack_protocol_icmp); + WRITE_UNLOCK(&ip_conntrack_lock); + + for (i = 0; i < ip_conntrack_htable_size; i++) + INIT_LIST_HEAD(&ip_conntrack_hash[i]); + + /* For use by ipt_REJECT */ + ip_ct_attach = ip_conntrack_attach; + return ret; + +err_free_hash: + vfree(ip_conntrack_hash); +err_unreg_sockopt: + nf_unregister_sockopt(&so_getorigdst); + + return -ENOMEM; +} diff -Nur linux-2.6.0-test9.org/net/ipv4/netfilter/ip_conntrack_standalone.c linux-2.6.0-test9/net/ipv4/netfilter/ip_conntrack_standalone.c --- linux-2.6.0-test9.org/net/ipv4/netfilter/ip_conntrack_standalone.c 2003-10-25 20:43:32.000000000 +0200 +++ linux-2.6.0-test9/net/ipv4/netfilter/ip_conntrack_standalone.c 2003-11-13 11:01:39.000000000 +0100 @@ -186,6 +186,26 @@ return ip_conntrack_confirm(*pskb); } +static unsigned int ip_conntrack_defrag(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + /* Previously seen (loopback)? Ignore. Do this before + fragment check. */ + if ((*pskb)->nfct) + return NF_ACCEPT; + + /* Gather fragments. */ + if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { + *pskb = ip_ct_gather_frags(*pskb); + if (!*pskb) + return NF_STOLEN; + } + return NF_ACCEPT; +} + static unsigned int ip_refrag(unsigned int hooknum, struct sk_buff **pskb, const struct net_device *in, @@ -225,6 +245,15 @@ return ip_conntrack_in(hooknum, pskb, in, out, okfn); } +/* At the very first: defragment */ +static struct nf_hook_ops ip_conntrack_defrag_ops = { + .hook = ip_conntrack_defrag, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_PRE_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_DEFRAG, +}; + /* Connection tracking may drop packets, but never alters them, so make it the first hook. */ static struct nf_hook_ops ip_conntrack_in_ops = { @@ -367,10 +396,15 @@ if (!proc) goto cleanup_init; proc->owner = THIS_MODULE; + ret = nf_register_hook(&ip_conntrack_defrag_ops); + if (ret < 0) { + printk("ip_conntrack: can't register pre-routing hook to defrag.\n"); + goto cleanup_proc; + } ret = nf_register_hook(&ip_conntrack_in_ops); if (ret < 0) { printk("ip_conntrack: can't register pre-routing hook.\n"); - goto cleanup_proc; + goto cleanup_defragops; } ret = nf_register_hook(&ip_conntrack_local_out_ops); if (ret < 0) { @@ -408,6 +442,8 @@ nf_unregister_hook(&ip_conntrack_local_out_ops); cleanup_inops: nf_unregister_hook(&ip_conntrack_in_ops); + cleanup_defragops: + nf_unregister_hook(&ip_conntrack_defrag_ops); cleanup_proc: proc_net_remove("ip_conntrack"); cleanup_init: @@ -499,5 +535,6 @@ EXPORT_SYMBOL(ip_conntrack_expect_list); EXPORT_SYMBOL(ip_conntrack_lock); EXPORT_SYMBOL(ip_conntrack_hash); +EXPORT_SYMBOL(ip_conntrack_untracked); EXPORT_SYMBOL_GPL(ip_conntrack_find_get); EXPORT_SYMBOL_GPL(ip_conntrack_put); diff -Nur linux-2.6.0-test9.org/net/ipv4/netfilter/ip_conntrack_standalone.c.orig linux-2.6.0-test9/net/ipv4/netfilter/ip_conntrack_standalone.c.orig --- linux-2.6.0-test9.org/net/ipv4/netfilter/ip_conntrack_standalone.c.orig 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.0-test9/net/ipv4/netfilter/ip_conntrack_standalone.c.orig 2003-10-25 20:43:32.000000000 +0200 @@ -0,0 +1,503 @@ +/* This file contains all the functions required for the standalone + ip_conntrack module. + + These are not required by the compatibility layer. +*/ + +/* (c) 1999 Paul `Rusty' Russell. Licenced under the GNU General + Public Licence. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_SYSCTL +#include +#endif +#include + +#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock) +#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock) + +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +MODULE_LICENSE("GPL"); + +static int kill_proto(const struct ip_conntrack *i, void *data) +{ + return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum == + *((u_int8_t *) data)); +} + +static unsigned int +print_tuple(char *buffer, const struct ip_conntrack_tuple *tuple, + struct ip_conntrack_protocol *proto) +{ + int len; + + len = sprintf(buffer, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ", + NIPQUAD(tuple->src.ip), NIPQUAD(tuple->dst.ip)); + + len += proto->print_tuple(buffer + len, tuple); + + return len; +} + +/* FIXME: Don't print source proto part. --RR */ +static unsigned int +print_expect(char *buffer, const struct ip_conntrack_expect *expect) +{ + unsigned int len; + + if (expect->expectant->helper->timeout) + len = sprintf(buffer, "EXPECTING: %lu ", + timer_pending(&expect->timeout) + ? (expect->timeout.expires - jiffies)/HZ : 0); + else + len = sprintf(buffer, "EXPECTING: - "); + len += sprintf(buffer + len, "use=%u proto=%u ", + atomic_read(&expect->use), expect->tuple.dst.protonum); + len += print_tuple(buffer + len, &expect->tuple, + __ip_ct_find_proto(expect->tuple.dst.protonum)); + len += sprintf(buffer + len, "\n"); + return len; +} + +static unsigned int +print_conntrack(char *buffer, struct ip_conntrack *conntrack) +{ + unsigned int len; + struct ip_conntrack_protocol *proto + = __ip_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.protonum); + + len = sprintf(buffer, "%-8s %u %lu ", + proto->name, + conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.protonum, + timer_pending(&conntrack->timeout) + ? (conntrack->timeout.expires - jiffies)/HZ : 0); + + len += proto->print_conntrack(buffer + len, conntrack); + len += print_tuple(buffer + len, + &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + proto); + if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status))) + len += sprintf(buffer + len, "[UNREPLIED] "); + len += print_tuple(buffer + len, + &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple, + proto); + if (test_bit(IPS_ASSURED_BIT, &conntrack->status)) + len += sprintf(buffer + len, "[ASSURED] "); + len += sprintf(buffer + len, "use=%u ", + atomic_read(&conntrack->ct_general.use)); + len += sprintf(buffer + len, "\n"); + + return len; +} + +/* Returns true when finished. */ +static inline int +conntrack_iterate(const struct ip_conntrack_tuple_hash *hash, + char *buffer, off_t offset, off_t *upto, + unsigned int *len, unsigned int maxlen) +{ + unsigned int newlen; + IP_NF_ASSERT(hash->ctrack); + + MUST_BE_READ_LOCKED(&ip_conntrack_lock); + + /* Only count originals */ + if (DIRECTION(hash)) + return 0; + + if ((*upto)++ < offset) + return 0; + + newlen = print_conntrack(buffer + *len, hash->ctrack); + if (*len + newlen > maxlen) + return 1; + else *len += newlen; + + return 0; +} + +static int +list_conntracks(char *buffer, char **start, off_t offset, int length) +{ + unsigned int i; + unsigned int len = 0; + off_t upto = 0; + struct list_head *e; + + READ_LOCK(&ip_conntrack_lock); + /* Traverse hash; print originals then reply. */ + for (i = 0; i < ip_conntrack_htable_size; i++) { + if (LIST_FIND(&ip_conntrack_hash[i], conntrack_iterate, + struct ip_conntrack_tuple_hash *, + buffer, offset, &upto, &len, length)) + goto finished; + } + + /* Now iterate through expecteds. */ + list_for_each(e, &ip_conntrack_expect_list) { + unsigned int last_len; + struct ip_conntrack_expect *expect + = (struct ip_conntrack_expect *)e; + if (upto++ < offset) continue; + + last_len = len; + len += print_expect(buffer + len, expect); + if (len > length) { + len = last_len; + goto finished; + } + } + + finished: + READ_UNLOCK(&ip_conntrack_lock); + + /* `start' hack - see fs/proc/generic.c line ~165 */ + *start = (char *)((unsigned int)upto - offset); + return len; +} + +static unsigned int ip_confirm(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + /* We've seen it coming out the other side: confirm it */ + return ip_conntrack_confirm(*pskb); +} + +static unsigned int ip_refrag(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct rtable *rt = (struct rtable *)(*pskb)->dst; + + /* We've seen it coming out the other side: confirm */ + if (ip_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT) + return NF_DROP; + + /* Local packets are never produced too large for their + interface. We degfragment them at LOCAL_OUT, however, + so we have to refragment them here. */ + if ((*pskb)->len > dst_pmtu(&rt->u.dst)) { + /* No hook can be after us, so this should be OK. */ + ip_fragment(*pskb, okfn); + return NF_STOLEN; + } + return NF_ACCEPT; +} + +static unsigned int ip_conntrack_local(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + /* root is playing with raw sockets. */ + if ((*pskb)->len < sizeof(struct iphdr) + || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) { + if (net_ratelimit()) + printk("ipt_hook: happy cracking.\n"); + return NF_ACCEPT; + } + return ip_conntrack_in(hooknum, pskb, in, out, okfn); +} + +/* Connection tracking may drop packets, but never alters them, so + make it the first hook. */ +static struct nf_hook_ops ip_conntrack_in_ops = { + .hook = ip_conntrack_in, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_PRE_ROUTING, + .priority = NF_IP_PRI_CONNTRACK, +}; + +static struct nf_hook_ops ip_conntrack_local_out_ops = { + .hook = ip_conntrack_local, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_LOCAL_OUT, + .priority = NF_IP_PRI_CONNTRACK, +}; + +/* Refragmenter; last chance. */ +static struct nf_hook_ops ip_conntrack_out_ops = { + .hook = ip_refrag, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_POST_ROUTING, + .priority = NF_IP_PRI_LAST, +}; + +static struct nf_hook_ops ip_conntrack_local_in_ops = { + .hook = ip_confirm, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_LOCAL_IN, + .priority = NF_IP_PRI_LAST-1, +}; + +/* Sysctl support */ + +#ifdef CONFIG_SYSCTL + +/* From ip_conntrack_core.c */ +extern int ip_conntrack_max; + +/* From ip_conntrack_proto_tcp.c */ +extern unsigned long ip_ct_tcp_timeout_syn_sent; +extern unsigned long ip_ct_tcp_timeout_syn_recv; +extern unsigned long ip_ct_tcp_timeout_established; +extern unsigned long ip_ct_tcp_timeout_fin_wait; +extern unsigned long ip_ct_tcp_timeout_close_wait; +extern unsigned long ip_ct_tcp_timeout_last_ack; +extern unsigned long ip_ct_tcp_timeout_time_wait; +extern unsigned long ip_ct_tcp_timeout_close; + +/* From ip_conntrack_proto_udp.c */ +extern unsigned long ip_ct_udp_timeout; +extern unsigned long ip_ct_udp_timeout_stream; + +/* From ip_conntrack_proto_icmp.c */ +extern unsigned long ip_ct_icmp_timeout; + +/* From ip_conntrack_proto_icmp.c */ +extern unsigned long ip_ct_generic_timeout; + +static struct ctl_table_header *ip_ct_sysctl_header; + +static ctl_table ip_ct_sysctl_table[] = { + {NET_IPV4_NF_CONNTRACK_MAX, "ip_conntrack_max", + &ip_conntrack_max, sizeof(int), 0644, NULL, + &proc_dointvec}, + {NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, "ip_conntrack_tcp_timeout_syn_sent", + &ip_ct_tcp_timeout_syn_sent, sizeof(unsigned int), 0644, NULL, + &proc_dointvec_jiffies}, + {NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV, "ip_conntrack_tcp_timeout_syn_recv", + &ip_ct_tcp_timeout_syn_recv, sizeof(unsigned int), 0644, NULL, + &proc_dointvec_jiffies}, + {NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED, "ip_conntrack_tcp_timeout_established", + &ip_ct_tcp_timeout_established, sizeof(unsigned int), 0644, NULL, + &proc_dointvec_jiffies}, + {NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT, "ip_conntrack_tcp_timeout_fin_wait", + &ip_ct_tcp_timeout_fin_wait, sizeof(unsigned int), 0644, NULL, + &proc_dointvec_jiffies}, + {NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT, "ip_conntrack_tcp_timeout_close_wait", + &ip_ct_tcp_timeout_close_wait, sizeof(unsigned int), 0644, NULL, + &proc_dointvec_jiffies}, + {NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK, "ip_conntrack_tcp_timeout_last_ack", + &ip_ct_tcp_timeout_last_ack, sizeof(unsigned int), 0644, NULL, + &proc_dointvec_jiffies}, + {NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT, "ip_conntrack_tcp_timeout_time_wait", + &ip_ct_tcp_timeout_time_wait, sizeof(unsigned int), 0644, NULL, + &proc_dointvec_jiffies}, + {NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE, "ip_conntrack_tcp_timeout_close", + &ip_ct_tcp_timeout_close, sizeof(unsigned int), 0644, NULL, + &proc_dointvec_jiffies}, + {NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT, "ip_conntrack_udp_timeout", + &ip_ct_udp_timeout, sizeof(unsigned int), 0644, NULL, + &proc_dointvec_jiffies}, + {NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM, "ip_conntrack_udp_timeout_stream", + &ip_ct_udp_timeout_stream, sizeof(unsigned int), 0644, NULL, + &proc_dointvec_jiffies}, + {NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT, "ip_conntrack_icmp_timeout", + &ip_ct_icmp_timeout, sizeof(unsigned int), 0644, NULL, + &proc_dointvec_jiffies}, + {NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT, "ip_conntrack_generic_timeout", + &ip_ct_generic_timeout, sizeof(unsigned int), 0644, NULL, + &proc_dointvec_jiffies}, + {0} +}; + +#define NET_IP_CONNTRACK_MAX 2089 + +static ctl_table ip_ct_netfilter_table[] = { + {NET_IPV4_NETFILTER, "netfilter", NULL, 0, 0555, ip_ct_sysctl_table, 0, 0, 0, 0, 0}, + {NET_IP_CONNTRACK_MAX, "ip_conntrack_max", + &ip_conntrack_max, sizeof(int), 0644, NULL, + &proc_dointvec}, + {0} +}; + +static ctl_table ip_ct_ipv4_table[] = { + {NET_IPV4, "ipv4", NULL, 0, 0555, ip_ct_netfilter_table, 0, 0, 0, 0, 0}, + {0} +}; + +static ctl_table ip_ct_net_table[] = { + {CTL_NET, "net", NULL, 0, 0555, ip_ct_ipv4_table, 0, 0, 0, 0, 0}, + {0} +}; +#endif +static int init_or_cleanup(int init) +{ + struct proc_dir_entry *proc; + int ret = 0; + + if (!init) goto cleanup; + + ret = ip_conntrack_init(); + if (ret < 0) + goto cleanup_nothing; + + proc = proc_net_create("ip_conntrack",0,list_conntracks); + if (!proc) goto cleanup_init; + proc->owner = THIS_MODULE; + + ret = nf_register_hook(&ip_conntrack_in_ops); + if (ret < 0) { + printk("ip_conntrack: can't register pre-routing hook.\n"); + goto cleanup_proc; + } + ret = nf_register_hook(&ip_conntrack_local_out_ops); + if (ret < 0) { + printk("ip_conntrack: can't register local out hook.\n"); + goto cleanup_inops; + } + ret = nf_register_hook(&ip_conntrack_out_ops); + if (ret < 0) { + printk("ip_conntrack: can't register post-routing hook.\n"); + goto cleanup_inandlocalops; + } + ret = nf_register_hook(&ip_conntrack_local_in_ops); + if (ret < 0) { + printk("ip_conntrack: can't register local in hook.\n"); + goto cleanup_inoutandlocalops; + } +#ifdef CONFIG_SYSCTL + ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table, 0); + if (ip_ct_sysctl_header == NULL) { + printk("ip_conntrack: can't register to sysctl.\n"); + goto cleanup; + } +#endif + + return ret; + + cleanup: +#ifdef CONFIG_SYSCTL + unregister_sysctl_table(ip_ct_sysctl_header); +#endif + nf_unregister_hook(&ip_conntrack_local_in_ops); + cleanup_inoutandlocalops: + nf_unregister_hook(&ip_conntrack_out_ops); + cleanup_inandlocalops: + nf_unregister_hook(&ip_conntrack_local_out_ops); + cleanup_inops: + nf_unregister_hook(&ip_conntrack_in_ops); + cleanup_proc: + proc_net_remove("ip_conntrack"); + cleanup_init: + ip_conntrack_cleanup(); + cleanup_nothing: + return ret; +} + +/* FIXME: Allow NULL functions and sub in pointers to generic for + them. --RR */ +int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto) +{ + int ret = 0; + struct list_head *i; + + WRITE_LOCK(&ip_conntrack_lock); + list_for_each(i, &protocol_list) { + if (((struct ip_conntrack_protocol *)i)->proto + == proto->proto) { + ret = -EBUSY; + goto out; + } + } + + list_prepend(&protocol_list, proto); + + out: + WRITE_UNLOCK(&ip_conntrack_lock); + return ret; +} + +void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto) +{ + WRITE_LOCK(&ip_conntrack_lock); + + /* ip_ct_find_proto() returns proto_generic in case there is no protocol + * helper. So this should be enough - HW */ + LIST_DELETE(&protocol_list, proto); + WRITE_UNLOCK(&ip_conntrack_lock); + + /* Somebody could be still looking at the proto in bh. */ + synchronize_net(); + + /* Remove all contrack entries for this protocol */ + ip_ct_selective_cleanup(kill_proto, &proto->proto); +} + +static int __init init(void) +{ + return init_or_cleanup(1); +} + +static void __exit fini(void) +{ + init_or_cleanup(0); +} + +module_init(init); +module_exit(fini); + +/* Some modules need us, but don't depend directly on any symbol. + They should call this. */ +void need_ip_conntrack(void) +{ +} + +EXPORT_SYMBOL(ip_conntrack_protocol_register); +EXPORT_SYMBOL(ip_conntrack_protocol_unregister); +EXPORT_SYMBOL(invert_tuplepr); +EXPORT_SYMBOL(ip_conntrack_alter_reply); +EXPORT_SYMBOL(ip_conntrack_destroyed); +EXPORT_SYMBOL(ip_conntrack_get); +EXPORT_SYMBOL(need_ip_conntrack); +EXPORT_SYMBOL(ip_conntrack_helper_register); +EXPORT_SYMBOL(ip_conntrack_helper_unregister); +EXPORT_SYMBOL(ip_ct_selective_cleanup); +EXPORT_SYMBOL(ip_ct_refresh); +EXPORT_SYMBOL(ip_ct_find_proto); +EXPORT_SYMBOL(__ip_ct_find_proto); +EXPORT_SYMBOL(ip_ct_find_helper); +EXPORT_SYMBOL(ip_conntrack_expect_related); +EXPORT_SYMBOL(ip_conntrack_change_expect); +EXPORT_SYMBOL(ip_conntrack_unexpect_related); +EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get); +EXPORT_SYMBOL_GPL(ip_conntrack_expect_put); +EXPORT_SYMBOL(ip_conntrack_tuple_taken); +EXPORT_SYMBOL(ip_ct_gather_frags); +EXPORT_SYMBOL(ip_conntrack_htable_size); +EXPORT_SYMBOL(ip_conntrack_expect_list); +EXPORT_SYMBOL(ip_conntrack_lock); +EXPORT_SYMBOL(ip_conntrack_hash); +EXPORT_SYMBOL_GPL(ip_conntrack_find_get); +EXPORT_SYMBOL_GPL(ip_conntrack_put); diff -Nur linux-2.6.0-test9.org/net/ipv4/netfilter/ip_conntrack_tftp.c linux-2.6.0-test9/net/ipv4/netfilter/ip_conntrack_tftp.c --- linux-2.6.0-test9.org/net/ipv4/netfilter/ip_conntrack_tftp.c 2003-10-25 20:43:19.000000000 +0200 +++ linux-2.6.0-test9/net/ipv4/netfilter/ip_conntrack_tftp.c 2003-11-13 11:01:08.000000000 +0100 @@ -97,8 +97,6 @@ for (i = 0 ; (i < MAX_PORTS) && ports[i] ; i++) { /* Create helper structure */ - memset(&tftp[i], 0, sizeof(struct ip_conntrack_helper)); - tftp[i].tuple.dst.protonum = IPPROTO_UDP; tftp[i].tuple.src.u.udp.port = htons(ports[i]); tftp[i].mask.dst.protonum = 0xFFFF; diff -Nur linux-2.6.0-test9.org/net/ipv4/netfilter/ip_nat_amanda.c linux-2.6.0-test9/net/ipv4/netfilter/ip_nat_amanda.c --- linux-2.6.0-test9.org/net/ipv4/netfilter/ip_nat_amanda.c 2003-10-25 20:43:06.000000000 +0200 +++ linux-2.6.0-test9/net/ipv4/netfilter/ip_nat_amanda.c 2003-11-13 11:01:08.000000000 +0100 @@ -195,8 +195,6 @@ struct ip_nat_helper *hlpr; hlpr = &ip_nat_amanda_helper; - memset(hlpr, 0, sizeof(struct ip_nat_helper)); - hlpr->tuple.dst.protonum = IPPROTO_UDP; hlpr->tuple.src.u.udp.port = htons(10080); hlpr->mask.src.u.udp.port = 0xFFFF; diff -Nur linux-2.6.0-test9.org/net/ipv4/netfilter/ip_nat_core.c linux-2.6.0-test9/net/ipv4/netfilter/ip_nat_core.c --- linux-2.6.0-test9.org/net/ipv4/netfilter/ip_nat_core.c 2003-11-13 11:07:50.000000000 +0100 +++ linux-2.6.0-test9/net/ipv4/netfilter/ip_nat_core.c 2003-11-13 11:01:39.000000000 +0100 @@ -810,7 +810,7 @@ /* Have to grab read lock before sibling_list traversal */ READ_LOCK(&ip_conntrack_lock); - list_for_each(cur_item, &ct->sibling_list) { + list_for_each_prev(cur_item, &ct->sibling_list) { exp = list_entry(cur_item, struct ip_conntrack_expect, expected_list); @@ -1010,7 +1010,11 @@ /* FIXME: Man, this is a hack. */ IP_NF_ASSERT(ip_conntrack_destroyed == NULL); ip_conntrack_destroyed = &ip_nat_cleanup_conntrack; - + + /* Initialize fake conntrack so that NAT will skip it */ + ip_conntrack_untracked.nat.info.initialized |= + (1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST); + return 0; } diff -Nur linux-2.6.0-test9.org/net/ipv4/netfilter/ip_nat_core.c.orig linux-2.6.0-test9/net/ipv4/netfilter/ip_nat_core.c.orig --- linux-2.6.0-test9.org/net/ipv4/netfilter/ip_nat_core.c.orig 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.0-test9/net/ipv4/netfilter/ip_nat_core.c.orig 2003-11-13 11:01:27.000000000 +0100 @@ -0,0 +1,1030 @@ +/* NAT for netfilter; shared with compatibility layer. */ + +/* (c) 1999 Paul `Rusty' Russell. Licenced under the GNU General + Public Licence. */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* For tcp_prot in getorigdst */ +#include +#include + +#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock) +#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock) + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +DECLARE_RWLOCK(ip_nat_lock); +DECLARE_RWLOCK_EXTERN(ip_conntrack_lock); + +/* Calculated at init based on memory size */ +static unsigned int ip_nat_htable_size; + +static struct list_head *bysource; +static struct list_head *byipsproto; +LIST_HEAD(protos); +LIST_HEAD(helpers); + +extern struct ip_nat_protocol unknown_nat_protocol; + +/* We keep extra hashes for each conntrack, for fast searching. */ +static inline size_t +hash_by_ipsproto(u_int32_t src, u_int32_t dst, u_int16_t proto) +{ + /* Modified src and dst, to ensure we don't create two + identical streams. */ + return (src + dst + proto) % ip_nat_htable_size; +} + +static inline size_t +hash_by_src(const struct ip_conntrack_manip *manip, u_int16_t proto) +{ + /* Original src, to ensure we map it consistently if poss. */ + return (manip->ip + manip->u.all + proto) % ip_nat_htable_size; +} + +/* Noone using conntrack by the time this called. */ +static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn) +{ + struct ip_nat_info *info = &conn->nat.info; + unsigned int hs, hp; + + if (!info->initialized) + return; + + IP_NF_ASSERT(info->bysource.conntrack); + IP_NF_ASSERT(info->byipsproto.conntrack); + + hs = hash_by_src(&conn->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src, + conn->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.protonum); + + hp = hash_by_ipsproto(conn->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip, + conn->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip, + conn->tuplehash[IP_CT_DIR_REPLY] + .tuple.dst.protonum); + + WRITE_LOCK(&ip_nat_lock); + LIST_DELETE(&bysource[hs], &info->bysource); + LIST_DELETE(&byipsproto[hp], &info->byipsproto); + WRITE_UNLOCK(&ip_nat_lock); +} + +/* We do checksum mangling, so if they were wrong before they're still + * wrong. Also works for incomplete packets (eg. ICMP dest + * unreachables.) */ +u_int16_t +ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) +{ + u_int32_t diffs[] = { oldvalinv, newval }; + return csum_fold(csum_partial((char *)diffs, sizeof(diffs), + oldcheck^0xFFFF)); +} + +static inline int cmp_proto(const struct ip_nat_protocol *i, int proto) +{ + return i->protonum == proto; +} + +struct ip_nat_protocol * +find_nat_proto(u_int16_t protonum) +{ + struct ip_nat_protocol *i; + + MUST_BE_READ_LOCKED(&ip_nat_lock); + i = LIST_FIND(&protos, cmp_proto, struct ip_nat_protocol *, protonum); + if (!i) + i = &unknown_nat_protocol; + return i; +} + +/* Is this tuple already taken? (not by us) */ +int +ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack) +{ + /* Conntrack tracking doesn't keep track of outgoing tuples; only + incoming ones. NAT means they don't have a fixed mapping, + so we invert the tuple and look for the incoming reply. + + We could keep a separate hash if this proves too slow. */ + struct ip_conntrack_tuple reply; + + invert_tuplepr(&reply, tuple); + return ip_conntrack_tuple_taken(&reply, ignored_conntrack); +} + +/* Does tuple + the source manip come within the range mr */ +static int +in_range(const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_manip *manip, + const struct ip_nat_multi_range *mr) +{ + struct ip_nat_protocol *proto = find_nat_proto(tuple->dst.protonum); + unsigned int i; + struct ip_conntrack_tuple newtuple = { *manip, tuple->dst }; + + for (i = 0; i < mr->rangesize; i++) { + /* If we are allowed to map IPs, then we must be in the + range specified, otherwise we must be unchanged. */ + if (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS) { + if (ntohl(newtuple.src.ip) < ntohl(mr->range[i].min_ip) + || (ntohl(newtuple.src.ip) + > ntohl(mr->range[i].max_ip))) + continue; + } else { + if (newtuple.src.ip != tuple->src.ip) + continue; + } + + if (!(mr->range[i].flags & IP_NAT_RANGE_PROTO_SPECIFIED) + || proto->in_range(&newtuple, IP_NAT_MANIP_SRC, + &mr->range[i].min, &mr->range[i].max)) + return 1; + } + return 0; +} + +static inline int +src_cmp(const struct ip_nat_hash *i, + const struct ip_conntrack_tuple *tuple, + const struct ip_nat_multi_range *mr) +{ + return (i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum + == tuple->dst.protonum + && i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip + == tuple->src.ip + && i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all + == tuple->src.u.all + && in_range(tuple, + &i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.src, + mr)); +} + +/* Only called for SRC manip */ +static struct ip_conntrack_manip * +find_appropriate_src(const struct ip_conntrack_tuple *tuple, + const struct ip_nat_multi_range *mr) +{ + unsigned int h = hash_by_src(&tuple->src, tuple->dst.protonum); + struct ip_nat_hash *i; + + MUST_BE_READ_LOCKED(&ip_nat_lock); + i = LIST_FIND(&bysource[h], src_cmp, struct ip_nat_hash *, tuple, mr); + if (i) + return &i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src; + else + return NULL; +} + +#ifdef CONFIG_IP_NF_NAT_LOCAL +/* If it's really a local destination manip, it may need to do a + source manip too. */ +static int +do_extra_mangle(u_int32_t var_ip, u_int32_t *other_ipp) +{ + struct flowi fl = { .nl_u = { .ip4_u = { .daddr = var_ip } } }; + struct rtable *rt; + + /* FIXME: IPTOS_TOS(iph->tos) --RR */ + if (ip_route_output_key(&rt, &fl) != 0) { + DEBUGP("do_extra_mangle: Can't get route to %u.%u.%u.%u\n", + NIPQUAD(var_ip)); + return 0; + } + + *other_ipp = rt->rt_src; + ip_rt_put(rt); + return 1; +} +#endif + +/* Simple way to iterate through all. */ +static inline int fake_cmp(const struct ip_nat_hash *i, + u_int32_t src, u_int32_t dst, u_int16_t protonum, + unsigned int *score, + const struct ip_conntrack *conntrack) +{ + /* Compare backwards: we're dealing with OUTGOING tuples, and + inside the conntrack is the REPLY tuple. Don't count this + conntrack. */ + if (i->conntrack != conntrack + && i->conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip == dst + && i->conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip == src + && (i->conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum + == protonum)) + (*score)++; + return 0; +} + +static inline unsigned int +count_maps(u_int32_t src, u_int32_t dst, u_int16_t protonum, + const struct ip_conntrack *conntrack) +{ + unsigned int score = 0; + unsigned int h; + + MUST_BE_READ_LOCKED(&ip_nat_lock); + h = hash_by_ipsproto(src, dst, protonum); + LIST_FIND(&byipsproto[h], fake_cmp, struct ip_nat_hash *, + src, dst, protonum, &score, conntrack); + + return score; +} + +/* For [FUTURE] fragmentation handling, we want the least-used + src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus + if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports + 1-65535, we don't do pro-rata allocation based on ports; we choose + the ip with the lowest src-ip/dst-ip/proto usage. + + If an allocation then fails (eg. all 6 ports used in the 1.2.3.4 + range), we eliminate that and try again. This is not the most + efficient approach, but if you're worried about that, don't hand us + ranges you don't really have. */ +static struct ip_nat_range * +find_best_ips_proto(struct ip_conntrack_tuple *tuple, + const struct ip_nat_multi_range *mr, + const struct ip_conntrack *conntrack, + unsigned int hooknum) +{ + unsigned int i; + struct { + const struct ip_nat_range *range; + unsigned int score; + struct ip_conntrack_tuple tuple; + } best = { NULL, 0xFFFFFFFF }; + u_int32_t *var_ipp, *other_ipp, saved_ip, orig_dstip; + static unsigned int randomness; + + if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC) { + var_ipp = &tuple->src.ip; + saved_ip = tuple->dst.ip; + other_ipp = &tuple->dst.ip; + } else { + var_ipp = &tuple->dst.ip; + saved_ip = tuple->src.ip; + other_ipp = &tuple->src.ip; + } + /* Don't do do_extra_mangle unless necessary (overrides + explicit socket bindings, for example) */ + orig_dstip = tuple->dst.ip; + + IP_NF_ASSERT(mr->rangesize >= 1); + for (i = 0; i < mr->rangesize; i++) { + /* Host order */ + u_int32_t minip, maxip, j; + + /* Don't do ranges which are already eliminated. */ + if (mr->range[i].flags & IP_NAT_RANGE_FULL) { + continue; + } + + if (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS) { + minip = ntohl(mr->range[i].min_ip); + maxip = ntohl(mr->range[i].max_ip); + } else + minip = maxip = ntohl(*var_ipp); + + randomness++; + for (j = 0; j < maxip - minip + 1; j++) { + unsigned int score; + + *var_ipp = htonl(minip + (randomness + j) + % (maxip - minip + 1)); + + /* Reset the other ip in case it was mangled by + * do_extra_mangle last time. */ + *other_ipp = saved_ip; + +#ifdef CONFIG_IP_NF_NAT_LOCAL + if (hooknum == NF_IP_LOCAL_OUT + && *var_ipp != orig_dstip + && !do_extra_mangle(*var_ipp, other_ipp)) { + DEBUGP("Range %u %u.%u.%u.%u rt failed!\n", + i, NIPQUAD(*var_ipp)); + /* Can't route? This whole range part is + * probably screwed, but keep trying + * anyway. */ + continue; + } +#endif + + /* Count how many others map onto this. */ + score = count_maps(tuple->src.ip, tuple->dst.ip, + tuple->dst.protonum, conntrack); + if (score < best.score) { + /* Optimization: doesn't get any better than + this. */ + if (score == 0) + return (struct ip_nat_range *) + &mr->range[i]; + + best.score = score; + best.tuple = *tuple; + best.range = &mr->range[i]; + } + } + } + *tuple = best.tuple; + + /* Discard const. */ + return (struct ip_nat_range *)best.range; +} + +/* Fast version doesn't iterate through hash chains, but only handles + common case of single IP address (null NAT, masquerade) */ +static struct ip_nat_range * +find_best_ips_proto_fast(struct ip_conntrack_tuple *tuple, + const struct ip_nat_multi_range *mr, + const struct ip_conntrack *conntrack, + unsigned int hooknum) +{ + if (mr->rangesize != 1 + || (mr->range[0].flags & IP_NAT_RANGE_FULL) + || ((mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) + && mr->range[0].min_ip != mr->range[0].max_ip)) + return find_best_ips_proto(tuple, mr, conntrack, hooknum); + + if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { + if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC) + tuple->src.ip = mr->range[0].min_ip; + else { + /* Only do extra mangle when required (breaks + socket binding) */ +#ifdef CONFIG_IP_NF_NAT_LOCAL + if (tuple->dst.ip != mr->range[0].min_ip + && hooknum == NF_IP_LOCAL_OUT + && !do_extra_mangle(mr->range[0].min_ip, + &tuple->src.ip)) + return NULL; +#endif + tuple->dst.ip = mr->range[0].min_ip; + } + } + + /* Discard const. */ + return (struct ip_nat_range *)&mr->range[0]; +} + +static int +get_unique_tuple(struct ip_conntrack_tuple *tuple, + const struct ip_conntrack_tuple *orig_tuple, + const struct ip_nat_multi_range *mrr, + struct ip_conntrack *conntrack, + unsigned int hooknum) +{ + struct ip_nat_protocol *proto + = find_nat_proto(orig_tuple->dst.protonum); + struct ip_nat_range *rptr; + unsigned int i; + int ret; + + /* We temporarily use flags for marking full parts, but we + always clean up afterwards */ + struct ip_nat_multi_range *mr = (void *)mrr; + + /* 1) If this srcip/proto/src-proto-part is currently mapped, + and that same mapping gives a unique tuple within the given + range, use that. + + This is only required for source (ie. NAT/masq) mappings. + So far, we don't do local source mappings, so multiple + manips not an issue. */ + if (hooknum == NF_IP_POST_ROUTING) { + struct ip_conntrack_manip *manip; + + manip = find_appropriate_src(orig_tuple, mr); + if (manip) { + /* Apply same source manipulation. */ + *tuple = ((struct ip_conntrack_tuple) + { *manip, orig_tuple->dst }); + DEBUGP("get_unique_tuple: Found current src map\n"); + if (!ip_nat_used_tuple(tuple, conntrack)) + return 1; + } + } + + /* 2) Select the least-used IP/proto combination in the given + range. + */ + *tuple = *orig_tuple; + while ((rptr = find_best_ips_proto_fast(tuple, mr, conntrack, hooknum)) + != NULL) { + DEBUGP("Found best for "); DUMP_TUPLE(tuple); + /* 3) The per-protocol part of the manip is made to + map into the range to make a unique tuple. */ + + /* Only bother mapping if it's not already in range + and unique */ + if ((!(rptr->flags & IP_NAT_RANGE_PROTO_SPECIFIED) + || proto->in_range(tuple, HOOK2MANIP(hooknum), + &rptr->min, &rptr->max)) + && !ip_nat_used_tuple(tuple, conntrack)) { + ret = 1; + goto clear_fulls; + } else { + if (proto->unique_tuple(tuple, rptr, + HOOK2MANIP(hooknum), + conntrack)) { + /* Must be unique. */ + IP_NF_ASSERT(!ip_nat_used_tuple(tuple, + conntrack)); + ret = 1; + goto clear_fulls; + } else if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST) { + /* Try implicit source NAT; protocol + may be able to play with ports to + make it unique. */ + struct ip_nat_range r + = { IP_NAT_RANGE_MAP_IPS, + tuple->src.ip, tuple->src.ip, + { 0 }, { 0 } }; + DEBUGP("Trying implicit mapping\n"); + if (proto->unique_tuple(tuple, &r, + IP_NAT_MANIP_SRC, + conntrack)) { + /* Must be unique. */ + IP_NF_ASSERT(!ip_nat_used_tuple + (tuple, conntrack)); + ret = 1; + goto clear_fulls; + } + } + DEBUGP("Protocol can't get unique tuple %u.\n", + hooknum); + } + + /* Eliminate that from range, and try again. */ + rptr->flags |= IP_NAT_RANGE_FULL; + *tuple = *orig_tuple; + } + + ret = 0; + + clear_fulls: + /* Clear full flags. */ + IP_NF_ASSERT(mr->rangesize >= 1); + for (i = 0; i < mr->rangesize; i++) + mr->range[i].flags &= ~IP_NAT_RANGE_FULL; + + return ret; +} + +static inline int +helper_cmp(const struct ip_nat_helper *helper, + const struct ip_conntrack_tuple *tuple) +{ + return ip_ct_tuple_mask_cmp(tuple, &helper->tuple, &helper->mask); +} + +/* Where to manip the reply packets (will be reverse manip). */ +static unsigned int opposite_hook[NF_IP_NUMHOOKS] += { [NF_IP_PRE_ROUTING] = NF_IP_POST_ROUTING, + [NF_IP_POST_ROUTING] = NF_IP_PRE_ROUTING, +#ifdef CONFIG_IP_NF_NAT_LOCAL + [NF_IP_LOCAL_OUT] = NF_IP_LOCAL_IN, + [NF_IP_LOCAL_IN] = NF_IP_LOCAL_OUT, +#endif +}; + +unsigned int +ip_nat_setup_info(struct ip_conntrack *conntrack, + const struct ip_nat_multi_range *mr, + unsigned int hooknum) +{ + struct ip_conntrack_tuple new_tuple, inv_tuple, reply; + struct ip_conntrack_tuple orig_tp; + struct ip_nat_info *info = &conntrack->nat.info; + int in_hashes = info->initialized; + + MUST_BE_WRITE_LOCKED(&ip_nat_lock); + IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING + || hooknum == NF_IP_POST_ROUTING + || hooknum == NF_IP_LOCAL_OUT); + IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS); + IP_NF_ASSERT(!(info->initialized & (1 << HOOK2MANIP(hooknum)))); + + /* What we've got will look like inverse of reply. Normally + this is what is in the conntrack, except for prior + manipulations (future optimization: if num_manips == 0, + orig_tp = + conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */ + invert_tuplepr(&orig_tp, + &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple); + +#if 0 + { + unsigned int i; + + DEBUGP("Hook %u (%s), ", hooknum, + HOOK2MANIP(hooknum)==IP_NAT_MANIP_SRC ? "SRC" : "DST"); + DUMP_TUPLE(&orig_tp); + DEBUGP("Range %p: ", mr); + for (i = 0; i < mr->rangesize; i++) { + DEBUGP("%u:%s%s%s %u.%u.%u.%u - %u.%u.%u.%u %u - %u\n", + i, + (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS) + ? " MAP_IPS" : "", + (mr->range[i].flags + & IP_NAT_RANGE_PROTO_SPECIFIED) + ? " PROTO_SPECIFIED" : "", + (mr->range[i].flags & IP_NAT_RANGE_FULL) + ? " FULL" : "", + NIPQUAD(mr->range[i].min_ip), + NIPQUAD(mr->range[i].max_ip), + mr->range[i].min.all, + mr->range[i].max.all); + } + } +#endif + + do { + if (!get_unique_tuple(&new_tuple, &orig_tp, mr, conntrack, + hooknum)) { + DEBUGP("ip_nat_setup_info: Can't get unique for %p.\n", + conntrack); + return NF_DROP; + } + +#if 0 + DEBUGP("Hook %u (%s) %p\n", hooknum, + HOOK2MANIP(hooknum)==IP_NAT_MANIP_SRC ? "SRC" : "DST", + conntrack); + DEBUGP("Original: "); + DUMP_TUPLE(&orig_tp); + DEBUGP("New: "); + DUMP_TUPLE(&new_tuple); +#endif + + /* We now have two tuples (SRCIP/SRCPT/DSTIP/DSTPT): + the original (A/B/C/D') and the mangled one (E/F/G/H'). + + We're only allowed to work with the SRC per-proto + part, so we create inverses of both to start, then + derive the other fields we need. */ + + /* Reply connection: simply invert the new tuple + (G/H/E/F') */ + invert_tuplepr(&reply, &new_tuple); + + /* Alter conntrack table so it recognizes replies. + If fail this race (reply tuple now used), repeat. */ + } while (!ip_conntrack_alter_reply(conntrack, &reply)); + + /* FIXME: We can simply used existing conntrack reply tuple + here --RR */ + /* Create inverse of original: C/D/A/B' */ + invert_tuplepr(&inv_tuple, &orig_tp); + + /* Has source changed?. */ + if (!ip_ct_tuple_src_equal(&new_tuple, &orig_tp)) { + /* In this direction, a source manip. */ + info->manips[info->num_manips++] = + ((struct ip_nat_info_manip) + { IP_CT_DIR_ORIGINAL, hooknum, + IP_NAT_MANIP_SRC, new_tuple.src }); + + IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS); + + /* In the reverse direction, a destination manip. */ + info->manips[info->num_manips++] = + ((struct ip_nat_info_manip) + { IP_CT_DIR_REPLY, opposite_hook[hooknum], + IP_NAT_MANIP_DST, orig_tp.src }); + IP_NF_ASSERT(info->num_manips <= IP_NAT_MAX_MANIPS); + } + + /* Has destination changed? */ + if (!ip_ct_tuple_dst_equal(&new_tuple, &orig_tp)) { + /* In this direction, a destination manip */ + info->manips[info->num_manips++] = + ((struct ip_nat_info_manip) + { IP_CT_DIR_ORIGINAL, hooknum, + IP_NAT_MANIP_DST, reply.src }); + + IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS); + + /* In the reverse direction, a source manip. */ + info->manips[info->num_manips++] = + ((struct ip_nat_info_manip) + { IP_CT_DIR_REPLY, opposite_hook[hooknum], + IP_NAT_MANIP_SRC, inv_tuple.src }); + IP_NF_ASSERT(info->num_manips <= IP_NAT_MAX_MANIPS); + } + + /* If there's a helper, assign it; based on new tuple. */ + if (!conntrack->master) + info->helper = LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *, + &reply); + + /* It's done. */ + info->initialized |= (1 << HOOK2MANIP(hooknum)); + + if (in_hashes) { + IP_NF_ASSERT(info->bysource.conntrack); + replace_in_hashes(conntrack, info); + } else { + place_in_hashes(conntrack, info); + } + + return NF_ACCEPT; +} + +void replace_in_hashes(struct ip_conntrack *conntrack, + struct ip_nat_info *info) +{ + /* Source has changed, so replace in hashes. */ + unsigned int srchash + = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.src, + conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.protonum); + /* We place packet as seen OUTGOUNG in byips_proto hash + (ie. reverse dst and src of reply packet. */ + unsigned int ipsprotohash + = hash_by_ipsproto(conntrack->tuplehash[IP_CT_DIR_REPLY] + .tuple.dst.ip, + conntrack->tuplehash[IP_CT_DIR_REPLY] + .tuple.src.ip, + conntrack->tuplehash[IP_CT_DIR_REPLY] + .tuple.dst.protonum); + + IP_NF_ASSERT(info->bysource.conntrack == conntrack); + MUST_BE_WRITE_LOCKED(&ip_nat_lock); + + list_del(&info->bysource.list); + list_del(&info->byipsproto.list); + + list_prepend(&bysource[srchash], &info->bysource); + list_prepend(&byipsproto[ipsprotohash], &info->byipsproto); +} + +void place_in_hashes(struct ip_conntrack *conntrack, + struct ip_nat_info *info) +{ + unsigned int srchash + = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.src, + conntrack->tuplehash[IP_CT_DIR_ORIGINAL] + .tuple.dst.protonum); + /* We place packet as seen OUTGOUNG in byips_proto hash + (ie. reverse dst and src of reply packet. */ + unsigned int ipsprotohash + = hash_by_ipsproto(conntrack->tuplehash[IP_CT_DIR_REPLY] + .tuple.dst.ip, + conntrack->tuplehash[IP_CT_DIR_REPLY] + .tuple.src.ip, + conntrack->tuplehash[IP_CT_DIR_REPLY] + .tuple.dst.protonum); + + IP_NF_ASSERT(!info->bysource.conntrack); + + MUST_BE_WRITE_LOCKED(&ip_nat_lock); + info->byipsproto.conntrack = conntrack; + info->bysource.conntrack = conntrack; + + list_prepend(&bysource[srchash], &info->bysource); + list_prepend(&byipsproto[ipsprotohash], &info->byipsproto); +} + +/* Returns true if succeeded. */ +static int +manip_pkt(u_int16_t proto, + struct sk_buff **pskb, + unsigned int iphdroff, + const struct ip_conntrack_manip *manip, + enum ip_nat_manip_type maniptype) +{ + struct iphdr *iph; + + (*pskb)->nfcache |= NFC_ALTERED; + if (!skb_ip_make_writable(pskb, iphdroff+sizeof(iph))) + return 0; + + iph = (void *)(*pskb)->data + iphdroff; + + /* Manipulate protcol part. */ + if (!find_nat_proto(proto)->manip_pkt(pskb, + iphdroff + iph->ihl*4, + manip, maniptype)) + return 0; + + iph = (void *)(*pskb)->data + iphdroff; + + if (maniptype == IP_NAT_MANIP_SRC) { + iph->check = ip_nat_cheat_check(~iph->saddr, manip->ip, + iph->check); + iph->saddr = manip->ip; + } else { + iph->check = ip_nat_cheat_check(~iph->daddr, manip->ip, + iph->check); + iph->daddr = manip->ip; + } + return 1; +} + +static inline int exp_for_packet(struct ip_conntrack_expect *exp, + struct sk_buff *skb) +{ + struct ip_conntrack_protocol *proto; + int ret = 1; + + MUST_BE_READ_LOCKED(&ip_conntrack_lock); + proto = __ip_ct_find_proto(skb->nh.iph->protocol); + if (proto->exp_matches_pkt) + ret = proto->exp_matches_pkt(exp, skb); + + return ret; +} + +/* Do packet manipulations according to binding. */ +unsigned int +do_bindings(struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo, + struct ip_nat_info *info, + unsigned int hooknum, + struct sk_buff **pskb) +{ + unsigned int i; + struct ip_nat_helper *helper; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + int proto = (*pskb)->nh.iph->protocol; + + /* Need nat lock to protect against modification, but neither + conntrack (referenced) and helper (deleted with + synchronize_bh()) can vanish. */ + READ_LOCK(&ip_nat_lock); + for (i = 0; i < info->num_manips; i++) { + if (info->manips[i].direction == dir + && info->manips[i].hooknum == hooknum) { + DEBUGP("Mangling %p: %s to %u.%u.%u.%u %u\n", + *pskb, + info->manips[i].maniptype == IP_NAT_MANIP_SRC + ? "SRC" : "DST", + NIPQUAD(info->manips[i].manip.ip), + htons(info->manips[i].manip.u.all)); + if (!manip_pkt(proto, pskb, 0, + &info->manips[i].manip, + info->manips[i].maniptype)) { + READ_UNLOCK(&ip_nat_lock); + return NF_DROP; + } + } + } + helper = info->helper; + READ_UNLOCK(&ip_nat_lock); + + if (helper) { + struct ip_conntrack_expect *exp = NULL; + struct list_head *cur_item; + int ret = NF_ACCEPT; + int helper_called = 0; + + DEBUGP("do_bindings: helper existing for (%p)\n", ct); + + /* Always defragged for helpers */ + IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off + & htons(IP_MF|IP_OFFSET))); + + /* Have to grab read lock before sibling_list traversal */ + READ_LOCK(&ip_conntrack_lock); + list_for_each_prev(cur_item, &ct->sibling_list) { + exp = list_entry(cur_item, struct ip_conntrack_expect, + expected_list); + + /* if this expectation is already established, skip */ + if (exp->sibling) + continue; + + if (exp_for_packet(exp, *pskb)) { + /* FIXME: May be true multiple times in the + * case of UDP!! */ + DEBUGP("calling nat helper (exp=%p) for packet\n", exp); + ret = helper->help(ct, exp, info, ctinfo, + hooknum, pskb); + if (ret != NF_ACCEPT) { + READ_UNLOCK(&ip_conntrack_lock); + return ret; + } + helper_called = 1; + } + } + /* Helper might want to manip the packet even when there is no + * matching expectation for this packet */ + if (!helper_called && helper->flags & IP_NAT_HELPER_F_ALWAYS) { + DEBUGP("calling nat helper for packet without expectation\n"); + ret = helper->help(ct, NULL, info, ctinfo, + hooknum, pskb); + if (ret != NF_ACCEPT) { + READ_UNLOCK(&ip_conntrack_lock); + return ret; + } + } + READ_UNLOCK(&ip_conntrack_lock); + + /* Adjust sequence number only once per packet + * (helper is called at all hooks) */ + if (proto == IPPROTO_TCP + && (hooknum == NF_IP_POST_ROUTING + || hooknum == NF_IP_LOCAL_IN)) { + DEBUGP("ip_nat_core: adjusting sequence number\n"); + /* future: put this in a l4-proto specific function, + * and call this function here. */ + if (!ip_nat_seq_adjust(pskb, ct, ctinfo)) + ret = NF_DROP; + } + + return ret; + + } else + return NF_ACCEPT; + + /* not reached */ +} + +int +icmp_reply_translation(struct sk_buff **pskb, + struct ip_conntrack *conntrack, + unsigned int hooknum, + int dir) +{ + struct { + struct icmphdr icmp; + struct iphdr ip; + } *inside; + unsigned int i; + struct ip_nat_info *info = &conntrack->nat.info; + int hdrlen; + + if (!skb_ip_make_writable(pskb,(*pskb)->nh.iph->ihl*4+sizeof(*inside))) + return 0; + inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; + + /* We're actually going to mangle it beyond trivial checksum + adjustment, so make sure the current checksum is correct. */ + if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) { + hdrlen = (*pskb)->nh.iph->ihl * 4; + if ((u16)csum_fold(skb_checksum(*pskb, hdrlen, + (*pskb)->len - hdrlen, 0))) + return 0; + } + + /* Must be RELATED */ + IP_NF_ASSERT((*pskb)->nfct + - (struct ip_conntrack *)(*pskb)->nfct->master + == IP_CT_RELATED + || (*pskb)->nfct + - (struct ip_conntrack *)(*pskb)->nfct->master + == IP_CT_RELATED+IP_CT_IS_REPLY); + + /* Redirects on non-null nats must be dropped, else they'll + start talking to each other without our translation, and be + confused... --RR */ + if (inside->icmp.type == ICMP_REDIRECT) { + /* Don't care about races here. */ + if (info->initialized + != ((1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST)) + || info->num_manips != 0) + return 0; + } + + DEBUGP("icmp_reply_translation: translating error %p hook %u dir %s\n", + *pskb, hooknum, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); + /* Note: May not be from a NAT'd host, but probably safest to + do translation always as if it came from the host itself + (even though a "host unreachable" coming from the host + itself is a bit weird). + + More explanation: some people use NAT for anonymizing. + Also, CERT recommends dropping all packets from private IP + addresses (although ICMP errors from internal links with + such addresses are not too uncommon, as Alan Cox points + out) */ + + READ_LOCK(&ip_nat_lock); + for (i = 0; i < info->num_manips; i++) { + DEBUGP("icmp_reply: manip %u dir %s hook %u\n", + i, info->manips[i].direction == IP_CT_DIR_ORIGINAL ? + "ORIG" : "REPLY", info->manips[i].hooknum); + + if (info->manips[i].direction != dir) + continue; + + /* Mapping the inner packet is just like a normal + packet, except it was never src/dst reversed, so + where we would normally apply a dst manip, we apply + a src, and vice versa. */ + if (info->manips[i].hooknum == hooknum) { + DEBUGP("icmp_reply: inner %s -> %u.%u.%u.%u %u\n", + info->manips[i].maniptype == IP_NAT_MANIP_SRC + ? "DST" : "SRC", + NIPQUAD(info->manips[i].manip.ip), + ntohs(info->manips[i].manip.u.udp.port)); + if (!manip_pkt(inside->ip.protocol, pskb, + (*pskb)->nh.iph->ihl*4 + + sizeof(inside->icmp), + &info->manips[i].manip, + !info->manips[i].maniptype)) + goto unlock_fail; + + /* Outer packet needs to have IP header NATed like + it's a reply. */ + + /* Use mapping to map outer packet: 0 give no + per-proto mapping */ + DEBUGP("icmp_reply: outer %s -> %u.%u.%u.%u\n", + info->manips[i].maniptype == IP_NAT_MANIP_SRC + ? "SRC" : "DST", + NIPQUAD(info->manips[i].manip.ip)); + if (!manip_pkt(0, pskb, 0, + &info->manips[i].manip, + info->manips[i].maniptype)) + goto unlock_fail; + } + } + READ_UNLOCK(&ip_nat_lock); + + hdrlen = (*pskb)->nh.iph->ihl * 4; + + inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; + + inside->icmp.checksum = 0; + inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen, + (*pskb)->len - hdrlen, + 0)); + return 1; + + unlock_fail: + READ_UNLOCK(&ip_nat_lock); + return 0; +} + +int __init ip_nat_init(void) +{ + size_t i; + + /* Leave them the same for the moment. */ + ip_nat_htable_size = ip_conntrack_htable_size; + + /* One vmalloc for both hash tables */ + bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size*2); + if (!bysource) { + return -ENOMEM; + } + byipsproto = bysource + ip_nat_htable_size; + + /* Sew in builtin protocols. */ + WRITE_LOCK(&ip_nat_lock); + list_append(&protos, &ip_nat_protocol_tcp); + list_append(&protos, &ip_nat_protocol_udp); + list_append(&protos, &ip_nat_protocol_icmp); + WRITE_UNLOCK(&ip_nat_lock); + + for (i = 0; i < ip_nat_htable_size; i++) { + INIT_LIST_HEAD(&bysource[i]); + INIT_LIST_HEAD(&byipsproto[i]); + } + + /* FIXME: Man, this is a hack. */ + IP_NF_ASSERT(ip_conntrack_destroyed == NULL); + ip_conntrack_destroyed = &ip_nat_cleanup_conntrack; + + return 0; +} + +/* Clear NAT section of all conntracks, in case we're loaded again. */ +static int clean_nat(const struct ip_conntrack *i, void *data) +{ + memset((void *)&i->nat, 0, sizeof(i->nat)); + return 0; +} + +/* Not __exit: called from ip_nat_standalone.c:init_or_cleanup() --RR */ +void ip_nat_cleanup(void) +{ + ip_ct_selective_cleanup(&clean_nat, NULL); + ip_conntrack_destroyed = NULL; + vfree(bysource); +} diff -Nur linux-2.6.0-test9.org/net/ipv4/netfilter/ip_nat_rule.c linux-2.6.0-test9/net/ipv4/netfilter/ip_nat_rule.c --- linux-2.6.0-test9.org/net/ipv4/netfilter/ip_nat_rule.c 2003-10-25 20:43:27.000000000 +0200 +++ linux-2.6.0-test9/net/ipv4/netfilter/ip_nat_rule.c 2003-11-13 11:01:39.000000000 +0100 @@ -67,7 +67,7 @@ 0, sizeof(struct ipt_entry), sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, + 0, NULL, 0, { 0, 0 }, { } }, { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, -NF_ACCEPT - 1 } }, /* POST_ROUTING */ @@ -75,7 +75,7 @@ 0, sizeof(struct ipt_entry), sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, + 0, NULL, 0, { 0, 0 }, { } }, { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, -NF_ACCEPT - 1 } }, /* LOCAL_OUT */ @@ -83,7 +83,7 @@ 0, sizeof(struct ipt_entry), sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, + 0, NULL, 0, { 0, 0 }, { } }, { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, -NF_ACCEPT - 1 } } }, @@ -92,7 +92,7 @@ 0, sizeof(struct ipt_entry), sizeof(struct ipt_error), - 0, { 0, 0 }, { } }, + 0, NULL, 0, { 0, 0 }, { } }, { { { { IPT_ALIGN(sizeof(struct ipt_error_target)), IPT_ERROR_TARGET } }, { } }, "ERROR" diff -Nur linux-2.6.0-test9.org/net/ipv4/netfilter/ip_nat_tftp.c linux-2.6.0-test9/net/ipv4/netfilter/ip_nat_tftp.c --- linux-2.6.0-test9.org/net/ipv4/netfilter/ip_nat_tftp.c 2003-10-25 20:43:18.000000000 +0200 +++ linux-2.6.0-test9/net/ipv4/netfilter/ip_nat_tftp.c 2003-11-13 11:01:08.000000000 +0100 @@ -164,8 +164,6 @@ ports[0] = TFTP_PORT; for (i = 0 ; (i < MAX_PORTS) && ports[i] ; i++) { - memset(&tftp[i], 0, sizeof(struct ip_nat_helper)); - tftp[i].tuple.dst.protonum = IPPROTO_UDP; tftp[i].tuple.src.u.udp.port = htons(ports[i]); tftp[i].mask.dst.protonum = 0xFFFF; diff -Nur linux-2.6.0-test9.org/net/ipv4/netfilter/ip_tables.c linux-2.6.0-test9/net/ipv4/netfilter/ip_tables.c --- linux-2.6.0-test9.org/net/ipv4/netfilter/ip_tables.c 2003-10-25 20:43:11.000000000 +0200 +++ linux-2.6.0-test9/net/ipv4/netfilter/ip_tables.c 2003-11-13 11:01:39.000000000 +0100 @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -23,8 +24,17 @@ #include #include +#include #include +static const char *hooknames[] = { + [NF_IP_PRE_ROUTING] "PREROUTING", + [NF_IP_LOCAL_IN] "INPUT", + [NF_IP_FORWARD] "FORWARD", + [NF_IP_LOCAL_OUT] "OUTPUT", + [NF_IP_POST_ROUTING] "POSTROUTING", +}; + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); MODULE_DESCRIPTION("IPv4 packet filter"); @@ -322,6 +332,12 @@ t = ipt_get_target(e); IP_NF_ASSERT(t->u.kernel.target); + + /* The packet traced and the rule isn't an unconditional return/END. */ + if (((*pskb)->nfcache & NFC_TRACE) && e->rulenum) { + nf_log_packet(AF_INET, hook, *pskb, in, out, "TRACE: %s/%s/%u ", + table->name, e->chainname, e->rulenum); + } /* Standard target? */ if (!t->u.kernel.target->target) { int v; @@ -474,6 +490,29 @@ return find_inlist_lock(&ipt_target, name, "ipt_", error, mutex); } +static inline int +find_error_target(struct ipt_entry *s, + struct ipt_entry *e, + char **chainname) +{ + struct ipt_entry_target *t; + static struct ipt_entry *found = NULL; + + if (s == e) { + if (!found) + return 0; + t = ipt_get_target(found); + if (strcmp(t->u.user.name, + IPT_ERROR_TARGET) == 0) { + *chainname = t->data; + return 1; + } + } else + found = s; + + return 0; +} + /* All zeroes == unconditional rule. */ static inline int unconditional(const struct ipt_ip *ip) @@ -493,6 +532,8 @@ mark_source_chains(struct ipt_table_info *newinfo, unsigned int valid_hooks) { unsigned int hook; + char *chainname = NULL; + u_int32_t rulenum; /* No recursion; use packet counter to save back ptrs (reset to 0 as we leave), and comefrom to save source hook bitmask */ @@ -506,6 +547,8 @@ /* Set initial back pointer. */ e->counters.pcnt = pos; + rulenum = 1; + chainname = (char *) hooknames[hook]; for (;;) { struct ipt_standard_target *t @@ -518,6 +561,8 @@ } e->comefrom |= ((1 << hook) | (1 << NF_IP_NUMHOOKS)); + e->rulenum = rulenum++; + e->chainname = chainname; /* Unconditional return/END. */ if (e->target_offset == sizeof(struct ipt_entry) @@ -527,6 +572,10 @@ && unconditional(&e->ip)) { unsigned int oldpos, size; + /* Set unconditional rulenum to zero. */ + e->rulenum = 0; + e->counters.bcnt = 0; + /* Return: backtrack through the last big jump. */ do { @@ -552,6 +601,11 @@ (newinfo->entries + pos); } while (oldpos == pos + e->next_offset); + /* Restore chainname, rulenum. */ + chainname = e->chainname; + rulenum = e->counters.bcnt; + e->counters.bcnt = 0; + /* Move along one */ size = e->next_offset; e = (struct ipt_entry *) @@ -567,6 +621,17 @@ /* This a jump; chase it. */ duprintf("Jump rule %u -> %u\n", pos, newpos); + e->counters.bcnt = rulenum++; + rulenum = 1; + e = (struct ipt_entry *) + (newinfo->entries + newpos); + if (IPT_ENTRY_ITERATE(newinfo->entries, + newinfo->size, + find_error_target, + e, &chainname) == 0) { + printk("ip_tables: table screwed up!\n"); + return 0; + } } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; diff -Nur linux-2.6.0-test9.org/net/ipv4/netfilter/ipt_LOG.c linux-2.6.0-test9/net/ipv4/netfilter/ipt_LOG.c --- linux-2.6.0-test9.org/net/ipv4/netfilter/ipt_LOG.c 2003-10-25 20:44:37.000000000 +0200 +++ linux-2.6.0-test9/net/ipv4/netfilter/ipt_LOG.c 2003-11-13 11:01:18.000000000 +0100 @@ -4,12 +4,14 @@ #include #include #include +#include #include #include #include #include #include +#include #include #include @@ -17,6 +19,10 @@ MODULE_AUTHOR("Netfilter Core Team "); MODULE_DESCRIPTION("iptables syslog logging module"); +static unsigned int nflog = 1; +MODULE_PARM(nflog, "i"); +MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); + #if 0 #define DEBUGP printk #else @@ -315,28 +321,25 @@ /* maxlen = 230+ 91 + 230 + 252 = 803 */ } -static unsigned int -ipt_log_target(struct sk_buff **pskb, +static void +ipt_log_packet(unsigned int hooknum, + const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, - unsigned int hooknum, - const void *targinfo, - void *userinfo) + const struct ipt_log_info *loginfo, + const char *level_string, + const char *prefix) { - const struct ipt_log_info *loginfo = targinfo; - char level_string[4] = "< >"; - - level_string[1] = '0' + (loginfo->level % 8); spin_lock_bh(&log_lock); printk(level_string); printk("%sIN=%s OUT=%s ", - loginfo->prefix, + prefix == NULL ? loginfo->prefix : prefix, in ? in->name : "", out ? out->name : ""); #ifdef CONFIG_BRIDGE_NETFILTER - if ((*pskb)->nf_bridge) { - struct net_device *physindev = (*pskb)->nf_bridge->physindev; - struct net_device *physoutdev = (*pskb)->nf_bridge->physoutdev; + if (skb->nf_bridge) { + struct net_device *physindev = skb->nf_bridge->physindev; + struct net_device *physoutdev = skb->nf_bridge->physoutdev; if (physindev && in != physindev) printk("PHYSIN=%s ", physindev->name); @@ -348,25 +351,56 @@ if (in && !out) { /* MAC logging for input chain only. */ printk("MAC="); - if ((*pskb)->dev && (*pskb)->dev->hard_header_len - && (*pskb)->mac.raw != (void*)(*pskb)->nh.iph) { + if (skb->dev && skb->dev->hard_header_len + && skb->mac.raw != (void*)skb->nh.iph) { int i; - unsigned char *p = (*pskb)->mac.raw; - for (i = 0; i < (*pskb)->dev->hard_header_len; i++,p++) + unsigned char *p = skb->mac.raw; + for (i = 0; i < skb->dev->hard_header_len; i++,p++) printk("%02x%c", *p, - i==(*pskb)->dev->hard_header_len - 1 + i==skb->dev->hard_header_len - 1 ? ' ':':'); } else printk(" "); } - dump_packet(loginfo, *pskb, 0); + dump_packet(loginfo, skb, 0); printk("\n"); spin_unlock_bh(&log_lock); +} + +static unsigned int +ipt_log_target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, + void *userinfo) +{ + const struct ipt_log_info *loginfo = targinfo; + char level_string[4] = "< >"; + + level_string[1] = '0' + (loginfo->level % 8); + ipt_log_packet(hooknum, *pskb, in, out, loginfo, level_string, NULL); return IPT_CONTINUE; } +static void +ipt_logfn(unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const char *prefix) +{ + struct ipt_log_info loginfo = { + .level = 0, + .logflags = IPT_LOG_MASK, + .prefix = "" + }; + + ipt_log_packet(hooknum, skb, in, out, &loginfo, KERN_WARNING, prefix); +} + static int ipt_log_checkentry(const char *tablename, const struct ipt_entry *e, void *targinfo, @@ -406,12 +440,17 @@ { if (ipt_register_target(&ipt_log_reg)) return -EINVAL; + if (nflog) + nf_log_register(PF_INET, &ipt_logfn); return 0; } static void __exit fini(void) { + if (nflog) + nf_log_unregister(PF_INET, &ipt_logfn); + ipt_unregister_target(&ipt_log_reg); } diff -Nur linux-2.6.0-test9.org/net/ipv4/netfilter/ipt_NOTRACK.c linux-2.6.0-test9/net/ipv4/netfilter/ipt_NOTRACK.c --- linux-2.6.0-test9.org/net/ipv4/netfilter/ipt_NOTRACK.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.0-test9/net/ipv4/netfilter/ipt_NOTRACK.c 2003-11-13 11:01:39.000000000 +0100 @@ -0,0 +1,79 @@ +/* This is a module which is used for setting up fake conntracks + * on packets so that they are not seen by the conntrack/NAT code. + */ +#include +#include + +#include +#include + +static unsigned int +target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, + void *userinfo) +{ + /* Previously seen (loopback)? Ignore. */ + if ((*pskb)->nfct != NULL) + return IPT_CONTINUE; + + /* Attach fake conntrack entry. + If there is a real ct entry correspondig to this packet, + it'll hang aroun till timing out. We don't deal with it + for performance reasons. JK */ + (*pskb)->nfct = &ip_conntrack_untracked.infos[IP_CT_NEW]; + nf_conntrack_get((*pskb)->nfct); + + return IPT_CONTINUE; +} + +static int +checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + if (targinfosize != 0) { + printk(KERN_WARNING "NOTRACK: targinfosize %u != 0\n", + targinfosize); + return 0; + } + + if (strcmp(tablename, "raw") != 0) { + printk(KERN_WARNING "NOTRACK: can only be called from \"raw\" table, not \"%s\"\n", tablename); + return 0; + } + + return 1; +} + +static struct ipt_target ipt_notrack_reg = { + .name = "NOTRACK", + .target = target, + .checkentry = checkentry, + .destroy = NULL, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + if (ipt_register_target(&ipt_notrack_reg)) + return -EINVAL; + + return 0; +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_notrack_reg); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Netfilter Core Team "); +MODULE_DESCRIPTION("IPv4 NOTRACK target"); diff -Nur linux-2.6.0-test9.org/net/ipv4/netfilter/ipt_TRACE.c linux-2.6.0-test9/net/ipv4/netfilter/ipt_TRACE.c --- linux-2.6.0-test9.org/net/ipv4/netfilter/ipt_TRACE.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.0-test9/net/ipv4/netfilter/ipt_TRACE.c 2003-11-13 11:01:39.000000000 +0100 @@ -0,0 +1,67 @@ +/* This is a module which is used for setting + * the NFC_TRACE flag in the nfcache field of an skb. + */ +#include +#include + +#include + +static unsigned int +target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, + void *userinfo) +{ + (*pskb)->nfcache |= NFC_TRACE; + return IPT_CONTINUE; +} + +static int +checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + if (targinfosize != 0) { + printk(KERN_WARNING "TRACE: targinfosize %u != 0\n", + targinfosize); + return 0; + } + + if (strcmp(tablename, "raw") != 0) { + printk(KERN_WARNING "TRACE: can only be called from \"raw\" table, not \"%s\"\n", tablename); + return 0; + } + + return 1; +} + +static struct ipt_target ipt_trace_reg = { + .name = "TRACE", + .target = target, + .checkentry = checkentry, + .destroy = NULL, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + if (ipt_register_target(&ipt_trace_reg)) + return -EINVAL; + + return 0; +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_trace_reg); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Netfilter Core Team "); +MODULE_DESCRIPTION("IPv4 TRACE target"); diff -Nur linux-2.6.0-test9.org/net/ipv4/netfilter/ipt_ULOG.c linux-2.6.0-test9/net/ipv4/netfilter/ipt_ULOG.c --- linux-2.6.0-test9.org/net/ipv4/netfilter/ipt_ULOG.c 2003-10-25 20:43:22.000000000 +0200 +++ linux-2.6.0-test9/net/ipv4/netfilter/ipt_ULOG.c 2003-11-13 11:01:18.000000000 +0100 @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -75,6 +76,10 @@ MODULE_PARM(flushtimeout, "i"); MODULE_PARM_DESC(flushtimeout, "buffer flush timeout"); +static unsigned int nflog = 1; +MODULE_PARM(nflog, "i"); +MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); + /* global data structures */ typedef struct { @@ -152,17 +157,17 @@ return skb; } -static unsigned int ipt_ulog_target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const void *targinfo, void *userinfo) +static void ipt_ulog_packet(unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct ipt_ulog_info *loginfo, + const char *prefix) { ulog_buff_t *ub; ulog_packet_msg_t *pm; size_t size, copy_len; struct nlmsghdr *nlh; - struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; /* ffs == find first bit set, necessary because userspace * is already shifting groupnumber, but we need unshifted. @@ -171,8 +176,8 @@ /* calculate the size of the skb needed */ if ((loginfo->copy_range == 0) || - (loginfo->copy_range > (*pskb)->len)) { - copy_len = (*pskb)->len; + (loginfo->copy_range > skb->len)) { + copy_len = skb->len; } else { copy_len = loginfo->copy_range; } @@ -209,19 +214,21 @@ /* copy hook, prefix, timestamp, payload, etc. */ pm->data_len = copy_len; - pm->timestamp_sec = (*pskb)->stamp.tv_sec; - pm->timestamp_usec = (*pskb)->stamp.tv_usec; - pm->mark = (*pskb)->nfmark; + pm->timestamp_sec = skb->stamp.tv_sec; + pm->timestamp_usec = skb->stamp.tv_usec; + pm->mark = skb->nfmark; pm->hook = hooknum; - if (loginfo->prefix[0] != '\0') + if (prefix != NULL) + strncpy(pm->prefix, prefix, sizeof(pm->prefix)); + else if (loginfo->prefix[0] != '\0') strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix)); else *(pm->prefix) = '\0'; if (in && in->hard_header_len > 0 - && (*pskb)->mac.raw != (void *) (*pskb)->nh.iph + && skb->mac.raw != (void *) skb->nh.iph && in->hard_header_len <= ULOG_MAC_LEN) { - memcpy(pm->mac, (*pskb)->mac.raw, in->hard_header_len); + memcpy(pm->mac, skb->mac.raw, in->hard_header_len); pm->mac_len = in->hard_header_len; } else pm->mac_len = 0; @@ -236,8 +243,8 @@ else pm->outdev_name[0] = '\0'; - /* copy_len <= (*pskb)->len, so can't fail. */ - if (skb_copy_bits(*pskb, 0, pm->payload, copy_len) < 0) + /* copy_len <= skb->len, so can't fail. */ + if (skb_copy_bits(skb, 0, pm->payload, copy_len) < 0) BUG(); /* check if we are building multi-part messages */ @@ -261,8 +268,7 @@ UNLOCK_BH(&ulog_lock); - return IPT_CONTINUE; - + return; nlmsg_failure: PRINTR("ipt_ULOG: error during NLMSG_PUT\n"); @@ -271,8 +277,35 @@ PRINTR("ipt_ULOG: Error building netlink message\n"); UNLOCK_BH(&ulog_lock); +} + +static unsigned int ipt_ulog_target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, void *userinfo) +{ + struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; - return IPT_CONTINUE; + ipt_ulog_packet(hooknum, *pskb, in, out, loginfo, NULL); + + return IPT_CONTINUE; +} + +static void ipt_logfn(unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const char *prefix) +{ + struct ipt_ulog_info loginfo = { + .nl_group = NFLOG_DEFAULT_NLGROUP, + .copy_range = 0, + .qthreshold = NFLOG_DEFAULT_QTHRESHOLD, + .prefix = "" + }; + + ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); } static int ipt_ulog_checkentry(const char *tablename, @@ -337,6 +370,9 @@ return -EINVAL; } + if (nflog) + nf_log_register(PF_INET, &ipt_logfn); + return 0; } @@ -347,6 +383,9 @@ DEBUGP("ipt_ULOG: cleanup_module\n"); + if (nflog) + nf_log_unregister(PF_INET, &ipt_logfn); + ipt_unregister_target(&ipt_ulog_reg); sock_release(nflognl->sk_socket); diff -Nur linux-2.6.0-test9.org/net/ipv4/netfilter/ipt_conntrack.c linux-2.6.0-test9/net/ipv4/netfilter/ipt_conntrack.c --- linux-2.6.0-test9.org/net/ipv4/netfilter/ipt_conntrack.c 2003-10-25 20:44:42.000000000 +0200 +++ linux-2.6.0-test9/net/ipv4/netfilter/ipt_conntrack.c 2003-11-13 11:01:39.000000000 +0100 @@ -29,7 +29,9 @@ #define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg)) - if (ct) + if (skb->nfct == &ip_conntrack_untracked.infos[IP_CT_NEW]) + statebit = IPT_CONNTRACK_STATE_UNTRACKED; + else if (ct) statebit = IPT_CONNTRACK_STATE_BIT(ctinfo); else statebit = IPT_CONNTRACK_STATE_INVALID; diff -Nur linux-2.6.0-test9.org/net/ipv4/netfilter/ipt_sctp.c linux-2.6.0-test9/net/ipv4/netfilter/ipt_sctp.c --- linux-2.6.0-test9.org/net/ipv4/netfilter/ipt_sctp.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.0-test9/net/ipv4/netfilter/ipt_sctp.c 2003-11-13 11:01:48.000000000 +0100 @@ -0,0 +1,125 @@ +/* IP tables module for matching the SCTP header + * + * $ipt_sctp.c,v 1.3 2002/05/29 15:09:00 laforge Exp$ + * + * (C) 2003 by Harald Welte + * + * This software is distributed under the terms GNU GPL v2 + */ + +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("IP tables SCTP matching module"); +MODULE_LICENSE("GPL"); + +/* Returns 1 if the port is matched by the range, 0 otherwise */ +static inline int +port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert) +{ + int ret; + + ret = (port >= min && port <= max) ^ invert; + return ret; +} + +static int chunk_match(const struct sk_buff *skb, u_int32_t chunks, u_int32_t chunk_mask) +{ + sctp_chunkhdr_t *ch = (sctp_chunkhdr_t *) skb->data; + + u_int32_t chunks_present = 0; + + do { + u_int8_t *ch_end; + ch_end = ((u_int8_t *) ch) + WORD_ROUND(ntohs(ch->length)); + + if (ch->type < 32) + chunks_present |= (1 << ch_type); + else if (ch->type == SCTP_CID_ASCONF) + chunks_present |= (1 << 31); + else if (ch->type == SCTP_CID_ASCONF_ACK) + chunks_present |= (1 << 30); + + ch = (sctp_chunkhdr_t *) ch_end; + } while (ch_end < skb->tail); + + return ((chunks_present& chunk_mask) == chunks); +} + +static int match(const struct sk_buff *skb, const struct net_device *in, + const struct net_device *out, const void *matchinfo, + int offset, const void *hdr, u_int16_t datalen, + int *hotdrop) +{ + const struct ipt_sctp_info *info = matchinfo; + const struct iphdr *iph = skb->nh.iph; + const struct sctphdr *sh = (struct sctphdr *) skb->h.raw; + + if (iph->protocol != IPPROTO_SCTP) + return 0; + + if (offset == 1) { + duprintf("Dropping evil SCTP offset=1 frag.\n"); + *hotdrop = 1; + return 0; + } else if (offset == 0 && datalen < sizeof(struct sctphdr)) { + /* We've been askd o examine this packet, and we can't. + * Hence, no choice but to drop. */ + duprintf("Dropping evil SCTP offset=0 tinygram.\n"); + *hotdrop = 1; + return 0; + } + + return (!offset + && port_match(info->spts[0], info->spts[1], + ntohs(sh->source), + !!(info->invflags & IPT_SCTP_INV_SRCPT)) + && port_match(info->dpts[0], info->dpts[1], + ntohs(sh->dest), + !!(info->invflags & IPT_SCTP_INV_DSTPT)) + && chunk_match(skb, info->chunks, info->chunk_mask) + ); +} + +static int checkentry(const char *tablename, const struct ipt_ip *ip, + void *matchinfo, unsigned int matchsize, + unsigned int hook_mask) +{ + const struct ipt_sctp_info *info = matchinfo; + + if (matchsize != IPT_ALIGN(sizeof(struct ipt_sctp_info))) + return 0; + + if (ip->proto != IPPROTO_SCTP && !(ip->invflags & IPT_INV_PROTO)) + return 0; + + if !(info->invflags & ~IPT_SCTP_INV_MASK) + return 0; + + return 1; +} + +static struct ipt_match sctp_match = { + .name = "sctp", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ipt_register_match(&sctp_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&sctp_match); +} + +module_init(init); +module_exit(fini); diff -Nur linux-2.6.0-test9.org/net/ipv4/netfilter/ipt_state.c linux-2.6.0-test9/net/ipv4/netfilter/ipt_state.c --- linux-2.6.0-test9.org/net/ipv4/netfilter/ipt_state.c 2003-10-25 20:44:59.000000000 +0200 +++ linux-2.6.0-test9/net/ipv4/netfilter/ipt_state.c 2003-11-13 11:01:39.000000000 +0100 @@ -23,10 +23,12 @@ enum ip_conntrack_info ctinfo; unsigned int statebit; - if (!ip_conntrack_get((struct sk_buff *)skb, &ctinfo)) - statebit = IPT_STATE_INVALID; - else + if (skb->nfct == &ip_conntrack_untracked.infos[IP_CT_NEW]) + statebit = IPT_STATE_UNTRACKED; + else if (ip_conntrack_get((struct sk_buff *)skb, &ctinfo)) statebit = IPT_STATE_BIT(ctinfo); + else + statebit = IPT_STATE_INVALID; return (sinfo->statemask & statebit); } diff -Nur linux-2.6.0-test9.org/net/ipv4/netfilter/iptable_filter.c linux-2.6.0-test9/net/ipv4/netfilter/iptable_filter.c --- linux-2.6.0-test9.org/net/ipv4/netfilter/iptable_filter.c 2003-10-25 20:43:23.000000000 +0200 +++ linux-2.6.0-test9/net/ipv4/netfilter/iptable_filter.c 2003-11-13 11:01:39.000000000 +0100 @@ -52,7 +52,7 @@ 0, sizeof(struct ipt_entry), sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, + 0, NULL, 0, { 0, 0 }, { } }, { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, -NF_ACCEPT - 1 } }, /* FORWARD */ @@ -60,7 +60,7 @@ 0, sizeof(struct ipt_entry), sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, + 0, NULL, 0, { 0, 0 }, { } }, { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, -NF_ACCEPT - 1 } }, /* LOCAL_OUT */ @@ -68,7 +68,7 @@ 0, sizeof(struct ipt_entry), sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, + 0, NULL, 0, { 0, 0 }, { } }, { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, -NF_ACCEPT - 1 } } }, @@ -77,7 +77,7 @@ 0, sizeof(struct ipt_entry), sizeof(struct ipt_error), - 0, { 0, 0 }, { } }, + 0, NULL, 0, { 0, 0 }, { } }, { { { { IPT_ALIGN(sizeof(struct ipt_error_target)), IPT_ERROR_TARGET } }, { } }, "ERROR" diff -Nur linux-2.6.0-test9.org/net/ipv4/netfilter/iptable_mangle.c linux-2.6.0-test9/net/ipv4/netfilter/iptable_mangle.c --- linux-2.6.0-test9.org/net/ipv4/netfilter/iptable_mangle.c 2003-10-25 20:44:37.000000000 +0200 +++ linux-2.6.0-test9/net/ipv4/netfilter/iptable_mangle.c 2003-11-13 11:01:39.000000000 +0100 @@ -69,7 +69,7 @@ 0, sizeof(struct ipt_entry), sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, + 0, NULL, 0, { 0, 0 }, { } }, { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, -NF_ACCEPT - 1 } }, /* LOCAL_IN */ @@ -77,7 +77,7 @@ 0, sizeof(struct ipt_entry), sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, + 0, NULL, 0, { 0, 0 }, { } }, { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, -NF_ACCEPT - 1 } }, /* FORWARD */ @@ -85,7 +85,7 @@ 0, sizeof(struct ipt_entry), sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, + 0, NULL, 0, { 0, 0 }, { } }, { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, -NF_ACCEPT - 1 } }, /* LOCAL_OUT */ @@ -93,7 +93,7 @@ 0, sizeof(struct ipt_entry), sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, + 0, NULL, 0, { 0, 0 }, { } }, { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, -NF_ACCEPT - 1 } }, /* POST_ROUTING */ @@ -101,7 +101,7 @@ 0, sizeof(struct ipt_entry), sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, + 0, NULL, 0, { 0, 0 }, { } }, { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, -NF_ACCEPT - 1 } }, }, @@ -110,7 +110,7 @@ 0, sizeof(struct ipt_entry), sizeof(struct ipt_error), - 0, { 0, 0 }, { } }, + 0, NULL, 0, { 0, 0 }, { } }, { { { { IPT_ALIGN(sizeof(struct ipt_error_target)), IPT_ERROR_TARGET } }, { } }, "ERROR" diff -Nur linux-2.6.0-test9.org/net/ipv4/netfilter/iptable_raw.c linux-2.6.0-test9/net/ipv4/netfilter/iptable_raw.c --- linux-2.6.0-test9.org/net/ipv4/netfilter/iptable_raw.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.0-test9/net/ipv4/netfilter/iptable_raw.c 2003-11-13 11:01:39.000000000 +0100 @@ -0,0 +1,153 @@ +/* + * 'raw' table, which is the very first hooked in at PRE_ROUTING and LOCAL_OUT . + * + * Copyright (C) 2003 Jozsef Kadlecsik + */ +#include +#include + +#define RAW_VALID_HOOKS ((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT)) + +/* Standard entry. */ +struct ipt_standard +{ + struct ipt_entry entry; + struct ipt_standard_target target; +}; + +struct ipt_error_target +{ + struct ipt_entry_target target; + char errorname[IPT_FUNCTION_MAXNAMELEN]; +}; + +struct ipt_error +{ + struct ipt_entry entry; + struct ipt_error_target target; +}; + +static struct +{ + struct ipt_replace repl; + struct ipt_standard entries[2]; + struct ipt_error term; +} initial_table __initdata += { { "raw", RAW_VALID_HOOKS, 3, + sizeof(struct ipt_standard) * 2 + sizeof(struct ipt_error), + { [NF_IP_PRE_ROUTING] 0, + [NF_IP_LOCAL_OUT] sizeof(struct ipt_standard) }, + { [NF_IP_PRE_ROUTING] 0, + [NF_IP_LOCAL_OUT] sizeof(struct ipt_standard) }, + 0, NULL, { } }, + { + /* PRE_ROUTING */ + { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ipt_entry), + sizeof(struct ipt_standard), + 0, NULL, 0, { 0, 0 }, { } }, + { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, + -NF_ACCEPT - 1 } }, + /* LOCAL_OUT */ + { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ipt_entry), + sizeof(struct ipt_standard), + 0, NULL, 0, { 0, 0 }, { } }, + { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, + -NF_ACCEPT - 1 } } + }, + /* ERROR */ + { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ipt_entry), + sizeof(struct ipt_error), + 0, NULL, 0, { 0, 0 }, { } }, + { { { { IPT_ALIGN(sizeof(struct ipt_error_target)), IPT_ERROR_TARGET } }, + { } }, + "ERROR" + } + } +}; + +static struct ipt_table packet_raw = { + .name = "raw", + .table = &initial_table.repl, + .valid_hooks = RAW_VALID_HOOKS, + .lock = RW_LOCK_UNLOCKED, + .me = THIS_MODULE, +}; + +/* The work comes in here from netfilter.c. */ +static unsigned int +ipt_hook(unsigned int hook, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ipt_do_table(pskb, hook, in, out, &packet_raw, NULL); +} + +/* 'raw' is the very first table. */ +static struct nf_hook_ops ipt_ops[] = { + { /* PRE_ROUTING hook */ + .hook = ipt_hook, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_PRE_ROUTING, + .priority = NF_IP_PRI_RAW, + }, + { /* LOCAL_OUT hook */ + .hook = ipt_hook, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_LOCAL_OUT, + .priority = NF_IP_PRI_RAW, + }, +}; + +static int __init init(void) +{ + int ret; + + /* Register table */ + ret = ipt_register_table(&packet_raw); + if (ret < 0) + return ret; + + /* Register hooks */ + ret = nf_register_hook(&ipt_ops[0]); + if (ret < 0) + goto cleanup_table; + + ret = nf_register_hook(&ipt_ops[1]); + if (ret < 0) + goto cleanup_hook0; + + return ret; + + cleanup_hook0: + nf_unregister_hook(&ipt_ops[0]); + cleanup_table: + ipt_unregister_table(&packet_raw); + + return ret; +} + +static void __exit fini(void) +{ + unsigned int i; + + for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++) + nf_unregister_hook(&ipt_ops[i]); + + ipt_unregister_table(&packet_raw); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Netfilter Core Team "); +MODULE_DESCRIPTION("IPv4 raw table"); diff -Nur linux-2.6.0-test9.org/net/ipv6/netfilter/Kconfig linux-2.6.0-test9/net/ipv6/netfilter/Kconfig --- linux-2.6.0-test9.org/net/ipv6/netfilter/Kconfig 2003-10-25 20:44:37.000000000 +0200 +++ linux-2.6.0-test9/net/ipv6/netfilter/Kconfig 2003-11-13 11:01:39.000000000 +0100 @@ -217,6 +217,31 @@ To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_RAW + tristate "Raw table" + depends on IP6_NF_IPTABLES + help + This option adds a `raw' table to iptables: see the man page for + iptables(8). This table is the very first in the netfilter + framework and hooks in at the PREROUTING and OUTPUT chains. + The TRACE target can be used in this table only. + + To compile it as a module, choose M here. If unsure, say N. + +config IP6_NF_TARGET_TRACE + tristate "TRACE target support" + depends on IP6_NF_RAW + help + The TRACE target allows packets to be traced as those matches + any subsequent rule in any IPv6 netfilter table/rule. The matched + rule and the packet is logged with the prefix + + TRACE: tablename/chainname/rulenum + + if the ip6t_LOG target is loaded in. + + To compile it as a module, choose M here. If unsure, say N. + #dep_tristate ' LOG target support' CONFIG_IP6_NF_TARGET_LOG $CONFIG_IP6_NF_IPTABLES endmenu diff -Nur linux-2.6.0-test9.org/net/ipv6/netfilter/Makefile linux-2.6.0-test9/net/ipv6/netfilter/Makefile --- linux-2.6.0-test9.org/net/ipv6/netfilter/Makefile 2003-10-25 20:43:53.000000000 +0200 +++ linux-2.6.0-test9/net/ipv6/netfilter/Makefile 2003-11-13 11:01:39.000000000 +0100 @@ -18,7 +18,9 @@ obj-$(CONFIG_IP6_NF_MATCH_OWNER) += ip6t_owner.o obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o +obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_TARGET_MARK) += ip6t_MARK.o obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o +obj-$(CONFIG_IP6_NF_TARGET_TRACE) += ip6t_TRACE.o obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o diff -Nur linux-2.6.0-test9.org/net/ipv6/netfilter/ip6_tables.c linux-2.6.0-test9/net/ipv6/netfilter/ip6_tables.c --- linux-2.6.0-test9.org/net/ipv6/netfilter/ip6_tables.c 2003-10-25 20:44:39.000000000 +0200 +++ linux-2.6.0-test9/net/ipv6/netfilter/ip6_tables.c 2003-11-13 11:01:39.000000000 +0100 @@ -12,6 +12,7 @@ */ #include #include +#include #include #include #include @@ -24,8 +25,17 @@ #include #include +#include #include +static const char *hook6names[] = { + [NF_IP6_PRE_ROUTING] "PREROUTING", + [NF_IP6_LOCAL_IN] "INPUT", + [NF_IP6_FORWARD] "FORWARD", + [NF_IP6_LOCAL_OUT] "OUTPUT", + [NF_IP6_POST_ROUTING] "POSTROUTING", +}; + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); MODULE_DESCRIPTION("IPv6 packet filter"); @@ -403,6 +413,12 @@ t = ip6t_get_target(e); IP_NF_ASSERT(t->u.kernel.target); + + /* The packet traced and the rule isn't an unconditional return/END. */ + if (((*pskb)->nfcache & NFC_TRACE) && e->rulenum) { + nf_log_packet(AF_INET6, hook, *pskb, in, out, "TRACE: %s/%s/%u ", + table->name, e->chainname, e->rulenum); + } /* Standard target? */ if (!t->u.kernel.target->target) { int v; @@ -556,6 +572,29 @@ return find_inlist_lock(&ip6t_target, name, "ip6t_", error, mutex); } +static inline int +find_error_target(struct ip6t_entry *s, + struct ip6t_entry *e, + char **chainname) +{ + struct ip6t_entry_target *t; + static struct ip6t_entry *found = NULL; + + if (s == e) { + if (!found) + return 0; + t = ip6t_get_target(found); + if (strcmp(t->u.user.name, + IP6T_ERROR_TARGET) == 0) { + *chainname = t->data; + return 1; + } + } else + found = s; + + return 0; +} + /* All zeroes == unconditional rule. */ static inline int unconditional(const struct ip6t_ip6 *ipv6) @@ -575,6 +614,8 @@ mark_source_chains(struct ip6t_table_info *newinfo, unsigned int valid_hooks) { unsigned int hook; + char *chainname = NULL; + u_int32_t rulenum; /* No recursion; use packet counter to save back ptrs (reset to 0 as we leave), and comefrom to save source hook bitmask */ @@ -588,6 +629,8 @@ /* Set initial back pointer. */ e->counters.pcnt = pos; + rulenum = 1; + chainname = (char *) hook6names[hook]; for (;;) { struct ip6t_standard_target *t @@ -600,6 +643,8 @@ } e->comefrom |= ((1 << hook) | (1 << NF_IP6_NUMHOOKS)); + e->rulenum = rulenum++; + e->chainname = chainname; /* Unconditional return/END. */ if (e->target_offset == sizeof(struct ip6t_entry) @@ -609,6 +654,10 @@ && unconditional(&e->ipv6)) { unsigned int oldpos, size; + /* Set unconditional rulenum to zero. */ + e->rulenum = 0; + e->counters.bcnt = 0; + /* Return: backtrack through the last big jump. */ do { @@ -634,6 +683,11 @@ (newinfo->entries + pos); } while (oldpos == pos + e->next_offset); + /* Restore chainname, rulenum. */ + chainname = e->chainname; + rulenum = e->counters.bcnt; + e->counters.bcnt = 0; + /* Move along one */ size = e->next_offset; e = (struct ip6t_entry *) @@ -649,6 +703,17 @@ /* This a jump; chase it. */ duprintf("Jump rule %u -> %u\n", pos, newpos); + e->counters.bcnt = rulenum++; + rulenum = 1; + e = (struct ip6t_entry *) + (newinfo->entries + newpos); + if (IP6T_ENTRY_ITERATE(newinfo->entries, + newinfo->size, + find_error_target, + e, &chainname) == 0) { + printk("ip6_tables: table screwed up!\n"); + return 0; + } } else { /* ... this is a fallthru */ newpos = pos + e->next_offset; diff -Nur linux-2.6.0-test9.org/net/ipv6/netfilter/ip6t_LOG.c linux-2.6.0-test9/net/ipv6/netfilter/ip6t_LOG.c --- linux-2.6.0-test9.org/net/ipv6/netfilter/ip6t_LOG.c 2003-10-25 20:43:18.000000000 +0200 +++ linux-2.6.0-test9/net/ipv6/netfilter/ip6t_LOG.c 2003-11-13 11:01:18.000000000 +0100 @@ -3,18 +3,24 @@ */ #include #include +#include #include #include #include #include #include #include +#include #include MODULE_AUTHOR("Jan Rekorajski "); MODULE_DESCRIPTION("IP6 tables LOG target module"); MODULE_LICENSE("GPL"); +static unsigned int nflog = 1; +MODULE_PARM(nflog, "i"); +MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); + struct in_device; #include #include @@ -256,40 +262,38 @@ } } -static unsigned int -ip6t_log_target(struct sk_buff **pskb, - unsigned int hooknum, - const struct net_device *in, - const struct net_device *out, - const void *targinfo, - void *userinfo) +static void +ip6t_log_packet(unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct ip6t_log_info *loginfo, + const char *level_string, + const char *prefix) { - struct ipv6hdr *ipv6h = (*pskb)->nh.ipv6h; - const struct ip6t_log_info *loginfo = targinfo; - char level_string[4] = "< >"; + struct ipv6hdr *ipv6h = skb->nh.ipv6h; - level_string[1] = '0' + (loginfo->level % 8); spin_lock_bh(&log_lock); printk(level_string); printk("%sIN=%s OUT=%s ", - loginfo->prefix, + prefix == NULL ? loginfo->prefix : prefix, in ? in->name : "", out ? out->name : ""); if (in && !out) { /* MAC logging for input chain only. */ printk("MAC="); - if ((*pskb)->dev && (*pskb)->dev->hard_header_len && (*pskb)->mac.raw != (void*)ipv6h) { - if ((*pskb)->dev->type != ARPHRD_SIT){ + if (skb->dev && skb->dev->hard_header_len && skb->mac.raw != (void*)ipv6h) { + if (skb->dev->type != ARPHRD_SIT){ int i; - unsigned char *p = (*pskb)->mac.raw; - for (i = 0; i < (*pskb)->dev->hard_header_len; i++,p++) + unsigned char *p = skb->mac.raw; + for (i = 0; i < skb->dev->hard_header_len; i++,p++) printk("%02x%c", *p, - i==(*pskb)->dev->hard_header_len - 1 + i==skb->dev->hard_header_len - 1 ? ' ':':'); } else { int i; - unsigned char *p = (*pskb)->mac.raw; - if ( p - (ETH_ALEN*2+2) > (*pskb)->head ){ + unsigned char *p = skb->mac.raw; + if ( p - (ETH_ALEN*2+2) > skb->head ){ p -= (ETH_ALEN+2); for (i = 0; i < (ETH_ALEN); i++,p++) printk("%02x%s", *p, @@ -300,10 +304,10 @@ i == ETH_ALEN-1 ? ' ' : ':'); } - if (((*pskb)->dev->addr_len == 4) && - (*pskb)->dev->hard_header_len > 20){ + if ((skb->dev->addr_len == 4) && + skb->dev->hard_header_len > 20){ printk("TUNNEL="); - p = (*pskb)->mac.raw + 12; + p = skb->mac.raw + 12; for (i = 0; i < 4; i++,p++) printk("%3d%s", *p, i == 3 ? "->" : "."); @@ -319,10 +323,41 @@ dump_packet(loginfo, ipv6h, 1); printk("\n"); spin_unlock_bh(&log_lock); +} + +static unsigned int +ip6t_log_target(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userinfo) +{ + const struct ip6t_log_info *loginfo = targinfo; + char level_string[4] = "< >"; + + level_string[1] = '0' + (loginfo->level % 8); + ip6t_log_packet(hooknum, *pskb, in, out, loginfo, level_string, NULL); return IP6T_CONTINUE; } +static void +ip6t_logfn(unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const char *prefix) +{ + struct ip6t_log_info loginfo = { + .level = 0, + .logflags = IP6T_LOG_MASK, + .prefix = "" + }; + + ip6t_log_packet(hooknum, skb, in, out, &loginfo, KERN_WARNING, prefix); +} + static int ip6t_log_checkentry(const char *tablename, const struct ip6t_entry *e, void *targinfo, @@ -359,12 +394,17 @@ { if (ip6t_register_target(&ip6t_log_reg)) return -EINVAL; + if (nflog) + nf_log_register(PF_INET6, &ip6t_logfn); return 0; } static void __exit fini(void) { + if (nflog) + nf_log_register(PF_INET6, &ip6t_logfn); + ip6t_unregister_target(&ip6t_log_reg); } diff -Nur linux-2.6.0-test9.org/net/ipv6/netfilter/ip6t_TRACE.c linux-2.6.0-test9/net/ipv6/netfilter/ip6t_TRACE.c --- linux-2.6.0-test9.org/net/ipv6/netfilter/ip6t_TRACE.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.0-test9/net/ipv6/netfilter/ip6t_TRACE.c 2003-11-13 11:01:39.000000000 +0100 @@ -0,0 +1,69 @@ +/* This is a module which is used for setting + * the NFC_TRACE flag in the nfcache field of an skb. + */ +#include +#include + +#include + +static unsigned int +target(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userinfo) +{ + (*pskb)->nfcache |= NFC_TRACE; + return IP6T_CONTINUE; +} + +static int +checkentry(const char *tablename, + const struct ip6t_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + if (targinfosize != 0) { + printk(KERN_WARNING "TRACE: targinfosize %u != 0\n", + targinfosize); + return 0; + } + + if (strcmp(tablename, "raw") != 0) { + printk(KERN_WARNING "TRACE: can only be called from \"raw\" table, not \"%s\"\n", tablename); + return 0; + } + + return 1; +} + +static struct ip6t_target ip6t_trace_reg = { + .name = "TRACE", + .target = target, + .checkentry = checkentry, + .destroy = NULL, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + if (ip6t_register_target(&ip6t_trace_reg)) + return -EINVAL; + + return 0; +} + +static void __exit fini(void) +{ + ip6t_unregister_target(&ip6t_trace_reg); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Netfilter Core Team "); +MODULE_DESCRIPTION("IPv6 TRACE target"); + \ No newline at end of file diff -Nur linux-2.6.0-test9.org/net/ipv6/netfilter/ip6table_filter.c linux-2.6.0-test9/net/ipv6/netfilter/ip6table_filter.c --- linux-2.6.0-test9.org/net/ipv6/netfilter/ip6table_filter.c 2003-10-25 20:42:53.000000000 +0200 +++ linux-2.6.0-test9/net/ipv6/netfilter/ip6table_filter.c 2003-11-13 11:01:39.000000000 +0100 @@ -52,7 +52,7 @@ 0, sizeof(struct ip6t_entry), sizeof(struct ip6t_standard), - 0, { 0, 0 }, { } }, + 0, NULL, 0, { 0, 0 }, { } }, { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } }, -NF_ACCEPT - 1 } }, /* FORWARD */ @@ -60,7 +60,7 @@ 0, sizeof(struct ip6t_entry), sizeof(struct ip6t_standard), - 0, { 0, 0 }, { } }, + 0, NULL, 0, { 0, 0 }, { } }, { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } }, -NF_ACCEPT - 1 } }, /* LOCAL_OUT */ @@ -68,7 +68,7 @@ 0, sizeof(struct ip6t_entry), sizeof(struct ip6t_standard), - 0, { 0, 0 }, { } }, + 0, NULL, 0, { 0, 0 }, { } }, { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } }, -NF_ACCEPT - 1 } } }, @@ -77,7 +77,7 @@ 0, sizeof(struct ip6t_entry), sizeof(struct ip6t_error), - 0, { 0, 0 }, { } }, + 0, NULL, 0, { 0, 0 }, { } }, { { { { IP6T_ALIGN(sizeof(struct ip6t_error_target)), IP6T_ERROR_TARGET } }, { } }, "ERROR" diff -Nur linux-2.6.0-test9.org/net/ipv6/netfilter/ip6table_mangle.c linux-2.6.0-test9/net/ipv6/netfilter/ip6table_mangle.c --- linux-2.6.0-test9.org/net/ipv6/netfilter/ip6table_mangle.c 2003-10-25 20:43:26.000000000 +0200 +++ linux-2.6.0-test9/net/ipv6/netfilter/ip6table_mangle.c 2003-11-13 11:01:39.000000000 +0100 @@ -66,7 +66,7 @@ 0, sizeof(struct ip6t_entry), sizeof(struct ip6t_standard), - 0, { 0, 0 }, { } }, + 0, NULL, 0, { 0, 0 }, { } }, { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } }, -NF_ACCEPT - 1 } }, /* LOCAL_IN */ @@ -74,7 +74,7 @@ 0, sizeof(struct ip6t_entry), sizeof(struct ip6t_standard), - 0, { 0, 0 }, { } }, + 0, NULL, 0, { 0, 0 }, { } }, { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } }, -NF_ACCEPT - 1 } }, /* FORWARD */ @@ -82,7 +82,7 @@ 0, sizeof(struct ip6t_entry), sizeof(struct ip6t_standard), - 0, { 0, 0 }, { } }, + 0, NULL, 0, { 0, 0 }, { } }, { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } }, -NF_ACCEPT - 1 } }, /* LOCAL_OUT */ @@ -90,7 +90,7 @@ 0, sizeof(struct ip6t_entry), sizeof(struct ip6t_standard), - 0, { 0, 0 }, { } }, + 0, NULL, 0, { 0, 0 }, { } }, { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } }, -NF_ACCEPT - 1 } }, /* POST_ROUTING */ @@ -98,7 +98,7 @@ 0, sizeof(struct ip6t_entry), sizeof(struct ip6t_standard), - 0, { 0, 0 }, { } }, + 0, NULL, 0, { 0, 0 }, { } }, { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } }, -NF_ACCEPT - 1 } } }, @@ -107,7 +107,7 @@ 0, sizeof(struct ip6t_entry), sizeof(struct ip6t_error), - 0, { 0, 0 }, { } }, + 0, NULL, 0, { 0, 0 }, { } }, { { { { IP6T_ALIGN(sizeof(struct ip6t_error_target)), IP6T_ERROR_TARGET } }, { } }, "ERROR" diff -Nur linux-2.6.0-test9.org/net/ipv6/netfilter/ip6table_raw.c linux-2.6.0-test9/net/ipv6/netfilter/ip6table_raw.c --- linux-2.6.0-test9.org/net/ipv6/netfilter/ip6table_raw.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.0-test9/net/ipv6/netfilter/ip6table_raw.c 2003-11-13 11:01:39.000000000 +0100 @@ -0,0 +1,158 @@ +/* + * IPv6 raw table, a port of the IPv4 raw table to IPv6 + * + * Copyright (C) 2003 Jozsef Kadlecsik + */ +#include +#include + +#define RAW_VALID_HOOKS ((1 << NF_IP6_PRE_ROUTING) | (1 << NF_IP6_LOCAL_OUT)) + +#if 0 +#define DEBUGP(x, args...) printk(KERN_DEBUG x, ## args) +#else +#define DEBUGP(x, args...) +#endif + +/* Standard entry. */ +struct ip6t_standard +{ + struct ip6t_entry entry; + struct ip6t_standard_target target; +}; + +struct ip6t_error_target +{ + struct ip6t_entry_target target; + char errorname[IP6T_FUNCTION_MAXNAMELEN]; +}; + +struct ip6t_error +{ + struct ip6t_entry entry; + struct ip6t_error_target target; +}; + +static struct +{ + struct ip6t_replace repl; + struct ip6t_standard entries[2]; + struct ip6t_error term; +} initial_table __initdata += { { "raw", RAW_VALID_HOOKS, 3, + sizeof(struct ip6t_standard) * 2 + sizeof(struct ip6t_error), + { [NF_IP6_PRE_ROUTING] 0, + [NF_IP6_LOCAL_OUT] sizeof(struct ip6t_standard) }, + { [NF_IP6_PRE_ROUTING] 0, + [NF_IP6_LOCAL_OUT] sizeof(struct ip6t_standard) }, + 0, NULL, { } }, + { + /* PRE_ROUTING */ + { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ip6t_entry), + sizeof(struct ip6t_standard), + 0, NULL, 0, { 0, 0 }, { } }, + { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } }, + -NF_ACCEPT - 1 } }, + /* LOCAL_OUT */ + { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ip6t_entry), + sizeof(struct ip6t_standard), + 0, NULL, 0, { 0, 0 }, { } }, + { { { { IP6T_ALIGN(sizeof(struct ip6t_standard_target)), "" } }, { } }, + -NF_ACCEPT - 1 } }, + }, + /* ERROR */ + { { { { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, { { { 0 } } }, "", "", { 0 }, { 0 }, 0, 0, 0 }, + 0, + sizeof(struct ip6t_entry), + sizeof(struct ip6t_error), + 0, NULL, 0, { 0, 0 }, { } }, + { { { { IP6T_ALIGN(sizeof(struct ip6t_error_target)), IP6T_ERROR_TARGET } }, + { } }, + "ERROR" + } + } +}; + +static struct ip6t_table packet_raw = { + .name = "raw", + .table = &initial_table.repl, + .valid_hooks = RAW_VALID_HOOKS, + .lock = RW_LOCK_UNLOCKED, + .me = THIS_MODULE +}; + +/* The work comes in here from netfilter.c. */ +static unsigned int +ip6t_hook(unsigned int hook, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ip6t_do_table(pskb, hook, in, out, &packet_raw, NULL); +} + +static struct nf_hook_ops ip6t_ops[] = { + { /* PRE_ROUTING */ + .hook = ip6t_hook, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_PRE_ROUTING, + .priority = NF_IP6_PRI_FIRST, + }, + { /* LOCAL_OUT */ + .hook = ip6t_hook, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_LOCAL_OUT, + .priority = NF_IP6_PRI_FIRST, + }, +}; + +static int __init init(void) +{ + int ret; + + /* Register table */ + ret = ip6t_register_table(&packet_raw); + if (ret < 0) + return ret; + + /* Register hooks */ + ret = nf_register_hook(&ip6t_ops[0]); + if (ret < 0) + goto cleanup_table; + + ret = nf_register_hook(&ip6t_ops[1]); + if (ret < 0) + goto cleanup_hook0; + + return ret; + + cleanup_hook0: + nf_unregister_hook(&ip6t_ops[0]); + cleanup_table: + ip6t_unregister_table(&packet_raw); + + return ret; +} + +static void __exit fini(void) +{ + unsigned int i; + + for (i = 0; i < sizeof(ip6t_ops)/sizeof(struct nf_hook_ops); i++) + nf_unregister_hook(&ip6t_ops[i]); + + ip6t_unregister_table(&packet_raw); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Netfilter Core Team "); +MODULE_DESCRIPTION("IPv6 raw table"); diff -Nur linux-2.6.0-test9.org/netfilter-patch-o-matic/patches linux-2.6.0-test9/netfilter-patch-o-matic/patches --- linux-2.6.0-test9.org/netfilter-patch-o-matic/patches 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.0-test9/netfilter-patch-o-matic/patches 2003-11-13 11:02:28.000000000 +0100 @@ -0,0 +1,30 @@ +./base/01_sctp_match.patch +./pending/23_REJECT-headroom-tcprst.patch +./pending/24_rcu.patch +./pending/25-err-ptr.patch +./pending/26-memsets.patch +./pending/40_nf-log.patch +./pending/70_expect-evict-order.patch +./pending/71_raw.patch +./submitted/02_REJECT-headroom-tcprst.patch +./submitted/03_260t4-mirror-remove.patch +./submitted/03_physdev_bridged.patch +./submitted/04_260t4-unclean-remove.patch +./submitted/05_260t4-unexperimental.patch +./submitted/06_260t4-cosmetic.patch +./submitted/07_260t4-newmodules_iprange_SAME_NETMAP_CLASSIFY.patch +./submitted/07_nonlinear_skb.patch +./submitted/08_260t4_ipt-helper-kconfig.patch +./submitted/09_260t4-cosmetic-physdev-author.patch +./submitted/75_nathelper-udp-csum.patch +./submitted/76_mangle_udp-sizecheck.patch +./submitted/77_destroy-conntrack.patch +./submitted/78_reject-localout.patch +./submitted/80_ip_conntrack-proc.patch +./submitted/82_irc-conntrack-mirc-serverlookup.patch +./submitted/83_nolocalout.patch +./submitted/84_local-nullbinding.patch +./submitted/85_ipv6header.patch +./submitted/86_getorigdst-tuple-zero.patch +./submitted/87_compat-nat_setup_info.patch +./submitted/89_ip_queue-maxlen.patch