diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter/ipv4/nf_conntrack_icmp.h linux-2.6.11.3/include/linux/netfilter/ipv4/nf_conntrack_icmp.h --- linux-2.6.11.3.org/include/linux/netfilter/ipv4/nf_conntrack_icmp.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter/ipv4/nf_conntrack_icmp.h 2005-03-13 22:06:02.118232144 +0100 @@ -0,0 +1,17 @@ +/* + * ICMP tracking. + * + * Derived from include/linux/netfiter_ipv4/ip_conntrack_icmp.h + */ + +#ifndef _NF_CONNTRACK_ICMP_H +#define _NF_CONNTRACK_ICMP_H +#include + +struct nf_ct_icmp +{ + /* Optimization: when number in == number out, forget immediately. */ + atomic_t count; +}; + +#endif /* _NF_CONNTRACK_ICMP_H */ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter/ipv4/nf_conntrack_ipv4.h linux-2.6.11.3/include/linux/netfilter/ipv4/nf_conntrack_ipv4.h --- linux-2.6.11.3.org/include/linux/netfilter/ipv4/nf_conntrack_ipv4.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter/ipv4/nf_conntrack_ipv4.h 2005-03-13 22:06:02.120231840 +0100 @@ -0,0 +1,40 @@ +/* + * IPv4 support for nf_conntrack. + * + * 23 Mar 2004: Yasuyuki Kozakai @ USAGI + * - move L3 protocol dependent part from include/linux/netfilter_ipv4/ + * ip_conntarck.h + */ + +#ifndef _NF_CONNTRACK_IPV4_H +#define _NF_CONNTRACK_IPV4_H + +#ifdef CONFIG_IP_NF_NAT_NEEDED +#include + +/* per conntrack: nat application helper private data */ +union ip_conntrack_nat_help { + /* insert nat helper private data here */ +}; + +struct nf_conntrack_ipv4_nat { + struct ip_nat_info info; + union ip_conntrack_nat_help help; +#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \ + defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE) + int masq_index; +#endif +}; +#endif /* CONFIG_IP_NF_NAT_NEEDED */ + +struct nf_conntrack_ipv4 { +#ifdef CONFIG_IP_NF_NAT_NEEDED + struct nf_conntrack_ipv4_nat *nat; +#endif +}; + +/* Returns new sk_buff, or NULL */ +struct sk_buff * +nf_ct_ipv4_ct_gather_frags(struct sk_buff *skb); + +#endif /*_NF_CONNTRACK_IPV4_H*/ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter/ipv6/nf_conntrack_icmpv6.h linux-2.6.11.3/include/linux/netfilter/ipv6/nf_conntrack_icmpv6.h --- linux-2.6.11.3.org/include/linux/netfilter/ipv6/nf_conntrack_icmpv6.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter/ipv6/nf_conntrack_icmpv6.h 2005-03-13 22:06:02.130230320 +0100 @@ -0,0 +1,27 @@ +/* + * ICMPv6 tracking. + * + * 21 Apl 2004: Yasuyuki Kozakai @USAGI + * - separated from nf_conntrack_icmp.h + * + * Derived from include/linux/netfiter_ipv4/ip_conntrack_icmp.h + */ + +#ifndef _NF_CONNTRACK_ICMPV6_H +#define _NF_CONNTRACK_ICMPV6_H +#include + +#ifndef ICMPV6_NI_QUERY +#define ICMPV6_NI_QUERY 139 +#endif +#ifndef ICMPV6_NI_REPLY +#define ICMPV6_NI_REPLY 140 +#endif + +struct nf_ct_icmpv6 +{ + /* Optimization: when number in == number out, forget immediately. */ + atomic_t count; +}; + +#endif /* _NF_CONNTRACK_ICMPV6_H */ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter/nf_conntrack.h linux-2.6.11.3/include/linux/netfilter/nf_conntrack.h --- linux-2.6.11.3.org/include/linux/netfilter/nf_conntrack.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter/nf_conntrack.h 2005-03-13 22:06:02.140228800 +0100 @@ -0,0 +1,302 @@ +/* + * Connection state tracking for netfilter. This is separated from, + * but required by, the (future) NAT layer; it can also be used by an iptables + * extension. + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI + * - generalize L3 protocol dependent part. + * + * Derived from include/linux/netfiter_ipv4/ip_conntrack.h + */ + +#ifndef _NF_CONNTRACK_H +#define _NF_CONNTRACK_H + +enum nf_conntrack_info +{ + /* Part of an established connection (either direction). */ + NF_CT_ESTABLISHED, + + /* Like NEW, but related to an existing connection, or ICMP error + (in either direction). */ + NF_CT_RELATED, + + /* Started a new connection to track (only + NF_CT_DIR_ORIGINAL); may be a retransmission. */ + NF_CT_NEW, + + /* >= this indicates reply direction */ + NF_CT_IS_REPLY, + + /* Number of distinct NF_CT types (no NEW in reply dirn). */ + NF_CT_NUMBER = NF_CT_IS_REPLY * 2 - 1 +}; + +/* Bitset representing status of connection. */ +enum nf_conntrack_status { + /* It's an expected connection: bit 0 set. This bit never changed */ + NF_S_EXPECTED_BIT = 0, + NF_S_EXPECTED = (1 << NF_S_EXPECTED_BIT), + + /* We've seen packets both ways: bit 1 set. Can be set, not unset. */ + NF_S_SEEN_REPLY_BIT = 1, + NF_S_SEEN_REPLY = (1 << NF_S_SEEN_REPLY_BIT), + + /* Conntrack should never be early-expired. */ + NF_S_ASSURED_BIT = 2, + NF_S_ASSURED = (1 << NF_S_ASSURED_BIT), + + /* Connection is confirmed: originating packet has left box */ + NF_S_CONFIRMED_BIT = 3, + NF_S_CONFIRMED = (1 << NF_S_CONFIRMED_BIT), +}; + +#ifdef __KERNEL__ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* per conntrack: protocol private data */ +union nf_conntrack_proto { + /* insert conntrack proto private data here */ + struct nf_ct_sctp sctp; + struct nf_ct_tcp tcp; + struct nf_ct_icmp icmp; + struct nf_ct_icmpv6 icmpv6; +}; + +union nf_conntrack_expect_proto { + /* insert expect proto private data here */ +}; + +/* Add protocol helper include file here */ +#include + +/* per conntrack: application helper private data */ +union nf_conntrack_help { + /* insert conntrack helper private data (master) here */ + struct nf_ct_ftp_master ct_ftp_info; +}; + +#include +#include + +#ifdef CONFIG_NETFILTER_DEBUG +#define NF_CT_ASSERT(x) \ +do { \ + if (!(x)) \ + /* Wooah! I'm tripping my conntrack in a frenzy of \ + netplay... */ \ + printk("NF_CT_ASSERT: %s:%i(%s)\n", \ + __FILE__, __LINE__, __FUNCTION__); \ +} while(0) +#else +#define NF_CT_ASSERT(x) +#endif + +struct nf_conntrack_counter +{ + u_int64_t packets; + u_int64_t bytes; +}; + +struct nf_conntrack_helper; + +#include +struct nf_conn +{ + /* Usage count in here is 1 for hash table/destruct timer, 1 per skb, + plus 1 for any connection(s) we are `master' for */ + struct nf_conntrack ct_general; + + /* XXX should I move this to the tail ? - Y.K */ + /* These are my tuples; original and reply */ + struct nf_conntrack_tuple_hash tuplehash[NF_CT_DIR_MAX]; + + /* Have we seen traffic both ways yet? (bitset) */ + unsigned long status; + + /* Timer function; drops refcnt when it goes off. */ + struct timer_list timeout; + +#ifdef CONFIG_NF_CT_ACCT + /* Accounting Information (same cache line as other written members) */ + struct nf_conntrack_counter counters[NF_CT_DIR_MAX]; +#endif + /* If we were expected by an expectation, this will be it */ + struct nf_conn *master; + + /* Current number of expected connections */ + unsigned int expecting; + + /* Helper. if any */ + struct nf_conntrack_helper *helper; + + /* features - nat, helper, ... used by allocating system */ + u_int32_t features; + + /* Storage reserved for other modules: */ + + union nf_conntrack_proto proto; + +#if defined(CONFIG_NF_CONNTRACK_MARK) + unsigned long mark; +#endif + + /* These members are dynamically allocated. */ + + union nf_conntrack_help *help; + + /* Layer 3 dependent members. (ex: NAT) */ + union { + struct nf_conntrack_ipv4 *ipv4; + } l3proto; + void *data[0]; +}; + +struct nf_conntrack_expect +{ + /* Internal linked list (global expectation list) */ + struct list_head list; + + /* We expect this tuple, with the following mask */ + struct nf_conntrack_tuple tuple, mask; + + /* Function to call after setup and insertion */ + void (*expectfn)(struct nf_conn *new, + struct nf_conntrack_expect *this); + + /* The conntrack of the master connection */ + struct nf_conn *master; + + /* Timer function; deletes the expectation. */ + struct timer_list timeout; + +#ifdef CONFIG_NF_NAT_NEEDED + /* This is the original per-proto part, used to map the + * expected connection the way the recipient expects. */ + union nf_conntrack_manip_proto saved_proto; + /* Direction relative to the master connection. */ + enum nf_conntrack_dir dir; +#endif +}; + +static inline struct nf_conn * +tuplehash_to_ctrack(const struct nf_conntrack_tuple_hash *hash) +{ + return container_of(hash, struct nf_conn, + tuplehash[hash->tuple.dst.dir]); +} + +/* get master conntrack via master expectation */ +#define master_ct(conntr) (conntr->master) + +/* Alter reply tuple (maybe alter helper). */ +extern void +nf_conntrack_alter_reply(struct nf_conn *conntrack, + const struct nf_conntrack_tuple *newreply); + +/* Is this tuple taken? (ignoring any belonging to the given + conntrack). */ +extern int +nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, + const struct nf_conn *ignored_conntrack); + +/* Return conntrack_info and tuple hash for given skb. */ +static inline struct nf_conn * +nf_ct_get(struct sk_buff *skb, enum nf_conntrack_info *ctinfo) +{ + *ctinfo = skb->nfctinfo; + return (struct nf_conn *)skb->nfct; +} + +/* decrement reference count on a conntrack */ +extern inline void nf_ct_put(struct nf_conn *ct); + +/* call to create an explicit dependency on nf_conntrack. */ +extern void need_nf_conntrack(void); + +extern int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, + const struct nf_conntrack_tuple *orig); + +/* Refresh conntrack for this many jiffies */ +extern void nf_ct_refresh_acct(struct nf_conn *ct, + enum nf_conntrack_info ctinfo, + const struct sk_buff *skb, + unsigned long extra_jiffies); + +/* These are for NAT. Icky. */ +/* Call me when a conntrack is destroyed. */ +extern void (*nf_conntrack_destroyed)(struct nf_conn *conntrack); + +/* Fake conntrack entry for untracked connections */ +extern struct nf_conn nf_conntrack_untracked; + +extern int nf_ct_no_defrag; + +/* Iterate over all conntracks: if iter returns true, it's deleted. */ +extern void +nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data); + +/* It's confirmed if it is, or has been in the hash table. */ +static inline int is_confirmed(struct nf_conn *ct) +{ + return test_bit(NF_S_CONFIRMED_BIT, &ct->status); +} + +extern unsigned int nf_conntrack_htable_size; + +struct nf_conntrack_stat +{ + unsigned int searched; + unsigned int found; + unsigned int new; + unsigned int invalid; + unsigned int ignore; + unsigned int delete; + unsigned int delete_list; + unsigned int insert; + unsigned int insert_failed; + unsigned int drop; + unsigned int early_drop; + unsigned int error; + unsigned int expect_new; + unsigned int expect_create; + unsigned int expect_delete; +}; + +#define NF_CT_STAT_INC(count) (__get_cpu_var(nf_conntrack_stat).count++) + +/* eg. PROVIDES_CONNTRACK(ftp); */ +#define PROVIDES_CONNTRACK(name) \ + int needs_nf_conntrack_##name; \ + EXPORT_SYMBOL(needs_nf_conntrack_##name) + +/*. eg. NEEDS_CONNTRACK(ftp); */ +#define NEEDS_CONNTRACK(name) \ + extern int needs_nf_conntrack_##name; \ + static int *need_nf_conntrack_##name __attribute_used__ = &needs_nf_conntrack_##name + +/* no helper, no nat */ +#define NF_CT_F_BASIC 0 +/* for helper */ +#define NF_CT_F_HELP 1 +/* for nat. */ +#define NF_CT_F_NAT 2 +#define NF_CT_F_NUM 4 + +extern int +nf_conntrack_register_cache(u_int32_t features, const char *name, size_t size, + int (*init_conntrack)(struct nf_conn *, u_int32_t)); +extern void +nf_conntrack_unregister_cache(u_int32_t features); + +#endif /* __KERNEL__ */ +#endif /* _NF_CONNTRACK_H */ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter/nf_conntrack_core.h linux-2.6.11.3/include/linux/netfilter/nf_conntrack_core.h --- linux-2.6.11.3.org/include/linux/netfilter/nf_conntrack_core.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter/nf_conntrack_core.h 2005-03-13 22:06:02.145228040 +0100 @@ -0,0 +1,72 @@ +/* + * This header is used to share core functionality between the + * standalone connection tracking module, and the compatibility layer's use + * of connection tracking. + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI + * - generalize L3 protocol dependent part. + * + * Derived from include/linux/netfiter_ipv4/ip_conntrack_core.h + */ + +#ifndef _NF_CONNTRACK_CORE_H +#define _NF_CONNTRACK_CORE_H + +#include +#include + +/* This header is used to share core functionality between the + standalone connection tracking module, and the compatibility layer's use + of connection tracking. */ +extern unsigned int nf_conntrack_in(int pf, + unsigned int hooknum, + struct sk_buff **pskb); + +extern int nf_conntrack_init(void); +extern void nf_conntrack_cleanup(void); + +struct nf_conntrack_l3proto; +extern struct nf_conntrack_l3proto *nf_ct_find_l3proto(u_int16_t pf); +/* Like above, but you already have conntrack read lock. */ +extern struct nf_conntrack_l3proto *__nf_ct_find_l3proto(u_int16_t l3proto); + +struct nf_conntrack_protocol; + +extern int +nf_ct_get_tuple(const struct sk_buff *skb, + unsigned int nhoff, + unsigned int dataoff, + u_int16_t l3num, + u_int8_t protonum, + struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_l3proto *l3proto, + const struct nf_conntrack_protocol *protocol); + +extern int +nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, + const struct nf_conntrack_tuple *orig, + const struct nf_conntrack_l3proto *l3proto, + const struct nf_conntrack_protocol *protocol); + +/* Find a connection corresponding to a tuple. */ +extern struct nf_conntrack_tuple_hash * +nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple, + const struct nf_conn *ignored_conntrack); + +extern int __nf_conntrack_confirm(struct sk_buff **pskb); + +/* Confirm a connection: returns NF_DROP if packet must be dropped. */ +static inline int nf_conntrack_confirm(struct sk_buff **pskb) +{ + if ((*pskb)->nfct + && !is_confirmed((struct nf_conn *)(*pskb)->nfct)) + return __nf_conntrack_confirm(pskb); + return NF_ACCEPT; +} + +extern void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb); + +extern struct list_head *nf_conntrack_hash; +extern struct list_head nf_conntrack_expect_list; +DECLARE_RWLOCK_EXTERN(nf_conntrack_lock); +#endif /* _NF_CONNTRACK_CORE_H */ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter/nf_conntrack_ftp.h linux-2.6.11.3/include/linux/netfilter/nf_conntrack_ftp.h --- linux-2.6.11.3.org/include/linux/netfilter/nf_conntrack_ftp.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter/nf_conntrack_ftp.h 2005-03-13 22:06:02.147227736 +0100 @@ -0,0 +1,59 @@ +/* + * nf_conntrack_ftp.h + * + * Definitions and Declarations for FTP tracking. + * + * Derived from include/linux/netfiter_ipv4/ip_conntrack_ftp.h + * + * 16 Dec 2003: Yasuyuki Kozakai @ USAGI + * - IPv6 support. + */ + +#ifndef _NF_CONNTRACK_FTP_H +#define _NF_CONNTRACK_FTP_H +/* FTP tracking. */ + +#ifdef __KERNEL__ + +#include + +/* Protects ftp part of conntracks */ +DECLARE_LOCK_EXTERN(ip_ftp_lock); + +#define FTP_PORT 21 + +#endif /* __KERNEL__ */ + +enum nf_ct_ftp_type +{ + /* PORT command from client */ + NF_CT_FTP_PORT, + /* PASV response from server */ + NF_CT_FTP_PASV, + /* EPRT command from client */ + NF_CT_FTP_EPRT, + /* EPSV response from server */ + NF_CT_FTP_EPSV, +}; + +#define NUM_SEQ_TO_REMEMBER 2 +/* This structure exists only once per master */ +struct nf_ct_ftp_master { + /* Valid seq positions for cmd matching after newline */ + u_int32_t seq_aft_nl[NF_CT_DIR_MAX][NUM_SEQ_TO_REMEMBER]; + /* 0 means seq_match_aft_nl not set */ + int seq_aft_nl_num[NF_CT_DIR_MAX]; +}; + +struct nf_conntrack_expect; + +/* For NAT to hook in when we find a packet which describes what other + * connection we should expect. */ +extern unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb, + enum nf_conntrack_info ctinfo, + enum nf_ct_ftp_type type, + unsigned int matchoff, + unsigned int matchlen, + struct nf_conntrack_expect *exp, + u32 *seq); +#endif /* _NF_CONNTRACK_FTP_H */ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter/nf_conntrack_helper.h linux-2.6.11.3/include/linux/netfilter/nf_conntrack_helper.h --- linux-2.6.11.3.org/include/linux/netfilter/nf_conntrack_helper.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter/nf_conntrack_helper.h 2005-03-13 22:06:02.148227584 +0100 @@ -0,0 +1,50 @@ +/* + * connection tracking helpers. + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI + * - generalize L3 protocol dependent part. + * + * Derived from include/linux/netfiter_ipv4/ip_conntrack_helper.h + */ + +#ifndef _NF_CONNTRACK_HELPER_H +#define _NF_CONNTRACK_HELPER_H +#include + +struct module; + +struct nf_conntrack_helper +{ + struct list_head list; /* Internal use. */ + + const char *name; /* name of the module */ + struct module *me; /* pointer to self */ + unsigned int max_expected; /* Maximum number of concurrent + * expected connections */ + unsigned int timeout; /* timeout for expecteds */ + + /* Mask of things we will help (compared against server response) */ + struct nf_conntrack_tuple tuple; + struct nf_conntrack_tuple mask; + + /* Function to call when data passes; return verdict, or -1 to + invalidate. */ + int (*help)(struct sk_buff **pskb, + unsigned int protoff, + struct nf_conn *ct, + enum nf_conntrack_info conntrackinfo); +}; + +extern int nf_conntrack_helper_register(struct nf_conntrack_helper *); +extern void nf_conntrack_helper_unregister(struct nf_conntrack_helper *); + +/* Allocate space for an expectation: this is mandatory before calling + nf_conntrack_expect_related. */ +extern struct nf_conntrack_expect *nf_conntrack_expect_alloc(void); +extern void nf_conntrack_expect_free(struct nf_conntrack_expect *exp); + +/* Add an expected connection: can have more than one per connection */ +extern int nf_conntrack_expect_related(struct nf_conntrack_expect *exp); +extern void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp); + +#endif /*_NF_CONNTRACK_HELPER_H*/ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter/nf_conntrack_l3proto.h linux-2.6.11.3/include/linux/netfilter/nf_conntrack_l3proto.h --- linux-2.6.11.3.org/include/linux/netfilter/nf_conntrack_l3proto.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter/nf_conntrack_l3proto.h 2005-03-13 22:06:02.152226976 +0100 @@ -0,0 +1,93 @@ +/* + * Copyright (C)2003,2004 USAGI/WIDE Project + * + * Header for use in defining a given L3 protocol for connection tracking. + * + * Author: + * Yasuyuki Kozakai @USAGI + * + * Derived from include/netfilter_ipv4/ip_conntrack_protocol.h + */ + +#ifndef _NF_CONNTRACK_L3PROTO_H +#define _NF_CONNTRACK_L3PROTO_H +#include +#include + +struct nf_conntrack_l3proto +{ + /* Next pointer. */ + struct list_head list; + + /* L3 Protocol Family number. ex) PF_INET */ + u_int16_t l3proto; + + /* Protocol name */ + const char *name; + + /* + * Try to fill in the third arg: nhoff is offset of l3 proto + * hdr. Return true if possible. + */ + int (*pkt_to_tuple)(const struct sk_buff *skb, unsigned int nhoff, + struct nf_conntrack_tuple *tuple); + + /* + * Invert the per-proto part of the tuple: ie. turn xmit into reply. + * Some packets can't be inverted: return 0 in that case. + */ + int (*invert_tuple)(struct nf_conntrack_tuple *inverse, + const struct nf_conntrack_tuple *orig); + + /* Print out the per-protocol part of the tuple. */ + int (*print_tuple)(struct seq_file *s, + const struct nf_conntrack_tuple *); + + /* Print out the private part of the conntrack. */ + int (*print_conntrack)(struct seq_file *s, const struct nf_conn *); + + /* Returns verdict for packet, or -1 for invalid. */ + int (*packet)(struct nf_conn *conntrack, + const struct sk_buff *skb, + enum nf_conntrack_info ctinfo); + + /* + * Called when a new connection for this protocol found; + * returns TRUE if it's OK. If so, packet() called next. + */ + int (*new)(struct nf_conn *conntrack, const struct sk_buff *skb); + + /* Called when a conntrack entry is destroyed */ + void (*destroy)(struct nf_conn *conntrack); + + /* + * Called before tracking. + * *dataoff: offset of protocol header (TCP, UDP,...) in *pskb + * *protonum: protocol number + */ + int (*prepare)(struct sk_buff **pskb, unsigned int hooknum, + unsigned int *dataoff, u_int8_t *protonum, int *ret); + + u_int32_t (*get_features)(const struct nf_conntrack_tuple *tuple); + + /* Module (if any) which this is connected to. */ + struct module *me; +}; + +extern struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX]; + +/* Protocol registration. */ +extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto); +extern void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto); + +static inline struct nf_conntrack_l3proto * +nf_ct_find_l3proto(u_int16_t l3proto) +{ + return nf_ct_l3protos[l3proto]; +} + +/* Existing built-in protocols */ +extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; +extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; +extern struct nf_conntrack_l3proto nf_conntrack_generic_l3proto; +#endif /*_NF_CONNTRACK_L3PROTO_H*/ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter/nf_conntrack_protocol.h linux-2.6.11.3/include/linux/netfilter/nf_conntrack_protocol.h --- linux-2.6.11.3.org/include/linux/netfilter/nf_conntrack_protocol.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter/nf_conntrack_protocol.h 2005-03-13 22:06:02.154226672 +0100 @@ -0,0 +1,105 @@ +/* + * Header for use in defining a given protocol for connection tracking. + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI + * - generalized L3 protocol dependent part. + * + * Derived from include/linux/netfiter_ipv4/ip_conntrack_protcol.h + */ + +#ifndef _NF_CONNTRACK_PROTOCOL_H +#define _NF_CONNTRACK_PROTOCOL_H +#include + +struct seq_file; + +struct nf_conntrack_protocol +{ + /* Next pointer. */ + struct list_head list; + + /* L3 Protocol number. */ + u_int16_t l3proto; + + /* Protocol number. */ + u_int8_t proto; + + /* Protocol name */ + const char *name; + + /* Try to fill in the third arg: dataoff is offset past network protocol + hdr. Return true if possible. */ + int (*pkt_to_tuple)(const struct sk_buff *skb, + unsigned int dataoff, + struct nf_conntrack_tuple *tuple); + + /* Invert the per-proto part of the tuple: ie. turn xmit into reply. + * Some packets can't be inverted: return 0 in that case. + */ + int (*invert_tuple)(struct nf_conntrack_tuple *inverse, + const struct nf_conntrack_tuple *orig); + + /* Print out the per-protocol part of the tuple. Return like seq_* */ + int (*print_tuple)(struct seq_file *s, + const struct nf_conntrack_tuple *); + + /* Print out the private part of the conntrack. */ + int (*print_conntrack)(struct seq_file *s, const struct nf_conn *); + + /* Returns verdict for packet, or -1 for invalid. */ + int (*packet)(struct nf_conn *conntrack, + const struct sk_buff *skb, + unsigned int dataoff, + enum nf_conntrack_info ctinfo, + int pf, + unsigned int hooknum); + + /* Called when a new connection for this protocol found; + * returns TRUE if it's OK. If so, packet() called next. */ + int (*new)(struct nf_conn *conntrack, const struct sk_buff *skb, + unsigned int dataoff); + + /* Called when a conntrack entry is destroyed */ + void (*destroy)(struct nf_conn *conntrack); + + int (*error)(struct sk_buff *skb, unsigned int dataoff, + enum nf_conntrack_info *ctinfo, + int pf, unsigned int hooknum); + + /* Module (if any) which this is connected to. */ + struct module *me; +}; + +/* Existing built-in protocols */ +extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp6; +extern struct nf_conntrack_protocol nf_conntrack_protocol_udp4; +extern struct nf_conntrack_protocol nf_conntrack_protocol_udp6; +extern struct nf_conntrack_protocol nf_conntrack_generic_protocol; + +#define MAX_NF_CT_PROTO 256 +extern struct nf_conntrack_protocol **nf_ct_protos[PF_MAX]; + +extern struct nf_conntrack_protocol * +nf_ct_find_proto(u_int16_t l3proto, u_int8_t protocol); + +/* Protocol registration. */ +extern int nf_conntrack_protocol_register(struct nf_conntrack_protocol *proto); +extern void nf_conntrack_protocol_unregister(struct nf_conntrack_protocol *proto); + +/* Log invalid packets */ +extern unsigned int nf_ct_log_invalid; + +#ifdef CONFIG_SYSCTL +#ifdef DEBUG_INVALID_PACKETS +#define LOG_INVALID(proto) \ + (nf_ct_log_invalid == (proto) || nf_ct_log_invalid == IPPROTO_RAW) +#else +#define LOG_INVALID(proto) \ + ((nf_ct_log_invalid == (proto) || nf_ct_log_invalid == IPPROTO_RAW) \ + && net_ratelimit()) +#endif +#else +#define LOG_INVALID(proto) 0 +#endif /* CONFIG_SYSCTL */ + +#endif /*_NF_CONNTRACK_PROTOCOL_H*/ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter/nf_conntrack_sctp.h linux-2.6.11.3/include/linux/netfilter/nf_conntrack_sctp.h --- linux-2.6.11.3.org/include/linux/netfilter/nf_conntrack_sctp.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter/nf_conntrack_sctp.h 2005-03-13 22:06:02.156226368 +0100 @@ -0,0 +1,30 @@ +/* + * SCTP tracking. + * + * Derived from include/linux/netfiter_ipv4/ip_conntrack_tcp.h + */ + +#ifndef _NF_CONNTRACK_SCTP_H +#define _NF_CONNTRACK_SCTP_H + +enum sctp_conntrack { + SCTP_CONNTRACK_NONE, + SCTP_CONNTRACK_CLOSED, + SCTP_CONNTRACK_COOKIE_WAIT, + SCTP_CONNTRACK_COOKIE_ECHOED, + SCTP_CONNTRACK_ESTABLISHED, + SCTP_CONNTRACK_SHUTDOWN_SENT, + SCTP_CONNTRACK_SHUTDOWN_RECD, + SCTP_CONNTRACK_SHUTDOWN_ACK_SENT, + SCTP_CONNTRACK_MAX +}; + +struct nf_ct_sctp +{ + enum sctp_conntrack state; + + u_int32_t vtag[NF_CT_DIR_MAX]; + u_int32_t ttag[NF_CT_DIR_MAX]; +}; + +#endif /* _NF_CONNTRACK_SCTP_H */ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter/nf_conntrack_tcp.h linux-2.6.11.3/include/linux/netfilter/nf_conntrack_tcp.h --- linux-2.6.11.3.org/include/linux/netfilter/nf_conntrack_tcp.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter/nf_conntrack_tcp.h 2005-03-13 22:06:02.157226216 +0100 @@ -0,0 +1,63 @@ +/* + * TCP tracking. + * + * Derived from include/linux/netfiter_ipv4/ip_conntrack_tcp.h + */ + +#ifndef _NF_CONNTRACK_TCP_H +#define _NF_CONNTRACK_TCP_H + +enum tcp_conntrack { + TCP_CONNTRACK_NONE, + TCP_CONNTRACK_SYN_SENT, + TCP_CONNTRACK_SYN_RECV, + TCP_CONNTRACK_ESTABLISHED, + TCP_CONNTRACK_FIN_WAIT, + TCP_CONNTRACK_CLOSE_WAIT, + TCP_CONNTRACK_LAST_ACK, + TCP_CONNTRACK_TIME_WAIT, + TCP_CONNTRACK_CLOSE, + TCP_CONNTRACK_LISTEN, + TCP_CONNTRACK_MAX, + TCP_CONNTRACK_IGNORE +}; + +/* Window scaling is advertised by the sender */ +#define NF_CT_TCP_FLAG_WINDOW_SCALE 0x01 + +/* SACK is permitted by the sender */ +#define NF_CT_TCP_FLAG_SACK_PERM 0x02 + +struct nf_ct_tcp_state { + u_int32_t td_end; /* max of seq + len */ + u_int32_t td_maxend; /* max of ack + max(win, 1) */ + u_int32_t td_maxwin; /* max(win) */ + u_int8_t td_scale; /* window scale factor */ + u_int8_t loose; /* used when connection picked up from the middle */ + u_int8_t flags; /* per direction state flags */ +}; + +struct nf_ct_tcp +{ + struct nf_ct_tcp_state seen[2]; /* connection parameters per direction */ + u_int8_t state; /* state of the connection (enum tcp_conntrack) */ + /* For detecting stale connections */ + u_int8_t last_dir; /* Direction of the last packet (enum nf_conntrack_dir) */ + u_int8_t retrans; /* Number of retransmitted packets */ + u_int8_t last_index; /* Index of the last packet */ + u_int32_t last_seq; /* Last sequence number seen in dir */ + u_int32_t last_ack; /* Last sequence number seen in opposite dir */ + u_int32_t last_end; /* Last seq + len */ +}; + +/* Need this, since this file is included before the nf_conn definition + * in nf_conntrack.h */ +struct nf_conn; + +/* Update TCP window tracking data when NAT mangles the packet */ +extern void nf_conntrack_tcp_update(struct sk_buff *skb, + unsigned int dataoff, + struct nf_conn *conntrack, + int dir); + +#endif /* _NF_CONNTRACK_TCP_H */ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter/nf_conntrack_tuple.h linux-2.6.11.3/include/linux/netfilter/nf_conntrack_tuple.h --- linux-2.6.11.3.org/include/linux/netfilter/nf_conntrack_tuple.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter/nf_conntrack_tuple.h 2005-03-13 22:06:02.159225912 +0100 @@ -0,0 +1,201 @@ +/* + * Definitions and Declarations for tuple. + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI + * - generalize L3 protocol dependent part. + * + * Derived from include/linux/netfiter_ipv4/ip_conntrack_tuple.h + */ + +#ifndef _NF_CONNTRACK_TUPLE_H +#define _NF_CONNTRACK_TUPLE_H + +/* A `tuple' is a structure containing the information to uniquely + identify a connection. ie. if two packets have the same tuple, they + are in the same connection; if not, they are not. + + We divide the structure along "manipulatable" and + "non-manipulatable" lines, for the benefit of the NAT code. +*/ + +#define NF_CT_TUPLE_L3SIZE 4 + +/* The l3 protocol-specific manipulable parts of the tuple: always in + network order! */ +union nf_conntrack_man_l3proto { + u_int32_t all[NF_CT_TUPLE_L3SIZE]; + u_int32_t ip; + u_int32_t ip6[4]; +}; + +/* The protocol-specific manipulable parts of the tuple: always in + network order! */ +union nf_conntrack_man_proto +{ + /* Add other protocols here. */ + u_int16_t all; + + struct { + u_int16_t port; + } tcp; + struct { + u_int16_t port; + } udp; + struct { + u_int16_t id; + } icmp; + struct { + u_int16_t port; + } sctp; +}; + +/* The manipulable part of the tuple. */ +struct nf_conntrack_man +{ + union nf_conntrack_man_l3proto u3; + union nf_conntrack_man_proto u; + /* Layer 3 protocol */ + u_int16_t l3num; +}; + +/* This contains the information to distinguish a connection. */ +struct nf_conntrack_tuple +{ + struct nf_conntrack_man src; + + /* These are the parts of the tuple which are fixed. */ + struct { + union { + u_int32_t all[NF_CT_TUPLE_L3SIZE]; + u_int32_t ip; + u_int32_t ip6[4]; + } u3; + union { + /* Add other protocols here. */ + u_int16_t all; + + struct { + u_int16_t port; + } tcp; + struct { + u_int16_t port; + } udp; + struct { + u_int8_t type, code; + } icmp; + struct { + u_int16_t port; + } sctp; + } u; + + /* The protocol. */ + u_int8_t protonum; + + /* The direction (for tuplehash) */ + u_int8_t dir; + } dst; +}; + +/* This is optimized opposed to a memset of the whole structure. Everything we + * really care about is the source/destination unions */ +#define NF_CT_TUPLE_U_BLANK(tuple) \ + do { \ + (tuple)->src.u.all = 0; \ + (tuple)->dst.u.all = 0; \ + memset((tuple)->src.u3.all, 0, \ + sizeof(u_int32_t)*NF_CT_TUPLE_L3SIZE); \ + memset((tuple)->dst.u3.all, 0, \ + sizeof(u_int32_t)*NF_CT_TUPLE_L3SIZE); \ + } while (0) + +enum nf_conntrack_dir +{ + NF_CT_DIR_ORIGINAL, + NF_CT_DIR_REPLY, + NF_CT_DIR_MAX +}; + +#ifdef __KERNEL__ + +#define NF_CT_DUMP_TUPLE(tp) \ +DEBUGP("tuple %p: %u %u %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x %hu -> %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x %hu\n", \ + (tp), (tp)->src.l3num, (tp)->dst.protonum, \ + NIP6(*(struct in6_addr *)(tp)->src.u3.all), ntohs((tp)->src.u.all), \ + NIP6(*(struct in6_addr *)(tp)->dst.u3.all), ntohs((tp)->dst.u.all)) + +#define NFCTINFO2DIR(ctinfo) ((ctinfo) >= NF_CT_IS_REPLY ? NF_CT_DIR_REPLY : NF_CT_DIR_ORIGINAL) + +/* If we're the first tuple, it's the original dir. */ +#define NF_CT_DIRECTION(h) \ + ((enum nf_conntrack_dir)(h)->tuple.dst.dir) + +/* Connections have two entries in the hash table: one for each way */ +struct nf_conntrack_tuple_hash +{ + struct list_head list; + + struct nf_conntrack_tuple tuple; +}; + +#endif /* __KERNEL__ */ + +static inline int nf_ct_tuple_src_equal(const struct nf_conntrack_tuple *t1, + const struct nf_conntrack_tuple *t2) +{ + return (t1->src.u3.all[0] == t2->src.u3.all[0] && + t1->src.u3.all[1] == t2->src.u3.all[1] && + t1->src.u3.all[2] == t2->src.u3.all[2] && + t1->src.u3.all[3] == t2->src.u3.all[3] && + t1->src.u.all == t2->src.u.all && + t1->src.l3num == t2->src.l3num && + t1->dst.protonum == t2->dst.protonum); +} + +static inline int nf_ct_tuple_dst_equal(const struct nf_conntrack_tuple *t1, + const struct nf_conntrack_tuple *t2) +{ + return (t1->dst.u3.all[0] == t2->dst.u3.all[0] && + t1->dst.u3.all[1] == t2->dst.u3.all[1] && + t1->dst.u3.all[2] == t2->dst.u3.all[2] && + t1->dst.u3.all[3] == t2->dst.u3.all[3] && + t1->dst.u.all == t2->dst.u.all && + t1->src.l3num == t2->src.l3num && + t1->dst.protonum == t2->dst.protonum); +} + +static inline int nf_ct_tuple_equal(const struct nf_conntrack_tuple *t1, + const struct nf_conntrack_tuple *t2) +{ + return nf_ct_tuple_src_equal(t1, t2) && nf_ct_tuple_dst_equal(t1, t2); +} + +static inline int nf_ct_tuple_mask_cmp(const struct nf_conntrack_tuple *t, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *mask) +{ + int count = 0; + + for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ + if ((ntohs(t->src.u3.all[count]) ^ + ntohs(tuple->src.u3.all[count])) & + ntohs(mask->src.u3.all[count])) + return 0; + } + + for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ + if ((ntohs(t->dst.u3.all[count]) ^ + ntohs(tuple->dst.u3.all[count])) & + ntohs(mask->dst.u3.all[count])) + return 0; + } + + if ((t->src.u.all ^ tuple->src.u.all) & mask->src.u.all || + (t->dst.u.all ^ tuple->dst.u.all) & mask->dst.u.all || + (t->src.l3num ^ tuple->src.l3num) & mask->src.l3num || + (t->dst.protonum ^ tuple->dst.protonum) & mask->dst.protonum) + return 0; + + return 1; +} + +#endif /* _NF_CONNTRACK_TUPLE_H */ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ip_conntrack_quake3.h linux-2.6.11.3/include/linux/netfilter_ipv4/ip_conntrack_quake3.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ip_conntrack_quake3.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ip_conntrack_quake3.h 2005-03-13 22:06:14.548342480 +0100 @@ -0,0 +1,21 @@ +#ifndef _IP_CT_QUAKE3 +#define _IP_CT_QUAKE3 + +/* Don't confuse with 27960, often used as the Server Port */ +#define QUAKE3_MASTER_PORT 27950 + +struct quake3_search { + const char marker[4]; /* always 0xff 0xff 0xff 0xff ? */ + const char *pattern; + size_t plen; +}; + +/* This structure is per expected connection */ +struct ip_ct_quake3_expect { +}; + +/* This structure exists only once per master */ +struct ip_ct_quake3_master { +}; + +#endif /* _IP_CT_QUAKE3 */ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ip_queue.h linux-2.6.11.3/include/linux/netfilter_ipv4/ip_queue.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ip_queue.h 2005-03-13 07:44:41.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ip_queue.h 2005-03-13 22:05:50.829948224 +0100 @@ -47,10 +47,20 @@ unsigned char payload[0]; /* Optional replacement packet */ } ipq_verdict_msg_t; +typedef struct ipq_vwmark_msg { + unsigned int value; /* Verdict to hand to netfilter */ + unsigned long id; /* Packet ID for this verdict */ + size_t data_len; /* Length of replacement data */ + unsigned char payload[0]; /* Optional replacement packet */ + unsigned long nfmark; /* Mark for the Packet */ +} ipq_vwmark_msg_t; + + typedef struct ipq_peer_msg { union { ipq_verdict_msg_t verdict; ipq_mode_msg_t mode; + ipq_vwmark_msg_t vwmark; } msg; } ipq_peer_msg_t; @@ -67,6 +77,7 @@ #define IPQM_MODE (IPQM_BASE + 1) /* Mode request from peer */ #define IPQM_VERDICT (IPQM_BASE + 2) /* Verdict from peer */ #define IPQM_PACKET (IPQM_BASE + 3) /* Packet from kernel */ -#define IPQM_MAX (IPQM_BASE + 4) +#define IPQM_VWMARK (IPQM_BASE + 4) /* Verdict and mark from peer */ +#define IPQM_MAX (IPQM_BASE + 5) #endif /*_IP_QUEUE_H*/ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ip_set.h linux-2.6.11.3/include/linux/netfilter_ipv4/ip_set.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ip_set.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ip_set.h 2005-03-13 22:04:00.474724768 +0100 @@ -0,0 +1,489 @@ +#ifndef _IP_SET_H +#define _IP_SET_H + +/* Copyright (C) 2000-2002 Joakim Axelsson + * Patrick Schaaf + * Martin Josefsson + * Copyright (C) 2003-2004 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * A sockopt of such quality has hardly ever been seen before on the open + * market! This little beauty, hardly ever used: above 64, so it's + * traditionally used for firewalling, not touched (even once!) by the + * 2.0, 2.2 and 2.4 kernels! + * + * Comes with its own certificate of authenticity, valid anywhere in the + * Free world! + * + * Rusty, 19.4.2000 + */ +#define SO_IP_SET 83 + +/* + * Heavily modify by Joakim Axelsson 08.03.2002 + * - Made it more modulebased + * + * Additional heavy modifications by Jozsef Kadlecsik 22.02.2004 + * - bindings added + * - in order to "deal with" backward compatibility, renamed to ipset + */ + +/* + * Used so that the kernel module and ipset-binary can match their versions + */ +#define IP_SET_PROTOCOL_VERSION 2 + +#define IP_SET_MAXNAMELEN 32 /* set names and set typenames */ + +/* Lets work with our own typedef for representing an IP address. + * We hope to make the code more portable, possibly to IPv6... + * + * The representation works in HOST byte order, because most set types + * will perform arithmetic operations and compare operations. + * + * For now the type is an uint32_t. + * + * Make sure to ONLY use the functions when translating and parsing + * in order to keep the host byte order and make it more portable: + * parse_ip() + * parse_mask() + * parse_ipandmask() + * ip_tostring() + * (Joakim: where are they???) + */ + +typedef uint32_t ip_set_ip_t; + +/* Sets are identified by an id in kernel space. Tweak with ip_set_id_t + * and IP_SET_INVALID_ID if you want to increase the max number of sets. + */ +typedef uint16_t ip_set_id_t; + +#define IP_SET_INVALID_ID 65535 + +/* How deep we follow bindings */ +#define IP_SET_MAX_BINDINGS 6 + +/* + * Option flags for kernel operations (ipt_set_info) + */ +#define IPSET_SRC 0x01 /* Source match/add */ +#define IPSET_DST 0x02 /* Destination match/add */ +#define IPSET_MATCH_INV 0x04 /* Inverse matching */ + +/* + * Set types (flavours) + */ +#define IPSET_TYPE_IP 0 /* IP address type of set */ +#define IPSET_TYPE_PORT 1 /* Port type of set */ + +/* Reserved keywords */ +#define IPSET_TOKEN_DEFAULT ":default:" +#define IPSET_TOKEN_ALL ":all:" + +/* SO_IP_SET operation constants, and their request struct types. + * + * Operation ids: + * 0-99: commands with version checking + * 100-199: add/del/test/bind/unbind + * 200-299: list, save, restore + */ + +/* Single shot operations: + * version, create, destroy, flush, rename and swap + * + * Sets are identified by name. + */ + +#define IP_SET_REQ_STD \ + unsigned op; \ + unsigned version; \ + char name[IP_SET_MAXNAMELEN] + +#define IP_SET_OP_CREATE 0x00000001 /* Create a new (empty) set */ +struct ip_set_req_create { + IP_SET_REQ_STD; + char typename[IP_SET_MAXNAMELEN]; +}; + +#define IP_SET_OP_DESTROY 0x00000002 /* Remove a (empty) set */ +struct ip_set_req_std { + IP_SET_REQ_STD; +}; + +#define IP_SET_OP_FLUSH 0x00000003 /* Remove all IPs in a set */ +/* Uses ip_set_req_std */ + +#define IP_SET_OP_RENAME 0x00000004 /* Rename a set */ +/* Uses ip_set_req_create */ + +#define IP_SET_OP_SWAP 0x00000005 /* Swap two sets */ +/* Uses ip_set_req_create */ + +union ip_set_name_index { + char name[IP_SET_MAXNAMELEN]; + ip_set_id_t index; +}; + +#define IP_SET_OP_GET_BYNAME 0x00000006 /* Get set index by name */ +struct ip_set_req_get_set { + unsigned op; + unsigned version; + union ip_set_name_index set; +}; + +#define IP_SET_OP_GET_BYINDEX 0x00000007 /* Get set name by index */ +/* Uses ip_set_req_get_set */ + +#define IP_SET_OP_VERSION 0x00000100 /* Ask kernel version */ +struct ip_set_req_version { + unsigned op; + unsigned version; +}; + +/* Double shots operations: + * add, del, test, bind and unbind. + * + * First we query the kernel to get the index and type of the target set, + * then issue the command. Validity of IP is checked in kernel in order + * to minimalize sockopt operations. + */ + +/* Get minimal set data for add/del/test/bind/unbind IP */ +#define IP_SET_OP_ADT_GET 0x00000010 /* Get set and type */ +struct ip_set_req_adt_get { + unsigned op; + unsigned version; + union ip_set_name_index set; + char typename[IP_SET_MAXNAMELEN]; +}; + +#define IP_SET_REQ_BYINDEX \ + unsigned op; \ + ip_set_id_t index; + +struct ip_set_req_adt { + IP_SET_REQ_BYINDEX; +}; + +#define IP_SET_OP_ADD_IP 0x00000101 /* Add an IP to a set */ +/* Uses ip_set_req_adt, with type specific addage */ + +#define IP_SET_OP_DEL_IP 0x00000102 /* Remove an IP from a set */ +/* Uses ip_set_req_adt, with type specific addage */ + +#define IP_SET_OP_TEST_IP 0x00000103 /* Test an IP in a set */ +/* Uses ip_set_req_adt, with type specific addage */ + +#define IP_SET_OP_BIND_SET 0x00000104 /* Bind an IP to a set */ +/* Uses ip_set_req_bind, with type specific addage */ +struct ip_set_req_bind { + IP_SET_REQ_BYINDEX; + char binding[IP_SET_MAXNAMELEN]; +}; + +#define IP_SET_OP_UNBIND_SET 0x00000105 /* Unbind an IP from a set */ +/* Uses ip_set_req_bind, with type speficic addage + * index = 0 means unbinding for all sets */ + +#define IP_SET_OP_TEST_BIND_SET 0x00000106 /* Test binding an IP to a set */ +/* Uses ip_set_req_bind, with type specific addage */ + +/* Multiple shots operations: list, save, restore. + * + * - check kernel version and query the max number of sets + * - get the basic information on all sets + * and size required for the next step + * - get actual set data: header, data, bindings + */ + +/* Get max_sets and the index of a queried set + */ +#define IP_SET_OP_MAX_SETS 0x00000020 +struct ip_set_req_max_sets { + unsigned op; + unsigned version; + ip_set_id_t max_sets; /* max_sets */ + ip_set_id_t sets; /* real number of sets */ + union ip_set_name_index set; /* index of set if name used */ +}; + +/* Get the id and name of the sets plus size for next step */ +#define IP_SET_OP_LIST_SIZE 0x00000201 +#define IP_SET_OP_SAVE_SIZE 0x00000202 +struct ip_set_req_setnames { + unsigned op; + ip_set_id_t index; /* set to list/save */ + size_t size; /* size to get setdata/bindings */ + /* followed by sets number of struct ip_set_name_list */ +}; + +struct ip_set_name_list { + char name[IP_SET_MAXNAMELEN]; + char typename[IP_SET_MAXNAMELEN]; + ip_set_id_t index; + ip_set_id_t id; +}; + +/* The actual list operation */ +#define IP_SET_OP_LIST 0x00000203 +struct ip_set_req_list { + IP_SET_REQ_BYINDEX; + /* sets number of struct ip_set_list in reply */ +}; + +struct ip_set_list { + ip_set_id_t index; + ip_set_id_t binding; + u_int32_t ref; + size_t header_size; /* Set header data of header_size */ + size_t members_size; /* Set members data of members_size */ + size_t bindings_size; /* Set bindings data of bindings_size */ +}; + +struct ip_set_hash_list { + ip_set_ip_t ip; + ip_set_id_t binding; +}; + +/* The save operation */ +#define IP_SET_OP_SAVE 0x00000204 +/* Uses ip_set_req_list, in the reply replaced by + * sets number of struct ip_set_save plus a marker + * ip_set_save followed by ip_set_hash_save structures. + */ +struct ip_set_save { + ip_set_id_t index; + ip_set_id_t binding; + size_t header_size; /* Set header data of header_size */ + size_t members_size; /* Set members data of members_size */ +}; + +/* At restoring, ip == 0 means default binding for the given set: */ +struct ip_set_hash_save { + ip_set_ip_t ip; + ip_set_id_t id; + ip_set_id_t binding; +}; + +/* The restore operation */ +#define IP_SET_OP_RESTORE 0x00000205 +/* Uses ip_set_req_setnames followed by ip_set_restore structures + * plus a marker ip_set_restore, followed by ip_set_hash_save + * structures. + */ +struct ip_set_restore { + char name[IP_SET_MAXNAMELEN]; + char typename[IP_SET_MAXNAMELEN]; + ip_set_id_t index; + size_t header_size; /* Create data of header_size */ + size_t members_size; /* Set members data of members_size */ +}; + +static inline int bitmap_bytes(ip_set_ip_t a, ip_set_ip_t b) +{ + return 4 * ((((b - a + 8) / 8) + 3) / 4); +} + +#ifdef __KERNEL__ + +#define ip_set_printk(format, args...) \ + do { \ + printk("%s: %s: ", __FILE__, __FUNCTION__); \ + printk(format "\n" , ## args); \ + } while (0) + +#if defined(IP_SET_DEBUG) +#define DP(format, args...) \ + do { \ + printk("%s: %s (DBG): ", __FILE__, __FUNCTION__);\ + printk(format "\n" , ## args); \ + } while (0) +#define IP_SET_ASSERT(x) \ + do { \ + if (!(x)) \ + printk("IP_SET_ASSERT: %s:%i(%s)\n", \ + __FILE__, __LINE__, __FUNCTION__); \ + } while (0) +#else +#define DP(format, args...) +#define IP_SET_ASSERT(x) +#endif + +struct ip_set; + +/* + * The ip_set_type definition - one per set type, e.g. "ipmap". + * + * Each individual set has a pointer, set->type, going to one + * of these structures. Function pointers inside the structure implement + * the real behaviour of the sets. + * + * If not mentioned differently, the implementation behind the function + * pointers of a set_type, is expected to return 0 if ok, and a negative + * errno (e.g. -EINVAL) on error. + */ +struct ip_set_type { + struct list_head list; /* next in list of set types */ + + /* test for IP in set (kernel: iptables -m set src|dst) + * return 0 if not in set, 1 if in set. + */ + int (*testip_kernel) (struct ip_set *set, + const struct sk_buff * skb, + u_int32_t flags, + ip_set_ip_t *ip); + + /* test for IP in set (userspace: ipset -T set IP) + * return 0 if not in set, 1 if in set. + */ + int (*testip) (struct ip_set *set, + const void *data, size_t size, + ip_set_ip_t *ip); + + /* + * Size of the data structure passed by when + * adding/deletin/testing an entry. + */ + size_t reqsize; + + /* Add IP into set (userspace: ipset -A set IP) + * Return -EEXIST if the address is already in the set, + * and -ERANGE if the address lies outside the set bounds. + * If the address was not already in the set, 0 is returned. + */ + int (*addip) (struct ip_set *set, + const void *data, size_t size, + ip_set_ip_t *ip); + + /* Add IP into set (kernel: iptables ... -j SET set src|dst) + * Return -EEXIST if the address is already in the set, + * and -ERANGE if the address lies outside the set bounds. + * If the address was not already in the set, 0 is returned. + */ + int (*addip_kernel) (struct ip_set *set, + const struct sk_buff * skb, + u_int32_t flags, + ip_set_ip_t *ip); + + /* remove IP from set (userspace: ipset -D set --entry x) + * Return -EEXIST if the address is NOT in the set, + * and -ERANGE if the address lies outside the set bounds. + * If the address really was in the set, 0 is returned. + */ + int (*delip) (struct ip_set *set, + const void *data, size_t size, + ip_set_ip_t *ip); + + /* remove IP from set (kernel: iptables ... -j SET --entry x) + * Return -EEXIST if the address is NOT in the set, + * and -ERANGE if the address lies outside the set bounds. + * If the address really was in the set, 0 is returned. + */ + int (*delip_kernel) (struct ip_set *set, + const struct sk_buff * skb, + u_int32_t flags, + ip_set_ip_t *ip); + + /* new set creation - allocated type specific items + */ + int (*create) (struct ip_set *set, + const void *data, size_t size); + + /* retry the operation after successfully tweaking the set + */ + int (*retry) (struct ip_set *set); + + /* set destruction - free type specific items + * There is no return value. + * Can be called only when child sets are destroyed. + */ + void (*destroy) (struct ip_set *set); + + /* set flushing - reset all bits in the set, or something similar. + * There is no return value. + */ + void (*flush) (struct ip_set *set); + + /* Listing: size needed for header + */ + size_t header_size; + + /* Listing: Get the header + * + * Fill in the information in "data". + * This function is always run after list_header_size() under a + * writelock on the set. Therefor is the length of "data" always + * correct. + */ + void (*list_header) (const struct ip_set *set, + void *data); + + /* Listing: Get the size for the set members + */ + int (*list_members_size) (const struct ip_set *set); + + /* Listing: Get the set members + * + * Fill in the information in "data". + * This function is always run after list_member_size() under a + * writelock on the set. Therefor is the length of "data" always + * correct. + */ + void (*list_members) (const struct ip_set *set, + void *data); + + char typename[IP_SET_MAXNAMELEN]; + char typecode; + int protocol_version; + + /* Set this to THIS_MODULE if you are a module, otherwise NULL */ + struct module *me; +}; + +extern int ip_set_register_set_type(struct ip_set_type *set_type); +extern void ip_set_unregister_set_type(struct ip_set_type *set_type); + +/* A generic ipset */ +struct ip_set { + char name[IP_SET_MAXNAMELEN]; /* the name of the set */ + rwlock_t lock; /* lock for concurrency control */ + ip_set_id_t id; /* set id for swapping */ + ip_set_id_t binding; /* default binding for the set */ + atomic_t ref; /* in kernel and in hash references */ + struct ip_set_type *type; /* the set types */ + void *data; /* pooltype specific data */ +}; + +/* Structure to bind set elements to sets */ +struct ip_set_hash { + struct list_head list; /* list of clashing entries in hash */ + ip_set_ip_t ip; /* ip from set */ + ip_set_id_t id; /* set id */ + ip_set_id_t binding; /* set we bind the element to */ +}; + +/* register and unregister set references */ +extern ip_set_id_t ip_set_get_byname(const char name[IP_SET_MAXNAMELEN]); +extern ip_set_id_t ip_set_get_byindex(ip_set_id_t id); +extern void ip_set_put(ip_set_id_t id); + +/* API for iptables set match, and SET target */ +extern void ip_set_addip_kernel(ip_set_id_t id, + const struct sk_buff *skb, + const u_int32_t *flags); +extern void ip_set_delip_kernel(ip_set_id_t id, + const struct sk_buff *skb, + const u_int32_t *flags); +extern int ip_set_testip_kernel(ip_set_id_t id, + const struct sk_buff *skb, + const u_int32_t *flags); + +#endif /* __KERNEL__ */ + +#endif /*_IP_SET_H*/ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ip_set_iphash.h linux-2.6.11.3/include/linux/netfilter_ipv4/ip_set_iphash.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ip_set_iphash.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ip_set_iphash.h 2005-03-13 22:04:00.475724616 +0100 @@ -0,0 +1,30 @@ +#ifndef __IP_SET_IPHASH_H +#define __IP_SET_IPHASH_H + +#include + +#define SETTYPE_NAME "iphash" +#define MAX_RANGE 0x0000FFFF + +struct ip_set_iphash { + ip_set_ip_t *members; /* the iphash proper */ + uint32_t initval; /* initval for jhash_1word */ + uint32_t prime; /* prime for double hashing */ + uint32_t hashsize; /* hash size */ + uint16_t probes; /* max number of probes */ + uint16_t resize; /* resize factor in percent */ + ip_set_ip_t netmask; /* netmask */ +}; + +struct ip_set_req_iphash_create { + uint32_t hashsize; + uint16_t probes; + uint16_t resize; + ip_set_ip_t netmask; +}; + +struct ip_set_req_iphash { + ip_set_ip_t ip; +}; + +#endif /* __IP_SET_IPHASH_H */ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ip_set_ipmap.h linux-2.6.11.3/include/linux/netfilter_ipv4/ip_set_ipmap.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ip_set_ipmap.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ip_set_ipmap.h 2005-03-13 22:04:00.476724464 +0100 @@ -0,0 +1,56 @@ +#ifndef __IP_SET_IPMAP_H +#define __IP_SET_IPMAP_H + +#include + +#define SETTYPE_NAME "ipmap" +#define MAX_RANGE 0x0000FFFF + +struct ip_set_ipmap { + void *members; /* the ipmap proper */ + ip_set_ip_t first_ip; /* host byte order, included in range */ + ip_set_ip_t last_ip; /* host byte order, included in range */ + ip_set_ip_t netmask; /* subnet netmask */ + ip_set_ip_t sizeid; /* size of set in IPs */ + u_int16_t hosts; /* number of hosts in a subnet */ +}; + +struct ip_set_req_ipmap_create { + ip_set_ip_t from; + ip_set_ip_t to; + ip_set_ip_t netmask; +}; + +struct ip_set_req_ipmap { + ip_set_ip_t ip; +}; + +unsigned int +mask_to_bits(ip_set_ip_t mask) +{ + unsigned int bits = 32; + ip_set_ip_t maskaddr; + + if (mask == 0xFFFFFFFF) + return bits; + + maskaddr = 0xFFFFFFFE; + while (--bits >= 0 && maskaddr != mask) + maskaddr <<= 1; + + return bits; +} + +ip_set_ip_t +range_to_mask(ip_set_ip_t from, ip_set_ip_t to, unsigned int *bits) +{ + ip_set_ip_t mask = 0xFFFFFFFE; + + *bits = 32; + while (--(*bits) >= 0 && mask && (to & mask) != from) + mask <<= 1; + + return mask; +} + +#endif /* __IP_SET_IPMAP_H */ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ip_set_jhash.h linux-2.6.11.3/include/linux/netfilter_ipv4/ip_set_jhash.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ip_set_jhash.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ip_set_jhash.h 2005-03-13 22:04:00.477724312 +0100 @@ -0,0 +1,148 @@ +#ifndef _LINUX_IPSET_JHASH_H +#define _LINUX_IPSET_JHASH_H + +/* This is a copy of linux/jhash.h but the types u32/u8 are changed + * to __u32/__u8 so that the header file can be included into + * userspace code as well. Jozsef Kadlecsik (kadlec@blackhole.kfki.hu) + */ + +/* jhash.h: Jenkins hash support. + * + * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net) + * + * http://burtleburtle.net/bob/hash/ + * + * These are the credits from Bob's sources: + * + * lookup2.c, by Bob Jenkins, December 1996, Public Domain. + * hash(), hash2(), hash3, and mix() are externally useful functions. + * Routines to test the hash are included if SELF_TEST is defined. + * You can use this free for any purpose. It has no warranty. + * + * Copyright (C) 2003 David S. Miller (davem@redhat.com) + * + * I've modified Bob's hash to be useful in the Linux kernel, and + * any bugs present are surely my fault. -DaveM + */ + +/* NOTE: Arguments are modified. */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= b; a -= c; a ^= (c>>13); \ + b -= c; b -= a; b ^= (a<<8); \ + c -= a; c -= b; c ^= (b>>13); \ + a -= b; a -= c; a ^= (c>>12); \ + b -= c; b -= a; b ^= (a<<16); \ + c -= a; c -= b; c ^= (b>>5); \ + a -= b; a -= c; a ^= (c>>3); \ + b -= c; b -= a; b ^= (a<<10); \ + c -= a; c -= b; c ^= (b>>15); \ +} + +/* The golden ration: an arbitrary value */ +#define JHASH_GOLDEN_RATIO 0x9e3779b9 + +/* The most generic version, hashes an arbitrary sequence + * of bytes. No alignment or length assumptions are made about + * the input key. + */ +static inline __u32 jhash(void *key, __u32 length, __u32 initval) +{ + __u32 a, b, c, len; + __u8 *k = key; + + len = length; + a = b = JHASH_GOLDEN_RATIO; + c = initval; + + while (len >= 12) { + a += (k[0] +((__u32)k[1]<<8) +((__u32)k[2]<<16) +((__u32)k[3]<<24)); + b += (k[4] +((__u32)k[5]<<8) +((__u32)k[6]<<16) +((__u32)k[7]<<24)); + c += (k[8] +((__u32)k[9]<<8) +((__u32)k[10]<<16)+((__u32)k[11]<<24)); + + __jhash_mix(a,b,c); + + k += 12; + len -= 12; + } + + c += length; + switch (len) { + case 11: c += ((__u32)k[10]<<24); + case 10: c += ((__u32)k[9]<<16); + case 9 : c += ((__u32)k[8]<<8); + case 8 : b += ((__u32)k[7]<<24); + case 7 : b += ((__u32)k[6]<<16); + case 6 : b += ((__u32)k[5]<<8); + case 5 : b += k[4]; + case 4 : a += ((__u32)k[3]<<24); + case 3 : a += ((__u32)k[2]<<16); + case 2 : a += ((__u32)k[1]<<8); + case 1 : a += k[0]; + }; + + __jhash_mix(a,b,c); + + return c; +} + +/* A special optimized version that handles 1 or more of __u32s. + * The length parameter here is the number of __u32s in the key. + */ +static inline __u32 jhash2(__u32 *k, __u32 length, __u32 initval) +{ + __u32 a, b, c, len; + + a = b = JHASH_GOLDEN_RATIO; + c = initval; + len = length; + + while (len >= 3) { + a += k[0]; + b += k[1]; + c += k[2]; + __jhash_mix(a, b, c); + k += 3; len -= 3; + } + + c += length * 4; + + switch (len) { + case 2 : b += k[1]; + case 1 : a += k[0]; + }; + + __jhash_mix(a,b,c); + + return c; +} + + +/* A special ultra-optimized versions that knows they are hashing exactly + * 3, 2 or 1 word(s). + * + * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally + * done at the end is not done here. + */ +static inline __u32 jhash_3words(__u32 a, __u32 b, __u32 c, __u32 initval) +{ + a += JHASH_GOLDEN_RATIO; + b += JHASH_GOLDEN_RATIO; + c += initval; + + __jhash_mix(a, b, c); + + return c; +} + +static inline __u32 jhash_2words(__u32 a, __u32 b, __u32 initval) +{ + return jhash_3words(a, b, 0, initval); +} + +static inline __u32 jhash_1word(__u32 a, __u32 initval) +{ + return jhash_3words(a, 0, 0, initval); +} + +#endif /* _LINUX_IPSET_JHASH_H */ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ip_set_macipmap.h linux-2.6.11.3/include/linux/netfilter_ipv4/ip_set_macipmap.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ip_set_macipmap.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ip_set_macipmap.h 2005-03-13 22:04:00.478724160 +0100 @@ -0,0 +1,38 @@ +#ifndef __IP_SET_MACIPMAP_H +#define __IP_SET_MACIPMAP_H + +#include + +#define SETTYPE_NAME "macipmap" +#define MAX_RANGE 0x0000FFFF + +/* general flags */ +#define IPSET_MACIP_MATCHUNSET 1 + +/* per ip flags */ +#define IPSET_MACIP_ISSET 1 + +struct ip_set_macipmap { + void *members; /* the macipmap proper */ + ip_set_ip_t first_ip; /* host byte order, included in range */ + ip_set_ip_t last_ip; /* host byte order, included in range */ + u_int32_t flags; +}; + +struct ip_set_req_macipmap_create { + ip_set_ip_t from; + ip_set_ip_t to; + u_int32_t flags; +}; + +struct ip_set_req_macipmap { + ip_set_ip_t ip; + unsigned char ethernet[ETH_ALEN]; +}; + +struct ip_set_macip { + unsigned short flags; + unsigned char ethernet[ETH_ALEN]; +}; + +#endif /* __IP_SET_MACIPMAP_H */ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ip_set_malloc.h linux-2.6.11.3/include/linux/netfilter_ipv4/ip_set_malloc.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ip_set_malloc.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ip_set_malloc.h 2005-03-13 22:04:00.479724008 +0100 @@ -0,0 +1,34 @@ +#ifndef _IP_SET_MALLOC_H +#define _IP_SET_MALLOC_H + +#ifdef __KERNEL__ + +/* Memory allocation and deallocation */ +static size_t max_malloc_size = 0; + +static inline void init_max_malloc_size(void) +{ +#define CACHE(x) max_malloc_size = x; +#include +#undef CACHE +} + +static inline void * ip_set_malloc(size_t bytes) +{ + if (bytes > max_malloc_size) + return vmalloc(bytes); + else + return kmalloc(bytes, GFP_KERNEL); +} + +static inline void ip_set_free(void * data, size_t bytes) +{ + if (bytes > max_malloc_size) + vfree(data); + else + kfree(data); +} + +#endif /* __KERNEL__ */ + +#endif /*_IP_SET_MALLOC_H*/ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ip_set_nethash.h linux-2.6.11.3/include/linux/netfilter_ipv4/ip_set_nethash.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ip_set_nethash.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ip_set_nethash.h 2005-03-13 22:04:00.480723856 +0100 @@ -0,0 +1,55 @@ +#ifndef __IP_SET_NETHASH_H +#define __IP_SET_NETHASH_H + +#include + +#define SETTYPE_NAME "nethash" +#define MAX_RANGE 0x0000FFFF + +struct ip_set_nethash { + ip_set_ip_t *members; /* the nethash proper */ + uint32_t initval; /* initval for jhash_1word */ + uint32_t prime; /* prime for double hashing */ + uint32_t hashsize; /* hash size */ + uint16_t probes; /* max number of probes */ + uint16_t resize; /* resize factor in percent */ + unsigned char cidr[30]; /* CIDR sizes */ +}; + +struct ip_set_req_nethash_create { + uint32_t hashsize; + uint16_t probes; + uint16_t resize; +}; + +struct ip_set_req_nethash { + ip_set_ip_t ip; + unsigned char cidr; +}; + +static unsigned char shifts[] = {255, 253, 249, 242, 225, 193, 129, 1}; + +static inline ip_set_ip_t +pack(ip_set_ip_t ip, unsigned char cidr) +{ + ip_set_ip_t addr, *paddr = &addr; + unsigned char n, t, *a; + + addr = htonl(ip & (0xFFFFFFFF << (32 - (cidr)))); +#ifdef __KERNEL__ + DP("ip:%u.%u.%u.%u/%u", NIPQUAD(addr), cidr); +#endif + n = cidr / 8; + t = cidr % 8; + a = &((unsigned char *)paddr)[n]; + *a = *a /(1 << (8 - t)) + shifts[t]; +#ifdef __KERNEL__ + DP("n: %u, t: %u, a: %u", n, t, *a); + DP("ip:%u.%u.%u.%u/%u, %u.%u.%u.%u", + HIPQUAD(ip), cidr, NIPQUAD(addr)); +#endif + + return ntohl(addr); +} + +#endif /* __IP_SET_NETHASH_H */ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ip_set_portmap.h linux-2.6.11.3/include/linux/netfilter_ipv4/ip_set_portmap.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ip_set_portmap.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ip_set_portmap.h 2005-03-13 22:04:00.482723552 +0100 @@ -0,0 +1,25 @@ +#ifndef __IP_SET_PORTMAP_H +#define __IP_SET_PORTMAP_H + +#include + +#define SETTYPE_NAME "portmap" +#define MAX_RANGE 0x0000FFFF +#define INVALID_PORT (MAX_RANGE + 1) + +struct ip_set_portmap { + void *members; /* the portmap proper */ + ip_set_ip_t first_port; /* host byte order, included in range */ + ip_set_ip_t last_port; /* host byte order, included in range */ +}; + +struct ip_set_req_portmap_create { + ip_set_ip_t from; + ip_set_ip_t to; +}; + +struct ip_set_req_portmap { + ip_set_ip_t port; +}; + +#endif /* __IP_SET_PORTMAP_H */ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ip_set_prime.h linux-2.6.11.3/include/linux/netfilter_ipv4/ip_set_prime.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ip_set_prime.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ip_set_prime.h 2005-03-13 22:04:00.483723400 +0100 @@ -0,0 +1,34 @@ +#ifndef __IP_SET_PRIME_H +#define __IP_SET_PRIME_H + +static inline unsigned make_prime_bound(unsigned nr) +{ + unsigned long long nr64 = nr; + unsigned long long x = 1; + nr = 1; + while (x <= nr64) { x <<= 2; nr <<= 1; } + return nr; +} + +static inline int make_prime_check(unsigned nr) +{ + unsigned x = 3; + unsigned b = make_prime_bound(nr); + while (x <= b) { + if (0 == (nr % x)) return 0; + x += 2; + } + return 1; +} + +static unsigned make_prime(unsigned nr) +{ + if (0 == (nr & 1)) nr--; + while (nr > 1) { + if (make_prime_check(nr)) return nr; + nr -= 2; + } + return 2; +} + +#endif /* __IP_SET_PRIME_H */ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_IPMARK.h linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_IPMARK.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_IPMARK.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_IPMARK.h 2005-03-13 22:04:32.035926736 +0100 @@ -0,0 +1,13 @@ +#ifndef _IPT_IPMARK_H_target +#define _IPT_IPMARK_H_target + +struct ipt_ipmark_target_info { + unsigned long andmask; + unsigned long ormask; + unsigned int addr; +}; + +#define IPT_IPMARK_SRC 0 +#define IPT_IPMARK_DST 1 + +#endif /*_IPT_IPMARK_H_target*/ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_ROUTE.h linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_ROUTE.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_ROUTE.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_ROUTE.h 2005-03-13 22:04:33.119761968 +0100 @@ -0,0 +1,23 @@ +/* Header file for iptables ipt_ROUTE target + * + * (C) 2002 by Cédric de Launois + * + * This software is distributed under GNU GPL v2, 1991 + */ +#ifndef _IPT_ROUTE_H_target +#define _IPT_ROUTE_H_target + +#define IPT_ROUTE_IFNAMSIZ 16 + +struct ipt_route_target_info { + char oif[IPT_ROUTE_IFNAMSIZ]; /* Output Interface Name */ + char iif[IPT_ROUTE_IFNAMSIZ]; /* Input Interface Name */ + u_int32_t gw; /* IP address of gateway */ + u_int8_t flags; +}; + +/* Values for "flags" field */ +#define IPT_ROUTE_CONTINUE 0x01 +#define IPT_ROUTE_TEE 0x02 + +#endif /*_IPT_ROUTE_H_target*/ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_TTL.h linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_TTL.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_TTL.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_TTL.h 2005-03-13 22:03:52.571926176 +0100 @@ -0,0 +1,21 @@ +/* TTL modification module for IP tables + * (C) 2000 by Harald Welte */ + +#ifndef _IPT_TTL_H +#define _IPT_TTL_H + +enum { + IPT_TTL_SET = 0, + IPT_TTL_INC, + IPT_TTL_DEC +}; + +#define IPT_TTL_MAXMODE IPT_TTL_DEC + +struct ipt_TTL_info { + u_int8_t mode; + u_int8_t ttl; +}; + + +#endif diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_XOR.h linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_XOR.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_XOR.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_XOR.h 2005-03-13 22:05:12.803729088 +0100 @@ -0,0 +1,9 @@ +#ifndef _IPT_XOR_H +#define _IPT_XOR_H + +struct ipt_XOR_info { + char key[30]; + u_int8_t block_size; +}; + +#endif /* _IPT_XOR_H */ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_account.h linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_account.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_account.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_account.h 2005-03-13 22:05:13.932557480 +0100 @@ -0,0 +1,26 @@ +/* + * accounting match (ipt_account.c) + * (C) 2003,2004 by Piotr Gasidlo (quaker@barbara.eu.org) + * + * Version: 0.1.7 + * + * This software is distributed under the terms of GNU GPL + */ + +#ifndef _IPT_ACCOUNT_H_ +#define _IPT_ACCOUNT_H_ + +#define IPT_ACCOUNT_NAME_LEN 64 + +#define IPT_ACCOUNT_NAME "ipt_account" +#define IPT_ACCOUNT_VERSION "0.1.7" + +struct t_ipt_account_info { + char name[IPT_ACCOUNT_NAME_LEN]; + u_int32_t network; + u_int32_t netmask; + int shortlisting:1; +}; + +#endif + diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_connlimit.h linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_connlimit.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_connlimit.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_connlimit.h 2005-03-13 22:03:53.554776760 +0100 @@ -0,0 +1,12 @@ +#ifndef _IPT_CONNLIMIT_H +#define _IPT_CONNLIMIT_H + +struct ipt_connlimit_data; + +struct ipt_connlimit_info { + int limit; + int inverse; + u_int32_t mask; + struct ipt_connlimit_data *data; +}; +#endif /* _IPT_CONNLIMIT_H */ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_fuzzy.h linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_fuzzy.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_fuzzy.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_fuzzy.h 2005-03-13 22:03:55.522477624 +0100 @@ -0,0 +1,21 @@ +#ifndef _IPT_FUZZY_H +#define _IPT_FUZZY_H + +#include +#include + +#define MAXFUZZYRATE 10000000 +#define MINFUZZYRATE 3 + +struct ipt_fuzzy_info { + u_int32_t minimum_rate; + u_int32_t maximum_rate; + u_int32_t packets_total; + u_int32_t bytes_total; + u_int32_t previous_time; + u_int32_t present_time; + u_int32_t mean_rate; + u_int8_t acceptance_rate; +}; + +#endif /*_IPT_FUZZY_H*/ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_geoip.h linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_geoip.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_geoip.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_geoip.h 2005-03-13 22:05:27.878437384 +0100 @@ -0,0 +1,50 @@ +/* ipt_geoip.h header file for libipt_geoip.c and ipt_geoip.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Copyright (c) 2004 Cookinglinux + */ +#ifndef _IPT_GEOIP_H +#define _IPT_GEOIP_H + +#define IPT_GEOIP_SRC 0x01 /* Perform check on Source IP */ +#define IPT_GEOIP_DST 0x02 /* Perform check on Destination IP */ +#define IPT_GEOIP_INV 0x04 /* Negate the condition */ + +#define IPT_GEOIP_MAX 15 /* Maximum of countries */ + +struct geoip_subnet { + u_int32_t begin; + u_int32_t end; +}; + +struct geoip_info { + struct geoip_subnet *subnets; + u_int32_t count; + u_int32_t ref; + u_int16_t cc; + struct geoip_info *next; + struct geoip_info *prev; +}; + +struct ipt_geoip_info { + u_int8_t flags; + u_int8_t count; + u_int16_t cc[IPT_GEOIP_MAX]; + + /* Used internally by the kernel */ + struct geoip_info *mem[IPT_GEOIP_MAX]; + u_int8_t *refcount; + + /* not implemented yet: + void *fini; + */ +}; + +#define COUNTRY(cc) (cc >> 8), (cc & 0x00FF) + +#endif + diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_ipp2p.h linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_ipp2p.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_ipp2p.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_ipp2p.h 2005-03-13 22:05:51.892786648 +0100 @@ -0,0 +1,29 @@ +#ifndef __IPT_IPP2P_H +#define __IPT_IPP2P_H +#define IPP2P_VERSION "0.7.2" + +struct ipt_p2p_info { + int cmd; + int debug; +}; + +#endif //__IPT_IPP2P_H + +#define SHORT_HAND_IPP2P 1 /* --ipp2p switch*/ +#define SHORT_HAND_DATA 4 /* --ipp2p-data switch*/ +#define SHORT_HAND_NONE 5 /* no short hand*/ + +#define IPP2P_EDK 2 +#define IPP2P_DATA_KAZAA 8 +#define IPP2P_DATA_EDK 16 +#define IPP2P_DATA_DC 32 +#define IPP2P_DC 64 +#define IPP2P_DATA_GNU 128 +#define IPP2P_GNU 256 +#define IPP2P_KAZAA 512 +#define IPP2P_BIT 1024 +#define IPP2P_APPLE 2048 +#define IPP2P_SOUL 4096 +#define IPP2P_WINMX 8192 +#define IPP2P_ARES 16384 + diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_ipv4options.h linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_ipv4options.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_ipv4options.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_ipv4options.h 2005-03-13 22:03:56.287361344 +0100 @@ -0,0 +1,21 @@ +#ifndef __ipt_ipv4options_h_included__ +#define __ipt_ipv4options_h_included__ + +#define IPT_IPV4OPTION_MATCH_SSRR 0x01 /* For strict source routing */ +#define IPT_IPV4OPTION_MATCH_LSRR 0x02 /* For loose source routing */ +#define IPT_IPV4OPTION_DONT_MATCH_SRR 0x04 /* any source routing */ +#define IPT_IPV4OPTION_MATCH_RR 0x08 /* For Record route */ +#define IPT_IPV4OPTION_DONT_MATCH_RR 0x10 +#define IPT_IPV4OPTION_MATCH_TIMESTAMP 0x20 /* For timestamp request */ +#define IPT_IPV4OPTION_DONT_MATCH_TIMESTAMP 0x40 +#define IPT_IPV4OPTION_MATCH_ROUTER_ALERT 0x80 /* For router-alert */ +#define IPT_IPV4OPTION_DONT_MATCH_ROUTER_ALERT 0x100 +#define IPT_IPV4OPTION_MATCH_ANY_OPT 0x200 /* match packet with any option */ +#define IPT_IPV4OPTION_DONT_MATCH_ANY_OPT 0x400 /* match packet with no option */ + +struct ipt_ipv4options_info { + u_int16_t options; +}; + + +#endif /* __ipt_ipv4options_h_included__ */ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_nth.h linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_nth.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_nth.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_nth.h 2005-03-13 22:03:57.029248560 +0100 @@ -0,0 +1,19 @@ +#ifndef _IPT_NTH_H +#define _IPT_NTH_H + +#include +#include + +#ifndef IPT_NTH_NUM_COUNTERS +#define IPT_NTH_NUM_COUNTERS 16 +#endif + +struct ipt_nth_info { + u_int8_t every; + u_int8_t not; + u_int8_t startat; + u_int8_t counter; + u_int8_t packet; +}; + +#endif /*_IPT_NTH_H*/ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_osf.h linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_osf.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_osf.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_osf.h 2005-03-13 22:03:57.763136992 +0100 @@ -0,0 +1,150 @@ +/* + * ipt_osf.h + * + * Copyright (c) 2003 Evgeniy Polyakov + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef _IPT_OSF_H +#define _IPT_OSF_H + +#define MAXGENRELEN 32 +#define MAXDETLEN 64 + +#define IPT_OSF_GENRE 1 +#define IPT_OSF_SMART 2 +#define IPT_OSF_LOG 4 +#define IPT_OSF_NETLINK 8 + +#define IPT_OSF_LOGLEVEL_ALL 0 +#define IPT_OSF_LOGLEVEL_FIRST 1 + +#ifndef __KERNEL__ +#include +#include + +struct list_head +{ + struct list_head *prev, *next; +}; +#endif + +struct ipt_osf_info +{ + char genre[MAXGENRELEN]; + int len; + unsigned long flags; + int loglevel; + int invert; /* UNSUPPORTED */ +}; + +struct osf_wc +{ + char wc; + unsigned long val; +}; + +/* This struct represents IANA options + * http://www.iana.org/assignments/tcp-parameters + */ +struct osf_opt +{ + unsigned char kind; + unsigned char length; + struct osf_wc wc; +}; + +struct osf_finger +{ + struct list_head flist; + struct osf_wc wss; + unsigned char ttl; + unsigned char df; + unsigned long ss; + unsigned char genre[MAXGENRELEN]; + unsigned char version[MAXGENRELEN], subtype[MAXGENRELEN]; + + /* Not needed, but for consistency with original table from Michal Zalewski */ + unsigned char details[MAXDETLEN]; + + int opt_num; + struct osf_opt opt[MAX_IPOPTLEN]; /* In case it is all NOP or EOL */ + +}; + +struct ipt_osf_nlmsg +{ + struct osf_finger f; + struct iphdr ip; + struct tcphdr tcp; +}; + +#ifdef __KERNEL__ + +#include +#include + + +/* Defines for IANA option kinds */ + +#define OSFOPT_EOL 0 /* End of options */ +#define OSFOPT_NOP 1 /* NOP */ +#define OSFOPT_MSS 2 /* Maximum segment size */ +#define OSFOPT_WSO 3 /* Window scale option */ +#define OSFOPT_SACKP 4 /* SACK permitted */ +#define OSFOPT_SACK 5 /* SACK */ +#define OSFOPT_ECHO 6 +#define OSFOPT_ECHOREPLY 7 +#define OSFOPT_TS 8 /* Timestamp option */ +#define OSFOPT_POCP 9 /* Partial Order Connection Permitted */ +#define OSFOPT_POSP 10 /* Partial Order Service Profile */ +/* Others are not used in current OSF */ + +static struct osf_opt IANA_opts[] = +{ + {0, 1,}, + {1, 1,}, + {2, 4,}, + {3, 3,}, + {4, 2,}, + {5, 1 ,}, /* SACK length is not defined */ + {6, 6,}, + {7, 6,}, + {8, 10,}, + {9, 2,}, + {10, 3,}, + {11, 1,}, /* CC: Suppose 1 */ + {12, 1,}, /* the same */ + {13, 1,}, /* and here too */ + {14, 3,}, + {15, 1,}, /* TCP Alternate Checksum Data. Length is not defined */ + {16, 1,}, + {17, 1,}, + {18, 3,}, + {19, 18,}, + {20, 1,}, + {21, 1,}, + {22, 1,}, + {23, 1,}, + {24, 1,}, + {25, 1,}, + {26, 1,}, +}; + +#endif /* __KERNEL__ */ + +#endif /* _IPT_OSF_H */ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_policy.h linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_policy.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_policy.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_policy.h 2005-03-13 22:06:07.321441136 +0100 @@ -0,0 +1,52 @@ +#ifndef _IPT_POLICY_H +#define _IPT_POLICY_H + +#define POLICY_MAX_ELEM 4 + +enum ipt_policy_flags +{ + POLICY_MATCH_IN = 0x1, + POLICY_MATCH_OUT = 0x2, + POLICY_MATCH_NONE = 0x4, + POLICY_MATCH_STRICT = 0x8, +}; + +enum ipt_policy_modes +{ + POLICY_MODE_TRANSPORT, + POLICY_MODE_TUNNEL +}; + +struct ipt_policy_spec +{ + u_int8_t saddr:1, + daddr:1, + proto:1, + mode:1, + spi:1, + reqid:1; +}; + +struct ipt_policy_elem +{ + u_int32_t saddr; + u_int32_t smask; + u_int32_t daddr; + u_int32_t dmask; + u_int32_t spi; + u_int32_t reqid; + u_int8_t proto; + u_int8_t mode; + + struct ipt_policy_spec match; + struct ipt_policy_spec invert; +}; + +struct ipt_policy_info +{ + struct ipt_policy_elem pol[POLICY_MAX_ELEM]; + u_int16_t flags; + u_int16_t len; +}; + +#endif /* _IPT_POLICY_H */ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_psd.h linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_psd.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_psd.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_psd.h 2005-03-13 22:03:58.555016608 +0100 @@ -0,0 +1,40 @@ +#ifndef _IPT_PSD_H +#define _IPT_PSD_H + +#include +#include + +/* + * High port numbers have a lower weight to reduce the frequency of false + * positives, such as from passive mode FTP transfers. + */ +#define PORT_WEIGHT_PRIV 3 +#define PORT_WEIGHT_HIGH 1 + +/* + * Port scan detection thresholds: at least COUNT ports need to be scanned + * from the same source, with no longer than DELAY ticks between ports. + */ +#define SCAN_MIN_COUNT 7 +#define SCAN_MAX_COUNT (SCAN_MIN_COUNT * PORT_WEIGHT_PRIV) +#define SCAN_WEIGHT_THRESHOLD SCAN_MAX_COUNT +#define SCAN_DELAY_THRESHOLD (300) /* old usage of HZ here was erroneously and broke under uml */ + +/* + * Keep track of up to LIST_SIZE source addresses, using a hash table of + * HASH_SIZE entries for faster lookups, but limiting hash collisions to + * HASH_MAX source addresses per the same hash value. + */ +#define LIST_SIZE 0x100 +#define HASH_LOG 9 +#define HASH_SIZE (1 << HASH_LOG) +#define HASH_MAX 0x10 + +struct ipt_psd_info { + unsigned int weight_threshold; + unsigned int delay_threshold; + unsigned short lo_ports_weight; + unsigned short hi_ports_weight; +}; + +#endif /*_IPT_PSD_H*/ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_quota.h linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_quota.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_quota.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_quota.h 2005-03-13 22:03:59.678845760 +0100 @@ -0,0 +1,11 @@ +#ifndef _IPT_QUOTA_H +#define _IPT_QUOTA_H + +/* print debug info in both kernel/netfilter module & iptable library */ +//#define DEBUG_IPT_QUOTA + +struct ipt_quota_info { + u_int64_t quota; +}; + +#endif /*_IPT_QUOTA_H*/ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_set.h linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_set.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_set.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_set.h 2005-03-13 22:04:00.484723248 +0100 @@ -0,0 +1,21 @@ +#ifndef _IPT_SET_H +#define _IPT_SET_H + +#include + +struct ipt_set_info { + ip_set_id_t index; + u_int32_t flags[IP_SET_MAX_BINDINGS + 1]; +}; + +/* match info */ +struct ipt_set_info_match { + struct ipt_set_info match_set; +}; + +struct ipt_set_info_target { + struct ipt_set_info add_set; + struct ipt_set_info del_set; +}; + +#endif /*_IPT_SET_H*/ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_time.h linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_time.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv4/ipt_time.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv4/ipt_time.h 2005-03-13 22:04:01.208613200 +0100 @@ -0,0 +1,15 @@ +#ifndef __ipt_time_h_included__ +#define __ipt_time_h_included__ + + +struct ipt_time_info { + u_int8_t days_match; /* 1 bit per day. -SMTWTFS */ + u_int16_t time_start; /* 0 < time_start < 23*60+59 = 1439 */ + u_int16_t time_stop; /* 0:0 < time_stat < 23:59 */ + u_int8_t kerneltime; /* ignore skb time (and use kerneltime) or not. */ + time_t date_start; + time_t date_stop; +}; + + +#endif /* __ipt_time_h_included__ */ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv6/ip6t_HL.h linux-2.6.11.3/include/linux/netfilter_ipv6/ip6t_HL.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv6/ip6t_HL.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv6/ip6t_HL.h 2005-03-13 22:03:48.782502256 +0100 @@ -0,0 +1,22 @@ +/* Hop Limit modification module for ip6tables + * Maciej Soltysiak + * Based on HW's TTL module */ + +#ifndef _IP6T_HL_H +#define _IP6T_HL_H + +enum { + IP6T_HL_SET = 0, + IP6T_HL_INC, + IP6T_HL_DEC +}; + +#define IP6T_HL_MAXMODE IP6T_HL_DEC + +struct ip6t_HL_info { + u_int8_t mode; + u_int8_t hop_limit; +}; + + +#endif diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv6/ip6t_ROUTE.h linux-2.6.11.3/include/linux/netfilter_ipv6/ip6t_ROUTE.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv6/ip6t_ROUTE.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv6/ip6t_ROUTE.h 2005-03-13 22:04:33.119761968 +0100 @@ -0,0 +1,23 @@ +/* Header file for iptables ip6t_ROUTE target + * + * (C) 2003 by Cédric de Launois + * + * This software is distributed under GNU GPL v2, 1991 + */ +#ifndef _IPT_ROUTE_H_target +#define _IPT_ROUTE_H_target + +#define IP6T_ROUTE_IFNAMSIZ 16 + +struct ip6t_route_target_info { + char oif[IP6T_ROUTE_IFNAMSIZ]; /* Output Interface Name */ + char iif[IP6T_ROUTE_IFNAMSIZ]; /* Input Interface Name */ + u_int32_t gw[4]; /* IPv6 address of gateway */ + u_int8_t flags; +}; + +/* Values for "flags" field */ +#define IP6T_ROUTE_CONTINUE 0x01 +#define IP6T_ROUTE_TEE 0x02 + +#endif /*_IP6T_ROUTE_H_target*/ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv6/ip6t_fuzzy.h linux-2.6.11.3/include/linux/netfilter_ipv6/ip6t_fuzzy.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv6/ip6t_fuzzy.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv6/ip6t_fuzzy.h 2005-03-13 22:03:55.523477472 +0100 @@ -0,0 +1,21 @@ +#ifndef _IP6T_FUZZY_H +#define _IP6T_FUZZY_H + +#include +#include + +#define MAXFUZZYRATE 10000000 +#define MINFUZZYRATE 3 + +struct ip6t_fuzzy_info { + u_int32_t minimum_rate; + u_int32_t maximum_rate; + u_int32_t packets_total; + u_int32_t bytes_total; + u_int32_t previous_time; + u_int32_t present_time; + u_int32_t mean_rate; + u_int8_t acceptance_rate; +}; + +#endif /*_IP6T_FUZZY_H*/ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv6/ip6t_nth.h linux-2.6.11.3/include/linux/netfilter_ipv6/ip6t_nth.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv6/ip6t_nth.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv6/ip6t_nth.h 2005-03-13 22:03:57.030248408 +0100 @@ -0,0 +1,19 @@ +#ifndef _IP6T_NTH_H +#define _IP6T_NTH_H + +#include +#include + +#ifndef IP6T_NTH_NUM_COUNTERS +#define IP6T_NTH_NUM_COUNTERS 16 +#endif + +struct ip6t_nth_info { + u_int8_t every; + u_int8_t not; + u_int8_t startat; + u_int8_t counter; + u_int8_t packet; +}; + +#endif /*_IP6T_NTH_H*/ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv6/ip6t_owner.h linux-2.6.11.3/include/linux/netfilter_ipv6/ip6t_owner.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv6/ip6t_owner.h 2005-03-13 07:44:29.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv6/ip6t_owner.h 2005-03-13 22:03:36.642347840 +0100 @@ -6,12 +6,14 @@ #define IP6T_OWNER_GID 0x02 #define IP6T_OWNER_PID 0x04 #define IP6T_OWNER_SID 0x08 +#define IP6T_OWNER_COMM 0x10 struct ip6t_owner_info { uid_t uid; gid_t gid; pid_t pid; pid_t sid; + char comm[16]; u_int8_t match, invert; /* flags */ }; diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv6/ip6t_policy.h linux-2.6.11.3/include/linux/netfilter_ipv6/ip6t_policy.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv6/ip6t_policy.h 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv6/ip6t_policy.h 2005-03-13 22:06:07.322440984 +0100 @@ -0,0 +1,52 @@ +#ifndef _IP6T_POLICY_H +#define _IP6T_POLICY_H + +#define POLICY_MAX_ELEM 4 + +enum ip6t_policy_flags +{ + POLICY_MATCH_IN = 0x1, + POLICY_MATCH_OUT = 0x2, + POLICY_MATCH_NONE = 0x4, + POLICY_MATCH_STRICT = 0x8, +}; + +enum ip6t_policy_modes +{ + POLICY_MODE_TRANSPORT, + POLICY_MODE_TUNNEL +}; + +struct ip6t_policy_spec +{ + u_int8_t saddr:1, + daddr:1, + proto:1, + mode:1, + spi:1, + reqid:1; +}; + +struct ip6t_policy_elem +{ + struct in6_addr saddr; + struct in6_addr smask; + struct in6_addr daddr; + struct in6_addr dmask; + u_int32_t spi; + u_int32_t reqid; + u_int8_t proto; + u_int8_t mode; + + struct ip6t_policy_spec match; + struct ip6t_policy_spec invert; +}; + +struct ip6t_policy_info +{ + struct ip6t_policy_elem pol[POLICY_MAX_ELEM]; + u_int16_t flags; + u_int16_t len; +}; + +#endif /* _IP6T_POLICY_H */ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/netfilter_ipv6.h linux-2.6.11.3/include/linux/netfilter_ipv6.h --- linux-2.6.11.3.org/include/linux/netfilter_ipv6.h 2005-03-13 07:44:54.000000000 +0100 +++ linux-2.6.11.3/include/linux/netfilter_ipv6.h 2005-03-13 22:06:02.260210560 +0100 @@ -56,6 +56,7 @@ enum nf_ip6_hook_priorities { NF_IP6_PRI_FIRST = INT_MIN, + NF_IP6_PRI_CONNTRACK_DEFRAG = -400, NF_IP6_PRI_SELINUX_FIRST = -225, NF_IP6_PRI_CONNTRACK = -200, NF_IP6_PRI_BRIDGE_SABOTAGE_FORWARD = -175, @@ -68,4 +69,6 @@ NF_IP6_PRI_LAST = INT_MAX, }; +#define SO_ORIGINAL_DST 80 + #endif /*__LINUX_IP6_NETFILTER_H*/ diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/skbuff.h linux-2.6.11.3/include/linux/skbuff.h --- linux-2.6.11.3.org/include/linux/skbuff.h 2005-03-13 07:44:50.000000000 +0100 +++ linux-2.6.11.3/include/linux/skbuff.h 2005-03-13 22:06:02.259210712 +0100 @@ -251,6 +251,9 @@ __u32 nfcache; __u32 nfctinfo; struct nf_conntrack *nfct; +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + struct sk_buff *nfct_reasm; +#endif #ifdef CONFIG_NETFILTER_DEBUG unsigned int nf_debug; #endif @@ -1145,10 +1148,26 @@ if (nfct) atomic_inc(&nfct->use); } +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +static inline void nf_conntrack_get_reasm(struct sk_buff *skb) +{ + if (skb) + atomic_inc(&skb->users); +} +static inline void nf_conntrack_put_reasm(struct sk_buff *skb) +{ + if (skb) + kfree_skb(skb); +} +#endif static inline void nf_reset(struct sk_buff *skb) { nf_conntrack_put(skb->nfct); skb->nfct = NULL; +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + nf_conntrack_put_reasm(skb->nfct_reasm); + skb->nfct_reasm = NULL; +#endif #ifdef CONFIG_NETFILTER_DEBUG skb->nf_debug = 0; #endif diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/linux/sysctl.h linux-2.6.11.3/include/linux/sysctl.h --- linux-2.6.11.3.org/include/linux/sysctl.h 2005-03-13 07:44:27.000000000 +0100 +++ linux-2.6.11.3/include/linux/sysctl.h 2005-03-13 22:06:02.258210864 +0100 @@ -192,6 +192,7 @@ NET_DECNET=15, NET_ECONET=16, NET_SCTP=17, + NET_NETFILTER=18, }; /* /proc/sys/kernel/random */ @@ -256,6 +257,42 @@ NET_UNIX_MAX_DGRAM_QLEN=3, }; +/* /proc/sys/net/netfilter */ +enum +{ + NET_NF_CONNTRACK_MAX=1, + NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT=2, + NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV=3, + NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED=4, + NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT=5, + NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT=6, + NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK=7, + NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT=8, + NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE=9, + NET_NF_CONNTRACK_UDP_TIMEOUT=10, + NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM=11, + NET_NF_CONNTRACK_ICMP_TIMEOUT=12, + NET_NF_CONNTRACK_GENERIC_TIMEOUT=13, + NET_NF_CONNTRACK_BUCKETS=14, + NET_NF_CONNTRACK_LOG_INVALID=15, + NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS=16, + NET_NF_CONNTRACK_TCP_LOOSE=17, + NET_NF_CONNTRACK_TCP_BE_LIBERAL=18, + NET_NF_CONNTRACK_TCP_MAX_RETRANS=19, + NET_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED=20, + NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT=21, + NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED=22, + NET_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED=23, + NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT=24, + NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD=25, + NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT=26, + NET_NF_CONNTRACK_COUNT=27, + NET_NF_CONNTRACK_ICMPV6_TIMEOUT=28, + NET_NF_CONNTRACK_FRAG6_TIMEOUT=29, + NET_NF_CONNTRACK_FRAG6_LOW_THRESH=30, + NET_NF_CONNTRACK_FRAG6_HIGH_THRESH=31, +}; + /* /proc/sys/net/ipv4 */ enum { diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/net/tcp.h linux-2.6.11.3/include/net/tcp.h --- linux-2.6.11.3.org/include/net/tcp.h 2005-03-13 07:44:07.000000000 +0100 +++ linux-2.6.11.3/include/net/tcp.h 2005-03-13 22:06:04.899809280 +0100 @@ -159,6 +159,7 @@ extern void tcp_bucket_destroy(struct tcp_bind_bucket *tb); extern void tcp_bucket_unlock(struct sock *sk); extern int tcp_port_rover; +extern struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 hnum, int dif); /* These are AF independent. */ static __inline__ int tcp_bhashfn(__u16 lport) diff -Nur --exclude '*.orig' linux-2.6.11.3.org/include/net/udp.h linux-2.6.11.3/include/net/udp.h --- linux-2.6.11.3.org/include/net/udp.h 2005-03-13 07:44:38.000000000 +0100 +++ linux-2.6.11.3/include/net/udp.h 2005-03-13 22:06:04.900809128 +0100 @@ -74,6 +74,8 @@ extern unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait); +extern struct sock *udp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif); + DECLARE_SNMP_STAT(struct udp_mib, udp_statistics); #define UDP_INC_STATS(field) SNMP_INC_STATS(udp_statistics, field) #define UDP_INC_STATS_BH(field) SNMP_INC_STATS_BH(udp_statistics, field) diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/Kconfig linux-2.6.11.3/net/Kconfig --- linux-2.6.11.3.org/net/Kconfig 2005-03-13 07:44:45.000000000 +0100 +++ linux-2.6.11.3/net/Kconfig 2005-03-13 22:06:02.257211016 +0100 @@ -216,6 +216,7 @@ source "net/ipv6/netfilter/Kconfig" source "net/decnet/netfilter/Kconfig" source "net/bridge/netfilter/Kconfig" +source "net/netfilter/Kconfig" endif diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/Makefile linux-2.6.11.3/net/Makefile --- linux-2.6.11.3.org/net/Makefile 2005-03-13 07:44:24.000000000 +0100 +++ linux-2.6.11.3/net/Makefile 2005-03-13 22:06:02.256211168 +0100 @@ -42,6 +42,7 @@ obj-$(CONFIG_ECONET) += econet/ obj-$(CONFIG_VLAN_8021Q) += 8021q/ obj-$(CONFIG_IP_SCTP) += sctp/ +obj-$(CONFIG_NETFILTER) += netfilter/ ifeq ($(CONFIG_NET),y) obj-$(CONFIG_SYSCTL) += sysctl_net.o diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/core/skbuff.c linux-2.6.11.3/net/core/skbuff.c --- linux-2.6.11.3.org/net/core/skbuff.c 2005-03-13 07:44:30.000000000 +0100 +++ linux-2.6.11.3/net/core/skbuff.c 2005-03-13 22:06:02.255211320 +0100 @@ -293,6 +293,9 @@ } #ifdef CONFIG_NETFILTER nf_conntrack_put(skb->nfct); +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + nf_conntrack_put_reasm(skb->nfct_reasm); +#endif #ifdef CONFIG_BRIDGE_NETFILTER nf_bridge_put(skb->nf_bridge); #endif @@ -365,6 +368,10 @@ C(nfct); nf_conntrack_get(skb->nfct); C(nfctinfo); +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + C(nfct_reasm); + nf_conntrack_get_reasm(skb->nfct_reasm); +#endif #ifdef CONFIG_NETFILTER_DEBUG C(nf_debug); #endif @@ -432,6 +439,10 @@ new->nfct = old->nfct; nf_conntrack_get(old->nfct); new->nfctinfo = old->nfctinfo; +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + new->nfct_reasm = old->nfct_reasm; + nf_conntrack_get_reasm(old->nfct_reasm); +#endif #ifdef CONFIG_NETFILTER_DEBUG new->nf_debug = old->nf_debug; #endif diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/Kconfig linux-2.6.11.3/net/ipv4/netfilter/Kconfig --- linux-2.6.11.3.org/net/ipv4/netfilter/Kconfig 2005-03-13 07:44:38.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/Kconfig 2005-03-13 22:06:37.025925368 +0100 @@ -692,5 +692,416 @@ Allows altering the ARP packet payload: source and destination hardware and network addresses. +config IP_NF_TARGET_IPV4OPTSSTRIP + tristate 'IPV4OPTSSTRIP target support' + depends on IP_NF_MANGLE + help + This option adds an IPV4OPTSSTRIP target. + This target allows you to strip all IP options in a packet. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +config IP_NF_TARGET_TTL + tristate 'TTL target support' + depends on IP_NF_MANGLE + help + This option adds a `TTL' target, which enables the user to set + the TTL value or increment / decrement the TTL value by a given + amount. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +config IP_NF_MATCH_CONNLIMIT + tristate 'Connections/IP limit match support' + depends on IP_NF_IPTABLES + help + This match allows you to restrict the number of parallel TCP + connections to a server per client IP address (or address block). + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +config IP_NF_MATCH_FUZZY + tristate 'fuzzy match support' + depends on IP_NF_IPTABLES + help + This option adds a `fuzzy' match, which allows you to match + packets according to a fuzzy logic based law. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +config IP_NF_MATCH_IPV4OPTIONS + tristate 'IPV4OPTIONS match support' + depends on IP_NF_IPTABLES + help + This option adds a IPV4OPTIONS match. + It allows you to filter options like source routing, + record route, timestamp and router-altert. + + If you say Y here, try iptables -m ipv4options --help for more information. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +config IP_NF_MATCH_NTH + tristate 'Nth match support' + depends on IP_NF_IPTABLES + help + This option adds a `Nth' match, which allow you to make + rules that match every Nth packet. By default there are + 16 different counters. + + [options] + --every Nth Match every Nth packet + [--counter] num Use counter 0-15 (default:0) + [--start] num Initialize the counter at the number 'num' + instead of 0. Must be between 0 and Nth-1 + [--packet] num Match on 'num' packet. Must be between 0 + and Nth-1. + + If --packet is used for a counter than + there must be Nth number of --packet + rules, covering all values between 0 and + Nth-1 inclusively. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +config IP_NF_MATCH_OSF + tristate 'OSF match support' + depends on IP_NF_IPTABLES + help + + The idea of passive OS fingerprint matching exists for quite a long time, + but was created as extension fo OpenBSD pf only some weeks ago. + Original idea was lurked in some OpenBSD mailing list (thanks + grange@open...) and than adopted for Linux netfilter in form of this code. + + Original table was created by Michal Zalewski for + his excellent p0f and than changed a bit for more convenience. + + This module compares some data(WS, MSS, options and it's order, ttl, + df and others) from first SYN packet (actually from packets with SYN + bit set) with hardcoded in fingers[] table ones. + + If you say Y here, try iptables -m osf --help for more information. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +config IP_NF_MATCH_PSD + tristate 'psd match support' + depends on IP_NF_IPTABLES + help + This option adds a `psd' match, which allows you to create rules in + any iptables table wich will detect TCP and UDP port scans. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +config IP_NF_MATCH_QUOTA + tristate 'quota match support' + depends on IP_NF_IPTABLES + help + This match implements network quotas. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + + +config IP_NF_SET + tristate "IP set support" + depends on INET && NETFILTER + help + This option adds IP set support to the kernel. + In order to define and use sets, you need the userspace utility + ipset(8). + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_SET_MAX + int "Maximum number of IP sets" + default 256 + range 2 65534 + depends on IP_NF_SET + help + You can define here default value of the maximum number + of IP sets for the kernel. + + The value can be overriden by the 'max_sets' module + parameter of the 'ip_set' module. + +config IP_NF_SET_HASHSIZE + int "Hash size for bindings of IP sets" + default 1024 + depends on IP_NF_SET + help + You can define here default value of the hash size for + bindings of IP sets. + + The value can be overriden by the 'hash_size' module + parameter of the 'ip_set' module. + +config IP_NF_SET_IPMAP + tristate "ipmap set support" + depends on IP_NF_SET + help + This option adds the ipmap set type support. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_SET_MACIPMAP + tristate "macipmap set support" + depends on IP_NF_SET + help + This option adds the macipmap set type support. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_SET_PORTMAP + tristate "portmap set support" + depends on IP_NF_SET + help + This option adds the portmap set type support. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_SET_IPHASH + tristate "iphash set support" + depends on IP_NF_SET + help + This option adds the iphash set type support. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_SET_NETHASH + tristate "nethash set support" + depends on IP_NF_SET + help + This option adds the nethash set type support. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_MATCH_SET + tristate "set match support" + depends on IP_NF_SET + help + Set matching matches against given IP sets. + You need the ipset utility to create and set up the sets. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_TARGET_SET + tristate "SET target support" + depends on IP_NF_SET + help + The SET target makes possible to add/delete entries + in IP sets. + You need the ipset utility to create and set up the sets. + + To compile it as a module, choose M here. If unsure, say N. + + +config IP_NF_MATCH_TIME + tristate 'TIME match support' + depends on IP_NF_IPTABLES + help + This option adds a `time' match, which allows you + to match based on the packet arrival time/date + (arrival time/date at the machine which netfilter is running on) or + departure time/date (for locally generated packets). + + If you say Y here, try iptables -m time --help for more information. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +config IP_NF_TARGET_IPMARK + tristate 'IPMARK target support' + depends on IP_NF_MANGLE + help + This option adds a `IPMARK' target, which allows you to create rules + in the `mangle' table which alter the netfilter mark (nfmark) field + basing on the source or destination ip address of the packet. + This is very useful for very fast massive mangling and marking. + + If you want to compile it as a module, say M here and read + . If unsure, say `N'. + +config IP_NF_TARGET_ROUTE + tristate 'ROUTE target support' + depends on IP_NF_MANGLE + help + This option adds a `ROUTE' target, which enables you to setup unusual + routes. For example, the ROUTE lets you route a received packet through + an interface or towards a host, even if the regular destination of the + packet is the router itself. The ROUTE target is also able to change the + incoming interface of a packet. + + The target can be or not a final target. It has to be used inside the + mangle table. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. The module will be called ipt_ROUTE.o. + If unsure, say `N'. + +config IP_NF_TARGET_TARPIT + tristate 'TARPIT target support' + depends on IP_NF_FILTER + help + Adds a TARPIT target to iptables, which captures and holds + incoming TCP connections using no local per-connection resources. + Connections are accepted, but immediately switched to the persist + state (0 byte window), in which the remote side stops sending data + and asks to continue every 60-240 seconds. Attempts to close the + connection are ignored, forcing the remote side to time out the + connection in 12-24 minutes. + + This offers similar functionality to LaBrea + but doesn't require dedicated + hardware or IPs. Any TCP port that you would normally DROP or REJECT + can instead become a tarpit. + +config IP_NF_TARGET_XOR + tristate 'XOR target support' + depends on IP_NF_MANGLE + help + This option adds a `XOR' target, which can encrypt TCP and + UDP traffic using a simple XOR encryption. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +config IP_NF_MATCH_ACCOUNT + tristate "account match support" + depends on IP_NF_IPTABLES && PROC_FS + help + This match is used for accounting traffic for all hosts in + defined network/netmask. + + Features: + - long (one counter per protocol TCP/UDP/IMCP/Other) and short statistics + - one iptables rule for all hosts in network/netmask + - loading/saving counters (by reading/writting to procfs entries) + + Example usage: + + account traffic for/to 192.168.0.0/24 network into table mynetwork: + + # iptables -A FORWARD -m account --aname mynetwork --aaddr 192.168.0.0/24 + + account traffic for/to WWW serwer for 192.168.0.0/24 network into table + mywwwserver: + + # iptables -A INPUT -p tcp --dport 80 + -m account --aname mywwwserver --aaddr 192.168.0.0/24 --ashort + # iptables -A OUTPUT -p tcp --sport 80 + -m account --aname mywwwserver --aaddr 192.168.0.0/24 --ashort + + read counters: + + # cat /proc/net/ipt_account/mynetwork + # cat /proc/net/ipt_account/mywwwserver + + set counters: + + # echo "ip = 192.168.0.1 packets_src = 0" > /proc/net/ipt_account/mywwserver + + Webpage: + http://www.barbara.eu.org/~quaker/ipt_account/ + +config IP_NF_MATCH_ACCOUNT_DEBUG + bool "account debugging output" + depends on IP_NF_MATCH_ACCOUNT + help + Say Y to get lots of debugging output. + + + +config IP_NF_MATCH_GEOIP + tristate 'geoip match support' + depends on IP_NF_IPTABLES + help + This option allows you to match a packet by its source or + destination country. Basically, you need a country's + database containing all subnets and associated countries. + + For the complete procedure and understanding, read : + http://www.cookinglinux.org/geoip/howto/geoip-HOWTO.html + + + If you want to compile it as a module, say M here and read + . If unsure, say `N'. + + +config IP_NF_MATCH_IPP2P + tristate 'IPP2P match support' + depends on IP_NF_IPTABLES + help + This option makes possible to match some P2P packets + therefore helps controlling such traffic. + + If you want to compile it as a module, say M here and read + . If unsure, say `N'. + +config NF_CONNTRACK_IPV4 + tristate "IPv4 support on new connection tracking (EXPERIMENTAL)" + depends on EXPERIMENTAL && NF_CONNTRACK && IP_NF_CONNTRACK!=y + ---help--- + Connection tracking keeps a record of what packets have passed + through your machine, in order to figure out how they are related + into connections. + + This is IPv4 support on Layer 3 independent connection tracking. + Layer 3 independent connection tracking is experimental scheme + which generalize ip_conntrack to support other layer 3 protocols. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_MATCH_POLICY + tristate "IPsec policy match support" + depends on IP_NF_IPTABLES && XFRM + help + Policy matching allows you to match packets based on the + IPsec policy that was used during decapsulation/will + be used during encapsulation. + + To compile it as a module, choose M here. If unsure, say N. + +config IP_NF_NAT_QUAKE3 + tristate + depends on IP_NF_CONNTRACK!=n && IP_NF_NAT !=n + default IP_NF_NAT if IP_NF_QUAKE3=y + default m if IP_NF_QUAKE3=m + +config IP_NF_QUAKE3 + tristate "Quake3 protocol support" + depends on IP_NF_CONNTRACK + help + Quake III Arena connection tracking helper. This module allows for a + stricter firewall rulebase if one only allows traffic to a master + server. Connections to Quake III server IP addresses and ports returned + by the master server will be tracked automatically. + + If you want to compile it as a module, say M here and read + . If unsure, say `Y'. + +config IP_NF_MATCH_UNCLEAN + tristate 'Unclean match support (DANGEROUS)' + depends on EXPERIMENTAL && IP_NF_IPTABLES + help + Unclean packet matching matches any strange or invalid packets, by + looking at a series of fields in the IP, TCP, UDP and ICMP headers. + + Please note that this kind of matching is considered dangerous and + might harm the future compatibility of your packet filter. + + It has happened before, search on the net for ECN blackholes :( + + + endmenu diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/Makefile linux-2.6.11.3/net/ipv4/netfilter/Makefile --- linux-2.6.11.3.org/net/ipv4/netfilter/Makefile 2005-03-13 07:44:14.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/Makefile 2005-03-13 22:06:37.027925064 +0100 @@ -13,6 +13,10 @@ obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o # connection tracking helpers +obj-$(CONFIG_IP_NF_QUAKE3) += ip_conntrack_quake3.o +ifdef CONFIG_IP_NF_NAT_QUAKE3 + export-objs += ip_conntrack_quake3.o +endif obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o @@ -23,6 +27,7 @@ obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o obj-$(CONFIG_IP_NF_NAT_IRC) += ip_nat_irc.o +obj-$(CONFIG_IP_NF_NAT_QUAKE3) += ip_nat_quake3.o # generic IP tables obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o @@ -36,47 +41,86 @@ # matches obj-$(CONFIG_IP_NF_MATCH_HELPER) += ipt_helper.o obj-$(CONFIG_IP_NF_MATCH_LIMIT) += ipt_limit.o +obj-$(CONFIG_IP_NF_MATCH_IPP2P) += ipt_ipp2p.o +obj-$(CONFIG_IP_NF_MATCH_GEOIP) += ipt_geoip.o + +obj-$(CONFIG_IP_NF_MATCH_QUOTA) += ipt_quota.o obj-$(CONFIG_IP_NF_MATCH_HASHLIMIT) += ipt_hashlimit.o obj-$(CONFIG_IP_NF_MATCH_SCTP) += ipt_sctp.o obj-$(CONFIG_IP_NF_MATCH_MARK) += ipt_mark.o +obj-$(CONFIG_IP_NF_MATCH_SET) += ipt_set.o obj-$(CONFIG_IP_NF_MATCH_MAC) += ipt_mac.o obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o obj-$(CONFIG_IP_NF_MATCH_PKTTYPE) += ipt_pkttype.o obj-$(CONFIG_IP_NF_MATCH_MULTIPORT) += ipt_multiport.o obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o +obj-$(CONFIG_IP_NF_MATCH_ACCOUNT) += ipt_account.o + +obj-$(CONFIG_IP_NF_MATCH_TIME) += ipt_time.o + + +obj-$(CONFIG_IP_NF_MATCH_PSD) += ipt_psd.o + +obj-$(CONFIG_IP_NF_MATCH_OSF) += ipt_osf.o + + +obj-$(CONFIG_IP_NF_MATCH_NTH) += ipt_nth.o + +obj-$(CONFIG_IP_NF_MATCH_IPV4OPTIONS) += ipt_ipv4options.o + + +obj-$(CONFIG_IP_NF_MATCH_FUZZY) += ipt_fuzzy.o obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o obj-$(CONFIG_IP_NF_MATCH_DSCP) += ipt_dscp.o obj-$(CONFIG_IP_NF_MATCH_AH_ESP) += ipt_ah.o ipt_esp.o obj-$(CONFIG_IP_NF_MATCH_LENGTH) += ipt_length.o +obj-$(CONFIG_IP_NF_MATCH_UNCLEAN) += ipt_unclean.o obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o obj-$(CONFIG_IP_NF_MATCH_STATE) += ipt_state.o +obj-$(CONFIG_IP_NF_MATCH_CONNLIMIT) += ipt_connlimit.o obj-$(CONFIG_IP_NF_MATCH_CONNMARK) += ipt_connmark.o obj-$(CONFIG_IP_NF_MATCH_CONNTRACK) += ipt_conntrack.o obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o obj-$(CONFIG_IP_NF_MATCH_REALM) += ipt_realm.o obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt_physdev.o +obj-$(CONFIG_IP_NF_MATCH_POLICY) += ipt_policy.o obj-$(CONFIG_IP_NF_MATCH_COMMENT) += ipt_comment.o # targets obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o +obj-$(CONFIG_IP_NF_TARGET_TARPIT) += ipt_TARPIT.o obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o obj-$(CONFIG_IP_NF_TARGET_DSCP) += ipt_DSCP.o obj-$(CONFIG_IP_NF_TARGET_MARK) += ipt_MARK.o +obj-$(CONFIG_IP_NF_TARGET_IPMARK) += ipt_IPMARK.o obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o +obj-$(CONFIG_IP_NF_TARGET_ROUTE) += ipt_ROUTE.o obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o obj-$(CONFIG_IP_NF_TARGET_CLASSIFY) += ipt_CLASSIFY.o obj-$(CONFIG_IP_NF_NAT_SNMP_BASIC) += ip_nat_snmp_basic.o obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o +obj-$(CONFIG_IP_NF_TARGET_XOR) += ipt_XOR.o +obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o +obj-$(CONFIG_IP_NF_TARGET_IPV4OPTSSTRIP) += ipt_IPV4OPTSSTRIP.o obj-$(CONFIG_IP_NF_TARGET_CONNMARK) += ipt_CONNMARK.o obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o obj-$(CONFIG_IP_NF_TARGET_NOTRACK) += ipt_NOTRACK.o +obj-$(CONFIG_IP_NF_TARGET_SET) += ipt_SET.o + +# sets +obj-$(CONFIG_IP_NF_SET) += ip_set.o +obj-$(CONFIG_IP_NF_SET_IPMAP) += ip_set_ipmap.o +obj-$(CONFIG_IP_NF_SET_PORTMAP) += ip_set_portmap.o +obj-$(CONFIG_IP_NF_SET_MACIPMAP) += ip_set_macipmap.o +obj-$(CONFIG_IP_NF_SET_IPHASH) += ip_set_iphash.o +obj-$(CONFIG_IP_NF_SET_NETHASH) += ip_set_nethash.o obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o # generic ARP tables @@ -87,3 +131,9 @@ obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o + +# objects for l3 independent conntrack +nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o + +# l3 independent conntrack +obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ip_conntrack_quake3.c linux-2.6.11.3/net/ipv4/netfilter/ip_conntrack_quake3.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ip_conntrack_quake3.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ip_conntrack_quake3.c 2005-03-13 22:06:14.549342328 +0100 @@ -0,0 +1,194 @@ +/* Quake3 extension for IP connection tracking + * (C) 2002 by Filip Sneppe + * based on ip_conntrack_ftp.c and ip_conntrack_tftp.c + * + * ip_conntrack_quake3.c v0.04 2002-08-31 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Module load syntax: + * insmod ip_conntrack_quake3.o ports=port1,port2,...port + * + * please give the ports of all Quake3 master servers You wish to + * connect to. If you don't specify ports, the default will be UDP + * port 27950. + * + * Thanks to the Ethereal folks for their analysis of the Quake3 protocol. + */ + +#include +#include +#include + +#include +#include +#include +#include + +MODULE_AUTHOR("Filip Sneppe "); +MODULE_DESCRIPTION("Netfilter connection tracking module for Quake III Arena"); +MODULE_LICENSE("GPL"); + +#define MAX_PORTS 8 +static int ports[MAX_PORTS]; +static int ports_c = 0; +module_param_array(ports, int, &ports_c, 0400); +MODULE_PARM_DESC(ports, "port numbers of Quake III master servers"); + +/* Quake3 master server reply will add > 100 expectations per reply packet; when + doing lots of printk's, klogd may not be able to read /proc/kmsg fast enough */ +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +struct quake3_search quake3s_conntrack = { "****", "getserversResponse", sizeof("getserversResponse") - 1 }; + +static int quake3_help(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_conntrack_info ctinfo) +{ + struct udphdr _udph, *uh; + int dir = CTINFO2DIR(ctinfo); + struct ip_conntrack_expect *exp; + int i; + int ret = NF_ACCEPT; + + /* Until there's been traffic both ways, don't look in packets. note: + * it's UDP ! */ + if (ctinfo != IP_CT_ESTABLISHED + && ctinfo != IP_CT_IS_REPLY) { + DEBUGP("ip_conntrack_quake3: not ok ! Conntrackinfo = %u\n", + ctinfo); + return NF_ACCEPT; + } else { + DEBUGP("ip_conntrack_quake3: it's ok ! Conntrackinfo = %u\n", + ctinfo); + } + + /* Valid UDP header? */ + uh = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4, + sizeof(_udph), &_udph); + if (!uh) + return NF_ACCEPT; + + /* Any data? */ + dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); + if (dataoff >= (*pskb)->len) + return NF_ACCEPT; + + LOCK_BH(&quake3_buffer_lock); + qb_ptr = skb_header_pointer(*pskb, dataoff, + (*pskb)->len - dataoff, quake3_buffer); + BUG_ON(dp_ptr == NULL); + data = qb_ptr; + + + if (strnicmp(data + 4, quake3s_conntrack.pattern, + quake3s_conntrack.plen) == 0) { + for(i=23; /* 4 bytes filler, 18 bytes "getserversResponse", + 1 byte "\" */ + i+6 < ntohs(uh->len); + i+=7) { +#if 0 + DEBUGP("ip_conntrack_quake3: adding server at offset " + "%u/%u %u.%u.%u.%u:%u\n", i, ntohs(uh->len), + NIPQUAD( (u_int32_t) *( (u_int32_t *)( (int)udph + i ) ) ), + ntohs((__u16) *( (__u16 *)( (int)udph + i + 4 ) ) ) ); +#endif + + exp = ip_conntrack_expect_alloc(); + if (!exp) { + ret = NF_DROP; + goto out; + } + + memset(exp, 0, sizeof(*exp)); + + exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip; + exp->tuple.dst.ip = + (u_int32_t) *((u_int32_t *)((int)data+i)); + exp->tuple.dst.u.udp.port = + (__u16) *((__u16 *)((int)data+i+4)); + exp->tuple.dst.protonum = IPPROTO_UDP; + + exp->mask.src.ip = 0xffffffff; + exp->mask.dst.ip = 0xffffffff; + exp->mask.dst.u.port = 0xffff; + exp->mask.dst.protonum = 0xff; + + if (ip_nat_quake3_hook) + ret = ip_nat_quake3_hook(pskb, ctinfo, + ..., exp); + else if (ip_conntrack_expect_related(exp) != 0) { + ip_conntrack_expect_free(exp); + ret = NF_DROP; + } + goto out; + } + } + +out: + return ret; +} + +static struct ip_conntrack_helper quake3[MAX_PORTS]; +static char quake3_names[MAX_PORTS][13]; /* quake3-65535 */ + +static void fini(void) +{ + int i; + + for(i = 0 ; (i < ports_c); i++) { + DEBUGP("ip_conntrack_quake3: unregistering helper for port %d\n", + ports[i]); + ip_conntrack_helper_unregister(&quake3[i]); + } +} + +static int __init init(void) +{ + int i, ret; + char *tmpname; + + if(!ports[0]) + ports[0]=QUAKE3_MASTER_PORT; + + for(i = 0 ; (i < MAX_PORTS) && ports[i] ; i++) { + /* Create helper structure */ + memset(&quake3[i], 0, sizeof(struct ip_conntrack_helper)); + + quake3[i].tuple.dst.protonum = IPPROTO_UDP; + quake3[i].tuple.src.u.udp.port = htons(ports[i]); + quake3[i].mask.dst.protonum = 0xFFFF; + quake3[i].mask.src.u.udp.port = 0xFFFF; + quake3[i].help = quake3_help; + quake3[i].me = THIS_MODULE; + + tmpname = &quake3_names[i][0]; + if (ports[i] == QUAKE3_MASTER_PORT) + sprintf(tmpname, "quake3"); + else + sprintf(tmpname, "quake3-%d", i); + quake3[i].name = tmpname; + + DEBUGP("ip_conntrack_quake3: registering helper for port %d\n", + ports[i]); + + ret=ip_conntrack_helper_register(&quake3[i]); + if(ret) { + fini(); + return(ret); + } + ports_c++; + } + + return(0); +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ip_nat_quake3.c linux-2.6.11.3/net/ipv4/netfilter/ip_nat_quake3.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ip_nat_quake3.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ip_nat_quake3.c 2005-03-13 22:06:14.550342176 +0100 @@ -0,0 +1,199 @@ +/* Quake3 extension for UDP NAT alteration. + * (C) 2002 by Filip Sneppe + * based on ip_nat_ftp.c and ip_nat_tftp.c + * + * ip_nat_quake3.c v0.0.3 2002-08-31 + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Module load syntax: + * insmod ip_nat_quake3.o ports=port1,port2,...port + * + * please give the ports of all Quake3 master servers You wish to + * connect to. If you don't specify ports, the default will be UDP + * port 27950. + * + * Thanks to the Ethereal folks for their analysis of the Quake3 protocol. + * + * Notes: + * - If you're one of those people who would try anything to lower + * latency while playing Quake (and who isn't :-) ), you may want to + * consider not loading ip_nat_quake3 at all and just MASQUERADE all + * outgoing UDP traffic. + * This will make ip_conntrack_quake3 add the necessary expectations, + * but there will be no overhead for client->server UDP streams. If + * ip_nat_quake3 is loaded, quake3_nat_expected will be called per NAT + * hook for every packet in the client->server UDP stream. + * - Only SNAT/MASQUEARDE targets are useful for ip_nat_quake3. + * The IP addresses in the master connection payload (=IP addresses + * of Quake servers) have no relation with the master server so + * DNAT'ing the master connection to a server should not change the + * expected connections. + * - Not tested due to lack of equipment: + * - multiple Quake3 clients behind one MASQUERADE gateway + * - what if Quake3 client is running on router too + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Filip Sneppe "); +MODULE_DESCRIPTION("Netfilter NAT helper for Quake III Arena"); +MODULE_LICENSE("GPL"); + +/* Quake3 master server reply will add > 100 expectations per reply packet; when + doing lots of printk's, klogd may not be able to read /proc/kmsg fast enough */ +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +static struct quake3_search quake3s_nat = { "****", "getserversResponse", sizeof("getserversResponse") - 1 }; + +static unsigned int +quake3_nat_help(struct sk_buff **pskb, + enum ip_conntrack_info ctinfo, + + struct ip_conntrack_expect *exp, + //struct ip_nat_info *info, + //unsigned int hooknum, + ) +{ + struct iphdr *iph = (*pskb)->nh.iph; + struct udphdr *udph = (void *)iph + iph->ihl * 4; + struct ip_conntrack_tuple repl; + int dir = CTINFO2DIR(ctinfo); + int i; + + DEBUGP("ip_nat_quake3: quake3_nat_help, direction: %s hook: %s\n", + dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY", + hooknum == NF_IP_POST_ROUTING ? "POSTROUTING" + : hooknum == NF_IP_PRE_ROUTING ? "PREROUTING" + : hooknum == NF_IP_LOCAL_OUT ? "OUTPUT" : "???" + ); + DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + + /* Only mangle things once: original direction in POST_ROUTING + and reply direction on PRE_ROUTING. */ + if (!((hooknum == NF_IP_POST_ROUTING && dir == IP_CT_DIR_ORIGINAL) + || (hooknum == NF_IP_PRE_ROUTING && dir == IP_CT_DIR_REPLY))) { + DEBUGP("ip_nat_quake3: Not touching dir %s at hook %s\n", + dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY", + hooknum == NF_IP_POST_ROUTING ? "POSTROUTING" + : hooknum == NF_IP_PRE_ROUTING ? "PREROUTING" + : hooknum == NF_IP_LOCAL_OUT ? "OUTPUT" : "????"); + return NF_ACCEPT; + } + + if (!exp) { + DEBUGP("no conntrack expectation to modify\n"); + return NF_ACCEPT; + } + + if (strnicmp((const char *)udph + 12, quake3s_nat.pattern, quake3s_nat.plen) == 0) { + for(i=31; /* 8 bytes UDP hdr, 4 bytes filler, 18 bytes "getserversResponse", 1 byte "\" */ + i+6 < ntohs(udph->len); + i+=7) { + DEBUGP("ip_nat_quake3: adding server at offset %u/%u %u.%u.%u.%u:%u\n", + i, ntohs(udph->len), + NIPQUAD( (u_int32_t) *( (u_int32_t *)( (int)udph + i ) ) ), + ntohs((__u16) *( (__u16 *)( (int)udph + i + 4 ) ) ) ); + + memset(&repl, 0, sizeof(repl)); + + repl.dst.protonum = IPPROTO_UDP; + repl.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; + repl.dst.ip = *( (u_int32_t *)( (int)udph + i ) ); + repl.dst.u.udp.port = (__u16) *( (__u16 *)( (int)udph + i + 4 ) ); + + ip_conntrack_change_expect(exp, &repl); + } + } + return NF_ACCEPT; +} + +static unsigned int +quake3_nat_expected(struct sk_buff **pskb, + unsigned int hooknum, + struct ip_conntrack *ct, + struct ip_nat_info *info) +{ + const struct ip_conntrack *master = ct->master->expectant; + struct ip_nat_multi_range mr; + u_int32_t newsrcip, newdstip, newip; +#if 0 + const struct ip_conntrack_tuple *repl = + &master->tuplehash[IP_CT_DIR_REPLY].tuple; + struct iphdr *iph = (*pskb)->nh.iph; + struct udphdr *udph = (void *)iph + iph->ihl*4; +#endif + + DEBUGP("ip_nat_quake3: quake3_nat_expected: here we are\n"); + DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + + IP_NF_ASSERT(info); + IP_NF_ASSERT(master); + IP_NF_ASSERT(!(info->initialized & (1 << HOOK2MANIP(hooknum)))); + + newdstip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; + newsrcip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; + + if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC) { + newip = newsrcip; + DEBUGP("hook: %s orig: %u.%u.%u.%u:%u <-> %u.%u.%u.%u:%u " + "newsrc: %u.%u.%u.%u\n", + hooknum == NF_IP_POST_ROUTING ? "POSTROUTING" + : hooknum == NF_IP_PRE_ROUTING ? "PREROUTING" + : hooknum == NF_IP_LOCAL_OUT ? "OUTPUT" : "????", + NIPQUAD((*pskb)->nh.iph->saddr), ntohs(udph->source), + NIPQUAD((*pskb)->nh.iph->daddr), ntohs(udph->dest), + NIPQUAD(newip)); + + } else { + newip = newdstip; + DEBUGP("hook: %s orig: %u.%u.%u.%u:%u <-> %u.%u.%u.%u:%u " + "newdst: %u.%u.%u.%u\n", + hooknum == NF_IP_POST_ROUTING ? "POSTROUTING" + : hooknum == NF_IP_PRE_ROUTING ? "PREROUTING" + : hooknum == NF_IP_LOCAL_OUT ? "OUTPUT" : "????", + NIPQUAD((*pskb)->nh.iph->saddr), ntohs(udph->source), + NIPQUAD((*pskb)->nh.iph->daddr), ntohs(udph->dest), + NIPQUAD(newip)); + } + + mr.rangesize = 1; + mr.range[0].flags = IP_NAT_RANGE_MAP_IPS; + mr.range[0].min_ip = mr.range[0].max_ip = newip; + + return ip_nat_setup_info(ct,&mr,hooknum); +} + +static void fini(void) +{ + ip_nat_quake3_hook = NULL; + synchronize_net(); +} + +static int __init init(void) +{ + BUG_ON(ip_nat_quake3_hook); + ip_nat_quake3_hook = help; + return 0; +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ip_queue.c linux-2.6.11.3/net/ipv4/netfilter/ip_queue.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ip_queue.c 2005-03-13 07:44:45.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ip_queue.c 2005-03-13 22:05:50.830948072 +0100 @@ -425,6 +425,33 @@ } static int +ipq_set_vwmark(struct ipq_vwmark_msg *vmsg, unsigned int len) +{ + struct ipq_queue_entry *entry; + + if (vmsg->value > NF_MAX_VERDICT) + return -EINVAL; + + entry = ipq_find_dequeue_entry(id_cmp, vmsg->id); + if (entry == NULL) + return -ENOENT; + else { + int verdict = vmsg->value; + + if (vmsg->data_len && vmsg->data_len == len) + if (ipq_mangle_ipv4((ipq_verdict_msg_t *)vmsg, entry) < 0) + verdict = NF_DROP; + + /* set mark of associated skb */ + entry->skb->nfmark = vmsg->nfmark; + + ipq_issue_verdict(entry, verdict); + return 0; + } +} + + +static int ipq_receive_peer(struct ipq_peer_msg *pmsg, unsigned char type, unsigned int len) { @@ -446,6 +473,14 @@ status = ipq_set_verdict(&pmsg->msg.verdict, len - sizeof(*pmsg)); break; + case IPQM_VWMARK: + if (pmsg->msg.verdict.value > NF_MAX_VERDICT) + status = -EINVAL; + else + status = ipq_set_vwmark(&pmsg->msg.vwmark, + len - sizeof(*pmsg)); + break; + default: status = -EINVAL; } diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ip_set.c linux-2.6.11.3/net/ipv4/netfilter/ip_set.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ip_set.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ip_set.c 2005-03-13 22:04:00.485723096 +0100 @@ -0,0 +1,1986 @@ +/* Copyright (C) 2000-2002 Joakim Axelsson + * Patrick Schaaf + * Copyright (C) 2003-2004 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module for IP set management */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define ASSERT_READ_LOCK(x) /* dont use that */ +#define ASSERT_WRITE_LOCK(x) +#include +#include + +static struct list_head set_type_list; /* all registered sets */ +static struct ip_set **ip_set_list; /* all individual sets */ +static DECLARE_RWLOCK(ip_set_lock); /* protects the lists and the hash */ +static DECLARE_MUTEX(ip_set_app_mutex); /* serializes user access */ +static ip_set_id_t ip_set_max = CONFIG_IP_NF_SET_MAX; +static ip_set_id_t ip_set_bindings_hash_size = CONFIG_IP_NF_SET_HASHSIZE; +static struct list_head *ip_set_hash; /* hash of bindings */ +static unsigned int ip_set_hash_random; /* random seed */ + +/* + * Sets are identified either by the index in ip_set_list or by id. + * The id never changes and is used to find a key in the hash. + * The index may change by swapping and used at all other places + * (set/SET netfilter modules, binding value, etc.) + * + * Userspace requests are serialized by ip_set_mutex and sets can + * be deleted only from userspace. Therefore ip_set_list locking + * must obey the following rules: + * + * - kernel requests: read and write locking mandatory + * - user requests: read locking optional, write locking mandatory + */ + +static inline void +__ip_set_get(ip_set_id_t index) +{ + atomic_inc(&ip_set_list[index]->ref); +} + +static inline void +__ip_set_put(ip_set_id_t index) +{ + atomic_dec(&ip_set_list[index]->ref); +} + +/* + * Binding routines + */ + +static inline int +ip_hash_cmp(const struct ip_set_hash *set_hash, + ip_set_id_t id, ip_set_ip_t ip) +{ + return set_hash->id == id && set_hash->ip == ip; +} + +static ip_set_id_t +ip_set_find_in_hash(ip_set_id_t id, ip_set_ip_t ip) +{ + u_int32_t key = jhash_2words(id, ip, ip_set_hash_random) + % ip_set_bindings_hash_size; + struct ip_set_hash *set_hash; + + MUST_BE_READ_LOCKED(&ip_set_lock); + IP_SET_ASSERT(ip_set_list[id]); + DP("set: %s, ip: %u.%u.%u.%u", ip_set_list[id]->name, HIPQUAD(ip)); + + set_hash = LIST_FIND(&ip_set_hash[key], ip_hash_cmp, + struct ip_set_hash *, id, ip); + + DP("set: %s, ip: %u.%u.%u.%u, binding: %s", ip_set_list[id]->name, + HIPQUAD(ip), + set_hash != NULL ? ip_set_list[set_hash->binding]->name : ""); + + return (set_hash != NULL ? set_hash->binding : IP_SET_INVALID_ID); +} + +static inline void +__set_hash_del(struct ip_set_hash *set_hash) +{ + MUST_BE_WRITE_LOCKED(&ip_set_lock); + IP_SET_ASSERT(ip_set_list[set_hash->binding]); + + __ip_set_put(set_hash->binding); + list_del(&set_hash->list); + kfree(set_hash); +} + +static int +ip_set_hash_del(ip_set_id_t id, ip_set_ip_t ip) +{ + u_int32_t key = jhash_2words(id, ip, ip_set_hash_random) + % ip_set_bindings_hash_size; + struct ip_set_hash *set_hash; + + IP_SET_ASSERT(ip_set_list[id]); + DP("set: %s, ip: %u.%u.%u.%u", ip_set_list[id]->name, HIPQUAD(ip)); + WRITE_LOCK(&ip_set_lock); + set_hash = LIST_FIND(&ip_set_hash[key], ip_hash_cmp, + struct ip_set_hash *, id, ip); + DP("set: %s, ip: %u.%u.%u.%u, binding: %s", ip_set_list[id]->name, + HIPQUAD(ip), + set_hash != NULL ? ip_set_list[set_hash->binding]->name : ""); + + if (set_hash != NULL) + __set_hash_del(set_hash); + WRITE_UNLOCK(&ip_set_lock); + return 0; +} + +static int +ip_set_hash_add(ip_set_id_t id, ip_set_ip_t ip, ip_set_id_t binding) +{ + u_int32_t key = jhash_2words(id, ip, ip_set_hash_random) + % ip_set_bindings_hash_size; + struct ip_set_hash *set_hash; + int ret = 0; + + IP_SET_ASSERT(ip_set_list[id]); + IP_SET_ASSERT(ip_set_list[binding]); + DP("set: %s, ip: %u.%u.%u.%u, binding: %s", ip_set_list[id]->name, + HIPQUAD(ip), ip_set_list[binding]->name); + WRITE_LOCK(&ip_set_lock); + set_hash = LIST_FIND(&ip_set_hash[key], ip_hash_cmp, + struct ip_set_hash *, id, ip); + if (!set_hash) { + set_hash = kmalloc(sizeof(struct ip_set_hash), GFP_KERNEL); + if (!set_hash) { + ret = -ENOMEM; + goto unlock; + } + INIT_LIST_HEAD(&set_hash->list); + set_hash->id = id; + set_hash->ip = ip; + list_add(&ip_set_hash[key], &set_hash->list); + } else { + IP_SET_ASSERT(ip_set_list[set_hash->binding]); + DP("overwrite binding: %s", + ip_set_list[set_hash->binding]->name); + __ip_set_put(set_hash->binding); + } + set_hash->binding = binding; + __ip_set_get(set_hash->binding); + unlock: + WRITE_UNLOCK(&ip_set_lock); + return ret; +} + +#define FOREACH_HASH_DO(fn, args...) \ +({ \ + ip_set_id_t __key; \ + struct ip_set_hash *__set_hash; \ + \ + for (__key = 0; __key < ip_set_bindings_hash_size; __key++) { \ + list_for_each_entry(__set_hash, &ip_set_hash[__key], list) \ + fn(__set_hash , ## args); \ + } \ +}) + +#define FOREACH_HASH_RW_DO(fn, args...) \ +({ \ + ip_set_id_t __key; \ + struct ip_set_hash *__set_hash, *__n; \ + \ + MUST_BE_WRITE_LOCKED(&ip_set_lock); \ + for (__key = 0; __key < ip_set_bindings_hash_size; __key++) { \ + list_for_each_entry_safe(__set_hash, __n, &ip_set_hash[__key], list)\ + fn(__set_hash , ## args); \ + } \ +}) + +/* Add, del and test set entries from kernel */ + +#define follow_bindings(index, set, ip) \ +((index = ip_set_find_in_hash((set)->id, ip)) != IP_SET_INVALID_ID \ + || (index = (set)->binding) != IP_SET_INVALID_ID) + +int +ip_set_testip_kernel(ip_set_id_t index, + const struct sk_buff *skb, + const u_int32_t *flags) +{ + struct ip_set *set; + ip_set_ip_t ip; + int res, i = 0; + + IP_SET_ASSERT(flags[i]); + READ_LOCK(&ip_set_lock); + do { + set = ip_set_list[index]; + IP_SET_ASSERT(set); + DP("set %s, index %u", set->name, index); + read_lock_bh(&set->lock); + res = set->type->testip_kernel(set, skb, flags[i], &ip); + read_unlock_bh(&set->lock); + } while (res > 0 + && flags[++i] + && follow_bindings(index, set, ip)); + READ_UNLOCK(&ip_set_lock); + + return res; +} + +void +ip_set_addip_kernel(ip_set_id_t index, + const struct sk_buff *skb, + const u_int32_t *flags) +{ + struct ip_set *set; + ip_set_ip_t ip; + int res, i= 0; + + IP_SET_ASSERT(flags[i]); + READ_LOCK(&ip_set_lock); + do { + set = ip_set_list[index]; + IP_SET_ASSERT(set); + DP("set %s, index %u", set->name, index); + write_lock_bh(&set->lock); + res = set->type->addip_kernel(set, skb, flags[i], &ip); + write_unlock_bh(&set->lock); + } while ((res == -EAGAIN + && set->type->retry + && (res = set->type->retry(set)) == 0) + || ((res == 0 || res == -EEXIST) + && flags[++i] + && follow_bindings(index, set, ip))); + READ_UNLOCK(&ip_set_lock); +} + +void +ip_set_delip_kernel(ip_set_id_t index, + const struct sk_buff *skb, + const u_int32_t *flags) +{ + struct ip_set *set; + ip_set_ip_t ip; + int res, i = 0; + + IP_SET_ASSERT(flags[i]); + READ_LOCK(&ip_set_lock); + do { + set = ip_set_list[index]; + IP_SET_ASSERT(set); + DP("set %s, index %u", set->name, index); + write_lock_bh(&set->lock); + res = set->type->delip_kernel(set, skb, flags[i], &ip); + write_unlock_bh(&set->lock); + } while ((res == 0 || res == -EEXIST) + && flags[++i] + && follow_bindings(index, set, ip)); + READ_UNLOCK(&ip_set_lock); +} + +/* Register and deregister settype */ + +static inline int +set_type_equal(const struct ip_set_type *set_type, const char *str2) +{ + return !strncmp(set_type->typename, str2, IP_SET_MAXNAMELEN - 1); +} + +static inline struct ip_set_type * +find_set_type(const char *name) +{ + return LIST_FIND(&set_type_list, + set_type_equal, + struct ip_set_type *, + name); +} + +int +ip_set_register_set_type(struct ip_set_type *set_type) +{ + int ret = 0; + + if (set_type->protocol_version != IP_SET_PROTOCOL_VERSION) { + ip_set_printk("'%s' uses wrong protocol version %u (want %u)", + set_type->typename, + set_type->protocol_version, + IP_SET_PROTOCOL_VERSION); + return -EINVAL; + } + + WRITE_LOCK(&ip_set_lock); + if (find_set_type(set_type->typename)) { + /* Duplicate! */ + ip_set_printk("'%s' already registered!", + set_type->typename); + ret = -EINVAL; + goto unlock; + } + if (!try_module_get(THIS_MODULE)) { + ret = -EFAULT; + goto unlock; + } + list_append(&set_type_list, set_type); + DP("'%s' registered.", set_type->typename); + unlock: + WRITE_UNLOCK(&ip_set_lock); + return ret; +} + +void +ip_set_unregister_set_type(struct ip_set_type *set_type) +{ + WRITE_LOCK(&ip_set_lock); + if (!find_set_type(set_type->typename)) { + ip_set_printk("'%s' not registered?", + set_type->typename); + goto unlock; + } + LIST_DELETE(&set_type_list, set_type); + module_put(THIS_MODULE); + DP("'%s' unregistered.", set_type->typename); + unlock: + WRITE_UNLOCK(&ip_set_lock); + +} + +/* + * Userspace routines + */ + +/* + * Find set by name, reference it once. The reference makes sure the + * thing pointed to, does not go away under our feet. Drop the reference + * later, using ip_set_put(). + */ +ip_set_id_t +ip_set_get_byname(const char *name) +{ + ip_set_id_t i, index = IP_SET_INVALID_ID; + + down(&ip_set_app_mutex); + for (i = 0; i < ip_set_max; i++) { + if (ip_set_list[i] != NULL + && strcmp(ip_set_list[i]->name, name) == 0) { + __ip_set_get(i); + index = i; + break; + } + } + up(&ip_set_app_mutex); + return index; +} + +/* + * Find set by index, reference it once. The reference makes sure the + * thing pointed to, does not go away under our feet. Drop the reference + * later, using ip_set_put(). + */ +ip_set_id_t +ip_set_get_byindex(ip_set_id_t index) +{ + down(&ip_set_app_mutex); + + if (index >= ip_set_max) + return IP_SET_INVALID_ID; + + if (ip_set_list[index]) + __ip_set_get(index); + else + index = IP_SET_INVALID_ID; + + up(&ip_set_app_mutex); + return index; +} + +/* + * If the given set pointer points to a valid set, decrement + * reference count by 1. The caller shall not assume the index + * to be valid, after calling this function. + */ +void ip_set_put(ip_set_id_t index) +{ + down(&ip_set_app_mutex); + if (ip_set_list[index]) + __ip_set_put(index); + up(&ip_set_app_mutex); +} + +/* Find a set by name or index */ +static ip_set_id_t +ip_set_find_byname(const char *name) +{ + ip_set_id_t i, index = IP_SET_INVALID_ID; + + for (i = 0; i < ip_set_max; i++) { + if (ip_set_list[i] != NULL + && strcmp(ip_set_list[i]->name, name) == 0) { + index = i; + break; + } + } + return index; +} + +static ip_set_id_t +ip_set_find_byindex(ip_set_id_t index) +{ + if (index >= ip_set_max || ip_set_list[index] == NULL) + index = IP_SET_INVALID_ID; + + return index; +} + +/* + * Add, del, test, bind and unbind + */ + +static inline int +__ip_set_testip(struct ip_set *set, + const void *data, + size_t size, + ip_set_ip_t *ip) +{ + int res; + + read_lock_bh(&set->lock); + res = set->type->testip(set, data, size, ip); + read_unlock_bh(&set->lock); + + return res; +} + +static int +__ip_set_addip(ip_set_id_t index, + const void *data, + size_t size) +{ + struct ip_set *set = ip_set_list[index]; + ip_set_ip_t ip; + int res; + + IP_SET_ASSERT(set); + do { + write_lock_bh(&set->lock); + res = set->type->addip(set, data, size, &ip); + write_unlock_bh(&set->lock); + } while (res == -EAGAIN + && set->type->retry + && (res = set->type->retry(set)) == 0); + + return res; +} + +static int +ip_set_addip(ip_set_id_t index, + const void *data, + size_t size) +{ + + return __ip_set_addip(index, + data + sizeof(struct ip_set_req_adt), + size - sizeof(struct ip_set_req_adt)); +} + +static int +ip_set_delip(ip_set_id_t index, + const void *data, + size_t size) +{ + struct ip_set *set = ip_set_list[index]; + ip_set_ip_t ip; + int res; + + IP_SET_ASSERT(set); + write_lock_bh(&set->lock); + res = set->type->delip(set, + data + sizeof(struct ip_set_req_adt), + size - sizeof(struct ip_set_req_adt), + &ip); + write_unlock_bh(&set->lock); + + return res; +} + +static int +ip_set_testip(ip_set_id_t index, + const void *data, + size_t size) +{ + struct ip_set *set = ip_set_list[index]; + ip_set_ip_t ip; + int res; + + IP_SET_ASSERT(set); + res = __ip_set_testip(set, + data + sizeof(struct ip_set_req_adt), + size - sizeof(struct ip_set_req_adt), + &ip); + + return (res > 0 ? -EEXIST : res); +} + +static int +ip_set_bindip(ip_set_id_t index, + const void *data, + size_t size) +{ + struct ip_set *set = ip_set_list[index]; + struct ip_set_req_bind *req_bind; + ip_set_id_t binding; + ip_set_ip_t ip; + int res; + + IP_SET_ASSERT(set); + if (size < sizeof(struct ip_set_req_bind)) + return -EINVAL; + + req_bind = (struct ip_set_req_bind *) data; + req_bind->binding[IP_SET_MAXNAMELEN - 1] = '\0'; + + if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) { + /* Default binding of a set */ + char *binding_name; + + if (size != sizeof(struct ip_set_req_bind) + IP_SET_MAXNAMELEN) + return -EINVAL; + + binding_name = (char *)(data + sizeof(struct ip_set_req_bind)); + binding_name[IP_SET_MAXNAMELEN - 1] = '\0'; + + binding = ip_set_find_byname(binding_name); + if (binding == IP_SET_INVALID_ID) + return -ENOENT; + + WRITE_LOCK(&ip_set_lock); + /* Sets as binding values are referenced */ + if (set->binding != IP_SET_INVALID_ID) + __ip_set_put(set->binding); + set->binding = binding; + __ip_set_get(set->binding); + WRITE_UNLOCK(&ip_set_lock); + + return 0; + } + binding = ip_set_find_byname(req_bind->binding); + if (binding == IP_SET_INVALID_ID) + return -ENOENT; + + res = __ip_set_testip(set, + data + sizeof(struct ip_set_req_bind), + size - sizeof(struct ip_set_req_bind), + &ip); + DP("set %s, ip: %u.%u.%u.%u, binding %s", + set->name, HIPQUAD(ip), ip_set_list[binding]->name); + + if (res >= 0) + res = ip_set_hash_add(set->id, ip, binding); + + return res; +} + +#define FOREACH_SET_DO(fn, args...) \ +({ \ + ip_set_id_t __i; \ + struct ip_set *__set; \ + \ + for (__i = 0; __i < ip_set_max; __i++) { \ + __set = ip_set_list[__i]; \ + if (__set != NULL) \ + fn(__set , ##args); \ + } \ +}) + +static inline void +__set_hash_del_byid(struct ip_set_hash *set_hash, ip_set_id_t id) +{ + if (set_hash->id == id) + __set_hash_del(set_hash); +} + +static inline void +__unbind_default(struct ip_set *set) +{ + if (set->binding != IP_SET_INVALID_ID) { + /* Sets as binding values are referenced */ + __ip_set_put(set->binding); + set->binding = IP_SET_INVALID_ID; + } +} + +static int +ip_set_unbindip(ip_set_id_t index, + const void *data, + size_t size) +{ + struct ip_set *set; + struct ip_set_req_bind *req_bind; + ip_set_ip_t ip; + int res; + + DP(""); + if (size < sizeof(struct ip_set_req_bind)) + return -EINVAL; + + req_bind = (struct ip_set_req_bind *) data; + req_bind->binding[IP_SET_MAXNAMELEN - 1] = '\0'; + + DP("%u %s", index, req_bind->binding); + if (index == IP_SET_INVALID_ID) { + /* unbind :all: */ + if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) { + /* Default binding of sets */ + WRITE_LOCK(&ip_set_lock); + FOREACH_SET_DO(__unbind_default); + WRITE_UNLOCK(&ip_set_lock); + return 0; + } else if (strcmp(req_bind->binding, IPSET_TOKEN_ALL) == 0) { + /* Flush all bindings of all sets*/ + WRITE_LOCK(&ip_set_lock); + FOREACH_HASH_RW_DO(__set_hash_del); + WRITE_UNLOCK(&ip_set_lock); + return 0; + } + DP("unreachable reached!"); + return -EINVAL; + } + + set = ip_set_list[index]; + IP_SET_ASSERT(set); + if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) { + /* Default binding of set */ + ip_set_id_t binding = ip_set_find_byindex(set->binding); + + if (binding == IP_SET_INVALID_ID) + return -ENOENT; + + WRITE_LOCK(&ip_set_lock); + /* Sets in hash values are referenced */ + __ip_set_put(set->binding); + set->binding = IP_SET_INVALID_ID; + WRITE_UNLOCK(&ip_set_lock); + + return 0; + } else if (strcmp(req_bind->binding, IPSET_TOKEN_ALL) == 0) { + /* Flush all bindings */ + + WRITE_LOCK(&ip_set_lock); + FOREACH_HASH_RW_DO(__set_hash_del_byid, set->id); + WRITE_UNLOCK(&ip_set_lock); + return 0; + } + + res = __ip_set_testip(set, + data + sizeof(struct ip_set_req_bind), + size - sizeof(struct ip_set_req_bind), + &ip); + + DP("set %s, ip: %u.%u.%u.%u", set->name, HIPQUAD(ip)); + if (res >= 0) + res = ip_set_hash_del(set->id, ip); + + return res; +} + +static int +ip_set_testbind(ip_set_id_t index, + const void *data, + size_t size) +{ + struct ip_set *set = ip_set_list[index]; + struct ip_set_req_bind *req_bind; + ip_set_id_t binding; + ip_set_ip_t ip; + int res; + + IP_SET_ASSERT(set); + if (size < sizeof(struct ip_set_req_bind)) + return -EINVAL; + + req_bind = (struct ip_set_req_bind *) data; + req_bind->binding[IP_SET_MAXNAMELEN - 1] = '\0'; + + if (strcmp(req_bind->binding, IPSET_TOKEN_DEFAULT) == 0) { + /* Default binding of set */ + char *binding_name; + + if (size != sizeof(struct ip_set_req_bind) + IP_SET_MAXNAMELEN) + return -EINVAL; + + binding_name = (char *)(data + sizeof(struct ip_set_req_bind)); + binding_name[IP_SET_MAXNAMELEN - 1] = '\0'; + + binding = ip_set_find_byname(binding_name); + if (binding == IP_SET_INVALID_ID) + return -ENOENT; + + res = (set->binding == binding) ? -EEXIST : 0; + + return res; + } + binding = ip_set_find_byname(req_bind->binding); + if (binding == IP_SET_INVALID_ID) + return -ENOENT; + + + res = __ip_set_testip(set, + data + sizeof(struct ip_set_req_bind), + size - sizeof(struct ip_set_req_bind), + &ip); + DP("set %s, ip: %u.%u.%u.%u, binding %s", + set->name, HIPQUAD(ip), ip_set_list[binding]->name); + + if (res >= 0) + res = (ip_set_find_in_hash(set->id, ip) == binding) + ? -EEXIST : 0; + + return res; +} + +static struct ip_set_type * +find_set_type_rlock(const char *typename) +{ + struct ip_set_type *type; + + READ_LOCK(&ip_set_lock); + type = find_set_type(typename); + if (type == NULL) + READ_UNLOCK(&ip_set_lock); + + return type; +} + +static int +find_free_id(const char *name, + ip_set_id_t *index, + ip_set_id_t *id) +{ + ip_set_id_t i; + + *id = IP_SET_INVALID_ID; + for (i = 0; i < ip_set_max; i++) { + if (ip_set_list[i] == NULL) { + if (*id == IP_SET_INVALID_ID) + *id = *index = i; + } else if (strcmp(name, ip_set_list[i]->name) == 0) + /* Name clash */ + return -EEXIST; + } + if (*id == IP_SET_INVALID_ID) + /* No free slot remained */ + return -ERANGE; + /* Check that index is usable as id (swapping) */ + check: + for (i = 0; i < ip_set_max; i++) { + if (ip_set_list[i] != NULL + && ip_set_list[i]->id == *id) { + *id = i; + goto check; + } + } + return 0; +} + +/* + * Create a set + */ +static int +ip_set_create(const char *name, + const char *typename, + ip_set_id_t restore, + const void *data, + size_t size) +{ + struct ip_set *set; + ip_set_id_t index, id; + int res = 0; + + DP("setname: %s, typename: %s, id: %u", name, typename, restore); + /* + * First, and without any locks, allocate and initialize + * a normal base set structure. + */ + set = kmalloc(sizeof(struct ip_set), GFP_KERNEL); + if (!set) + return -ENOMEM; + set->lock = RW_LOCK_UNLOCKED; + strncpy(set->name, name, IP_SET_MAXNAMELEN); + set->binding = IP_SET_INVALID_ID; + atomic_set(&set->ref, 0); + + /* + * Next, take the &ip_set_lock, check that we know the type, + * and take a reference on the type, to make sure it + * stays available while constructing our new set. + * + * After referencing the type, we drop the &ip_set_lock, + * and let the new set construction run without locks. + */ + set->type = find_set_type_rlock(typename); + if (set->type == NULL) { + /* Try loading the module */ + char modulename[IP_SET_MAXNAMELEN + strlen("ip_set_") + 1]; + strcpy(modulename, "ip_set_"); + strcat(modulename, typename); + DP("try to load %s", modulename); + request_module(modulename); + set->type = find_set_type_rlock(typename); + } + if (set->type == NULL) { + ip_set_printk("no set type '%s', set '%s' not created", + typename, name); + res = -ENOENT; + goto out; + } + if (!try_module_get(set->type->me)) { + READ_UNLOCK(&ip_set_lock); + res = -EFAULT; + goto out; + } + READ_UNLOCK(&ip_set_lock); + + /* + * Without holding any locks, create private part. + */ + res = set->type->create(set, data, size); + if (res != 0) + goto put_out; + + /* BTW, res==0 here. */ + + /* + * Here, we have a valid, constructed set. &ip_set_lock again, + * find free id/index and check that it is not already in + * ip_set_list. + */ + WRITE_LOCK(&ip_set_lock); + if ((res = find_free_id(set->name, &index, &id)) != 0) { + DP("no free id!"); + goto cleanup; + } + + /* Make sure restore gets the same index */ + if (restore != IP_SET_INVALID_ID && index != restore) { + DP("Can't restore, sets are screwed up"); + res = -ERANGE; + goto cleanup; + } + + /* + * Finally! Add our shiny new set to the list, and be done. + */ + DP("create: '%s' created with index %u, id %u!", set->name, index, id); + set->id = id; + ip_set_list[index] = set; + WRITE_UNLOCK(&ip_set_lock); + return res; + + cleanup: + WRITE_UNLOCK(&ip_set_lock); + set->type->destroy(set); + put_out: + module_put(set->type->me); + out: + kfree(set); + return res; +} + +/* + * Destroy a given existing set + */ +static void +ip_set_destroy_set(ip_set_id_t index) +{ + struct ip_set *set = ip_set_list[index]; + + IP_SET_ASSERT(set); + DP("set: %s", set->name); + WRITE_LOCK(&ip_set_lock); + FOREACH_HASH_RW_DO(__set_hash_del_byid, set->id); + if (set->binding != IP_SET_INVALID_ID) + __ip_set_put(set->binding); + ip_set_list[index] = NULL; + WRITE_UNLOCK(&ip_set_lock); + + /* Must call it without holding any lock */ + set->type->destroy(set); + module_put(set->type->me); + kfree(set); +} + +/* + * Destroy a set - or all sets + * Sets must not be referenced/used. + */ +static int +ip_set_destroy(ip_set_id_t index) +{ + ip_set_id_t i; + + /* ref modification always protected by the mutex */ + if (index != IP_SET_INVALID_ID) { + if (atomic_read(&ip_set_list[index]->ref)) + return -EBUSY; + ip_set_destroy_set(index); + } else { + for (i = 0; i < ip_set_max; i++) { + if (ip_set_list[i] != NULL + && (atomic_read(&ip_set_list[i]->ref))) + return -EBUSY; + } + + for (i = 0; i < ip_set_max; i++) { + if (ip_set_list[i] != NULL) + ip_set_destroy_set(i); + } + } + return 0; +} + +static void +ip_set_flush_set(struct ip_set *set) +{ + DP("set: %s %u", set->name, set->id); + + write_lock_bh(&set->lock); + set->type->flush(set); + write_unlock_bh(&set->lock); +} + +/* + * Flush data in a set - or in all sets + */ +static int +ip_set_flush(ip_set_id_t index) +{ + if (index != IP_SET_INVALID_ID) { + IP_SET_ASSERT(ip_set_list[index]); + ip_set_flush_set(ip_set_list[index]); + } else + FOREACH_SET_DO(ip_set_flush_set); + + return 0; +} + +/* Rename a set */ +static int +ip_set_rename(ip_set_id_t index, const char *name) +{ + struct ip_set *set = ip_set_list[index]; + ip_set_id_t i; + int res = 0; + + DP("set: %s to %s", set->name, name); + WRITE_LOCK(&ip_set_lock); + for (i = 0; i < ip_set_max; i++) { + if (ip_set_list[i] != NULL + && strncmp(ip_set_list[i]->name, + name, + IP_SET_MAXNAMELEN - 1) == 0) { + res = -EEXIST; + goto unlock; + } + } + strncpy(set->name, name, IP_SET_MAXNAMELEN); + unlock: + WRITE_UNLOCK(&ip_set_lock); + return res; +} + +/* + * Swap two sets so that name/index points to the other. + * References are also swapped. + */ +static int +ip_set_swap(ip_set_id_t from_index, ip_set_id_t to_index) +{ + struct ip_set *from = ip_set_list[from_index]; + struct ip_set *to = ip_set_list[to_index]; + char from_name[IP_SET_MAXNAMELEN]; + u_int32_t from_ref; + + DP("set: %s to %s", from->name, to->name); + /* Type can't be changed. Artifical restriction. */ + if (from->type->typecode != to->type->typecode) + return -ENOEXEC; + + /* No magic here: ref munging protected by the mutex */ + WRITE_LOCK(&ip_set_lock); + strncpy(from_name, from->name, IP_SET_MAXNAMELEN); + from_ref = atomic_read(&from->ref); + + strncpy(from->name, to->name, IP_SET_MAXNAMELEN); + atomic_set(&from->ref, atomic_read(&to->ref)); + strncpy(to->name, from_name, IP_SET_MAXNAMELEN); + atomic_set(&to->ref, from_ref); + + ip_set_list[from_index] = to; + ip_set_list[to_index] = from; + + WRITE_UNLOCK(&ip_set_lock); + return 0; +} + +/* + * List set data + */ + +static inline void +__set_hash_bindings_size_list(struct ip_set_hash *set_hash, + ip_set_id_t id, size_t *size) +{ + if (set_hash->id == id) + *size += sizeof(struct ip_set_hash_list); +} + +static inline void +__set_hash_bindings_size_save(struct ip_set_hash *set_hash, + ip_set_id_t id, size_t *size) +{ + if (set_hash->id == id) + *size += sizeof(struct ip_set_hash_save); +} + +static inline void +__set_hash_bindings(struct ip_set_hash *set_hash, + ip_set_id_t id, void *data, int *used) +{ + if (set_hash->id == id) { + struct ip_set_hash_list *hash_list = + (struct ip_set_hash_list *)(data + *used); + + hash_list->ip = set_hash->ip; + hash_list->binding = set_hash->binding; + *used += sizeof(struct ip_set_hash_list); + } +} + +static int ip_set_list_set(ip_set_id_t index, + void *data, + int *used, + int len) +{ + struct ip_set *set = ip_set_list[index]; + struct ip_set_list *set_list; + + /* Pointer to our header */ + set_list = (struct ip_set_list *) (data + *used); + + DP("set: %s, used: %d %p %p", set->name, *used, data, data + *used); + + /* Get and ensure header size */ + if (*used + sizeof(struct ip_set_list) > len) + goto not_enough_mem; + *used += sizeof(struct ip_set_list); + + read_lock_bh(&set->lock); + /* Get and ensure set specific header size */ + set_list->header_size = set->type->header_size; + if (*used + set_list->header_size > len) + goto unlock_set; + + /* Fill in the header */ + set_list->index = index; + set_list->binding = set->binding; + set_list->ref = atomic_read(&set->ref); + + /* Fill in set spefific header data */ + set->type->list_header(set, data + *used); + *used += set_list->header_size; + + /* Get and ensure set specific members size */ + set_list->members_size = set->type->list_members_size(set); + if (*used + set_list->members_size > len) + goto unlock_set; + + /* Fill in set spefific members data */ + set->type->list_members(set, data + *used); + *used += set_list->members_size; + read_unlock_bh(&set->lock); + + /* Bindings */ + + /* Get and ensure set specific bindings size */ + set_list->bindings_size = 0; + FOREACH_HASH_DO(__set_hash_bindings_size_list, + set->id, &set_list->bindings_size); + if (*used + set_list->bindings_size > len) + goto not_enough_mem; + + /* Fill in set spefific bindings data */ + FOREACH_HASH_DO(__set_hash_bindings, set->id, data, used); + + return 0; + + unlock_set: + read_unlock_bh(&set->lock); + not_enough_mem: + DP("not enough mem, try again"); + return -EAGAIN; +} + +/* + * Save sets + */ +static int ip_set_save_set(ip_set_id_t index, + void *data, + int *used, + int len) +{ + struct ip_set *set; + struct ip_set_save *set_save; + + /* Pointer to our header */ + set_save = (struct ip_set_save *) (data + *used); + + /* Get and ensure header size */ + if (*used + sizeof(struct ip_set_save) > len) + goto not_enough_mem; + *used += sizeof(struct ip_set_save); + + set = ip_set_list[index]; + DP("set: %s, used: %u(%u) %p %p", set->name, *used, len, + data, data + *used); + + read_lock_bh(&set->lock); + /* Get and ensure set specific header size */ + set_save->header_size = set->type->header_size; + if (*used + set_save->header_size > len) + goto unlock_set; + + /* Fill in the header */ + set_save->index = index; + set_save->binding = set->binding; + + /* Fill in set spefific header data */ + set->type->list_header(set, data + *used); + *used += set_save->header_size; + + DP("set header filled: %s, used: %u %p %p", set->name, *used, + data, data + *used); + /* Get and ensure set specific members size */ + set_save->members_size = set->type->list_members_size(set); + if (*used + set_save->members_size > len) + goto unlock_set; + + /* Fill in set spefific members data */ + set->type->list_members(set, data + *used); + *used += set_save->members_size; + read_unlock_bh(&set->lock); + DP("set members filled: %s, used: %u %p %p", set->name, *used, + data, data + *used); + return 0; + + unlock_set: + read_unlock_bh(&set->lock); + not_enough_mem: + DP("not enough mem, try again"); + return -EAGAIN; +} + +static inline void +__set_hash_save_bindings(struct ip_set_hash *set_hash, + ip_set_id_t id, + void *data, + int *used, + int len, + int *res) +{ + if (*res == 0 + && (id == IP_SET_INVALID_ID || set_hash->id == id)) { + struct ip_set_hash_save *hash_save = + (struct ip_set_hash_save *)(data + *used); + /* Ensure bindings size */ + if (*used + sizeof(struct ip_set_hash_save) > len) { + *res = -ENOMEM; + return; + } + hash_save->id = set_hash->id; + hash_save->ip = set_hash->ip; + hash_save->binding = set_hash->binding; + *used += sizeof(struct ip_set_hash_save); + } +} + +static int ip_set_save_bindings(ip_set_id_t index, + void *data, + int *used, + int len) +{ + int res = 0; + struct ip_set_save *set_save; + + DP("used %u, len %u", *used, len); + /* Get and ensure header size */ + if (*used + sizeof(struct ip_set_save) > len) + return -ENOMEM; + + /* Marker */ + set_save = (struct ip_set_save *) (data + *used); + set_save->index = IP_SET_INVALID_ID; + *used += sizeof(struct ip_set_save); + + DP("marker added used %u, len %u", *used, len); + /* Fill in bindings data */ + if (index != IP_SET_INVALID_ID) + /* Sets are identified by id in hash */ + index = ip_set_list[index]->id; + FOREACH_HASH_DO(__set_hash_save_bindings, index, data, used, len, &res); + + return res; +} + +/* + * Restore sets + */ +static int ip_set_restore(void *data, + int len) +{ + int res = 0; + int line = 0, used = 0, members_size; + struct ip_set *set; + struct ip_set_hash_save *hash_save; + struct ip_set_restore *set_restore; + ip_set_id_t index; + + /* Loop to restore sets */ + while (1) { + line++; + + DP("%u %u %u", used, sizeof(struct ip_set_restore), len); + /* Get and ensure header size */ + if (used + sizeof(struct ip_set_restore) > len) + return line; + set_restore = (struct ip_set_restore *) (data + used); + used += sizeof(struct ip_set_restore); + + /* Ensure data size */ + if (used + + set_restore->header_size + + set_restore->members_size > len) + return line; + + /* Check marker */ + if (set_restore->index == IP_SET_INVALID_ID) { + line--; + goto bindings; + } + + /* Try to create the set */ + DP("restore %s %s", set_restore->name, set_restore->typename); + res = ip_set_create(set_restore->name, + set_restore->typename, + set_restore->index, + data + used, + set_restore->header_size); + + if (res != 0) + return line; + used += set_restore->header_size; + + index = ip_set_find_byindex(set_restore->index); + DP("index %u, restore_index %u", index, set_restore->index); + if (index != set_restore->index) + return line; + /* Try to restore members data */ + set = ip_set_list[index]; + members_size = 0; + DP("members_size %u reqsize %u", + set_restore->members_size, set->type->reqsize); + while (members_size + set->type->reqsize <= + set_restore->members_size) { + line++; + DP("members: %u, line %u", members_size, line); + if (__ip_set_addip(index, + data + used + members_size, + set->type->reqsize)) { + return line; + } + members_size += set->type->reqsize; + } + + DP("members_size %u %u", + set_restore->members_size, members_size); + if (members_size != set_restore->members_size) + return line++; + used += set_restore->members_size; + } + + bindings: + /* Loop to restore bindings */ + while (used < len) { + line++; + + DP("restore binding, line %u", line); + /* Get and ensure size */ + if (used + sizeof(struct ip_set_hash_save) > len) + return line; + hash_save = (struct ip_set_hash_save *) (data + used); + used += sizeof(struct ip_set_hash_save); + + /* hash_save->id is used to store the index */ + index = ip_set_find_byindex(hash_save->id); + DP("restore binding index %u, id %u, %u -> %u", + index, hash_save->id, hash_save->ip, hash_save->binding); + if (index != hash_save->id) + return line; + + set = ip_set_list[hash_save->id]; + /* Null valued IP means default binding */ + if (hash_save->ip) + res = ip_set_hash_add(set->id, + hash_save->ip, + hash_save->binding); + else { + IP_SET_ASSERT(set->binding == IP_SET_INVALID_ID); + WRITE_LOCK(&ip_set_lock); + set->binding = hash_save->binding; + __ip_set_get(set->binding); + WRITE_UNLOCK(&ip_set_lock); + DP("default binding: %u", set->binding); + } + if (res != 0) + return line; + } + if (used != len) + return line; + + return 0; +} + +static int +ip_set_sockfn_set(struct sock *sk, int optval, void *user, unsigned int len) +{ + void *data; + int res = 0; /* Assume OK */ + unsigned *op; + struct ip_set_req_adt *req_adt; + ip_set_id_t index = IP_SET_INVALID_ID; + int (*adtfn)(ip_set_id_t index, + const void *data, size_t size); + struct fn_table { + int (*fn)(ip_set_id_t index, + const void *data, size_t size); + } adtfn_table[] = + { { ip_set_addip }, { ip_set_delip }, { ip_set_testip}, + { ip_set_bindip}, { ip_set_unbindip }, { ip_set_testbind }, + }; + + DP("optval=%d, user=%p, len=%d", optval, user, len); + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (optval != SO_IP_SET) + return -EBADF; + if (len <= sizeof(unsigned)) { + ip_set_printk("short userdata (want >%d, got %d)", + sizeof(unsigned), len); + return -EINVAL; + } + data = vmalloc(len); + if (!data) { + DP("out of mem for %d bytes", len); + return -ENOMEM; + } + if (copy_from_user(data, user, len) != 0) { + res = -EFAULT; + goto done; + } + if (down_interruptible(&ip_set_app_mutex)) { + res = -EINTR; + goto done; + } + + op = (unsigned *)data; + DP("op=%x", *op); + + if (*op < IP_SET_OP_VERSION) { + /* Check the version at the beginning of operations */ + struct ip_set_req_version *req_version = + (struct ip_set_req_version *) data; + if (req_version->version != IP_SET_PROTOCOL_VERSION) { + res = -EPROTO; + goto done; + } + } + + switch (*op) { + case IP_SET_OP_CREATE:{ + struct ip_set_req_create *req_create + = (struct ip_set_req_create *) data; + + if (len <= sizeof(struct ip_set_req_create)) { + ip_set_printk("short CREATE data (want >%d, got %d)", + sizeof(struct ip_set_req_create), len); + res = -EINVAL; + goto done; + } + req_create->name[IP_SET_MAXNAMELEN - 1] = '\0'; + req_create->typename[IP_SET_MAXNAMELEN - 1] = '\0'; + res = ip_set_create(req_create->name, + req_create->typename, + IP_SET_INVALID_ID, + data + sizeof(struct ip_set_req_create), + len - sizeof(struct ip_set_req_create)); + goto done; + } + case IP_SET_OP_DESTROY:{ + struct ip_set_req_std *req_destroy + = (struct ip_set_req_std *) data; + + if (len != sizeof(struct ip_set_req_std)) { + ip_set_printk("invalid DESTROY data (want %d, got %d)", + sizeof(struct ip_set_req_std), len); + res = -EINVAL; + goto done; + } + if (strcmp(req_destroy->name, IPSET_TOKEN_ALL) == 0) { + /* Destroy all sets */ + index = IP_SET_INVALID_ID; + } else { + req_destroy->name[IP_SET_MAXNAMELEN - 1] = '\0'; + index = ip_set_find_byname(req_destroy->name); + + if (index == IP_SET_INVALID_ID) { + res = -ENOENT; + goto done; + } + } + + res = ip_set_destroy(index); + goto done; + } + case IP_SET_OP_FLUSH:{ + struct ip_set_req_std *req_flush = + (struct ip_set_req_std *) data; + + if (len != sizeof(struct ip_set_req_std)) { + ip_set_printk("invalid FLUSH data (want %d, got %d)", + sizeof(struct ip_set_req_std), len); + res = -EINVAL; + goto done; + } + if (strcmp(req_flush->name, IPSET_TOKEN_ALL) == 0) { + /* Flush all sets */ + index = IP_SET_INVALID_ID; + } else { + req_flush->name[IP_SET_MAXNAMELEN - 1] = '\0'; + index = ip_set_find_byname(req_flush->name); + + if (index == IP_SET_INVALID_ID) { + res = -ENOENT; + goto done; + } + } + res = ip_set_flush(index); + goto done; + } + case IP_SET_OP_RENAME:{ + struct ip_set_req_create *req_rename + = (struct ip_set_req_create *) data; + + if (len != sizeof(struct ip_set_req_create)) { + ip_set_printk("invalid RENAME data (want %d, got %d)", + sizeof(struct ip_set_req_create), len); + res = -EINVAL; + goto done; + } + + req_rename->name[IP_SET_MAXNAMELEN - 1] = '\0'; + req_rename->typename[IP_SET_MAXNAMELEN - 1] = '\0'; + + index = ip_set_find_byname(req_rename->name); + if (index == IP_SET_INVALID_ID) { + res = -ENOENT; + goto done; + } + res = ip_set_rename(index, req_rename->typename); + goto done; + } + case IP_SET_OP_SWAP:{ + struct ip_set_req_create *req_swap + = (struct ip_set_req_create *) data; + ip_set_id_t to_index; + + if (len != sizeof(struct ip_set_req_create)) { + ip_set_printk("invalid SWAP data (want %d, got %d)", + sizeof(struct ip_set_req_create), len); + res = -EINVAL; + goto done; + } + + req_swap->name[IP_SET_MAXNAMELEN - 1] = '\0'; + req_swap->typename[IP_SET_MAXNAMELEN - 1] = '\0'; + + index = ip_set_find_byname(req_swap->name); + if (index == IP_SET_INVALID_ID) { + res = -ENOENT; + goto done; + } + to_index = ip_set_find_byname(req_swap->typename); + if (to_index == IP_SET_INVALID_ID) { + res = -ENOENT; + goto done; + } + res = ip_set_swap(index, to_index); + goto done; + } + default: + break; /* Set identified by id */ + } + + /* There we may have add/del/test/bind/unbind/test_bind operations */ + if (*op < IP_SET_OP_ADD_IP || *op > IP_SET_OP_TEST_BIND_SET) { + res = -EBADMSG; + goto done; + } + adtfn = adtfn_table[*op - IP_SET_OP_ADD_IP].fn; + + if (len < sizeof(struct ip_set_req_adt)) { + ip_set_printk("short data in adt request (want >=%d, got %d)", + sizeof(struct ip_set_req_adt), len); + res = -EINVAL; + goto done; + } + req_adt = (struct ip_set_req_adt *) data; + + /* -U :all: :all:|:default: uses IP_SET_INVALID_ID */ + if (!(*op == IP_SET_OP_UNBIND_SET + && req_adt->index == IP_SET_INVALID_ID)) { + index = ip_set_find_byindex(req_adt->index); + if (index == IP_SET_INVALID_ID) { + res = -ENOENT; + goto done; + } + } + res = adtfn(index, data, len); + + done: + up(&ip_set_app_mutex); + vfree(data); + if (res > 0) + res = 0; + DP("final result %d", res); + return res; +} + +static int +ip_set_sockfn_get(struct sock *sk, int optval, void *user, int *len) +{ + int res = 0; + unsigned *op; + ip_set_id_t index = IP_SET_INVALID_ID; + void *data; + int copylen = *len; + + DP("optval=%d, user=%p, len=%d", optval, user, *len); + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (optval != SO_IP_SET) + return -EBADF; + if (*len < sizeof(unsigned)) { + ip_set_printk("short userdata (want >=%d, got %d)", + sizeof(unsigned), *len); + return -EINVAL; + } + data = vmalloc(*len); + if (!data) { + DP("out of mem for %d bytes", *len); + return -ENOMEM; + } + if (copy_from_user(data, user, *len) != 0) { + res = -EFAULT; + goto done; + } + if (down_interruptible(&ip_set_app_mutex)) { + res = -EINTR; + goto done; + } + + op = (unsigned *) data; + DP("op=%x", *op); + + if (*op < IP_SET_OP_VERSION) { + /* Check the version at the beginning of operations */ + struct ip_set_req_version *req_version = + (struct ip_set_req_version *) data; + if (req_version->version != IP_SET_PROTOCOL_VERSION) { + res = -EPROTO; + goto done; + } + } + + switch (*op) { + case IP_SET_OP_VERSION: { + struct ip_set_req_version *req_version = + (struct ip_set_req_version *) data; + + if (*len != sizeof(struct ip_set_req_version)) { + ip_set_printk("invalid VERSION (want %d, got %d)", + sizeof(struct ip_set_req_version), + *len); + res = -EINVAL; + goto done; + } + + req_version->version = IP_SET_PROTOCOL_VERSION; + res = copy_to_user(user, req_version, + sizeof(struct ip_set_req_version)); + goto done; + } + case IP_SET_OP_GET_BYNAME: { + struct ip_set_req_get_set *req_get + = (struct ip_set_req_get_set *) data; + + if (*len != sizeof(struct ip_set_req_get_set)) { + ip_set_printk("invalid GET_BYNAME (want %d, got %d)", + sizeof(struct ip_set_req_get_set), *len); + res = -EINVAL; + goto done; + } + req_get->set.name[IP_SET_MAXNAMELEN - 1] = '\0'; + index = ip_set_find_byname(req_get->set.name); + req_get->set.index = index; + goto copy; + } + case IP_SET_OP_GET_BYINDEX: { + struct ip_set_req_get_set *req_get + = (struct ip_set_req_get_set *) data; + + if (*len != sizeof(struct ip_set_req_get_set)) { + ip_set_printk("invalid GET_BYINDEX (want %d, got %d)", + sizeof(struct ip_set_req_get_set), *len); + res = -EINVAL; + goto done; + } + req_get->set.name[IP_SET_MAXNAMELEN - 1] = '\0'; + index = ip_set_find_byindex(req_get->set.index); + strncpy(req_get->set.name, + index == IP_SET_INVALID_ID ? "" + : ip_set_list[index]->name, IP_SET_MAXNAMELEN); + goto copy; + } + case IP_SET_OP_ADT_GET: { + struct ip_set_req_adt_get *req_get + = (struct ip_set_req_adt_get *) data; + + if (*len != sizeof(struct ip_set_req_adt_get)) { + ip_set_printk("invalid ADT_GET (want %d, got %d)", + sizeof(struct ip_set_req_adt_get), *len); + res = -EINVAL; + goto done; + } + req_get->set.name[IP_SET_MAXNAMELEN - 1] = '\0'; + index = ip_set_find_byname(req_get->set.name); + if (index != IP_SET_INVALID_ID) { + req_get->set.index = index; + strncpy(req_get->typename, + ip_set_list[index]->type->typename, + IP_SET_MAXNAMELEN - 1); + } else { + res = -ENOENT; + goto done; + } + goto copy; + } + case IP_SET_OP_MAX_SETS: { + struct ip_set_req_max_sets *req_max_sets + = (struct ip_set_req_max_sets *) data; + ip_set_id_t i; + + if (*len != sizeof(struct ip_set_req_max_sets)) { + ip_set_printk("invalid MAX_SETS (want %d, got %d)", + sizeof(struct ip_set_req_max_sets), *len); + res = -EINVAL; + goto done; + } + + if (strcmp(req_max_sets->set.name, IPSET_TOKEN_ALL) == 0) { + req_max_sets->set.index = IP_SET_INVALID_ID; + } else { + req_max_sets->set.name[IP_SET_MAXNAMELEN - 1] = '\0'; + req_max_sets->set.index = + ip_set_find_byname(req_max_sets->set.name); + if (req_max_sets->set.index == IP_SET_INVALID_ID) { + res = -ENOENT; + goto done; + } + } + req_max_sets->max_sets = ip_set_max; + req_max_sets->sets = 0; + for (i = 0; i < ip_set_max; i++) { + if (ip_set_list[i] != NULL) + req_max_sets->sets++; + } + goto copy; + } + case IP_SET_OP_LIST_SIZE: + case IP_SET_OP_SAVE_SIZE: { + struct ip_set_req_setnames *req_setnames + = (struct ip_set_req_setnames *) data; + struct ip_set_name_list *name_list; + struct ip_set *set; + ip_set_id_t i; + int used; + + if (*len < sizeof(struct ip_set_req_setnames)) { + ip_set_printk("short LIST_SIZE (want >=%d, got %d)", + sizeof(struct ip_set_req_setnames), *len); + res = -EINVAL; + goto done; + } + + req_setnames->size = 0; + used = sizeof(struct ip_set_req_setnames); + for (i = 0; i < ip_set_max; i++) { + if (ip_set_list[i] == NULL) + continue; + name_list = (struct ip_set_name_list *) + (data + used); + used += sizeof(struct ip_set_name_list); + if (used > copylen) { + res = -EAGAIN; + goto done; + } + set = ip_set_list[i]; + /* Fill in index, name, etc. */ + name_list->index = i; + name_list->id = set->id; + strncpy(name_list->name, + set->name, + IP_SET_MAXNAMELEN - 1); + strncpy(name_list->typename, + set->type->typename, + IP_SET_MAXNAMELEN - 1); + DP("filled %s of type %s, index %u\n", + name_list->name, name_list->typename, + name_list->index); + if (!(req_setnames->index == IP_SET_INVALID_ID + || req_setnames->index == i)) + continue; + /* Update size */ + switch (*op) { + case IP_SET_OP_LIST_SIZE: { + req_setnames->size += sizeof(struct ip_set_list) + + set->type->header_size + + set->type->list_members_size(set); + FOREACH_HASH_DO(__set_hash_bindings_size_list, + i, &req_setnames->size); + break; + } + case IP_SET_OP_SAVE_SIZE: { + req_setnames->size += sizeof(struct ip_set_save) + + set->type->header_size + + set->type->list_members_size(set); + FOREACH_HASH_DO(__set_hash_bindings_size_save, + i, &req_setnames->size); + break; + } + default: + break; + } + } + if (copylen != used) { + res = -EAGAIN; + goto done; + } + goto copy; + } + case IP_SET_OP_LIST: { + struct ip_set_req_list *req_list + = (struct ip_set_req_list *) data; + ip_set_id_t i; + int used; + + if (*len < sizeof(struct ip_set_req_list)) { + ip_set_printk("short LIST (want >=%d, got %d)", + sizeof(struct ip_set_req_list), *len); + res = -EINVAL; + goto done; + } + index = req_list->index; + if (index != IP_SET_INVALID_ID + && ip_set_find_byindex(index) != index) { + res = -ENOENT; + goto done; + } + used = 0; + if (index == IP_SET_INVALID_ID) { + /* List all sets */ + for (i = 0; i < ip_set_max && res == 0; i++) { + if (ip_set_list[i] != NULL) + res = ip_set_list_set(i, data, &used, *len); + } + } else { + /* List an individual set */ + res = ip_set_list_set(index, data, &used, *len); + } + if (res != 0) + goto done; + else if (copylen != used) { + res = -EAGAIN; + goto done; + } + goto copy; + } + case IP_SET_OP_SAVE: { + struct ip_set_req_list *req_save + = (struct ip_set_req_list *) data; + ip_set_id_t i; + int used; + + if (*len < sizeof(struct ip_set_req_list)) { + ip_set_printk("short SAVE (want >=%d, got %d)", + sizeof(struct ip_set_req_list), *len); + res = -EINVAL; + goto done; + } + index = req_save->index; + if (index != IP_SET_INVALID_ID + && ip_set_find_byindex(index) != index) { + res = -ENOENT; + goto done; + } + used = 0; + if (index == IP_SET_INVALID_ID) { + /* Save all sets */ + for (i = 0; i < ip_set_max && res == 0; i++) { + if (ip_set_list[i] != NULL) + res = ip_set_save_set(i, data, &used, *len); + } + } else { + /* Save an individual set */ + res = ip_set_save_set(index, data, &used, *len); + } + if (res == 0) + res = ip_set_save_bindings(index, data, &used, *len); + + if (res != 0) + goto done; + else if (copylen != used) { + res = -EAGAIN; + goto done; + } + goto copy; + } + case IP_SET_OP_RESTORE: { + struct ip_set_req_setnames *req_restore + = (struct ip_set_req_setnames *) data; + int line; + + if (*len < sizeof(struct ip_set_req_setnames) + || *len != req_restore->size) { + ip_set_printk("invalid RESTORE (want =%d, got %d)", + req_restore->size, *len); + res = -EINVAL; + goto done; + } + line = ip_set_restore(data + sizeof(struct ip_set_req_setnames), + req_restore->size - sizeof(struct ip_set_req_setnames)); + DP("ip_set_restore: %u", line); + if (line != 0) { + res = -EAGAIN; + req_restore->size = line; + copylen = sizeof(struct ip_set_req_setnames); + goto copy; + } + goto done; + } + default: + res = -EBADMSG; + goto done; + } /* end of switch(op) */ + + copy: + DP("set %s, copylen %u", index != IP_SET_INVALID_ID + && ip_set_list[index] + ? ip_set_list[index]->name + : ":all:", copylen); + if (res == 0) + res = copy_to_user(user, data, copylen); + else + copy_to_user(user, data, copylen); + + done: + up(&ip_set_app_mutex); + vfree(data); + if (res > 0) + res = 0; + DP("final result %d", res); + return res; +} + +static struct nf_sockopt_ops so_set = { + .pf = PF_INET, + .set_optmin = SO_IP_SET, + .set_optmax = SO_IP_SET + 1, + .set = &ip_set_sockfn_set, + .get_optmin = SO_IP_SET, + .get_optmax = SO_IP_SET + 1, + .get = &ip_set_sockfn_get, + .use = 0 +}; + +static int max_sets, hash_size; +module_param(max_sets, int, 0600); +MODULE_PARM_DESC(max_sets, "maximal number of sets"); +module_param(hash_size, int, 0600); +MODULE_PARM_DESC(hash_size, "hash size for bindings"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("module implementing core IP set support"); + +static int __init init(void) +{ + int res; + ip_set_id_t i; + + get_random_bytes(&ip_set_hash_random, 4); + if (max_sets) + ip_set_max = max_sets; + ip_set_list = vmalloc(sizeof(struct ip_set *) * ip_set_max); + if (!ip_set_list) { + printk(KERN_ERR "Unable to create ip_set_list\n"); + return -ENOMEM; + } + memset(ip_set_list, 0, sizeof(struct ip_set *) * ip_set_max); + if (hash_size) + ip_set_bindings_hash_size = hash_size; + ip_set_hash = vmalloc(sizeof(struct list_head) * ip_set_bindings_hash_size); + if (!ip_set_hash) { + printk(KERN_ERR "Unable to create ip_set_hash\n"); + vfree(ip_set_list); + return -ENOMEM; + } + for (i = 0; i < ip_set_bindings_hash_size; i++) + INIT_LIST_HEAD(&ip_set_hash[i]); + + INIT_LIST_HEAD(&set_type_list); + + res = nf_register_sockopt(&so_set); + if (res != 0) { + ip_set_printk("SO_SET registry failed: %d", res); + vfree(ip_set_list); + vfree(ip_set_hash); + return res; + } + return 0; +} + +static void __exit fini(void) +{ + /* There can't be any existing set or binding */ + nf_unregister_sockopt(&so_set); + vfree(ip_set_list); + vfree(ip_set_hash); + DP("these are the famous last words"); +} + +EXPORT_SYMBOL(ip_set_register_set_type); +EXPORT_SYMBOL(ip_set_unregister_set_type); + +EXPORT_SYMBOL(ip_set_get_byname); +EXPORT_SYMBOL(ip_set_get_byindex); +EXPORT_SYMBOL(ip_set_put); + +EXPORT_SYMBOL(ip_set_addip_kernel); +EXPORT_SYMBOL(ip_set_delip_kernel); +EXPORT_SYMBOL(ip_set_testip_kernel); + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ip_set_iphash.c linux-2.6.11.3/net/ipv4/netfilter/ip_set_iphash.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ip_set_iphash.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ip_set_iphash.c 2005-03-13 22:04:00.486722944 +0100 @@ -0,0 +1,379 @@ +/* Copyright (C) 2003-2004 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an ip hash set */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +static inline __u32 +jhash_ip(const struct ip_set_iphash *map, ip_set_ip_t ip) +{ + return jhash_1word(ip, map->initval); +} + +static inline __u32 +randhash_ip(const struct ip_set_iphash *map, ip_set_ip_t ip) +{ + return (1 + ip % map->prime); +} + +static inline __u32 +hash_id(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip) +{ + struct ip_set_iphash *map = (struct ip_set_iphash *) set->data; + __u32 jhash, randhash, id; + u_int16_t i; + + *hash_ip = ip & map->netmask; + jhash = jhash_ip(map, *hash_ip); + randhash = randhash_ip(map, *hash_ip); + DP("set: %s, ip:%u.%u.%u.%u, %u.%u.%u.%u, %u.%u.%u.%u", + set->name, HIPQUAD(ip), HIPQUAD(*hash_ip), HIPQUAD(map->netmask)); + + for (i = 0; i < map->probes; i++) { + id = (jhash + i * randhash) % map->hashsize; + DP("hash key: %u", id); + if (map->members[id] == *hash_ip) + return id; + /* No shortcut at testing - there can be deleted + * entries. */ + } + return UINT_MAX; +} + +static inline int +__testip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip) +{ + return (hash_id(set, ip, hash_ip) != UINT_MAX); +} + +static int +testip(struct ip_set *set, const void *data, size_t size, + ip_set_ip_t *hash_ip) +{ + struct ip_set_req_iphash *req = + (struct ip_set_req_iphash *) data; + + if (size != sizeof(struct ip_set_req_iphash)) { + ip_set_printk("data length wrong (want %d, have %d)", + sizeof(struct ip_set_req_iphash), + size); + return -EINVAL; + } + return __testip(set, req->ip, hash_ip); +} + +static int +testip_kernel(struct ip_set *set, const struct sk_buff *skb, + u_int32_t flags, ip_set_ip_t *hash_ip) +{ + return __testip(set, + ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr + : skb->nh.iph->daddr), + hash_ip); +} + +static inline int +__addip(struct ip_set_iphash *map, ip_set_ip_t ip, ip_set_ip_t *hash_ip) +{ + __u32 jhash, randhash, probe; + u_int16_t i; + + *hash_ip = ip & map->netmask; + jhash = jhash_ip(map, *hash_ip); + randhash = randhash_ip(map, *hash_ip); + + for (i = 0; i < map->probes; i++) { + probe = (jhash + i * randhash) % map->hashsize; + if (map->members[probe] == *hash_ip) + return -EEXIST; + if (!map->members[probe]) { + map->members[probe] = *hash_ip; + return 0; + } + } + /* Trigger rehashing */ + return -EAGAIN; +} + +static int +addip(struct ip_set *set, const void *data, size_t size, + ip_set_ip_t *hash_ip) +{ + struct ip_set_req_iphash *req = + (struct ip_set_req_iphash *) data; + + if (size != sizeof(struct ip_set_req_iphash)) { + ip_set_printk("data length wrong (want %d, have %d)", + sizeof(struct ip_set_req_iphash), + size); + return -EINVAL; + } + return __addip((struct ip_set_iphash *) set->data, req->ip, hash_ip); +} + +static int +addip_kernel(struct ip_set *set, const struct sk_buff *skb, + u_int32_t flags, ip_set_ip_t *hash_ip) +{ + return __addip((struct ip_set_iphash *) set->data, + ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr + : skb->nh.iph->daddr), + hash_ip); +} + +static int retry(struct ip_set *set) +{ + struct ip_set_iphash *map = (struct ip_set_iphash *) set->data; + ip_set_ip_t hash_ip, *members; + u_int32_t i, hashsize; + unsigned newbytes; + int res; + struct ip_set_iphash tmp = { + .hashsize = map->hashsize, + .probes = map->probes, + .resize = map->resize, + .netmask = map->netmask, + }; + + if (map->resize == 0) + return -ERANGE; + + again: + res = 0; + + /* Calculate new parameters */ + get_random_bytes(&tmp.initval, 4); + hashsize = tmp.hashsize + (tmp.hashsize * map->resize)/100; + if (hashsize == tmp.hashsize) + hashsize++; + tmp.prime = make_prime(hashsize); + + ip_set_printk("rehashing of set %s triggered: " + "hashsize grows from %u to %u", + set->name, tmp.hashsize, hashsize); + tmp.hashsize = hashsize; + + newbytes = hashsize * sizeof(ip_set_ip_t); + tmp.members = ip_set_malloc(newbytes); + if (!tmp.members) { + DP("out of memory for %d bytes", newbytes); + return -ENOMEM; + } + memset(tmp.members, 0, newbytes); + + write_lock_bh(&set->lock); + map = (struct ip_set_iphash *) set->data; /* Play safe */ + for (i = 0; i < map->hashsize && res == 0; i++) { + if (map->members[i]) + res = __addip(&tmp, map->members[i], &hash_ip); + } + if (res) { + /* Failure, try again */ + write_unlock_bh(&set->lock); + ip_set_free(tmp.members, newbytes); + goto again; + } + + /* Success at resizing! */ + members = map->members; + hashsize = map->hashsize; + + map->initval = tmp.initval; + map->prime = tmp.prime; + map->hashsize = tmp.hashsize; + map->members = tmp.members; + write_unlock_bh(&set->lock); + + ip_set_free(members, hashsize * sizeof(ip_set_ip_t)); + + return 0; +} + +static inline int +__delip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip) +{ + struct ip_set_iphash *map = (struct ip_set_iphash *) set->data; + ip_set_ip_t id = hash_id(set, ip, hash_ip); + + if (id == UINT_MAX) + return -EEXIST; + + map->members[id] = 0; + return 0; +} + +static int +delip(struct ip_set *set, const void *data, size_t size, + ip_set_ip_t *hash_ip) +{ + struct ip_set_req_iphash *req = + (struct ip_set_req_iphash *) data; + + if (size != sizeof(struct ip_set_req_iphash)) { + ip_set_printk("data length wrong (want %d, have %d)", + sizeof(struct ip_set_req_iphash), + size); + return -EINVAL; + } + return __delip(set, req->ip, hash_ip); +} + +static int +delip_kernel(struct ip_set *set, const struct sk_buff *skb, + u_int32_t flags, ip_set_ip_t *hash_ip) +{ + return __delip(set, + ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr + : skb->nh.iph->daddr), + hash_ip); +} + +static int create(struct ip_set *set, const void *data, size_t size) +{ + unsigned newbytes; + struct ip_set_req_iphash_create *req = + (struct ip_set_req_iphash_create *) data; + struct ip_set_iphash *map; + + if (size != sizeof(struct ip_set_req_iphash_create)) { + ip_set_printk("data length wrong (want %d, have %d)", + sizeof(struct ip_set_req_iphash_create), + size); + return -EINVAL; + } + + if (req->hashsize < 1) { + ip_set_printk("hashsize too small"); + return -ENOEXEC; + } + + map = kmalloc(sizeof(struct ip_set_iphash), GFP_KERNEL); + if (!map) { + DP("out of memory for %d bytes", + sizeof(struct ip_set_iphash)); + return -ENOMEM; + } + get_random_bytes(&map->initval, 4); + map->prime = make_prime(req->hashsize); + map->hashsize = req->hashsize; + map->probes = req->probes; + map->resize = req->resize; + map->netmask = req->netmask; + newbytes = map->hashsize * sizeof(ip_set_ip_t); + map->members = ip_set_malloc(newbytes); + if (!map->members) { + DP("out of memory for %d bytes", newbytes); + kfree(map); + return -ENOMEM; + } + memset(map->members, 0, newbytes); + + set->data = map; + return 0; +} + +static void destroy(struct ip_set *set) +{ + struct ip_set_iphash *map = (struct ip_set_iphash *) set->data; + + ip_set_free(map->members, map->hashsize * sizeof(ip_set_ip_t)); + kfree(map); + + set->data = NULL; +} + +static void flush(struct ip_set *set) +{ + struct ip_set_iphash *map = (struct ip_set_iphash *) set->data; + memset(map->members, 0, map->hashsize * sizeof(ip_set_ip_t)); +} + +static void list_header(const struct ip_set *set, void *data) +{ + struct ip_set_iphash *map = (struct ip_set_iphash *) set->data; + struct ip_set_req_iphash_create *header = + (struct ip_set_req_iphash_create *) data; + + header->hashsize = map->hashsize; + header->probes = map->probes; + header->resize = map->resize; + header->netmask = map->netmask; +} + +static int list_members_size(const struct ip_set *set) +{ + struct ip_set_iphash *map = (struct ip_set_iphash *) set->data; + + return (map->hashsize * sizeof(ip_set_ip_t)); +} + +static void list_members(const struct ip_set *set, void *data) +{ + struct ip_set_iphash *map = (struct ip_set_iphash *) set->data; + int bytes = map->hashsize * sizeof(ip_set_ip_t); + + memcpy(data, map->members, bytes); +} + +static struct ip_set_type ip_set_iphash = { + .typename = SETTYPE_NAME, + .typecode = IPSET_TYPE_IP, + .protocol_version = IP_SET_PROTOCOL_VERSION, + .create = &create, + .destroy = &destroy, + .flush = &flush, + .reqsize = sizeof(struct ip_set_req_iphash), + .addip = &addip, + .addip_kernel = &addip_kernel, + .retry = &retry, + .delip = &delip, + .delip_kernel = &delip_kernel, + .testip = &testip, + .testip_kernel = &testip_kernel, + .header_size = sizeof(struct ip_set_req_iphash_create), + .list_header = &list_header, + .list_members_size = &list_members_size, + .list_members = &list_members, + .me = THIS_MODULE, +}; + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("iphash type of IP sets"); + +static int __init init(void) +{ + init_max_malloc_size(); + return ip_set_register_set_type(&ip_set_iphash); +} + +static void __exit fini(void) +{ + /* FIXME: possible race with ip_set_create() */ + ip_set_unregister_set_type(&ip_set_iphash); +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ip_set_ipmap.c linux-2.6.11.3/net/ipv4/netfilter/ip_set_ipmap.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ip_set_ipmap.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ip_set_ipmap.c 2005-03-13 22:04:00.486722944 +0100 @@ -0,0 +1,313 @@ +/* Copyright (C) 2000-2002 Joakim Axelsson + * Patrick Schaaf + * Copyright (C) 2003-2004 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the single bitmap type */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +static inline ip_set_ip_t +ip_to_id(const struct ip_set_ipmap *map, ip_set_ip_t ip) +{ + return (ip - map->first_ip)/map->hosts; +} + +static inline int +__testip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip) +{ + struct ip_set_ipmap *map = (struct ip_set_ipmap *) set->data; + + if (ip < map->first_ip || ip > map->last_ip) + return -ERANGE; + + *hash_ip = ip & map->netmask; + DP("set: %s, ip:%u.%u.%u.%u, %u.%u.%u.%u", + set->name, HIPQUAD(ip), HIPQUAD(*hash_ip)); + return !!test_bit(ip_to_id(map, *hash_ip), map->members); +} + +static int +testip(struct ip_set *set, const void *data, size_t size, + ip_set_ip_t *hash_ip) +{ + struct ip_set_req_ipmap *req = + (struct ip_set_req_ipmap *) data; + + if (size != sizeof(struct ip_set_req_ipmap)) { + ip_set_printk("data length wrong (want %d, have %d)", + sizeof(struct ip_set_req_ipmap), + size); + return -EINVAL; + } + return __testip(set, req->ip, hash_ip); +} + +static int +testip_kernel(struct ip_set *set, + const struct sk_buff *skb, + u_int32_t flags, + ip_set_ip_t *hash_ip) +{ + int res; + + DP("flag: %s src: %u.%u.%u.%u dst: %u.%u.%u.%u", + flags & IPSET_SRC ? "SRC" : "DST", + NIPQUAD(skb->nh.iph->saddr), + NIPQUAD(skb->nh.iph->daddr)); + + res = __testip(set, + ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr + : skb->nh.iph->daddr), + hash_ip); + return (res < 0 ? 0 : res); +} + +static inline int +__addip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip) +{ + struct ip_set_ipmap *map = (struct ip_set_ipmap *) set->data; + + if (ip < map->first_ip || ip > map->last_ip) + return -ERANGE; + + *hash_ip = ip & map->netmask; + DP("%u.%u.%u.%u, %u.%u.%u.%u", HIPQUAD(ip), HIPQUAD(*hash_ip)); + if (test_and_set_bit(ip_to_id(map, *hash_ip), map->members)) + return -EEXIST; + + return 0; +} + +static int +addip(struct ip_set *set, const void *data, size_t size, + ip_set_ip_t *hash_ip) +{ + struct ip_set_req_ipmap *req = + (struct ip_set_req_ipmap *) data; + + if (size != sizeof(struct ip_set_req_ipmap)) { + ip_set_printk("data length wrong (want %d, have %d)", + sizeof(struct ip_set_req_ipmap), + size); + return -EINVAL; + } + DP("%u.%u.%u.%u", HIPQUAD(req->ip)); + return __addip(set, req->ip, hash_ip); +} + +static int +addip_kernel(struct ip_set *set, const struct sk_buff *skb, + u_int32_t flags, ip_set_ip_t *hash_ip) +{ + return __addip(set, + ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr + : skb->nh.iph->daddr), + hash_ip); +} + +static inline int +__delip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip) +{ + struct ip_set_ipmap *map = (struct ip_set_ipmap *) set->data; + + if (ip < map->first_ip || ip > map->last_ip) + return -ERANGE; + + *hash_ip = ip & map->netmask; + DP("%u.%u.%u.%u, %u.%u.%u.%u", HIPQUAD(ip), HIPQUAD(*hash_ip)); + if (!test_and_clear_bit(ip_to_id(map, *hash_ip), map->members)) + return -EEXIST; + + return 0; +} + +static int +delip(struct ip_set *set, const void *data, size_t size, + ip_set_ip_t *hash_ip) +{ + struct ip_set_req_ipmap *req = + (struct ip_set_req_ipmap *) data; + + if (size != sizeof(struct ip_set_req_ipmap)) { + ip_set_printk("data length wrong (want %d, have %d)", + sizeof(struct ip_set_req_ipmap), + size); + return -EINVAL; + } + return __delip(set, req->ip, hash_ip); +} + +static int +delip_kernel(struct ip_set *set, const struct sk_buff *skb, + u_int32_t flags, ip_set_ip_t *hash_ip) +{ + return __delip(set, + ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr + : skb->nh.iph->daddr), + hash_ip); +} + +static int create(struct ip_set *set, const void *data, size_t size) +{ + int newbytes; + struct ip_set_req_ipmap_create *req = + (struct ip_set_req_ipmap_create *) data; + struct ip_set_ipmap *map; + + if (size != sizeof(struct ip_set_req_ipmap_create)) { + ip_set_printk("data length wrong (want %d, have %d)", + sizeof(struct ip_set_req_ipmap_create), + size); + return -EINVAL; + } + + DP("from %u.%u.%u.%u to %u.%u.%u.%u", + HIPQUAD(req->from), HIPQUAD(req->to)); + + if (req->from > req->to) { + DP("bad ip range"); + return -ENOEXEC; + } + + if (req->to - req->from > MAX_RANGE) { + ip_set_printk("range too big (max %d addresses)", + MAX_RANGE); + return -ENOEXEC; + } + + map = kmalloc(sizeof(struct ip_set_ipmap), GFP_KERNEL); + if (!map) { + DP("out of memory for %d bytes", + sizeof(struct ip_set_ipmap)); + return -ENOMEM; + } + map->first_ip = req->from; + map->last_ip = req->to; + map->netmask = req->netmask; + + if (req->netmask == 0xFFFFFFFF) { + map->hosts = 1; + map->sizeid = map->last_ip - map->first_ip + 1; + } else { + unsigned int mask_bits, netmask_bits; + ip_set_ip_t mask; + + map->first_ip &= map->netmask; /* Should we better bark? */ + + mask = range_to_mask(map->first_ip, map->last_ip, &mask_bits); + netmask_bits = mask_to_bits(map->netmask); + + if (!mask || netmask_bits <= mask_bits) + return -ENOEXEC; + + map->hosts = 2 << (32 - netmask_bits - 1); + map->sizeid = 2 << (netmask_bits - mask_bits - 1); + } + newbytes = bitmap_bytes(0, map->sizeid - 1); + map->members = kmalloc(newbytes, GFP_KERNEL); + if (!map->members) { + DP("out of memory for %d bytes", newbytes); + kfree(map); + return -ENOMEM; + } + memset(map->members, 0, newbytes); + + set->data = map; + return 0; +} + +static void destroy(struct ip_set *set) +{ + struct ip_set_ipmap *map = (struct ip_set_ipmap *) set->data; + + kfree(map->members); + kfree(map); + + set->data = NULL; +} + +static void flush(struct ip_set *set) +{ + struct ip_set_ipmap *map = (struct ip_set_ipmap *) set->data; + memset(map->members, 0, bitmap_bytes(0, map->sizeid - 1)); +} + +static void list_header(const struct ip_set *set, void *data) +{ + struct ip_set_ipmap *map = (struct ip_set_ipmap *) set->data; + struct ip_set_req_ipmap_create *header = + (struct ip_set_req_ipmap_create *) data; + + header->from = map->first_ip; + header->to = map->last_ip; + header->netmask = map->netmask; +} + +static int list_members_size(const struct ip_set *set) +{ + struct ip_set_ipmap *map = (struct ip_set_ipmap *) set->data; + + return bitmap_bytes(0, map->sizeid - 1); +} + +static void list_members(const struct ip_set *set, void *data) +{ + struct ip_set_ipmap *map = (struct ip_set_ipmap *) set->data; + int bytes = bitmap_bytes(0, map->sizeid - 1); + + memcpy(data, map->members, bytes); +} + +static struct ip_set_type ip_set_ipmap = { + .typename = SETTYPE_NAME, + .typecode = IPSET_TYPE_IP, + .protocol_version = IP_SET_PROTOCOL_VERSION, + .create = &create, + .destroy = &destroy, + .flush = &flush, + .reqsize = sizeof(struct ip_set_req_ipmap), + .addip = &addip, + .addip_kernel = &addip_kernel, + .delip = &delip, + .delip_kernel = &delip_kernel, + .testip = &testip, + .testip_kernel = &testip_kernel, + .header_size = sizeof(struct ip_set_req_ipmap_create), + .list_header = &list_header, + .list_members_size = &list_members_size, + .list_members = &list_members, + .me = THIS_MODULE, +}; + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("ipmap type of IP sets"); + +static int __init init(void) +{ + return ip_set_register_set_type(&ip_set_ipmap); +} + +static void __exit fini(void) +{ + /* FIXME: possible race with ip_set_create() */ + ip_set_unregister_set_type(&ip_set_ipmap); +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ip_set_macipmap.c linux-2.6.11.3/net/ipv4/netfilter/ip_set_macipmap.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ip_set_macipmap.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ip_set_macipmap.c 2005-03-13 22:04:00.487722792 +0100 @@ -0,0 +1,338 @@ +/* Copyright (C) 2000-2002 Joakim Axelsson + * Patrick Schaaf + * Martin Josefsson + * Copyright (C) 2003-2004 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing an IP set type: the macipmap type */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +static int +testip(struct ip_set *set, const void *data, size_t size, ip_set_ip_t *hash_ip) +{ + struct ip_set_macipmap *map = (struct ip_set_macipmap *) set->data; + struct ip_set_macip *table = (struct ip_set_macip *) map->members; + struct ip_set_req_macipmap *req = (struct ip_set_req_macipmap *) data; + + if (size != sizeof(struct ip_set_req_macipmap)) { + ip_set_printk("data length wrong (want %d, have %d)", + sizeof(struct ip_set_req_macipmap), + size); + return -EINVAL; + } + + if (req->ip < map->first_ip || req->ip > map->last_ip) + return -ERANGE; + + *hash_ip = req->ip; + DP("set: %s, ip:%u.%u.%u.%u, %u.%u.%u.%u", + set->name, HIPQUAD(req->ip), HIPQUAD(*hash_ip)); + if (test_bit(IPSET_MACIP_ISSET, + (void *) &table[req->ip - map->first_ip].flags)) { + return (memcmp(req->ethernet, + &table[req->ip - map->first_ip].ethernet, + ETH_ALEN) == 0); + } else { + return (map->flags & IPSET_MACIP_MATCHUNSET ? 1 : 0); + } +} + +static int +testip_kernel(struct ip_set *set, const struct sk_buff *skb, + u_int32_t flags, ip_set_ip_t *hash_ip) +{ + struct ip_set_macipmap *map = + (struct ip_set_macipmap *) set->data; + struct ip_set_macip *table = + (struct ip_set_macip *) map->members; + ip_set_ip_t ip; + + ip = ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr + : skb->nh.iph->daddr); + DP("flag: %s src: %u.%u.%u.%u dst: %u.%u.%u.%u", + flags & IPSET_SRC ? "SRC" : "DST", + NIPQUAD(skb->nh.iph->saddr), + NIPQUAD(skb->nh.iph->daddr)); + + if (ip < map->first_ip || ip > map->last_ip) + return 0; + + *hash_ip = ip; + DP("set: %s, ip:%u.%u.%u.%u, %u.%u.%u.%u", + set->name, HIPQUAD(ip), HIPQUAD(*hash_ip)); + if (test_bit(IPSET_MACIP_ISSET, + (void *) &table[ip - map->first_ip].flags)) { + /* Is mac pointer valid? + * If so, compare... */ + return (skb->mac.raw >= skb->head + && (skb->mac.raw + ETH_HLEN) <= skb->data + && (memcmp(eth_hdr(skb)->h_source, + &table[ip - map->first_ip].ethernet, + ETH_ALEN) == 0)); + } else { + return (map->flags & IPSET_MACIP_MATCHUNSET ? 1 : 0); + } +} + +/* returns 0 on success */ +static inline int +__addip(struct ip_set *set, + ip_set_ip_t ip, unsigned char *ethernet, ip_set_ip_t *hash_ip) +{ + struct ip_set_macipmap *map = + (struct ip_set_macipmap *) set->data; + struct ip_set_macip *table = + (struct ip_set_macip *) map->members; + + if (ip < map->first_ip || ip > map->last_ip) + return -ERANGE; + if (test_and_set_bit(IPSET_MACIP_ISSET, + (void *) &table[ip - map->first_ip].flags)) + return -EEXIST; + + *hash_ip = ip; + DP("%u.%u.%u.%u, %u.%u.%u.%u", HIPQUAD(ip), HIPQUAD(*hash_ip)); + memcpy(&table[ip - map->first_ip].ethernet, ethernet, ETH_ALEN); + return 0; +} + +static int +addip(struct ip_set *set, const void *data, size_t size, + ip_set_ip_t *hash_ip) +{ + struct ip_set_req_macipmap *req = + (struct ip_set_req_macipmap *) data; + + if (size != sizeof(struct ip_set_req_macipmap)) { + ip_set_printk("data length wrong (want %d, have %d)", + sizeof(struct ip_set_req_macipmap), + size); + return -EINVAL; + } + return __addip(set, req->ip, req->ethernet, hash_ip); +} + +static int +addip_kernel(struct ip_set *set, const struct sk_buff *skb, + u_int32_t flags, ip_set_ip_t *hash_ip) +{ + ip_set_ip_t ip; + + ip = ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr + : skb->nh.iph->daddr); + + if (!(skb->mac.raw >= skb->head + && (skb->mac.raw + ETH_HLEN) <= skb->data)) + return -EINVAL; + + return __addip(set, ip, eth_hdr(skb)->h_source, hash_ip); +} + +static inline int +__delip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip) +{ + struct ip_set_macipmap *map = + (struct ip_set_macipmap *) set->data; + struct ip_set_macip *table = + (struct ip_set_macip *) map->members; + + if (ip < map->first_ip || ip > map->last_ip) + return -ERANGE; + if (!test_and_clear_bit(IPSET_MACIP_ISSET, + (void *)&table[ip - map->first_ip].flags)) + return -EEXIST; + + *hash_ip = ip; + DP("%u.%u.%u.%u, %u.%u.%u.%u", HIPQUAD(ip), HIPQUAD(*hash_ip)); + return 0; +} + +static int +delip(struct ip_set *set, const void *data, size_t size, + ip_set_ip_t *hash_ip) +{ + struct ip_set_req_macipmap *req = + (struct ip_set_req_macipmap *) data; + + if (size != sizeof(struct ip_set_req_macipmap)) { + ip_set_printk("data length wrong (want %d, have %d)", + sizeof(struct ip_set_req_macipmap), + size); + return -EINVAL; + } + return __delip(set, req->ip, hash_ip); +} + +static int +delip_kernel(struct ip_set *set, const struct sk_buff *skb, + u_int32_t flags, ip_set_ip_t *hash_ip) +{ + return __delip(set, + ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr + : skb->nh.iph->daddr), + hash_ip); +} + +static inline size_t members_size(ip_set_id_t from, ip_set_id_t to) +{ + return (size_t)((to - from + 1) * sizeof(struct ip_set_macip)); +} + +static int create(struct ip_set *set, const void *data, size_t size) +{ + int newbytes; + struct ip_set_req_macipmap_create *req = + (struct ip_set_req_macipmap_create *) data; + struct ip_set_macipmap *map; + + if (size != sizeof(struct ip_set_req_macipmap_create)) { + ip_set_printk("data length wrong (want %d, have %d)", + sizeof(struct ip_set_req_macipmap_create), + size); + return -EINVAL; + } + + DP("from %u.%u.%u.%u to %u.%u.%u.%u", + HIPQUAD(req->from), HIPQUAD(req->to)); + + if (req->from > req->to) { + DP("bad ip range"); + return -ENOEXEC; + } + + if (req->to - req->from > MAX_RANGE) { + ip_set_printk("range too big (max %d addresses)", + MAX_RANGE); + return -ENOEXEC; + } + + map = kmalloc(sizeof(struct ip_set_macipmap), GFP_KERNEL); + if (!map) { + DP("out of memory for %d bytes", + sizeof(struct ip_set_macipmap)); + return -ENOMEM; + } + map->flags = req->flags; + map->first_ip = req->from; + map->last_ip = req->to; + newbytes = members_size(map->first_ip, map->last_ip); + map->members = ip_set_malloc(newbytes); + if (!map->members) { + DP("out of memory for %d bytes", newbytes); + kfree(map); + return -ENOMEM; + } + memset(map->members, 0, newbytes); + + set->data = map; + return 0; +} + +static void destroy(struct ip_set *set) +{ + struct ip_set_macipmap *map = + (struct ip_set_macipmap *) set->data; + + ip_set_free(map->members, members_size(map->first_ip, map->last_ip)); + kfree(map); + + set->data = NULL; +} + +static void flush(struct ip_set *set) +{ + struct ip_set_macipmap *map = + (struct ip_set_macipmap *) set->data; + memset(map->members, 0, members_size(map->first_ip, map->last_ip)); +} + +static void list_header(const struct ip_set *set, void *data) +{ + struct ip_set_macipmap *map = + (struct ip_set_macipmap *) set->data; + struct ip_set_req_macipmap_create *header = + (struct ip_set_req_macipmap_create *) data; + + DP("list_header %x %x %u", map->first_ip, map->last_ip, + map->flags); + + header->from = map->first_ip; + header->to = map->last_ip; + header->flags = map->flags; +} + +static int list_members_size(const struct ip_set *set) +{ + struct ip_set_macipmap *map = + (struct ip_set_macipmap *) set->data; + + return members_size(map->first_ip, map->last_ip); +} + +static void list_members(const struct ip_set *set, void *data) +{ + struct ip_set_macipmap *map = + (struct ip_set_macipmap *) set->data; + + int bytes = members_size(map->first_ip, map->last_ip); + + memcpy(data, map->members, bytes); +} + +static struct ip_set_type ip_set_macipmap = { + .typename = SETTYPE_NAME, + .typecode = IPSET_TYPE_IP, + .protocol_version = IP_SET_PROTOCOL_VERSION, + .create = &create, + .destroy = &destroy, + .flush = &flush, + .reqsize = sizeof(struct ip_set_req_macipmap), + .addip = &addip, + .addip_kernel = &addip_kernel, + .delip = &delip, + .delip_kernel = &delip_kernel, + .testip = &testip, + .testip_kernel = &testip_kernel, + .header_size = sizeof(struct ip_set_req_macipmap_create), + .list_header = &list_header, + .list_members_size = &list_members_size, + .list_members = &list_members, + .me = THIS_MODULE, +}; + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("macipmap type of IP sets"); + +static int __init init(void) +{ + init_max_malloc_size(); + return ip_set_register_set_type(&ip_set_macipmap); +} + +static void __exit fini(void) +{ + /* FIXME: possible race with ip_set_create() */ + ip_set_unregister_set_type(&ip_set_macipmap); +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ip_set_nethash.c linux-2.6.11.3/net/ipv4/netfilter/ip_set_nethash.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ip_set_nethash.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ip_set_nethash.c 2005-03-13 22:04:00.489722488 +0100 @@ -0,0 +1,448 @@ +/* Copyright (C) 2003-2004 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing a cidr nethash set */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +static inline __u32 +jhash_ip(const struct ip_set_nethash *map, ip_set_ip_t ip) +{ + return jhash_1word(ip, map->initval); +} + +static inline __u32 +randhash_ip(const struct ip_set_nethash *map, ip_set_ip_t ip) +{ + return (1 + ip % map->prime); +} + +static inline __u32 +hash_id_cidr(struct ip_set_nethash *map, + ip_set_ip_t ip, + unsigned char cidr, + ip_set_ip_t *hash_ip) +{ + __u32 jhash, randhash, id; + u_int16_t i; + + *hash_ip = pack(ip, cidr); + jhash = jhash_ip(map, *hash_ip); + randhash = randhash_ip(map, *hash_ip); + + for (i = 0; i < map->probes; i++) { + id = (jhash + i * randhash) % map->hashsize; + DP("hash key: %u", id); + if (map->members[id] == *hash_ip) + return id; + } + return UINT_MAX; +} + +static inline __u32 +hash_id(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip) +{ + struct ip_set_nethash *map = (struct ip_set_nethash *) set->data; + __u32 id = UINT_MAX; + int i; + + for (i = 0; i < 30 && map->cidr[i]; i++) { + id = hash_id_cidr(map, ip, map->cidr[i], hash_ip); + if (id != UINT_MAX) + break; + } + return id; +} + +static inline int +__testip_cidr(struct ip_set *set, ip_set_ip_t ip, unsigned char cidr, + ip_set_ip_t *hash_ip) +{ + struct ip_set_nethash *map = (struct ip_set_nethash *) set->data; + + return (hash_id_cidr(map, ip, cidr, hash_ip) != UINT_MAX); +} + +static inline int +__testip(struct ip_set *set, ip_set_ip_t ip, ip_set_ip_t *hash_ip) +{ + return (hash_id(set, ip, hash_ip) != UINT_MAX); +} + +static int +testip(struct ip_set *set, const void *data, size_t size, + ip_set_ip_t *hash_ip) +{ + struct ip_set_req_nethash *req = + (struct ip_set_req_nethash *) data; + + if (size != sizeof(struct ip_set_req_nethash)) { + ip_set_printk("data length wrong (want %d, have %d)", + sizeof(struct ip_set_req_nethash), + size); + return -EINVAL; + } + return __testip_cidr(set, req->ip, req->cidr, hash_ip); +} + +static int +testip_kernel(struct ip_set *set, const struct sk_buff *skb, + u_int32_t flags, ip_set_ip_t *hash_ip) +{ + return __testip(set, + ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr + : skb->nh.iph->daddr), + hash_ip); +} + +static inline int +__addip_base(struct ip_set_nethash *map, ip_set_ip_t ip) +{ + __u32 jhash, randhash, probe; + u_int16_t i; + + jhash = jhash_ip(map, ip); + randhash = randhash_ip(map, ip); + + for (i = 0; i < map->probes; i++) { + probe = (jhash + i * randhash) % map->hashsize; + if (map->members[probe] == ip) + return -EEXIST; + if (!map->members[probe]) { + map->members[probe] = ip; + return 0; + } + } + /* Trigger rehashing */ + return -EAGAIN; +} + +static inline int +__addip(struct ip_set_nethash *map, ip_set_ip_t ip, unsigned char cidr, + ip_set_ip_t *hash_ip) +{ + *hash_ip = pack(ip, cidr); + DP("%u.%u.%u.%u/%u, %u.%u.%u.%u", HIPQUAD(ip), cidr, HIPQUAD(*hash_ip)); + + return __addip_base(map, *hash_ip); +} + +static void +update_cidr_sizes(struct ip_set_nethash *map, unsigned char cidr) +{ + unsigned char next; + int i; + + for (i = 0; i < 30 && map->cidr[i]; i++) { + if (map->cidr[i] == cidr) { + return; + } else if (map->cidr[i] < cidr) { + next = map->cidr[i]; + map->cidr[i] = cidr; + cidr = next; + } + } + if (i < 30) + map->cidr[i] = cidr; +} + +static int +addip(struct ip_set *set, const void *data, size_t size, + ip_set_ip_t *hash_ip) +{ + struct ip_set_req_nethash *req = + (struct ip_set_req_nethash *) data; + int ret; + + if (size != sizeof(struct ip_set_req_nethash)) { + ip_set_printk("data length wrong (want %d, have %d)", + sizeof(struct ip_set_req_nethash), + size); + return -EINVAL; + } + ret = __addip((struct ip_set_nethash *) set->data, + req->ip, req->cidr, hash_ip); + + if (ret == 0) + update_cidr_sizes((struct ip_set_nethash *) set->data, + req->cidr); + + return ret; +} + +static int +addip_kernel(struct ip_set *set, const struct sk_buff *skb, + u_int32_t flags, ip_set_ip_t *hash_ip) +{ + struct ip_set_nethash *map = (struct ip_set_nethash *) set->data; + int ret = -ERANGE; + ip_set_ip_t ip = ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr + : skb->nh.iph->daddr); + + if (map->cidr[0]) + ret = __addip(map, ip, map->cidr[0], hash_ip); + + return ret; +} + +static int retry(struct ip_set *set) +{ + struct ip_set_nethash *map = (struct ip_set_nethash *) set->data; + ip_set_ip_t *members; + u_int32_t i, hashsize; + unsigned newbytes; + int res; + struct ip_set_nethash tmp = { + .hashsize = map->hashsize, + .probes = map->probes, + .resize = map->resize + }; + + if (map->resize == 0) + return -ERANGE; + + memcpy(tmp.cidr, map->cidr, 30 * sizeof(unsigned char)); + again: + res = 0; + + /* Calculate new parameters */ + get_random_bytes(&tmp.initval, 4); + hashsize = tmp.hashsize + (tmp.hashsize * map->resize)/100; + if (hashsize == tmp.hashsize) + hashsize++; + tmp.prime = make_prime(hashsize); + + ip_set_printk("rehashing of set %s triggered: " + "hashsize grows from %u to %u", + set->name, tmp.hashsize, hashsize); + tmp.hashsize = hashsize; + + newbytes = hashsize * sizeof(ip_set_ip_t); + tmp.members = ip_set_malloc(newbytes); + if (!tmp.members) { + DP("out of memory for %d bytes", newbytes); + return -ENOMEM; + } + memset(tmp.members, 0, newbytes); + + write_lock_bh(&set->lock); + map = (struct ip_set_nethash *) set->data; /* Play safe */ + for (i = 0; i < map->hashsize && res == 0; i++) { + if (map->members[i]) + res = __addip_base(&tmp, map->members[i]); + } + if (res) { + /* Failure, try again */ + ip_set_free(tmp.members, newbytes); + write_unlock_bh(&set->lock); + goto again; + } + + /* Success at resizing! */ + members = map->members; + hashsize = map->hashsize; + + map->initval = tmp.initval; + map->prime = tmp.prime; + map->hashsize = tmp.hashsize; + map->members = tmp.members; + write_unlock_bh(&set->lock); + + ip_set_free(members, hashsize * sizeof(ip_set_ip_t)); + + return 0; +} + +static inline int +__delip(struct ip_set_nethash *map, ip_set_ip_t ip, unsigned char cidr, + ip_set_ip_t *hash_ip) +{ + ip_set_ip_t id = hash_id_cidr(map, ip, cidr, hash_ip); + + if (id == UINT_MAX) + return -EEXIST; + + map->members[id] = 0; + return 0; +} + +static int +delip(struct ip_set *set, const void *data, size_t size, + ip_set_ip_t *hash_ip) +{ + struct ip_set_req_nethash *req = + (struct ip_set_req_nethash *) data; + + if (size != sizeof(struct ip_set_req_nethash)) { + ip_set_printk("data length wrong (want %d, have %d)", + sizeof(struct ip_set_req_nethash), + size); + return -EINVAL; + } + /* TODO: no garbage collection in map->cidr */ + return __delip((struct ip_set_nethash *) set->data, + req->ip, req->cidr, hash_ip); +} + +static int +delip_kernel(struct ip_set *set, const struct sk_buff *skb, + u_int32_t flags, ip_set_ip_t *hash_ip) +{ + struct ip_set_nethash *map = (struct ip_set_nethash *) set->data; + int ret = -ERANGE; + ip_set_ip_t ip = ntohl(flags & IPSET_SRC ? skb->nh.iph->saddr + : skb->nh.iph->daddr); + + if (map->cidr[0]) + ret = __delip(map, ip, map->cidr[0], hash_ip); + + return ret; +} + +static int create(struct ip_set *set, const void *data, size_t size) +{ + unsigned newbytes; + struct ip_set_req_nethash_create *req = + (struct ip_set_req_nethash_create *) data; + struct ip_set_nethash *map; + + if (size != sizeof(struct ip_set_req_nethash_create)) { + ip_set_printk("data length wrong (want %d, have %d)", + sizeof(struct ip_set_req_nethash_create), + size); + return -EINVAL; + } + + if (req->hashsize < 1) { + ip_set_printk("hashsize too small"); + return -ENOEXEC; + } + + map = kmalloc(sizeof(struct ip_set_nethash), GFP_KERNEL); + if (!map) { + DP("out of memory for %d bytes", + sizeof(struct ip_set_nethash)); + return -ENOMEM; + } + get_random_bytes(&map->initval, 4); + map->prime = make_prime(req->hashsize); + map->hashsize = req->hashsize; + map->probes = req->probes; + map->resize = req->resize; + memset(map->cidr, 0, 30 * sizeof(unsigned char)); + newbytes = map->hashsize * sizeof(ip_set_ip_t); + map->members = ip_set_malloc(newbytes); + if (!map->members) { + DP("out of memory for %d bytes", newbytes); + kfree(map); + return -ENOMEM; + } + memset(map->members, 0, newbytes); + + set->data = map; + return 0; +} + +static void destroy(struct ip_set *set) +{ + struct ip_set_nethash *map = (struct ip_set_nethash *) set->data; + + ip_set_free(map->members, map->hashsize * sizeof(ip_set_ip_t)); + kfree(map); + + set->data = NULL; +} + +static void flush(struct ip_set *set) +{ + struct ip_set_nethash *map = (struct ip_set_nethash *) set->data; + memset(map->members, 0, map->hashsize * sizeof(ip_set_ip_t)); + memset(map->cidr, 0, 30 * sizeof(unsigned char)); +} + +static void list_header(const struct ip_set *set, void *data) +{ + struct ip_set_nethash *map = (struct ip_set_nethash *) set->data; + struct ip_set_req_nethash_create *header = + (struct ip_set_req_nethash_create *) data; + + header->hashsize = map->hashsize; + header->probes = map->probes; + header->resize = map->resize; +} + +static int list_members_size(const struct ip_set *set) +{ + struct ip_set_nethash *map = (struct ip_set_nethash *) set->data; + + return (map->hashsize * sizeof(ip_set_ip_t)); +} + +static void list_members(const struct ip_set *set, void *data) +{ + struct ip_set_nethash *map = (struct ip_set_nethash *) set->data; + int bytes = map->hashsize * sizeof(ip_set_ip_t); + + memcpy(data, map->members, bytes); +} + +static struct ip_set_type ip_set_nethash = { + .typename = SETTYPE_NAME, + .typecode = IPSET_TYPE_IP, + .protocol_version = IP_SET_PROTOCOL_VERSION, + .create = &create, + .destroy = &destroy, + .flush = &flush, + .reqsize = sizeof(struct ip_set_req_nethash), + .addip = &addip, + .addip_kernel = &addip_kernel, + .retry = &retry, + .delip = &delip, + .delip_kernel = &delip_kernel, + .testip = &testip, + .testip_kernel = &testip_kernel, + .header_size = sizeof(struct ip_set_req_nethash_create), + .list_header = &list_header, + .list_members_size = &list_members_size, + .list_members = &list_members, + .me = THIS_MODULE, +}; + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("nethash type of IP sets"); + +static int __init init(void) +{ + return ip_set_register_set_type(&ip_set_nethash); +} + +static void __exit fini(void) +{ + /* FIXME: possible race with ip_set_create() */ + ip_set_unregister_set_type(&ip_set_nethash); +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ip_set_portmap.c linux-2.6.11.3/net/ipv4/netfilter/ip_set_portmap.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ip_set_portmap.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ip_set_portmap.c 2005-03-13 22:04:00.490722336 +0100 @@ -0,0 +1,325 @@ +/* Copyright (C) 2003-2004 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module implementing a port set type as a bitmap */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +/* We must handle non-linear skbs */ +static inline ip_set_ip_t +get_port(const struct sk_buff *skb, u_int32_t flags) +{ + struct iphdr *iph = skb->nh.iph; + u_int16_t offset = ntohs(iph->frag_off) & IP_OFFSET; + + switch (iph->protocol) { + case IPPROTO_TCP: { + struct tcphdr tcph; + + /* See comments at tcp_match in ip_tables.c */ + if (offset) + return INVALID_PORT; + + if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &tcph, sizeof(tcph)) < 0) + /* No choice either */ + return INVALID_PORT; + + return ntohs(flags & IPSET_SRC ? + tcph.source : tcph.dest); + } + case IPPROTO_UDP: { + struct udphdr udph; + + if (offset) + return INVALID_PORT; + + if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &udph, sizeof(udph)) < 0) + /* No choice either */ + return INVALID_PORT; + + return ntohs(flags & IPSET_SRC ? + udph.source : udph.dest); + } + default: + return INVALID_PORT; + } +} + +static inline int +__testport(struct ip_set *set, ip_set_ip_t port, ip_set_ip_t *hash_port) +{ + struct ip_set_portmap *map = (struct ip_set_portmap *) set->data; + + if (port < map->first_port || port > map->last_port) + return -ERANGE; + + *hash_port = port; + DP("set: %s, port:%u, %u", set->name, port, *hash_port); + return !!test_bit(port - map->first_port, map->members); +} + +static int +testport(struct ip_set *set, const void *data, size_t size, + ip_set_ip_t *hash_port) +{ + struct ip_set_req_portmap *req = + (struct ip_set_req_portmap *) data; + + if (size != sizeof(struct ip_set_req_portmap)) { + ip_set_printk("data length wrong (want %d, have %d)", + sizeof(struct ip_set_req_portmap), + size); + return -EINVAL; + } + return __testport(set, req->port, hash_port); +} + +static int +testport_kernel(struct ip_set *set, const struct sk_buff *skb, + u_int32_t flags, ip_set_ip_t *hash_port) +{ + int res; + ip_set_ip_t port = get_port(skb, flags); + + DP("flag %s port %u", flags & IPSET_SRC ? "SRC" : "DST", port); + if (port == INVALID_PORT) + return 0; + + res = __testport(set, port, hash_port); + + return (res < 0 ? 0 : res); +} + +static inline int +__addport(struct ip_set *set, ip_set_ip_t port, ip_set_ip_t *hash_port) +{ + struct ip_set_portmap *map = (struct ip_set_portmap *) set->data; + + if (port < map->first_port || port > map->last_port) + return -ERANGE; + if (test_and_set_bit(port - map->first_port, map->members)) + return -EEXIST; + + *hash_port = port; + DP("port %u", port); + return 0; +} + +static int +addport(struct ip_set *set, const void *data, size_t size, + ip_set_ip_t *hash_port) +{ + struct ip_set_req_portmap *req = + (struct ip_set_req_portmap *) data; + + if (size != sizeof(struct ip_set_req_portmap)) { + ip_set_printk("data length wrong (want %d, have %d)", + sizeof(struct ip_set_req_portmap), + size); + return -EINVAL; + } + return __addport(set, req->port, hash_port); +} + +static int +addport_kernel(struct ip_set *set, const struct sk_buff *skb, + u_int32_t flags, ip_set_ip_t *hash_port) +{ + ip_set_ip_t port = get_port(skb, flags); + + if (port == INVALID_PORT) + return -EINVAL; + + return __addport(set, port, hash_port); +} + +static inline int +__delport(struct ip_set *set, ip_set_ip_t port, ip_set_ip_t *hash_port) +{ + struct ip_set_portmap *map = (struct ip_set_portmap *) set->data; + + if (port < map->first_port || port > map->last_port) + return -ERANGE; + if (!test_and_clear_bit(port - map->first_port, map->members)) + return -EEXIST; + + *hash_port = port; + DP("port %u", port); + return 0; +} + +static int +delport(struct ip_set *set, const void *data, size_t size, + ip_set_ip_t *hash_port) +{ + struct ip_set_req_portmap *req = + (struct ip_set_req_portmap *) data; + + if (size != sizeof(struct ip_set_req_portmap)) { + ip_set_printk("data length wrong (want %d, have %d)", + sizeof(struct ip_set_req_portmap), + size); + return -EINVAL; + } + return __delport(set, req->port, hash_port); +} + +static int +delport_kernel(struct ip_set *set, const struct sk_buff *skb, + u_int32_t flags, ip_set_ip_t *hash_port) +{ + ip_set_ip_t port = get_port(skb, flags); + + if (port == INVALID_PORT) + return -EINVAL; + + return __delport(set, port, hash_port); +} + +static int create(struct ip_set *set, const void *data, size_t size) +{ + int newbytes; + struct ip_set_req_portmap_create *req = + (struct ip_set_req_portmap_create *) data; + struct ip_set_portmap *map; + + if (size != sizeof(struct ip_set_req_portmap_create)) { + ip_set_printk("data length wrong (want %d, have %d)", + sizeof(struct ip_set_req_portmap_create), + size); + return -EINVAL; + } + + DP("from %u to %u", req->from, req->to); + + if (req->from > req->to) { + DP("bad port range"); + return -ENOEXEC; + } + + if (req->to - req->from > MAX_RANGE) { + ip_set_printk("range too big (max %d ports)", + MAX_RANGE); + return -ENOEXEC; + } + + map = kmalloc(sizeof(struct ip_set_portmap), GFP_KERNEL); + if (!map) { + DP("out of memory for %d bytes", + sizeof(struct ip_set_portmap)); + return -ENOMEM; + } + map->first_port = req->from; + map->last_port = req->to; + newbytes = bitmap_bytes(req->from, req->to); + map->members = kmalloc(newbytes, GFP_KERNEL); + if (!map->members) { + DP("out of memory for %d bytes", newbytes); + kfree(map); + return -ENOMEM; + } + memset(map->members, 0, newbytes); + + set->data = map; + return 0; +} + +static void destroy(struct ip_set *set) +{ + struct ip_set_portmap *map = (struct ip_set_portmap *) set->data; + + kfree(map->members); + kfree(map); + + set->data = NULL; +} + +static void flush(struct ip_set *set) +{ + struct ip_set_portmap *map = (struct ip_set_portmap *) set->data; + memset(map->members, 0, bitmap_bytes(map->first_port, map->last_port)); +} + +static void list_header(const struct ip_set *set, void *data) +{ + struct ip_set_portmap *map = (struct ip_set_portmap *) set->data; + struct ip_set_req_portmap_create *header = + (struct ip_set_req_portmap_create *) data; + + DP("list_header %u %u", map->first_port, map->last_port); + + header->from = map->first_port; + header->to = map->last_port; +} + +static int list_members_size(const struct ip_set *set) +{ + struct ip_set_portmap *map = (struct ip_set_portmap *) set->data; + + return bitmap_bytes(map->first_port, map->last_port); +} + +static void list_members(const struct ip_set *set, void *data) +{ + struct ip_set_portmap *map = (struct ip_set_portmap *) set->data; + int bytes = bitmap_bytes(map->first_port, map->last_port); + + memcpy(data, map->members, bytes); +} + +static struct ip_set_type ip_set_portmap = { + .typename = SETTYPE_NAME, + .typecode = IPSET_TYPE_PORT, + .protocol_version = IP_SET_PROTOCOL_VERSION, + .create = &create, + .destroy = &destroy, + .flush = &flush, + .reqsize = sizeof(struct ip_set_req_portmap), + .addip = &addport, + .addip_kernel = &addport_kernel, + .delip = &delport, + .delip_kernel = &delport_kernel, + .testip = &testport, + .testip_kernel = &testport_kernel, + .header_size = sizeof(struct ip_set_req_portmap_create), + .list_header = &list_header, + .list_members_size = &list_members_size, + .list_members = &list_members, + .me = THIS_MODULE, +}; + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("portmap type of IP sets"); + +static int __init init(void) +{ + return ip_set_register_set_type(&ip_set_portmap); +} + +static void __exit fini(void) +{ + /* FIXME: possible race with ip_set_create() */ + ip_set_unregister_set_type(&ip_set_portmap); +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ipt_IPMARK.c linux-2.6.11.3/net/ipv4/netfilter/ipt_IPMARK.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ipt_IPMARK.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ipt_IPMARK.c 2005-03-13 22:04:32.036926584 +0100 @@ -0,0 +1,81 @@ +/* This is a module which is used for setting the NFMARK field of an skb. */ +#include +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Grzegorz Janoszka "); +MODULE_DESCRIPTION("IP tables IPMARK: mark based on ip address"); +MODULE_LICENSE("GPL"); + +static unsigned int +target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, + void *userinfo) +{ + const struct ipt_ipmark_target_info *ipmarkinfo = targinfo; + struct iphdr *iph = (*pskb)->nh.iph; + unsigned long mark; + + if (ipmarkinfo->addr == IPT_IPMARK_SRC) + mark = (unsigned long) ntohl(iph->saddr); + else + mark = (unsigned long) ntohl(iph->daddr); + + mark &= ipmarkinfo->andmask; + mark |= ipmarkinfo->ormask; + + if ((*pskb)->nfmark != mark) { + (*pskb)->nfmark = mark; + (*pskb)->nfcache |= NFC_ALTERED; + } + return IPT_CONTINUE; +} + +static int +checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_ipmark_target_info))) { + printk(KERN_WARNING "IPMARK: targinfosize %u != %Zu\n", + targinfosize, + IPT_ALIGN(sizeof(struct ipt_ipmark_target_info))); + return 0; + } + + if (strcmp(tablename, "mangle") != 0) { + printk(KERN_WARNING "IPMARK: can only be called from \"mangle\" table, not \"%s\"\n", tablename); + return 0; + } + + return 1; +} + +static struct ipt_target ipt_ipmark_reg = { + .name = "IPMARK", + .target = target, + .checkentry = checkentry, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + return ipt_register_target(&ipt_ipmark_reg); +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_ipmark_reg); +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ipt_IPV4OPTSSTRIP.c linux-2.6.11.3/net/ipv4/netfilter/ipt_IPV4OPTSSTRIP.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ipt_IPV4OPTSSTRIP.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ipt_IPV4OPTSSTRIP.c 2005-03-13 22:03:49.792348736 +0100 @@ -0,0 +1,89 @@ +/** + * Strip all IP options in the IP packet header. + * + * (C) 2001 by Fabrice MARIE + * This software is distributed under GNU GPL v2, 1991 + */ + +#include +#include +#include +#include + +#include + +MODULE_AUTHOR("Fabrice MARIE "); +MODULE_DESCRIPTION("Strip all options in IPv4 packets"); +MODULE_LICENSE("GPL"); + +static unsigned int +target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, + void *userinfo) +{ + struct iphdr *iph; + struct sk_buff *skb; + struct ip_options *opt; + unsigned char *optiph; + int l; + + if (!skb_ip_make_writable(pskb, (*pskb)->len)) + return NF_DROP; + + skb = (*pskb); + iph = (*pskb)->nh.iph; + optiph = skb->nh.raw; + l = ((struct ip_options *)(&(IPCB(skb)->opt)))->optlen; + + /* if no options in packet then nothing to clear. */ + if (iph->ihl * 4 == sizeof(struct iphdr)) + return IPT_CONTINUE; + + /* else clear all options */ + memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options)); + memset(optiph+sizeof(struct iphdr), IPOPT_NOOP, l); + opt = &(IPCB(skb)->opt); + opt->is_data = 0; + opt->optlen = l; + + skb->nfcache |= NFC_ALTERED; + + return IPT_CONTINUE; +} + +static int +checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + if (strcmp(tablename, "mangle")) { + printk(KERN_WARNING "IPV4OPTSSTRIP: can only be called from \"mangle\" table, not \"%s\"\n", tablename); + return 0; + } + /* nothing else to check because no parameters */ + return 1; +} + +static struct ipt_target ipt_ipv4optsstrip_reg = { + .name = "IPV4OPTSSTRIP", + .target = target, + .checkentry = checkentry, + .me = THIS_MODULE }; + +static int __init init(void) +{ + return ipt_register_target(&ipt_ipv4optsstrip_reg); +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_ipv4optsstrip_reg); +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ipt_ROUTE.c linux-2.6.11.3/net/ipv4/netfilter/ipt_ROUTE.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ipt_ROUTE.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ipt_ROUTE.c 2005-03-13 22:04:33.120761816 +0100 @@ -0,0 +1,465 @@ +/* + * This implements the ROUTE target, which enables you to setup unusual + * routes not supported by the standard kernel routing table. + * + * Copyright (C) 2002 Cedric de Launois + * + * v 1.11 2004/11/23 + * + * This software is distributed under GNU GPL v2, 1991 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Cedric de Launois "); +MODULE_DESCRIPTION("iptables ROUTE target module"); + +/* Try to route the packet according to the routing keys specified in + * route_info. Keys are : + * - ifindex : + * 0 if no oif preferred, + * otherwise set to the index of the desired oif + * - route_info->gw : + * 0 if no gateway specified, + * otherwise set to the next host to which the pkt must be routed + * If success, skb->dev is the output device to which the packet must + * be sent and skb->dst is not NULL + * + * RETURN: -1 if an error occured + * 1 if the packet was succesfully routed to the + * destination desired + * 0 if the kernel routing table could not route the packet + * according to the keys specified + */ +static int route(struct sk_buff *skb, + unsigned int ifindex, + const struct ipt_route_target_info *route_info) +{ + int err; + struct rtable *rt; + struct iphdr *iph = skb->nh.iph; + struct flowi fl = { + .oif = ifindex, + .nl_u = { + .ip4_u = { + .daddr = iph->daddr, + .saddr = 0, + .tos = RT_TOS(iph->tos), + .scope = RT_SCOPE_UNIVERSE, + } + } + }; + + /* The destination address may be overloaded by the target */ + if (route_info->gw) + fl.fl4_dst = route_info->gw; + + /* Trying to route the packet using the standard routing table. */ + if ((err = ip_route_output_key(&rt, &fl))) { + if (net_ratelimit()) + DEBUGP("ipt_ROUTE: couldn't route pkt (err: %i)",err); + return -1; + } + + /* Drop old route. */ + dst_release(skb->dst); + skb->dst = NULL; + + /* Success if no oif specified or if the oif correspond to the + * one desired */ + if (!ifindex || rt->u.dst.dev->ifindex == ifindex) { + skb->dst = &rt->u.dst; + skb->dev = skb->dst->dev; + skb->protocol = htons(ETH_P_IP); + return 1; + } + + /* The interface selected by the routing table is not the one + * specified by the user. This may happen because the dst address + * is one of our own addresses. + */ + if (net_ratelimit()) + DEBUGP("ipt_ROUTE: failed to route as desired gw=%u.%u.%u.%u oif=%i (got oif=%i)\n", + NIPQUAD(route_info->gw), ifindex, rt->u.dst.dev->ifindex); + + return 0; +} + + +/* Stolen from ip_finish_output2 + * PRE : skb->dev is set to the device we are leaving by + * skb->dst is not NULL + * POST: the packet is sent with the link layer header pushed + * the packet is destroyed + */ +static void ip_direct_send(struct sk_buff *skb) +{ + struct dst_entry *dst = skb->dst; + struct hh_cache *hh = dst->hh; + struct net_device *dev = dst->dev; + int hh_len = LL_RESERVED_SPACE(dev); + + /* Be paranoid, rather than too clever. */ + if (unlikely(skb_headroom(skb) < hh_len && dev->hard_header)) { + struct sk_buff *skb2; + + skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev)); + if (skb2 == NULL) { + kfree_skb(skb); + return; + } + if (skb->sk) + skb_set_owner_w(skb2, skb->sk); + kfree_skb(skb); + skb = skb2; + } + + if (hh) { + int hh_alen; + + read_lock_bh(&hh->hh_lock); + hh_alen = HH_DATA_ALIGN(hh->hh_len); + memcpy(skb->data - hh_alen, hh->hh_data, hh_alen); + read_unlock_bh(&hh->hh_lock); + skb_push(skb, hh->hh_len); + hh->hh_output(skb); + } else if (dst->neighbour) + dst->neighbour->output(skb); + else { + if (net_ratelimit()) + DEBUGP(KERN_DEBUG "ipt_ROUTE: no hdr & no neighbour cache!\n"); + kfree_skb(skb); + } +} + + +/* PRE : skb->dev is set to the device we are leaving by + * POST: - the packet is directly sent to the skb->dev device, without + * pushing the link layer header. + * - the packet is destroyed + */ +static inline int dev_direct_send(struct sk_buff *skb) +{ + return dev_queue_xmit(skb); +} + + +static unsigned int route_oif(const struct ipt_route_target_info *route_info, + struct sk_buff *skb) +{ + unsigned int ifindex = 0; + struct net_device *dev_out = NULL; + + /* The user set the interface name to use. + * Getting the current interface index. + */ + if ((dev_out = dev_get_by_name(route_info->oif))) { + ifindex = dev_out->ifindex; + } else { + /* Unknown interface name : packet dropped */ + if (net_ratelimit()) + DEBUGP("ipt_ROUTE: oif interface %s not found\n", route_info->oif); + return NF_DROP; + } + + /* Trying the standard way of routing packets */ + switch (route(skb, ifindex, route_info)) { + case 1: + dev_put(dev_out); + if (route_info->flags & IPT_ROUTE_CONTINUE) + return IPT_CONTINUE; + + ip_direct_send(skb); + return NF_STOLEN; + + case 0: + /* Failed to send to oif. Trying the hard way */ + if (route_info->flags & IPT_ROUTE_CONTINUE) + return NF_DROP; + + if (net_ratelimit()) + DEBUGP("ipt_ROUTE: forcing the use of %i\n", + ifindex); + + /* We have to force the use of an interface. + * This interface must be a tunnel interface since + * otherwise we can't guess the hw address for + * the packet. For a tunnel interface, no hw address + * is needed. + */ + if ((dev_out->type != ARPHRD_TUNNEL) + && (dev_out->type != ARPHRD_IPGRE)) { + if (net_ratelimit()) + DEBUGP("ipt_ROUTE: can't guess the hw addr !\n"); + dev_put(dev_out); + return NF_DROP; + } + + /* Send the packet. This will also free skb + * Do not go through the POST_ROUTING hook because + * skb->dst is not set and because it will probably + * get confused by the destination IP address. + */ + skb->dev = dev_out; + dev_direct_send(skb); + dev_put(dev_out); + return NF_STOLEN; + + default: + /* Unexpected error */ + dev_put(dev_out); + return NF_DROP; + } +} + + +static unsigned int route_iif(const struct ipt_route_target_info *route_info, + struct sk_buff *skb) +{ + struct net_device *dev_in = NULL; + + /* Getting the current interface index. */ + if (!(dev_in = dev_get_by_name(route_info->iif))) { + if (net_ratelimit()) + DEBUGP("ipt_ROUTE: iif interface %s not found\n", route_info->iif); + return NF_DROP; + } + + skb->dev = dev_in; + dst_release(skb->dst); + skb->dst = NULL; + + netif_rx(skb); + dev_put(dev_in); + return NF_STOLEN; +} + + +static unsigned int route_gw(const struct ipt_route_target_info *route_info, + struct sk_buff *skb) +{ + if (route(skb, 0, route_info)!=1) + return NF_DROP; + + if (route_info->flags & IPT_ROUTE_CONTINUE) + return IPT_CONTINUE; + + ip_direct_send(skb); + return NF_STOLEN; +} + + +/* To detect and deter routed packet loopback when using the --tee option, + * we take a page out of the raw.patch book: on the copied skb, we set up + * a fake ->nfct entry, pointing to the local &route_tee_track. We skip + * routing packets when we see they already have that ->nfct. + */ + +static struct ip_conntrack route_tee_track; + +static unsigned int ipt_route_target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, + void *userinfo) +{ + const struct ipt_route_target_info *route_info = targinfo; + struct sk_buff *skb = *pskb; + unsigned int res; + + if (skb->nfct == &route_tee_track.ct_general) { + /* Loopback - a packet we already routed, is to be + * routed another time. Avoid that, now. + */ + if (net_ratelimit()) + DEBUGP(KERN_DEBUG "ipt_ROUTE: loopback - DROP!\n"); + return NF_DROP; + } + + /* If we are at PREROUTING or INPUT hook + * the TTL isn't decreased by the IP stack + */ + if (hooknum == NF_IP_PRE_ROUTING || + hooknum == NF_IP_LOCAL_IN) { + + struct iphdr *iph = skb->nh.iph; + + if (iph->ttl <= 1) { + struct rtable *rt; + struct flowi fl = { + .oif = 0, + .nl_u = { + .ip4_u = { + .daddr = iph->daddr, + .saddr = iph->saddr, + .tos = RT_TOS(iph->tos), + .scope = ((iph->tos & RTO_ONLINK) ? + RT_SCOPE_LINK : + RT_SCOPE_UNIVERSE) + } + } + }; + + if (ip_route_output_key(&rt, &fl)) { + return NF_DROP; + } + + if (skb->dev == rt->u.dst.dev) { + /* Drop old route. */ + dst_release(skb->dst); + skb->dst = &rt->u.dst; + + /* this will traverse normal stack, and + * thus call conntrack on the icmp packet */ + icmp_send(skb, ICMP_TIME_EXCEEDED, + ICMP_EXC_TTL, 0); + } + + return NF_DROP; + } + + /* + * If we are at INPUT the checksum must be recalculated since + * the length could change as the result of a defragmentation. + */ + if(hooknum == NF_IP_LOCAL_IN) { + iph->ttl = iph->ttl - 1; + iph->check = 0; + iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl); + } else { + ip_decrease_ttl(iph); + } + } + + if ((route_info->flags & IPT_ROUTE_TEE)) { + /* + * Copy the *pskb, and route the copy. Will later return + * IPT_CONTINUE for the original skb, which should continue + * on its way as if nothing happened. The copy should be + * independantly delivered to the ROUTE --gw. + */ + skb = skb_copy(*pskb, GFP_ATOMIC); + if (!skb) { + if (net_ratelimit()) + DEBUGP(KERN_DEBUG "ipt_ROUTE: copy failed!\n"); + return IPT_CONTINUE; + } + } + + /* Tell conntrack to forget this packet since it may get confused + * when a packet is leaving with dst address == our address. + * Good idea ? Dunno. Need advice. + * + * NEW: mark the skb with our &route_tee_track, so we avoid looping + * on any already routed packet. + */ + if (!(route_info->flags & IPT_ROUTE_CONTINUE)) { + nf_conntrack_put(skb->nfct); + skb->nfct = &route_tee_track.ct_general; + skb->nfctinfo = IP_CT_NEW; + nf_conntrack_get(skb->nfct); + skb->nfcache = 0; +#ifdef CONFIG_NETFILTER_DEBUG + skb->nf_debug = 0; +#endif + } + + if (route_info->oif[0] != '\0') { + res = route_oif(route_info, skb); + } else if (route_info->iif[0] != '\0') { + res = route_iif(route_info, skb); + } else if (route_info->gw) { + res = route_gw(route_info, skb); + } else { + if (net_ratelimit()) + DEBUGP(KERN_DEBUG "ipt_ROUTE: no parameter !\n"); + res = IPT_CONTINUE; + } + + if ((route_info->flags & IPT_ROUTE_TEE)) + res = IPT_CONTINUE; + + return res; +} + + +static int ipt_route_checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + if (strcmp(tablename, "mangle") != 0) { + printk("ipt_ROUTE: bad table `%s', use the `mangle' table.\n", + tablename); + return 0; + } + + if (hook_mask & ~( (1 << NF_IP_PRE_ROUTING) + | (1 << NF_IP_LOCAL_IN) + | (1 << NF_IP_FORWARD) + | (1 << NF_IP_LOCAL_OUT) + | (1 << NF_IP_POST_ROUTING))) { + printk("ipt_ROUTE: bad hook\n"); + return 0; + } + + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_route_target_info))) { + printk(KERN_WARNING "ipt_ROUTE: targinfosize %u != %Zu\n", + targinfosize, + IPT_ALIGN(sizeof(struct ipt_route_target_info))); + return 0; + } + + return 1; +} + + +static struct ipt_target ipt_route_reg = { + .name = "ROUTE", + .target = ipt_route_target, + .checkentry = ipt_route_checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + /* Set up fake conntrack (stolen from raw.patch): + - to never be deleted, not in any hashes */ + atomic_set(&route_tee_track.ct_general.use, 1); + /* - and look it like as a confirmed connection */ + set_bit(IPS_CONFIRMED_BIT, &route_tee_track.status); + /* Initialize fake conntrack so that NAT will skip it */ + route_tee_track.nat.info.initialized |= + (1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST); + + return ipt_register_target(&ipt_route_reg); +} + + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_route_reg); +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ipt_SET.c linux-2.6.11.3/net/ipv4/netfilter/ipt_SET.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ipt_SET.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ipt_SET.c 2005-03-13 22:04:00.491722184 +0100 @@ -0,0 +1,128 @@ +/* Copyright (C) 2000-2002 Joakim Axelsson + * Patrick Schaaf + * Martin Josefsson + * Copyright (C) 2003-2004 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* ipt_SET.c - netfilter target to manipulate IP sets */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int +target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, + void *userinfo) +{ + const struct ipt_set_info_target *info = targinfo; + + if (info->add_set.index != IP_SET_INVALID_ID) + ip_set_addip_kernel(info->add_set.index, + *pskb, + info->add_set.flags); + if (info->del_set.index != IP_SET_INVALID_ID) + ip_set_delip_kernel(info->del_set.index, + *pskb, + info->del_set.flags); + + return IPT_CONTINUE; +} + +static int +checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, unsigned int hook_mask) +{ + struct ipt_set_info_target *info = + (struct ipt_set_info_target *) targinfo; + ip_set_id_t index; + + if (targinfosize != IPT_ALIGN(sizeof(*info))) { + DP("bad target info size %u", targinfosize); + return 0; + } + + if (info->add_set.index != IP_SET_INVALID_ID) { + index = ip_set_get_byindex(info->add_set.index); + if (index == IP_SET_INVALID_ID) { + ip_set_printk("cannot find add_set index %u as target", + info->add_set.index); + return 0; /* error */ + } + } + + if (info->del_set.index != IP_SET_INVALID_ID) { + index = ip_set_get_byindex(info->del_set.index); + if (index == IP_SET_INVALID_ID) { + ip_set_printk("cannot find del_set index %u as target", + info->del_set.index); + return 0; /* error */ + } + } + if (info->add_set.flags[IP_SET_MAX_BINDINGS] != 0 + || info->del_set.flags[IP_SET_MAX_BINDINGS] != 0) { + ip_set_printk("That's nasty!"); + return 0; /* error */ + } + + return 1; +} + +static void destroy(void *targetinfo, unsigned int targetsize) +{ + struct ipt_set_info_target *info = targetinfo; + + if (targetsize != IPT_ALIGN(sizeof(struct ipt_set_info_target))) { + ip_set_printk("invalid targetsize %d", targetsize); + return; + } + + if (info->add_set.index != IP_SET_INVALID_ID) + ip_set_put(info->add_set.index); + if (info->del_set.index != IP_SET_INVALID_ID) + ip_set_put(info->del_set.index); +} + +static struct ipt_target SET_target = { + .name = "SET", + .target = target, + .checkentry = checkentry, + .destroy = destroy, + .me = THIS_MODULE +}; + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("iptables IP set target module"); + +static int __init init(void) +{ + return ipt_register_target(&SET_target); +} + +static void __exit fini(void) +{ + ipt_unregister_target(&SET_target); +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ipt_TARPIT.c linux-2.6.11.3/net/ipv4/netfilter/ipt_TARPIT.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ipt_TARPIT.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ipt_TARPIT.c 2005-03-13 22:04:37.261132384 +0100 @@ -0,0 +1,290 @@ +/* + * Kernel module to capture and hold incoming TCP connections using + * no local per-connection resources. + * + * Based on ipt_REJECT.c and offering functionality similar to + * LaBrea . + * + * Copyright (c) 2002 Aaron Hopkins + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Goal: + * - Allow incoming TCP connections to be established. + * - Passing data should result in the connection being switched to the + * persist state (0 byte window), in which the remote side stops sending + * data and asks to continue every 60 seconds. + * - Attempts to shut down the connection should be ignored completely, so + * the remote side ends up having to time it out. + * + * This means: + * - Reply to TCP SYN,!ACK,!RST,!FIN with SYN-ACK, window 5 bytes + * - Reply to TCP SYN,ACK,!RST,!FIN with RST to prevent spoofing + * - Reply to TCP !SYN,!RST,!FIN with ACK, window 0 bytes, rate-limited + */ + +#include +#include +#include +#include +#include +#include +#include +struct in_device; +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Aaron Hopkins "); + +/* Stolen from ip_finish_output2 */ +static int ip_direct_send(struct sk_buff *skb) +{ + struct dst_entry *dst = skb->dst; + struct hh_cache *hh = dst->hh; + + if (hh) { + read_lock_bh(&hh->hh_lock); + memcpy(skb->data - 16, hh->hh_data, 16); + read_unlock_bh(&hh->hh_lock); + skb_push(skb, hh->hh_len); + return hh->hh_output(skb); + } else if (dst->neighbour) + return dst->neighbour->output(skb); + + if (net_ratelimit()) + printk(KERN_DEBUG "TARPIT ip_direct_send: no header cache and no neighbor!\n"); + kfree_skb(skb); + return -EINVAL; +} + + +/* Send reply */ +static void tarpit_tcp(struct sk_buff *oskb,struct rtable *ort,int local) +{ + struct sk_buff *nskb; + struct rtable *nrt; + struct tcphdr *otcph, *ntcph; + struct flowi fl = {}; + unsigned int otcplen; + u_int16_t tmp; + + /* A truncated TCP header isn't going to be useful */ + if (oskb->len < (oskb->nh.iph->ihl*4) + sizeof(struct tcphdr)) + return; + + otcph = (struct tcphdr *)((u_int32_t*)oskb->nh.iph + + oskb->nh.iph->ihl); + otcplen = oskb->len - oskb->nh.iph->ihl*4; + + /* No replies for RST or FIN */ + if (otcph->rst || otcph->fin) + return; + + /* No reply to !SYN,!ACK. Rate-limit replies to !SYN,ACKs */ + if (!otcph->syn && (!otcph->ack || !xrlim_allow(&ort->u.dst, 1*HZ))) + return; + + /* Check checksum. */ + if (tcp_v4_check(otcph, otcplen, oskb->nh.iph->saddr, + oskb->nh.iph->daddr, + csum_partial((char *)otcph, otcplen, 0)) != 0) + return; + + /* Copy skb (even if skb is about to be dropped, we can't just + clone it because there may be other things, such as tcpdump, + interested in it) */ + nskb = skb_copy(oskb, GFP_ATOMIC); + if (!nskb) + return; + + /* This packet will not be the same as the other: clear nf fields */ + nf_conntrack_put(nskb->nfct); + nskb->nfct = NULL; + nskb->nfcache = 0; +#ifdef CONFIG_NETFILTER_DEBUG + nskb->nf_debug = 0; +#endif + + ntcph = (struct tcphdr *)((u_int32_t*)nskb->nh.iph + nskb->nh.iph->ihl); + + /* Truncate to length (no data) */ + ntcph->doff = sizeof(struct tcphdr)/4; + skb_trim(nskb, nskb->nh.iph->ihl*4 + sizeof(struct tcphdr)); + nskb->nh.iph->tot_len = htons(nskb->len); + + /* Swap source and dest */ + nskb->nh.iph->daddr = xchg(&nskb->nh.iph->saddr, nskb->nh.iph->daddr); + tmp = ntcph->source; + ntcph->source = ntcph->dest; + ntcph->dest = tmp; + + /* Use supplied sequence number or make a new one */ + ntcph->seq = otcph->ack ? otcph->ack_seq + : htonl(secure_tcp_sequence_number(nskb->nh.iph->saddr, + nskb->nh.iph->daddr, + ntcph->source, + ntcph->dest)); + + /* Our SYN-ACKs must have a >0 window */ + ntcph->window = (otcph->syn && !otcph->ack) ? htons(5) : 0; + + ntcph->urg_ptr = 0; + + /* Reset flags */ + ((u_int8_t *)ntcph)[13] = 0; + + if (otcph->syn && otcph->ack) { + ntcph->rst = 1; + ntcph->ack_seq = 0; + } else { + ntcph->syn = otcph->syn; + ntcph->ack = 1; + ntcph->ack_seq = htonl(ntohl(otcph->seq) + otcph->syn); + } + + /* Adjust TCP checksum */ + ntcph->check = 0; + ntcph->check = tcp_v4_check(ntcph, sizeof(struct tcphdr), + nskb->nh.iph->saddr, + nskb->nh.iph->daddr, + csum_partial((char *)ntcph, + sizeof(struct tcphdr), 0)); + + /* Adjust IP TTL */ + nskb->nh.iph->ttl = sysctl_ip_default_ttl; + + /* Set DF, id = 0 */ + nskb->nh.iph->frag_off = htons(IP_DF); + nskb->nh.iph->id = 0; + + /* Adjust IP checksum */ + nskb->nh.iph->check = 0; + nskb->nh.iph->check = ip_fast_csum((unsigned char *)nskb->nh.iph, + nskb->nh.iph->ihl); + + fl.nl_u.ip4_u.daddr = nskb->nh.iph->daddr; + fl.nl_u.ip4_u.saddr = local ? nskb->nh.iph->saddr : 0; + fl.nl_u.ip4_u.tos = RT_TOS(nskb->nh.iph->tos) | RTO_CONN; + fl.oif = 0; + + if (ip_route_output_key(&nrt, &fl)) + goto free_nskb; + + dst_release(nskb->dst); + nskb->dst = &nrt->u.dst; + + /* "Never happens" */ + if (nskb->len > dst_pmtu(nskb->dst)) + goto free_nskb; + + ip_direct_send (nskb); + + return; + + free_nskb: + kfree_skb(nskb); +} + + +static unsigned int tarpit(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, + void *userinfo) +{ + struct sk_buff *skb = *pskb; + struct rtable *rt = (struct rtable*)skb->dst; + + /* Do we have an input route cache entry? */ + if (!rt) + return NF_DROP; + + /* No replies to physical multicast/broadcast */ + if (skb->pkt_type != PACKET_HOST && skb->pkt_type != PACKET_OTHERHOST) + return NF_DROP; + + /* Now check at the protocol level */ + if (rt->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST)) + return NF_DROP; + + /* Our naive response construction doesn't deal with IP + options, and probably shouldn't try. */ + if (skb->nh.iph->ihl*4 != sizeof(struct iphdr)) + return NF_DROP; + + /* We aren't interested in fragments */ + if (skb->nh.iph->frag_off & htons(IP_OFFSET)) + return NF_DROP; + + tarpit_tcp(skb,rt,hooknum == NF_IP_LOCAL_IN); + + return NF_DROP; +} + + +static int check(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + /* Only allow these for input/forward packet filtering. */ + if (strcmp(tablename, "filter") != 0) { + DEBUGP("TARPIT: bad table %s'.\n", tablename); + return 0; + } + if ((hook_mask & ~((1 << NF_IP_LOCAL_IN) + | (1 << NF_IP_FORWARD))) != 0) { + DEBUGP("TARPIT: bad hook mask %X\n", hook_mask); + return 0; + } + + /* Must specify that it's a TCP packet */ + if (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & IPT_INV_PROTO)) { + DEBUGP("TARPIT: not valid for non-tcp\n"); + return 0; + } + + return 1; +} + +static struct ipt_target ipt_tarpit_reg = { + .name = "TARPIT", + .target = tarpit, + .checkentry = check, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + return ipt_register_target(&ipt_tarpit_reg); +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_tarpit_reg); +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ipt_TTL.c linux-2.6.11.3/net/ipv4/netfilter/ipt_TTL.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ipt_TTL.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ipt_TTL.c 2005-03-13 22:03:52.572926024 +0100 @@ -0,0 +1,120 @@ +/* TTL modification target for IP tables + * (C) 2000 by Harald Welte + * + * Version: $Revision$ + * + * This software is distributed under the terms of GNU GPL + */ + +#include +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("IP tables TTL modification module"); +MODULE_LICENSE("GPL"); + +static unsigned int +ipt_ttl_target(struct sk_buff **pskb, const struct net_device *in, + const struct net_device *out, unsigned int hooknum, + const void *targinfo, void *userinfo) +{ + struct iphdr *iph; + const struct ipt_TTL_info *info = targinfo; + u_int16_t diffs[2]; + int new_ttl; + + if (!skb_ip_make_writable(pskb, (*pskb)->len)) + return NF_DROP; + + iph = (*pskb)->nh.iph; + + switch (info->mode) { + case IPT_TTL_SET: + new_ttl = info->ttl; + break; + case IPT_TTL_INC: + new_ttl = iph->ttl + info->ttl; + if (new_ttl > 255) + new_ttl = 255; + break; + case IPT_TTL_DEC: + new_ttl = iph->ttl - info->ttl; + if (new_ttl < 0) + new_ttl = 0; + break; + default: + new_ttl = iph->ttl; + break; + } + + if (new_ttl != iph->ttl) { + diffs[0] = htons(((unsigned)iph->ttl) << 8) ^ 0xFFFF; + iph->ttl = new_ttl; + diffs[1] = htons(((unsigned)iph->ttl) << 8); + iph->check = csum_fold(csum_partial((char *)diffs, + sizeof(diffs), + iph->check^0xFFFF)); + (*pskb)->nfcache |= NFC_ALTERED; + } + + return IPT_CONTINUE; +} + +static int ipt_ttl_checkentry(const char *tablename, + const struct ipt_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + struct ipt_TTL_info *info = targinfo; + + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_TTL_info))) { + printk(KERN_WARNING "TTL: targinfosize %u != %Zu\n", + targinfosize, + IPT_ALIGN(sizeof(struct ipt_TTL_info))); + return 0; + } + + if (strcmp(tablename, "mangle")) { + printk(KERN_WARNING "TTL: can only be called from \"mangle\" table, not \"%s\"\n", tablename); + return 0; + } + + if (info->mode > IPT_TTL_MAXMODE) { + printk(KERN_WARNING "TTL: invalid or unknown Mode %u\n", + info->mode); + return 0; + } + + if ((info->mode != IPT_TTL_SET) && (info->ttl == 0)) { + printk(KERN_WARNING "TTL: increment/decrement doesn't make sense with value 0\n"); + return 0; + } + + return 1; +} + +static struct ipt_target ipt_TTL = { + .name = "TTL", + .target = ipt_ttl_target, + .checkentry = ipt_ttl_checkentry, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + return ipt_register_target(&ipt_TTL); +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_TTL); +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ipt_ULOG.c linux-2.6.11.3/net/ipv4/netfilter/ipt_ULOG.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ipt_ULOG.c 2005-03-13 07:44:20.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ipt_ULOG.c 2005-03-13 22:05:11.758887928 +0100 @@ -164,7 +164,7 @@ return skb; } -static void ipt_ulog_packet(unsigned int hooknum, +void ipt_ulog_packet(unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -415,5 +415,7 @@ } +EXPORT_SYMBOL(ipt_ulog_packet); + module_init(init); module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ipt_XOR.c linux-2.6.11.3/net/ipv4/netfilter/ipt_XOR.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ipt_XOR.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ipt_XOR.c 2005-03-13 22:05:12.804728936 +0100 @@ -0,0 +1,117 @@ +/* XOR target for IP tables + * (C) 2000 by Tim Vandermeersch + * Based on ipt_TTL.c + * + * Version 1.0 + * + * This software is distributed under the terms of GNU GPL + */ + +#include +#include +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Tim Vandermeersch "); +MODULE_DESCRIPTION("IP tables XOR module"); +MODULE_LICENSE("GPL"); + +static unsigned int +ipt_xor_target(struct sk_buff **pskb, + const struct net_device *in, const struct net_device *out, + unsigned int hooknum, const void *targinfo, void *userinfo) +{ + struct ipt_XOR_info *info = (void *) targinfo; + struct iphdr *iph; + struct tcphdr *tcph; + struct udphdr *udph; + int i, j, k; + + if (!skb_ip_make_writable(pskb, (*pskb)->len)) + return NF_DROP; + + iph = (*pskb)->nh.iph; + + if (iph->protocol == IPPROTO_TCP) { + tcph = (struct tcphdr *) ((*pskb)->data + iph->ihl*4); + for (i=0, j=0; i<(ntohs(iph->tot_len) - iph->ihl*4 - tcph->doff*4); ) { + for (k=0; k<=info->block_size; k++) { + (*pskb)->data[ iph->ihl*4 + tcph->doff*4 + i ] ^= + info->key[j]; + i++; + } + j++; + if (info->key[j] == 0x00) + j = 0; + } + } else if (iph->protocol == IPPROTO_UDP) { + udph = (struct udphdr *) ((*pskb)->data + iph->ihl*4); + for (i=0, j=0; i<(ntohs(udph->len)-8); ) { + for (k=0; k<=info->block_size; k++) { + (*pskb)->data[ iph->ihl*4 + sizeof(struct udphdr) + i ] ^= + info->key[j]; + i++; + } + j++; + if (info->key[j] == 0x00) + j = 0; + } + } + + return IPT_CONTINUE; +} + +static int ipt_xor_checkentry(const char *tablename, const struct ipt_entry *e, + void *targinfo, unsigned int targinfosize, + unsigned int hook_mask) +{ + struct ipt_XOR_info *info = targinfo; + + if (targinfosize != IPT_ALIGN(sizeof(struct ipt_XOR_info))) { + printk(KERN_WARNING "XOR: targinfosize %u != %Zu\n", + targinfosize, IPT_ALIGN(sizeof(struct ipt_XOR_info))); + return 0; + } + + if (strcmp(tablename, "mangle")) { + printk(KERN_WARNING "XOR: can only be called from" + "\"mangle\" table, not \"%s\"\n", tablename); + return 0; + } + + if (!strcmp(info->key, "")) { + printk(KERN_WARNING "XOR: You must specify a key"); + return 0; + } + + if (info->block_size == 0) { + printk(KERN_WARNING "XOR: You must specify a block-size"); + return 0; + } + + return 1; +} + +static struct ipt_target ipt_XOR = { + .name = "XOR", + .target = ipt_xor_target, + .checkentry = ipt_xor_checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ipt_register_target(&ipt_XOR); +} + +static void __exit fini(void) +{ + ipt_unregister_target(&ipt_XOR); +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ipt_account.c linux-2.6.11.3/net/ipv4/netfilter/ipt_account.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ipt_account.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ipt_account.c 2005-03-13 22:05:13.933557328 +0100 @@ -0,0 +1,923 @@ +/* + * accounting match (ipt_account.c) + * (C) 2003,2004 by Piotr Gasidlo (quaker@barbara.eu.org) + * + * Version: 0.1.7 + * + * This software is distributed under the terms of GNU GPL + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include + +#include +#include + +#if defined(CONFIG_IP_NF_MATCH_ACCOUNT_DEBUG) + #define dprintk(format,args...) printk(format,##args) +#else + #define dprintk(format,args...) +#endif + +static char version[] = +KERN_INFO IPT_ACCOUNT_NAME " " IPT_ACCOUNT_VERSION " : Piotr Gasid³o , http://www.barbara.eu.org/~quaker/ipt_account/\n"; + +/* rights for files created in /proc/net/ipt_account/ */ +static int permissions = 0644; +/* maximal netmask for single table */ +static int netmask = 16; + +/* module information */ +MODULE_AUTHOR("Piotr Gasidlo "); +MODULE_DESCRIPTION("Traffic accounting modules"); +MODULE_LICENSE("GPL"); +MODULE_PARM(permissions,"i"); +MODULE_PARM_DESC(permissions,"permissions on /proc/net/ipt_account/* files"); +MODULE_PARM(netmask, "i"); +MODULE_PARM_DESC(netmask, "maximum *save* size of one list (netmask)"); + +/* structure with statistics counters */ +struct t_ipt_account_stat { + u_int64_t b_all, b_tcp, b_udp, b_icmp, b_other; /* byte counters for all/tcp/udp/icmp/other traffic */ + u_int64_t p_all, p_tcp, p_udp, p_icmp, p_other; /* packet counters for all/tcp/udp/icmp/other traffic */ +}; + +/* stucture with statistics counters, used when table is created with --ashort switch */ +struct t_ipt_account_stat_short { + u_int64_t b_all; /* byte counters for all traffic */ + u_int64_t p_all; /* packet counters for all traffic */ +}; + +/* structure holding to/from statistics for single ip */ +struct t_ipt_account_ip_list { + struct t_ipt_account_stat src; + struct t_ipt_account_stat dest; + unsigned long time; /* time when this record was last updated */ + +}; + +/* same as above, for tables with --ashort switch */ +struct t_ipt_account_ip_list_short { + struct t_ipt_account_stat_short src; + struct t_ipt_account_stat_short dest; + unsigned long time; +}; + +/* structure describing single table */ +struct t_ipt_account_table { + char name[IPT_ACCOUNT_NAME_LEN]; /* table name ( = filename in /proc/net/ipt_account/) */ + union { /* table with statistics for each ip in network/netmask */ + struct t_ipt_account_ip_list *l; + struct t_ipt_account_ip_list_short *s; + } ip_list; + u_int32_t network; /* network/netmask covered by table*/ + u_int32_t netmask; + u_int32_t count; + int shortlisting:1; /* show only total columns of counters */ + int use_count; /* rules counter - counting number of rules using this table */ + struct t_ipt_account_table *next; + spinlock_t ip_list_lock; + struct proc_dir_entry *status_file; +}; + +/* we must use spinlocks to avoid parallel modifications of table list */ +static spinlock_t account_lock = SPIN_LOCK_UNLOCKED; + +static struct proc_dir_entry *proc_net_ipt_account = NULL; + +/* root pointer holding list of the tables */ +static struct t_ipt_account_table *account_tables = NULL; + +/* convert ascii to ip */ +int atoip(char *buffer, u_int32_t *ip) { + + char *bufferptr = buffer; + int part, shift; + + /* zero ip */ + *ip = 0; + + /* first must be a digit */ + if (!isdigit(*bufferptr)) + return 0; + + /* parse first 3 octets (III.III.III.iii) */ + for (part = 0, shift = 24; *bufferptr && shift; bufferptr++) { + if (isdigit(*bufferptr)) { + part = part * 10 + (*bufferptr - '0'); + continue; + } + if (*bufferptr == '.') { + if (part > 255) + return 0; + *ip |= part << shift; + shift -= 8; + part = 0; + continue; + } + return 0; + } + + /* we expect more digts */ + if (!*bufferptr) + return 0; + /* parse last octet (iii.iii.iii.III) */ + for (; *bufferptr; bufferptr++) { + if (isdigit(*bufferptr)) { + part = part * 10 + (*bufferptr - '0'); + continue; + } else { + if (part > 255) + return 0; + *ip |= part; + break; + } + } + return (bufferptr - buffer); +} + +/* convert ascii to 64bit integer */ +int atoi64(char *buffer, u_int64_t *i) { + char *bufferptr = buffer; + + /* zero integer */ + *i = 0; + + while (isdigit(*bufferptr)) { + *i = *i * 10 + (*bufferptr - '0'); + bufferptr++; + } + return (bufferptr - buffer); +} + +static void *account_seq_start(struct seq_file *s, loff_t *pos) +{ + struct proc_dir_entry *pde = s->private; + struct t_ipt_account_table *table = pde->data; + + unsigned int *bucket; + + spin_lock_bh(&table->ip_list_lock); + if (*pos >= table->count) + return NULL; + + bucket = kmalloc(sizeof(unsigned int), GFP_KERNEL); + if (!bucket) + return ERR_PTR(-ENOMEM); + *bucket = *pos; + return bucket; +} + +static void *account_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct proc_dir_entry *pde = s->private; + struct t_ipt_account_table *table = pde->data; + + unsigned int *bucket = (unsigned int *)v; + + *pos = ++(*bucket); + if (*pos >= table->count) { + kfree(v); + return NULL; + } + return bucket; +} + +static void account_seq_stop(struct seq_file *s, void *v) +{ + struct proc_dir_entry *pde = s->private; + struct t_ipt_account_table *table = pde->data; + unsigned int *bucket = (unsigned int *)v; + kfree(bucket); + spin_unlock_bh(&table->ip_list_lock); +} + +static int account_seq_write(struct file *file, const char *ubuffer, + size_t ulength, loff_t *pos) +{ + struct proc_dir_entry *pde = ((struct seq_file *)file->private_data)->private; + struct t_ipt_account_table *table = pde->data; + char buffer[1024], *bufferptr; + int length; + + u_int32_t ip; + int len, i; + struct t_ipt_account_ip_list l; + struct t_ipt_account_ip_list_short s; + u_int64_t *p, dummy; + + + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() entered.\n"); + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() ulength = %i.\n", ulength); + + length = ulength; + if (ulength > 1024) + length = 1024; + if (copy_from_user(buffer, ubuffer, length)) + return -EFAULT; + buffer[length - 1] = 0; + bufferptr = buffer; + + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() buffer = \'%s\' length = %i.\n", buffer, length); + + /* reset table counters */ + if (!memcmp(buffer, "reset", 5)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got \"reset\".\n"); + if (!table->shortlisting) { + spin_lock_bh(&table->ip_list_lock); + memset(table->ip_list.l, 0, sizeof(struct t_ipt_account_ip_list) * table->count); + spin_unlock_bh(&table->ip_list_lock); + } else { + spin_lock_bh(&table->ip_list_lock); + memset(table->ip_list.s, 0, sizeof(struct t_ipt_account_ip_list_short) * table->count); + spin_unlock_bh(&table->ip_list_lock); + } + return length; + } + + if (!memcmp(buffer, "ip", 2)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got \"ip\".\n"); + bufferptr += 2; + if (!isspace(*bufferptr)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%i).\n", bufferptr - buffer); + return length; /* expected space */ + } + bufferptr += 1; + if (*bufferptr != '=') { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected equal (%i).\n", bufferptr - buffer); + return length; /* expected equal */ + } + bufferptr += 1; + if (!isspace(*bufferptr)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%i).\n", bufferptr - buffer); + return length; /* expected space */ + } + bufferptr += 1; + if (!(len = atoip(bufferptr, &ip))) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected ip (%i).\n", bufferptr - buffer); + return length; /* expected ip */ + } + bufferptr += len; + if ((ip & table->netmask) != table->network) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected ip [%u.%u.%u.%u] from table's network/netmask [%u.%u.%u.%u/%u.%u.%u.%u].\n", HIPQUAD(ip), HIPQUAD(table->network), HIPQUAD(table->netmask)); + return length; /* expected ip from table's network/netmask */ + } + if (!table->shortlisting) { + memset(&l, 0, sizeof(struct t_ipt_account_ip_list)); + while(*bufferptr) { + if (!isspace(*bufferptr)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%i).\n", bufferptr - buffer); + return length; /* expected space */ + } + bufferptr += 1; + if (!memcmp(bufferptr, "bytes_src", 9)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got bytes_src (%i).\n", bufferptr - buffer); + p = &l.src.b_all; + bufferptr += 9; + } else if (!memcmp(bufferptr, "bytes_dest", 10)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got bytes_dest (%i).\n", bufferptr - buffer); + p = &l.dest.b_all; + bufferptr += 10; + } else if (!memcmp(bufferptr, "packets_src", 11)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got packets_src (%i).\n", bufferptr - buffer); + p = &l.src.p_all; + bufferptr += 11; + } else if (!memcmp(bufferptr, "packets_dest", 12)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got packets_dest (%i).\n", bufferptr - buffer); + p = &l.dest.p_all; + bufferptr += 12; + } else if (!memcmp(bufferptr, "time", 4)) { + /* time hack, ignore time tokens */ + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got time (%i).\n", bufferptr - buffer); + bufferptr += 4; + if (!isspace(*bufferptr)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%i).\n", bufferptr - buffer); + return length; /* expected space */ + } + bufferptr += 1; + if (*bufferptr != '=') { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected equal (%i).\n", bufferptr - buffer); + return length; /* expected equal */ + } + bufferptr += 1; + if (!isspace(*bufferptr)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%i).\n", bufferptr - buffer); + return length; /* expected space */ + } + bufferptr += 1; + if (!(len = atoi64(bufferptr, &dummy))) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected int64 (%i).\n", bufferptr - buffer); + return length; /* expected int64 */ + } + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got %llu (%i).\n", dummy, bufferptr - buffer); + bufferptr += len; + continue; /* skip time token */ + } else + return length; /* expected token */ + if (!isspace(*bufferptr)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%i).\n", bufferptr - buffer); + return length; /* expected space */ + } + bufferptr += 1; + if (*bufferptr != '=') { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected equal (%i).\n", bufferptr - buffer); + return length; /* expected equal */ + } + bufferptr += 1; + for (i = 0; i < 5; i++) { + if (!isspace(*bufferptr)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%i).\n", bufferptr - buffer); + return length; /* expected space */ + } + bufferptr += 1; + if (!(len = atoi64(bufferptr, p))) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected int64 (%i).\n", bufferptr - buffer); + return length; /* expected int64 */ + } + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got %llu (%i).\n", *p, bufferptr - buffer); + bufferptr += len; + p++; + } + } + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() updating row.\n"); + spin_lock_bh(&table->ip_list_lock); + /* update counters, do not overwrite time field */ + memcpy(&table->ip_list.l[ip - table->network], &l, sizeof(struct t_ipt_account_ip_list) - sizeof(unsigned long)); + spin_unlock_bh(&table->ip_list_lock); + } else { + memset(&s, 0, sizeof(struct t_ipt_account_ip_list_short)); + while(*bufferptr) { + if (!isspace(*bufferptr)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%i).\n", bufferptr - buffer); + return length; /* expected space */ + } + bufferptr += 1; + if (!memcmp(bufferptr, "bytes_src", 9)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got bytes_src (%i).\n", bufferptr - buffer); + p = &s.src.b_all; + bufferptr += 9; + } else if (!memcmp(bufferptr, "bytes_dest", 10)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got bytes_dest (%i).\n", bufferptr - buffer); + p = &s.dest.b_all; + bufferptr += 10; + } else if (!memcmp(bufferptr, "packets_src", 11)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got packets_src (%i).\n", bufferptr - buffer); + p = &s.src.p_all; + bufferptr += 11; + } else if (!memcmp(bufferptr, "packets_dest", 12)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got packets_dest (%i).\n", bufferptr - buffer); + p = &s.dest.p_all; + bufferptr += 12; + } else if (!memcmp(bufferptr, "time", 4)) { + /* time hack, ignore time tokens */ + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got time (%i).\n", bufferptr - buffer); + bufferptr += 4; + if (!isspace(*bufferptr)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%i).\n", bufferptr - buffer); + return length; /* expected space */ + } + bufferptr += 1; + if (*bufferptr != '=') { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected equal (%i).\n", bufferptr - buffer); + return length; /* expected equal */ + } + bufferptr += 1; + if (!isspace(*bufferptr)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%i).\n", bufferptr - buffer); + return length; /* expected space */ + } + bufferptr += 1; + if (!(len = atoi64(bufferptr, &dummy))) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected int64 (%i).\n", bufferptr - buffer); + return length; /* expected int64 */ + } + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got %llu (%i).\n", dummy, bufferptr - buffer); + bufferptr += len; + continue; /* skip time token */ + } else { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected token (%i).\n", bufferptr - buffer); + return length; /* expected token */ + } + if (!isspace(*bufferptr)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%i).\n", bufferptr - buffer); + return length; /* expected space */ + } + bufferptr += 1; + if (*bufferptr != '=') { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected equal (%i).\n", bufferptr - buffer); + return length; /* expected equal */ + } + bufferptr += 1; + if (!isspace(*bufferptr)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected space (%i).\n", bufferptr - buffer); + return length; /* expected space */ + } + bufferptr += 1; + if (!(len = atoi64(bufferptr, p))) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() expected int64 (%i).\n", bufferptr - buffer); + return length; /* expected int64 */ + } + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() got %llu (%i).\n", *p, bufferptr - buffer); + bufferptr += len; + } + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() updating row.\n"); + spin_lock_bh(&table->ip_list_lock); + /* update counters, do not overwrite time field */ + memcpy(&table->ip_list.s[ip - table->network], &s, sizeof(struct t_ipt_account_ip_list_short) - sizeof(unsigned long)); + spin_unlock_bh(&table->ip_list_lock); + } + } + + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() left.\n"); + return length; +} + + +static int account_seq_show(struct seq_file *s, void *v) +{ + struct proc_dir_entry *pde = s->private; + struct t_ipt_account_table *table = pde->data; + unsigned int *bucket = (unsigned int *)v; + + u_int32_t address = table->network + *bucket; + + if (!table->shortlisting) { + seq_printf(s, + "ip = %u.%u.%u.%u bytes_src = %llu %llu %llu %llu %llu packets_src = %llu %llu %llu %llu %llu bytes_dest = %llu %llu %llu %llu %llu packets_dest = %llu %llu %llu %llu %llu\n", + HIPQUAD(address), + table->ip_list.l[*bucket].src.b_all, + table->ip_list.l[*bucket].src.b_tcp, + table->ip_list.l[*bucket].src.b_udp, + table->ip_list.l[*bucket].src.b_icmp, + table->ip_list.l[*bucket].src.b_other, + table->ip_list.l[*bucket].src.p_all, + table->ip_list.l[*bucket].src.p_tcp, + table->ip_list.l[*bucket].src.p_udp, + table->ip_list.l[*bucket].src.p_icmp, + table->ip_list.l[*bucket].src.p_other, + table->ip_list.l[*bucket].dest.b_all, + table->ip_list.l[*bucket].dest.b_tcp, + table->ip_list.l[*bucket].dest.b_udp, + table->ip_list.l[*bucket].dest.b_icmp, + table->ip_list.l[*bucket].dest.b_other, + table->ip_list.l[*bucket].dest.p_all, + table->ip_list.l[*bucket].dest.p_tcp, + table->ip_list.l[*bucket].dest.p_udp, + table->ip_list.l[*bucket].dest.p_icmp, + table->ip_list.l[*bucket].dest.p_other + ); + } else { + seq_printf(s, + "ip = %u.%u.%u.%u bytes_src = %llu packets_src = %llu bytes_dest = %llu packets_dest = %llu\n", + HIPQUAD(address), + table->ip_list.s[*bucket].src.b_all, + table->ip_list.s[*bucket].src.p_all, + table->ip_list.s[*bucket].dest.b_all, + table->ip_list.s[*bucket].dest.p_all + ); + } + return 0; +} + +static struct seq_operations account_seq_ops = { + .start = account_seq_start, + .next = account_seq_next, + .stop = account_seq_stop, + .show = account_seq_show +}; + +static int account_seq_open(struct inode *inode, struct file *file) +{ + int ret = seq_open(file, &account_seq_ops); + + if (!ret) { + struct seq_file *sf = file->private_data; + sf->private = PDE(inode); + } + return ret; +} + +static struct file_operations account_file_ops = { + .owner = THIS_MODULE, + .open = account_seq_open, + .read = seq_read, + .write = account_seq_write, + .llseek = seq_lseek, + .release = seq_release +}; + +/* do raw accounting */ +static inline void do_account(struct t_ipt_account_stat *stat, const struct sk_buff *skb) { + + /* update packet & bytes counters in *stat structure */ + stat->b_all += skb->len; + stat->p_all++; + + switch (skb->nh.iph->protocol) { + case IPPROTO_TCP: + stat->b_tcp += skb->len; + stat->p_tcp++; + break; + case IPPROTO_UDP: + stat->b_udp += skb->len; + stat->p_udp++; + break; + case IPPROTO_ICMP: + stat->b_icmp += skb->len; + stat->p_icmp++; + break; + default: + stat->b_other += skb->len; + stat->p_other++; + } +} + +static inline void do_account_short(struct t_ipt_account_stat_short *stat, const struct sk_buff *skb) { + + /* update packet & bytes counters in *stat structure */ + stat->b_all += skb->len; + stat->p_all++; +} + +static int match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + int *hotdrop) +{ + + const struct t_ipt_account_info *info = (struct t_ipt_account_info*)matchinfo; + struct t_ipt_account_table *table; + int ret; + unsigned long now; + + u_int32_t address; + + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() entered.\n"); + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() match name = %s.\n", info->name); + + spin_lock_bh(&account_lock); + /* find the right table */ + table = account_tables; + while (table && strncmp(table->name, info->name, IPT_ACCOUNT_NAME_LEN) && (table = table->next)); + spin_unlock_bh(&account_lock); + + if (table == NULL) { + /* ups, no table with that name */ + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() table %s not found. Leaving.\n", info->name); + return 0; + } + + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() table found %s\n", table->name); + + /* lock table while updating statistics */ + spin_lock_bh(&table->ip_list_lock); + + /* default: no match */ + ret = 0; + + /* get current time */ + now = jiffies; + + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() got packet src = %u.%u.%u.%u, dst = %u.%u.%u.%u, proto = %u.\n", NIPQUAD(skb->nh.iph->saddr), NIPQUAD(skb->nh.iph->daddr), skb->nh.iph->protocol); + + /* check whether traffic from source ip address ... */ + address = ntohl(skb->nh.iph->saddr); + /* ... is being accounted by this table */ + if (address && ((u_int32_t)(address & table->netmask) == (u_int32_t)table->network)) { + /* yes, account this packet */ + dprintk(KERN_INFO "ipt_account: match() accounting packet src = %u.%u.%u.%u, proto = %u.\n", HIPQUAD(address), skb->nh.iph->protocol); + /* update counters this host */ + if (!table->shortlisting) { + do_account(&table->ip_list.l[address - table->network].src, skb); + table->ip_list.l[address - table->network].time = now; + /* update also counters for all hosts in this table (network address) */ + if (table->netmask != INADDR_BROADCAST) { + do_account(&table->ip_list.l[0].src, skb); + table->ip_list.l[0].time = now; + } + } else { + do_account_short(&table->ip_list.s[address - table->network].src, skb); + table->ip_list.s[address - table->network].time = now; + /* update also counters for all hosts in this table (network address) */ + if (table->netmask != INADDR_BROADCAST) { + do_account_short(&table->ip_list.s[0].src, skb); + table->ip_list.s[0].time = now; + } + } + /* yes, it's a match */ + ret = 1; + } + + /* do the same thing with destination ip address */ + address = ntohl(skb->nh.iph->daddr); + if (address && ((u_int32_t)(address & table->netmask) == (u_int32_t)table->network)) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() accounting packet dst = %u.%u.%u.%u, proto = %u.\n", HIPQUAD(address), skb->nh.iph->protocol); + if (!table->shortlisting) { + do_account(&table->ip_list.l[address - table->network].dest, skb); + table->ip_list.l[address - table->network].time = now; + if (table->netmask != INADDR_BROADCAST) { + do_account(&table->ip_list.l[0].dest, skb); + table->ip_list.s[0].time = now; + } + } else { + do_account_short(&table->ip_list.s[address - table->network].dest, skb); + table->ip_list.s[address - table->network].time = now; + if (table->netmask != INADDR_BROADCAST) { + do_account_short(&table->ip_list.s[0].dest, skb); + table->ip_list.s[0].time = now; + } + } + ret = 1; + } + spin_unlock_bh(&table->ip_list_lock); + + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() left.\n"); + + return ret; +} + +static int checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchinfosize, + unsigned int hook_mask) +{ + const struct t_ipt_account_info *info = matchinfo; + struct t_ipt_account_table *table, *find_table, *last_table; + int ret = 0; + + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() entered.\n"); + + if (matchinfosize != IPT_ALIGN(sizeof(struct t_ipt_account_info))) return 0; + if (!info->name || !info->name[0]) return 0; + + /* find whether table with this name already exists */ + spin_lock_bh(&account_lock); + find_table = account_tables; + while( (last_table = find_table) && strncmp(info->name,find_table->name,IPT_ACCOUNT_NAME_LEN) && (find_table = find_table->next) ); + if (find_table != NULL) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() table %s found.\n", info->name); + /* if table exists, check whether table network/netmask equals rule network/netmask */ + if (find_table->network != info->network || find_table->netmask != info->netmask || find_table->shortlisting != info->shortlisting) { + spin_unlock_bh(&account_lock); + printk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() wrong parameters (not equals existing table parameters).\n"); + ret = 0; + goto failure; + } + /* increment table use count */ + find_table->use_count++; + spin_unlock_bh(&account_lock); + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() incrementing use count.\n"); + ret = 1; + goto failure; + } + spin_unlock_bh(&account_lock); + + /* check netmask first, before allocating memory */ + if (info->netmask < ((1 << netmask) - 1)) { + printk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() too big netmask.\n"); + ret = 0; + goto failure; + } + + /* table doesn't exist - create new */ + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() allocating %u for new table %s.\n", sizeof(struct t_ipt_account_table), info->name); + table = vmalloc(sizeof(struct t_ipt_account_table)); + if (table == NULL) { + printk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() failed to allocate %u for new table %s.\n", sizeof(struct t_ipt_account_table), info->name); + ret = 0; /* was -ENOMEM */ + goto failure; + } + + /* setup table parameters */ + table->ip_list_lock = SPIN_LOCK_UNLOCKED; + table->next = NULL; + table->use_count = 1; + table->network = info->network; + table->netmask = info->netmask; + table->shortlisting = info->shortlisting; + table->count = (~table->netmask) + 1; + strncpy(table->name,info->name,IPT_ACCOUNT_NAME_LEN); + table->name[IPT_ACCOUNT_NAME_LEN - 1] = '\0'; + + /* allocate memory for table->ip_list */ + if (!table->shortlisting) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() allocating %u for ip_list.\n", sizeof(struct t_ipt_account_ip_list) * table->count); + table->ip_list.l = vmalloc(sizeof(struct t_ipt_account_ip_list) * table->count); + if (table->ip_list.l == NULL) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() failed to allocate %u for ip_list.\n", sizeof(struct t_ipt_account_ip_list) * table->count); + ret = 0; /* was -ENOMEM */ + goto failure_table; + } + memset(table->ip_list.l, 0, sizeof(struct t_ipt_account_ip_list) * table->count); + } else { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() allocating %u for ip_list.\n", sizeof(struct t_ipt_account_ip_list_short) * table->count); + table->ip_list.s = vmalloc(sizeof(struct t_ipt_account_ip_list_short) * table->count); + if (table->ip_list.s == NULL) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() failed to allocate %u for ip_list.\n", sizeof(struct t_ipt_account_ip_list_short) * table->count); + ret = 0; /* was -ENOMEM */ + goto failure_table; + } + memset(table->ip_list.s, 0, sizeof(struct t_ipt_account_ip_list_short) * table->count); + } + + /* put table into chain */ + spin_lock_bh(&account_lock); + find_table = account_tables; + while( (last_table = find_table) && strncmp(info->name, find_table->name, IPT_ACCOUNT_NAME_LEN) && (find_table = find_table->next) ); + if (find_table != NULL) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() table %s found.\n", info->name); + if (find_table->network != info->network || find_table->netmask != info->netmask) { + spin_unlock_bh(&account_lock); + printk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() wrong network/netmask.\n"); + ret = 0; + goto failure_ip_list; + } + find_table->use_count++; + spin_unlock_bh(&account_lock); + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() incrementing use count.\n"); + ret = 1; + goto failure_ip_list; + } + if (!last_table) + account_tables = table; + else + last_table->next = table; + spin_unlock_bh(&account_lock); + + /* create procfs status file */ + table->status_file = create_proc_entry(table->name, permissions, proc_net_ipt_account); + if (table->status_file == NULL) { + ret = 0; /* was -ENOMEM */ + goto failure_unlink; + } + table->status_file->owner = THIS_MODULE; + table->status_file->data = table; + wmb(); + table->status_file->proc_fops = &account_file_ops; + + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() left.\n"); + /* everything went just okey */ + return 1; + + /* do cleanup in case of failure */ +failure_unlink: + /* remove table from list */ + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() removing table.\n"); + spin_lock_bh(&account_lock); + last_table = NULL; + table = account_tables; + if (table == NULL) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() no table found. Leaving.\n"); + spin_unlock_bh(&account_lock); + return 0; /* was -ENOMEM */ + } + while (strncmp(info->name, table->name, IPT_ACCOUNT_NAME_LEN) && (last_table = table) && (table = table->next)); + if (table == NULL) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() table already destroyed. Leaving.\n"); + spin_unlock_bh(&account_lock); + return 0; /* was -ENOMEM */ + } + if (last_table) + last_table->next = table->next; + else + account_tables = table->next; + spin_unlock_bh(&account_lock); +failure_ip_list: + /* free memory allocated for statistics table */ + if (!table->shortlisting) + vfree(table->ip_list.l); + else + vfree(table->ip_list.s); +failure_table: + /* free table */ + vfree(table); +failure: + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() left. Table not created.\n"); + /* failure return */ + return ret; +} + +static void destroy(void *matchinfo, + unsigned int matchinfosize) +{ + const struct t_ipt_account_info *info = matchinfo; + struct t_ipt_account_table *table, *last_table; + + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": destory() entered.\n"); + + if (matchinfosize != IPT_ALIGN(sizeof(struct t_ipt_account_info))) return; + + /* search for table */ + spin_lock_bh(&account_lock); + last_table = NULL; + table = account_tables; + if(table == NULL) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": destory() no tables found. Leaving.\n"); + spin_unlock_bh(&account_lock); + return; + } + while( strncmp(info->name,table->name,IPT_ACCOUNT_NAME_LEN) && (last_table = table) && (table = table->next) ); + if (table == NULL) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": destory() no table %s not found. Leaving.\n", info->name); + spin_unlock_bh(&account_lock); + return; + } + + /* decrement table use-count */ + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": destory() decrementing use count.\n"); + table->use_count--; + if (table->use_count) { + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": destory() table still in use. Leaving.\n"); + spin_unlock_bh(&account_lock); + return; + } + + /* remove table if use-count is zero */ + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": destory() table %s not used. Removing.\n", table->name); + + /* unlink table */ + if(last_table) + last_table->next = table->next; + else + account_tables = table->next; + spin_unlock_bh(&account_lock); + + /* wait while table is still in use */ + spin_lock_bh(&table->ip_list_lock); + spin_unlock_bh(&table->ip_list_lock); + + /* remove proc entries */ + remove_proc_entry(table->name, proc_net_ipt_account); + + /* remove table */ + if (!table->shortlisting) + vfree(table->ip_list.l); + else + vfree(table->ip_list.s); + vfree(table); + + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": destory() left.\n"); + return; +} + +static struct ipt_match account_match = { + .name = "account", + .match = &match, + .checkentry = &checkentry, + .destroy = &destroy, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": __init() entered.\n"); + printk(version); + /* check params */ + if (netmask > 32 || netmask < 0) { + printk(KERN_INFO "account: Wrong netmask given by netmask parameter (%i). Valid is 32 to 0.\n", netmask); + return -EINVAL; + } + + /* create /proc/net/ipt_account directory */ + proc_net_ipt_account = proc_mkdir("ipt_account", proc_net); + if (!proc_net_ipt_account) { + printk(KERN_INFO IPT_ACCOUNT_NAME ": checkentry() failed to create procfs entry.\n"); + return -EINVAL; /* was -ENOMEM */ + } + proc_net_ipt_account->owner = THIS_MODULE; + + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": __init() left.\n"); + + return ipt_register_match(&account_match); +} + +static void __exit fini(void) +{ + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": __exit() entered.\n"); + + ipt_unregister_match(&account_match); + /* remove /proc/net/ipt_account/ directory */ + remove_proc_entry("ipt_account", proc_net); + + dprintk(KERN_INFO IPT_ACCOUNT_NAME ": __exit() left.\n"); +} + +module_init(init); +module_exit(fini); + diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ipt_connlimit.c linux-2.6.11.3/net/ipv4/netfilter/ipt_connlimit.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ipt_connlimit.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ipt_connlimit.c 2005-03-13 22:03:53.555776608 +0100 @@ -0,0 +1,232 @@ +/* + * netfilter module to limit the number of parallel tcp + * connections per IP address. + * (c) 2000 Gerd Knorr + * Nov 2002: Martin Bene : + * only ignore TIME_WAIT or gone connections + * + * based on ... + * + * Kernel module to match connection tracking information. + * GPL (C) 1999 Rusty Russell (rusty@rustcorp.com.au). + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#define DEBUG 0 + +MODULE_LICENSE("GPL"); + +/* we'll save the tuples of all connections we care about */ +struct ipt_connlimit_conn +{ + struct list_head list; + struct ip_conntrack_tuple tuple; +}; + +struct ipt_connlimit_data { + spinlock_t lock; + struct list_head iphash[256]; +}; + +static int ipt_iphash(u_int32_t addr) +{ + int hash; + + hash = addr & 0xff; + hash ^= (addr >> 8) & 0xff; + hash ^= (addr >> 16) & 0xff; + hash ^= (addr >> 24) & 0xff; + return hash; +} + +static int count_them(struct ipt_connlimit_data *data, + u_int32_t addr, u_int32_t mask, + struct ip_conntrack *ct) +{ +#if DEBUG + const static char *tcp[] = { "none", "established", "syn_sent", "syn_recv", + "fin_wait", "time_wait", "close", "close_wait", + "last_ack", "listen" }; +#endif + int addit = 1, matches = 0; + struct ip_conntrack_tuple tuple; + struct ip_conntrack_tuple_hash *found; + struct ipt_connlimit_conn *conn; + struct list_head *hash,*lh; + + spin_lock(&data->lock); + tuple = ct->tuplehash[0].tuple; + hash = &data->iphash[ipt_iphash(addr & mask)]; + + /* check the saved connections */ + for (lh = hash->next; lh != hash; lh = lh->next) { + conn = list_entry(lh,struct ipt_connlimit_conn,list); + found = ip_conntrack_find_get(&conn->tuple,ct); + if (0 == memcmp(&conn->tuple,&tuple,sizeof(tuple)) && + found != NULL && + found->ctrack->proto.tcp.state != TCP_CONNTRACK_TIME_WAIT) { + /* Just to be sure we have it only once in the list. + We should'nt see tuples twice unless someone hooks this + into a table without "-p tcp --syn" */ + addit = 0; + } +#if DEBUG + printk("ipt_connlimit [%d]: src=%u.%u.%u.%u:%d dst=%u.%u.%u.%u:%d %s\n", + ipt_iphash(addr & mask), + NIPQUAD(conn->tuple.src.ip), ntohs(conn->tuple.src.u.tcp.port), + NIPQUAD(conn->tuple.dst.ip), ntohs(conn->tuple.dst.u.tcp.port), + (NULL != found) ? tcp[found->ctrack->proto.tcp.state] : "gone"); +#endif + if (NULL == found) { + /* this one is gone */ + lh = lh->prev; + list_del(lh->next); + kfree(conn); + continue; + } + if (found->ctrack->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT) { + /* we don't care about connections which are + closed already -> ditch it */ + lh = lh->prev; + list_del(lh->next); + kfree(conn); + nf_conntrack_put(&found->ctrack->infos[0]); + continue; + } + if ((addr & mask) == (conn->tuple.src.ip & mask)) { + /* same source IP address -> be counted! */ + matches++; + } + nf_conntrack_put(&found->ctrack->infos[0]); + } + if (addit) { + /* save the new connection in our list */ +#if DEBUG + printk("ipt_connlimit [%d]: src=%u.%u.%u.%u:%d dst=%u.%u.%u.%u:%d new\n", + ipt_iphash(addr & mask), + NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port), + NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port)); +#endif + conn = kmalloc(sizeof(*conn),GFP_ATOMIC); + if (NULL == conn) { + spin_unlock(&data->lock); + return -1; + } + memset(conn,0,sizeof(*conn)); + INIT_LIST_HEAD(&conn->list); + conn->tuple = tuple; + list_add(&conn->list,hash); + matches++; + } + spin_unlock(&data->lock); + return matches; +} + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + int *hotdrop) +{ + const struct ipt_connlimit_info *info = matchinfo; + int connections, match; + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; + + ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo); + if (NULL == ct) { + printk("ipt_connlimit: Oops: invalid ct state ?\n"); + *hotdrop = 1; + return 0; + } + connections = count_them(info->data,skb->nh.iph->saddr,info->mask,ct); + if (-1 == connections) { + printk("ipt_connlimit: Hmm, kmalloc failed :-(\n"); + *hotdrop = 1; /* let's free some memory :-) */ + return 0; + } + match = (info->inverse) ? (connections <= info->limit) : (connections > info->limit); +#if DEBUG + printk("ipt_connlimit: src=%u.%u.%u.%u mask=%u.%u.%u.%u " + "connections=%d limit=%d match=%s\n", + NIPQUAD(skb->nh.iph->saddr), NIPQUAD(info->mask), + connections, info->limit, match ? "yes" : "no"); +#endif + + return match; +} + +static int check(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + struct ipt_connlimit_info *info = matchinfo; + int i; + + /* verify size */ + if (matchsize != IPT_ALIGN(sizeof(struct ipt_connlimit_info))) + return 0; + + /* refuse anything but tcp */ + if (ip->proto != IPPROTO_TCP) + return 0; + + /* init private data */ + info->data = kmalloc(sizeof(struct ipt_connlimit_data),GFP_KERNEL); + spin_lock_init(&(info->data->lock)); + for (i = 0; i < 256; i++) + INIT_LIST_HEAD(&(info->data->iphash[i])); + + return 1; +} + +static void destroy(void *matchinfo, unsigned int matchinfosize) +{ + struct ipt_connlimit_info *info = matchinfo; + struct ipt_connlimit_conn *conn; + struct list_head *hash; + int i; + + /* cleanup */ + for (i = 0; i < 256; i++) { + hash = &(info->data->iphash[i]); + while (hash != hash->next) { + conn = list_entry(hash->next,struct ipt_connlimit_conn,list); + list_del(hash->next); + kfree(conn); + } + } + kfree(info->data); +} + +static struct ipt_match connlimit_match = { + .name = "connlimit", + .match = &match, + .checkentry = &check, + .destroy = &destroy, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + return ipt_register_match(&connlimit_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&connlimit_match); +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ipt_fuzzy.c linux-2.6.11.3/net/ipv4/netfilter/ipt_fuzzy.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ipt_fuzzy.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ipt_fuzzy.c 2005-03-13 22:03:55.524477320 +0100 @@ -0,0 +1,185 @@ +/* + * This module implements a simple TSK FLC + * (Takagi-Sugeno-Kang Fuzzy Logic Controller) that aims + * to limit , in an adaptive and flexible way , the packet rate crossing + * a given stream . It serves as an initial and very simple (but effective) + * example of how Fuzzy Logic techniques can be applied to defeat DoS attacks. + * As a matter of fact , Fuzzy Logic can help us to insert any "behavior" + * into our code in a precise , adaptive and efficient manner. + * The goal is very similar to that of "limit" match , but using techniques of + * Fuzzy Control , that allow us to shape the transfer functions precisely , + * avoiding over and undershoots - and stuff like that . + * + * + * 2002-08-10 Hime Aguiar e Oliveira Jr. : Initial version. + * 2002-08-17 : Changed to eliminate floating point operations . + * 2002-08-23 : Coding style changes . +*/ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + Packet Acceptance Rate - LOW and Packet Acceptance Rate - HIGH + Expressed in percentage +*/ + +#define PAR_LOW 1/100 +#define PAR_HIGH 1 + +static spinlock_t fuzzy_lock = SPIN_LOCK_UNLOCKED ; + +MODULE_AUTHOR("Hime Aguiar e Oliveira Junior "); +MODULE_DESCRIPTION("IP tables Fuzzy Logic Controller match module"); +MODULE_LICENSE("GPL"); + +static u_int8_t mf_high(u_int32_t tx,u_int32_t mini,u_int32_t maxi) +{ + if (tx >= maxi) + return 100; + + if (tx <= mini) + return 0; + + return ( (100*(tx-mini)) / (maxi-mini) ); +} + +static u_int8_t mf_low(u_int32_t tx,u_int32_t mini,u_int32_t maxi) +{ + if (tx <= mini) + return 100; + + if (tx >= maxi) + return 0; + + return ( (100*( maxi - tx )) / ( maxi - mini ) ); +} + +static int +ipt_fuzzy_match(const struct sk_buff *pskb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + int *hotdrop) +{ + /* From userspace */ + + struct ipt_fuzzy_info *info = (struct ipt_fuzzy_info *) matchinfo; + + u_int8_t random_number; + unsigned long amount; + u_int8_t howhigh, howlow; + + + spin_lock_bh(&fuzzy_lock); /* Rise the lock */ + + info->bytes_total += pskb->len; + info->packets_total++; + + info->present_time = jiffies; + + if (info->present_time >= info->previous_time) + amount = info->present_time - info->previous_time; + else { + /* There was a transition : I choose to re-sample + and keep the old acceptance rate... + */ + + amount = 0; + info->previous_time = info->present_time; + info->bytes_total = info->packets_total = 0; + }; + + if (amount > HZ/10) /* More than 100 ms elapsed ... */ + { + + info->mean_rate = (u_int32_t) ((HZ*info->packets_total) \ + / amount ); + + info->previous_time = info->present_time; + info->bytes_total = info->packets_total = 0; + + howhigh = mf_high(info->mean_rate,info->minimum_rate,info->maximum_rate); + howlow = mf_low(info->mean_rate,info->minimum_rate,info->maximum_rate); + + info->acceptance_rate = (u_int8_t) \ + (howhigh*PAR_LOW + PAR_HIGH*howlow); + + /* In fact , the above defuzzification would require a denominator + proportional to (howhigh+howlow) but , in this particular case , + that expression is constant . + An imediate consequence is that it isn't necessary to call + both mf_high and mf_low - but to keep things understandable , + I did so . */ + + } + + spin_unlock_bh(&fuzzy_lock); /* Release the lock */ + + + if ( info->acceptance_rate < 100 ) + { + get_random_bytes((void *)(&random_number), 1); + + /* If within the acceptance , it can pass => don't match */ + if (random_number <= (255 * info->acceptance_rate) / 100) + return 0; + else + return 1; /* It can't pass ( It matches ) */ + } ; + + return 0; /* acceptance_rate == 100 % => Everything passes ... */ + +} + +static int +ipt_fuzzy_checkentry(const char *tablename, + const struct ipt_ip *e, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + + const struct ipt_fuzzy_info *info = matchinfo; + + if (matchsize != IPT_ALIGN(sizeof(struct ipt_fuzzy_info))) { + printk("ipt_fuzzy: matchsize %u != %u\n", matchsize, + IPT_ALIGN(sizeof(struct ipt_fuzzy_info))); + return 0; + } + + if ((info->minimum_rate < MINFUZZYRATE ) || (info->maximum_rate > MAXFUZZYRATE) + || (info->minimum_rate >= info->maximum_rate )) { + printk("ipt_fuzzy: BAD limits , please verify !!!\n"); + return 0; + } + + return 1; +} + +static struct ipt_match ipt_fuzzy_reg = { + .name = "fuzzy", + .match = ipt_fuzzy_match, + .checkentry = ipt_fuzzy_checkentry, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + return ipt_register_match(&ipt_fuzzy_reg); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&ipt_fuzzy_reg); +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ipt_geoip.c linux-2.6.11.3/net/ipv4/netfilter/ipt_geoip.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ipt_geoip.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ipt_geoip.c 2005-03-13 22:05:27.879437232 +0100 @@ -0,0 +1,275 @@ +/* netfilter's kernel module for the geoip match + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * Copyright (c) 2004 Cookinglinux + */ +#include +#include +#include +#include +#include +#include + +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Samuel Jean, Nicolas Bouliane"); +MODULE_DESCRIPTION("iptables/netfilter's geoip match"); + +struct geoip_info *head = NULL; +static spinlock_t geoip_lock = SPIN_LOCK_UNLOCKED; + +static struct geoip_info *add_node(struct geoip_info *memcpy) +{ + struct geoip_info *p = + (struct geoip_info *)kmalloc(sizeof(struct geoip_info), GFP_KERNEL); + + struct geoip_subnet *s; + + if ((p == NULL) || (copy_from_user(p, memcpy, sizeof(struct geoip_info)) != 0)) + return NULL; + + s = (struct geoip_subnet *)kmalloc(p->count * sizeof(struct geoip_subnet), GFP_KERNEL); + if ((s == NULL) || (copy_from_user(s, p->subnets, p->count * sizeof(struct geoip_subnet)) != 0)) + return NULL; + + spin_lock_bh(&geoip_lock); + + p->subnets = s; + p->ref = 1; + p->next = head; + p->prev = NULL; + if (p->next) p->next->prev = p; + head = p; + + spin_unlock_bh(&geoip_lock); + return p; +} + +static void remove_node(struct geoip_info *p) + { + spin_lock_bh(&geoip_lock); + + if (p->next) { /* Am I following a node ? */ + p->next->prev = p->prev; + if (p->prev) p->prev->next = p->next; /* Is there a node behind me ? */ + else head = p->next; /* No? Then I was the head */ + } + + else + if (p->prev) /* Is there a node behind me ? */ + p->prev->next = NULL; + else + head = NULL; /* No, we're alone */ + + /* So now am unlinked or the only one alive, right ? + * What are you waiting ? Free up some memory! + */ + + kfree(p->subnets); + kfree(p); + + spin_unlock_bh(&geoip_lock); + return; +} + +static struct geoip_info *find_node(u_int16_t cc) +{ + struct geoip_info *p = head; + spin_lock_bh(&geoip_lock); + + while (p) { + if (p->cc == cc) { + spin_unlock_bh(&geoip_lock); + return p; + } + p = p->next; + } + spin_unlock_bh(&geoip_lock); + return NULL; +} + +static int match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + int *hotdrop) +{ + const struct ipt_geoip_info *info = matchinfo; + const struct geoip_info *node; /* This keeps the code sexy */ + const struct iphdr *iph = skb->nh.iph; + u_int32_t ip, j; + u_int8_t i; + + if (info->flags & IPT_GEOIP_SRC) + ip = ntohl(iph->saddr); + else + ip = ntohl(iph->daddr); + + spin_lock_bh(&geoip_lock); + for (i = 0; i < info->count; i++) { + if ((node = info->mem[i]) == NULL) { + printk(KERN_ERR "ipt_geoip: what the hell ?? '%c%c' isn't loaded into memory... skip it!\n", + COUNTRY(info->cc[i])); + + continue; + } + + for (j = 0; j < node->count; j++) + if ((ip > node->subnets[j].begin) && (ip < node->subnets[j].end)) { + spin_unlock_bh(&geoip_lock); + return (info->flags & IPT_GEOIP_INV) ? 0 : 1; + } + } + + spin_unlock_bh(&geoip_lock); + return (info->flags & IPT_GEOIP_INV) ? 1 : 0; +} + +static int geoip_checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + struct ipt_geoip_info *info = matchinfo; + struct geoip_info *node; + u_int8_t i; + + /* FIXME: Call a function to free userspace allocated memory. + * As Martin J. said; this match might eat lot of memory + * if commited with iptables-restore --noflush + void (*gfree)(struct geoip_info *oldmem); + gfree = info->fini; + */ + + if (matchsize != IPT_ALIGN(sizeof(struct ipt_geoip_info))) { + printk(KERN_ERR "ipt_geoip: matchsize differ, you may have forgotten to recompile me\n"); + return 0; + } + + /* If info->refcount isn't NULL, then + * it means that checkentry() already + * initialized this entry. Increase a + * refcount to prevent destroy() of + * this entry. */ + if (info->refcount != NULL) { + atomic_inc((atomic_t *)info->refcount); + return 1; + } + + + for (i = 0; i < info->count; i++) { + + if ((node = find_node(info->cc[i])) != NULL) + atomic_inc((atomic_t *)&node->ref); //increase the reference + else + if ((node = add_node(info->mem[i])) == NULL) { + printk(KERN_ERR + "ipt_geoip: unable to load '%c%c' into memory\n", + COUNTRY(info->cc[i])); + return 0; + } + + /* Free userspace allocated memory for that country. + * FIXME: It's a bit odd to call this function everytime + * we process a country. Would be nice to call + * it once after all countries've been processed. + * - SJ + * *not implemented for now* + gfree(info->mem[i]); + */ + + /* Overwrite the now-useless pointer info->mem[i] with + * a pointer to the node's kernelspace structure. + * This avoids searching for a node in the match() and + * destroy() functions. + */ + info->mem[i] = node; + } + + /* We allocate some memory and give info->refcount a pointer + * to this memory. This prevents checkentry() from increasing a refcount + * different from the one used by destroy(). + * For explanation, see http://www.mail-archive.com/netfilter-devel@lists.samba.org/msg00625.html + */ + info->refcount = kmalloc(sizeof(u_int8_t), GFP_KERNEL); + if (info->refcount == NULL) { + printk(KERN_ERR "ipt_geoip: failed to allocate `refcount' memory\n"); + return 0; + } + *(info->refcount) = 1; + + return 1; +} + +static void geoip_destroy(void *matchinfo, unsigned int matchsize) +{ + u_int8_t i; + struct geoip_info *node; /* this keeps the code sexy */ + + struct ipt_geoip_info *info = matchinfo; + /* Decrease the previously increased refcount in checkentry() + * If it's equal to 1, we know this entry is just moving + * but not removed. We simply return to avoid useless destroy() + * processing. + */ + atomic_dec((atomic_t *)info->refcount); + if (*info->refcount) + return; + + /* Don't leak my memory, you idiot. + * Bug found with nfsim.. the netfilter's best + * friend. --peejix */ + kfree(info->refcount); + + /* This entry has been removed from the table so + * decrease the refcount of all countries it is + * using. + */ + + for (i = 0; i < info->count; i++) + if ((node = info->mem[i]) != NULL) { + atomic_dec((atomic_t *)&node->ref); + + /* Free up some memory if that node isn't used + * anymore. */ + if (node->ref < 1) + remove_node(node); + } + else + /* Something strange happened. There's no memory allocated for this + * country. Please send this bug to the mailing list. */ + printk(KERN_ERR + "ipt_geoip: What happened peejix ? What happened acidmen ?\n" + "ipt_geoip: please report this bug to the maintainers\n"); + return; +} + +static struct ipt_match geoip_match = { + .name = "geoip", + .match = &match, + .checkentry = &geoip_checkentry, + .destroy = &geoip_destroy, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + return ipt_register_match(&geoip_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&geoip_match); + return; +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ipt_ipp2p.c linux-2.6.11.3/net/ipv4/netfilter/ipt_ipp2p.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ipt_ipp2p.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ipt_ipp2p.c 2005-03-13 22:05:51.893786496 +0100 @@ -0,0 +1,640 @@ +#include +#include +#include +#include +#include +#include + +#define get_u8(X,O) (*(__u8 *)(X + O)) +#define get_u16(X,O) (*(__u16 *)(X + O)) +#define get_u32(X,O) (*(__u32 *)(X + O)) + +MODULE_AUTHOR("Eicke Friedrich "); +MODULE_DESCRIPTION("An extension to iptables to identify P2P traffic."); +MODULE_LICENSE("GPL"); + + +/*Search for UDP eDonkey/eMule/Kad commands*/ +int +udp_search_edk (unsigned char *haystack, int packet_len) +{ + unsigned char *t = haystack; + t += 8; + + switch (t[0]) { + case 0xe3: { /*edonkey*/ + switch (t[1]) { + /* e3 9a + 16Bytes Hash | size == 26 */ + case 0x9a: if (packet_len == 26) return ((IPP2P_EDK * 100) + 1); + /* e3 96 xx yy zz kk | size == 14 | server status request */ + case 0x96: if (packet_len == 14) return ((IPP2P_EDK * 100) + 2); + /* e3 a2 | size == 10 or 14 <-- recheck*/ + } + } + + case 0xc5: { /*emule*/ + switch (t[1]) { + /* c5 91 xx yy | size == 12 (8+4) | xx != 0x00 -- xx yy queue rating */ + case 0x91: if ((packet_len == 12) && (t[2] != 0x00)) return ((IPP2P_EDK * 100) + 3); + /* c5 90 xx .. yy | size == 26 (8+2+16) | xx .. yy == hash -- file ping */ + case 0x90: if ((packet_len == 26) && (t[2] != 0x00)) return ((IPP2P_EDK * 100) + 4); + /* c5 92 | size == 10 (8+2) -- file not found */ + case 0x92: if (packet_len == 10) return ((IPP2P_EDK * 100) + 5); + /* c5 93 | size == 10 (8+2) -- queue full */ + case 0x93: if (packet_len == 10) return ((IPP2P_EDK * 100) + 6); + } + } + + case 0xe4: { /*kad*/ + switch (t[1]) { + /* e4 50 | size == 12 */ + case 0x50: if (packet_len == 12) return ((IPP2P_EDK * 100) + 7); + /* e4 58 | size == 14 */ + case 0x58: if ((packet_len == 14) && (t[2] != 0x00)) return ((IPP2P_EDK * 100) + 8); + /* e4 59 | size == 10 */ + case 0x59: if (packet_len == 10) return ((IPP2P_EDK * 100) + 9); + /* e4 30 .. | t[18] == 0x01 | size > 26 | --> search */ + case 0x30: if ((packet_len > 26) && (t[18] == 0x01)) return ((IPP2P_EDK * 100) + 10); + /* e4 28 .. 00 | t[68] == 0x00 | size > 76 */ + case 0x28: if ((packet_len > 76) && (t[68] == 0x00)) return ((IPP2P_EDK * 100) + 11); + /* e4 20 .. | size == 43 */ + case 0x20: if ((packet_len == 43) && (t[2] != 0x00) && (t[34] != 0x00)) return ((IPP2P_EDK * 100) + 12); + /* e4 00 .. 00 | size == 35 ? */ + case 0x00: if ((packet_len == 35) && (t[26] == 0x00)) return ((IPP2P_EDK * 100) + 13); + /* e4 10 .. 00 | size == 35 ? */ + case 0x10: if ((packet_len == 35) && (t[26] == 0x00)) return ((IPP2P_EDK * 100) + 14); + /* e4 18 .. 00 | size == 35 ? */ + case 0x18: if ((packet_len == 35) && (t[26] == 0x00)) return ((IPP2P_EDK * 100) + 15); + /* e4 40 .. | t[18] == 0x01 | t[19] == 0x00 | size > 40 */ + case 0x40: if ((packet_len > 40) && (t[18] == 0x01) && (t[19] == 0x00)) return ((IPP2P_EDK * 100) + 16); + } + } + + default: return 0; + } /* end of switch (t[0]) */ +}/*udp_search_edk*/ + + +/*Search for UDP Gnutella commands*/ +int +udp_search_gnu (unsigned char *haystack, int packet_len) +{ + unsigned char *t = haystack; + t += 8; + + if (memcmp(t, "GND", 3) == 0) return ((IPP2P_GNU * 100) + 1); + if (memcmp(t, "GNUTELLA ", 9) == 0) return ((IPP2P_GNU * 100) + 2); + return 0; +}/*udp_search_gnu*/ + + +/*Search for UDP KaZaA commands*/ +int +udp_search_kazaa (unsigned char *haystack, int packet_len) +{ + unsigned char *t = haystack; + + if (t[packet_len-1] == 0x00){ + t += (packet_len - 6); + if (memcmp(t, "KaZaA", 5) == 0) return (IPP2P_KAZAA * 100); + } + return 0; +}/*udp_search_kazaa*/ + + +/*Search for UDP BitTorrent commands*/ +int +udp_search_bit (unsigned char *haystack, int packet_len) +{ + unsigned char *t = haystack; + + /* packet_len has to be 24 */ + if (packet_len != 24) return 0; + + t += 8; + + /* ^ 00 00 04 17 27 10 19 80 */ + if ((ntohl(get_u32(t, 0)) == 0x00000417) && (ntohl(get_u32(t, 4)) == 0x27101980)) return (IPP2P_BIT * 100); + + return 0; +}/*udp_search_bit*/ + + + +/*Search for Ares commands*/ +int +search_ares (unsigned char *haystack, int packet_len, int head_len) +{ + unsigned char *t = haystack; + t += head_len; + + if ((packet_len - head_len) == 6){ /* possible connect command*/ + if ((t[0] == 0x03) && (t[1] == 0x00) && (t[2] == 0x5a) && (t[3] == 0x04) && (t[4] == 0x03) && (t[5] == 0x05)) + return ((IPP2P_ARES * 100) + 1); /* found connect packet: 03 00 5a 04 03 05 */ + } + if ((packet_len - head_len) == 60){ /* possible download command*/ + if ((t[59] == 0x0a) && (t[58] == 0x0a)){ + if (memcmp(t, "PUSH SHA1:", 10) == 0) /* found download command */ + return ((IPP2P_ARES * 100) + 2); + } + } + return 0; +} /*search_ares*/ + + +/*Search for SoulSeek commands*/ +int +search_soul (unsigned char *haystack, int packet_len, int head_len) +{ + unsigned char *t = haystack; + t += head_len; + + if (get_u16(t, 0) == (packet_len - head_len - 4)){ + /* xx xx 00 00 yy zz 00 00 .. | xx = sizeof(payload) - 4 */ + if ((get_u16(t,2) == 0x0000) &&(t[4] != 0x00) && (get_u16(t,6) == 0x0000)) + return ((IPP2P_SOUL * 100) + 1); + } else { + /* 00 00 00 00 00 00 00 00 + sizeof(payload) == 8*/ + if (((packet_len - head_len) == 8) && (get_u32(t, 0) == 0x00000000) && (get_u32(t, 4) == 0x00000000)) + return ((IPP2P_SOUL * 100) + 2); + } + + /* 01 xx 00 00 00 yy .. zz 00 00 00 .. | xx == sizeof(nick) | yy .. zz == nick */ + if ((t[0] == 0x01) && (t[2] == 0x00) && (get_u16(t,3) == 0x0000) && ((packet_len - head_len) > ((get_u8(t,1))+6)) && + (t[(get_u8(t,1))+4] != 0x00) && (t[(get_u8(t,1))+5] == 0x01) && (t[(get_u8(t,1))+6] == 0x00)) + return ((IPP2P_SOUL * 100) + 3); + return 0; +} + + +/*Search for WinMX commands*/ +int +search_winmx (unsigned char *haystack, int packet_len, int head_len) +{ + unsigned char *t = haystack; + int c; + t += head_len; + + if (((packet_len - head_len) == 4) && (memcmp(t, "SEND", 4) == 0)) return ((IPP2P_WINMX * 100) + 1); + if (((packet_len - head_len) == 3) && (memcmp(t, "GET", 3) == 0)) return ((IPP2P_WINMX * 100) + 2); + if (packet_len < (head_len + 10)) return 0; + + if ((memcmp(t, "SEND", 4) == 0) || (memcmp(t, "GET", 3) == 0)){ + c = head_len + 4; + t += 4; + while (c < packet_len - 5) { + if ((t[0] == 0x20) && (t[1] == 0x22)){ + c += 2; + t += 2; + while (c < packet_len - 2) { + if ((t[0] == 0x22) && (t[1] == 0x20)) return ((IPP2P_WINMX * 100) + 3); + t++; + c++; + } + } + t++; + c++; + } + } + return 0; +} /*search_winmx*/ + + +/*Search for appleJuice commands*/ +int +search_apple (unsigned char *haystack, int packet_len, int head_len) +{ + unsigned char *t = haystack; + t += head_len; + + if ((memcmp(t, "ajprot", 6) == 0) && (t[6] == 0x0d) && (t[7] == 0x0a)) return (IPP2P_APPLE * 100); + + return 0; +} + + +/*Search for BitTorrent commands*/ +int +search_bittorrent (unsigned char *haystack, int packet_len, int head_len) +{ + + unsigned char *t = haystack; + if (*(haystack+head_len) != 0x13) return 0; /*Bail out of first byte != 0x13*/ + + t += head_len + 1; + + if (memcmp(t, "BitTorrent protocol", 19) == 0) return (IPP2P_BIT * 100); + return 0; +} + + + +/*check for Kazaa get command*/ +int +search_kazaa (unsigned char *haystack, int packet_len, int head_len) +{ + unsigned char *t = haystack; + + if (!((*(haystack + packet_len - 2) == 0x0d) && (*(haystack + packet_len - 1) == 0x0a))) return 0; + + t += head_len; + if (memcmp(t, "GET /.hash=", 11) == 0) + return (IPP2P_DATA_KAZAA * 100); + else + return 0; +} + + +/*check for gnutella get command*/ +int +search_gnu (unsigned char *haystack, int packet_len, int head_len) +{ + unsigned char *t = haystack; + + if (!((*(haystack + packet_len - 2) == 0x0d) && (*(haystack + packet_len - 1) == 0x0a))) return 0; + + t += head_len; + if (memcmp(t, "GET /get/", 9) == 0) return ((IPP2P_DATA_GNU * 100) + 1); + if (memcmp(t, "GET /uri-res/", 13) == 0) return ((IPP2P_DATA_GNU * 100) + 2); + + return 0; +} + + +/*check for gnutella get commands and other typical data*/ +int +search_all_gnu (unsigned char *haystack, int packet_len, int head_len) +{ + unsigned char *t = haystack; + int c; + + if (!((*(haystack + packet_len - 2) == 0x0d) && (*(haystack + packet_len - 1) == 0x0a))) return 0; + + t += head_len; + + if (memcmp(t, "GNUTELLA CONNECT/", 17) == 0) return ((IPP2P_GNU * 100) + 1); + if (memcmp(t, "GNUTELLA/", 9) == 0) return ((IPP2P_GNU * 100) + 2); + + if ((memcmp(t, "GET /get/", 9) == 0) || (memcmp(t, "GET /uri-res/", 13) == 0)) + { + c = head_len + 8; + t += 8; + while (c < packet_len - 22) { + if ((t[0] == 0x0d) && (t[1] == 0x0a)) { + if ((memcmp(t, "X-Gnutella-", 11) == 0) || (memcmp(t, "X-Queue:", 8) == 0)) return ((IPP2P_GNU * 100) + 3); + t += 2; + c += 2; + } else { + t++; + c++; + } + } + } + return 0; +} + + +/*check for KaZaA download commands and other typical data*/ +int +search_all_kazaa (unsigned char *haystack, int packet_len, int head_len) +{ + unsigned char *t = haystack; + int c; + + if (!((*(haystack + packet_len - 2) == 0x0d) && (*(haystack + packet_len - 1) == 0x0a))) return 0; + + t += head_len; + if (memcmp(t, "GIVE ", 5) == 0) return ((IPP2P_KAZAA * 100) + 1); + + if (memcmp(t, "GET /", 5) == 0) { + c = head_len + 8; + t += 8; + while (c < packet_len - 22) { + if ((t[0] == 0x0d) && (t[1] == 0x0a)) { + if ( memcmp(t, "X-Kazaa-Username: ", 18) == 0 ) return ((IPP2P_KAZAA * 100) + 2); + t += 2; + c += 2; + } else { + t++; + c++; + } + } + } + + return 0; +} + +/*fast check for edonkey file segment transfer command*/ +int +search_edk (unsigned char *haystack, int packet_len, int head_len) +{ + if (*(haystack+head_len) != 0xe3) + return 0; + else { + if (*(haystack+head_len+5) == 0x47) + return (IPP2P_DATA_EDK * 100); + else + return 0; + } +} + + + +/*intensive but slower search for some edonkey packets including size-check*/ +int +search_all_edk (unsigned char *haystack, int packet_len, int head_len) +{ + unsigned char *t = haystack; + int cmd; + + if (*(haystack+head_len) == 0xd4) { + t += head_len; + cmd = get_u16(t, 1); + if (cmd == (packet_len - head_len - 5)) { + switch (t[5]) { + case 0x82: return ((IPP2P_EDK * 100) + 42); + case 0x15: return ((IPP2P_EDK * 100) + 43); + default: return 0; + } + } + return 0; + } + + + if (*(haystack+head_len) == 0xc5) { /*search for additional eMule packets*/ + t += head_len; + cmd = get_u16(t, 1); + + if (cmd == (packet_len - head_len - 5)) { + switch (t[5]) { + case 0x01: return ((IPP2P_EDK * 100) + 30); + case 0x02: return ((IPP2P_EDK * 100) + 31); + case 0x60: return ((IPP2P_EDK * 100) + 32); + case 0x81: return ((IPP2P_EDK * 100) + 33); + case 0x82: return ((IPP2P_EDK * 100) + 34); + case 0x85: return ((IPP2P_EDK * 100) + 35); + case 0x86: return ((IPP2P_EDK * 100) + 36); + case 0x87: return ((IPP2P_EDK * 100) + 37); + case 0x40: return ((IPP2P_EDK * 100) + 38); + case 0x92: return ((IPP2P_EDK * 100) + 39); + case 0x93: return ((IPP2P_EDK * 100) + 40); + case 0x12: return ((IPP2P_EDK * 100) + 41); + default: return 0; + } + } + + return 0; + } + + + if (*(haystack+head_len) != 0xe3) + return 0; + else { + t += head_len; + cmd = get_u16(t, 1); + if (cmd == (packet_len - head_len - 5)) { + switch (t[5]) { + case 0x01: return ((IPP2P_EDK * 100) + 1); /*Client: hello or Server:hello*/ + case 0x50: return ((IPP2P_EDK * 100) + 2); /*Client: file status*/ + case 0x16: return ((IPP2P_EDK * 100) + 3); /*Client: search*/ + case 0x58: return ((IPP2P_EDK * 100) + 4); /*Client: file request*/ + case 0x48: return ((IPP2P_EDK * 100) + 5); /*???*/ + case 0x54: return ((IPP2P_EDK * 100) + 6); /*???*/ + case 0x47: return ((IPP2P_EDK * 100) + 7); /*Client: file segment request*/ + case 0x46: return ((IPP2P_EDK * 100) + 8); /*Client: download segment*/ + case 0x4c: return ((IPP2P_EDK * 100) + 9); /*Client: Hello-Answer*/ + case 0x4f: return ((IPP2P_EDK * 100) + 10); /*Client: file status request*/ + case 0x59: return ((IPP2P_EDK * 100) + 11); /*Client: file request answer*/ + case 0x65: return ((IPP2P_EDK * 100) + 12); /*Client: ???*/ + case 0x66: return ((IPP2P_EDK * 100) + 13); /*Client: ???*/ + case 0x51: return ((IPP2P_EDK * 100) + 14); /*Client: ???*/ + case 0x52: return ((IPP2P_EDK * 100) + 15); /*Client: ???*/ + case 0x4d: return ((IPP2P_EDK * 100) + 16); /*Client: ???*/ + case 0x5c: return ((IPP2P_EDK * 100) + 17); /*Client: ???*/ + case 0x38: return ((IPP2P_EDK * 100) + 18); /*Client: ???*/ + case 0x69: return ((IPP2P_EDK * 100) + 19); /*Client: ???*/ + case 0x19: return ((IPP2P_EDK * 100) + 20); /*Client: ???*/ + case 0x42: return ((IPP2P_EDK * 100) + 21); /*Client: ???*/ + case 0x34: return ((IPP2P_EDK * 100) + 22); /*Client: ???*/ + case 0x94: return ((IPP2P_EDK * 100) + 23); /*Client: ???*/ + case 0x1c: return ((IPP2P_EDK * 100) + 24); /*Client: ???*/ + case 0x6a: return ((IPP2P_EDK * 100) + 25); /*Client: ???*/ + default: return 0; + } + } else { + if (cmd > packet_len - head_len - 5) { + if ((t[3] == 0x00) && (t[4] == 0x00)) { + if (t[5] == 0x01) return ((IPP2P_EDK * 100) + 26); + if (t[5] == 0x4c) return ((IPP2P_EDK * 100) + 27); + } + return 0; + + } /*non edk packet*/ + if (t[cmd+5] == 0xe3) return ((IPP2P_EDK * 100) + 28);/*found another edk-command*/ + if (t[cmd+5] == 0xc5) return ((IPP2P_EDK * 100) + 29);/*found an emule-command*/ + return 0; + } + } +} + + +/*fast check for Direct Connect send command*/ +int +search_dc (unsigned char *haystack, int packet_len, int head_len) +{ + unsigned char *t = haystack; + + if (*(haystack+head_len) != 0x24 ) + return 0; + else { + t += head_len + 1; + if (memcmp(t, "Send|", 5) == 0) + return (IPP2P_DATA_DC * 100); + else + return 0; + } + +} + + +/*intensive but slower check for all direct connect packets*/ +int +search_all_dc (unsigned char *haystack, int packet_len, int head_len) +{ + unsigned char *t = haystack; + + if ((*(haystack + head_len) == 0x24) && (*(haystack + packet_len - 1) == 0x7c)) { + t += head_len + 1; + if (memcmp(t, "Lock ", 5) == 0) return ((IPP2P_DC * 100) + 1); /*hub: hello*/ + if (memcmp(t, "Key ", 4) == 0) return ((IPP2P_DC * 100) + 2); /*client: hello*/ + if (memcmp(t, "Hello ", 6) == 0) return ((IPP2P_DC * 100) + 3); /*hub:connected*/ + if (memcmp(t, "MyNick ", 7) == 0) return ((IPP2P_DC * 100) + 4); /*client-client: hello*/ + if (memcmp(t, "Search ", 7) == 0) return ((IPP2P_DC * 100) + 5); /*client: search*/ + if (memcmp(t, "Send", 4) == 0) return ((IPP2P_DC * 100) + 6); /*client: start download*/ + return 0; + } else + return 0; +} + + +static struct { + int command; + __u8 short_hand; /*for fucntions included in short hands*/ + int packet_len; + int (*function_name) (unsigned char *, int, int); +} matchlist[] = { + {IPP2P_EDK,SHORT_HAND_IPP2P,40, &search_all_edk}, + {IPP2P_DATA_KAZAA,SHORT_HAND_DATA,200, &search_kazaa}, + {IPP2P_DATA_EDK,SHORT_HAND_DATA,60, &search_edk}, + {IPP2P_DATA_DC,SHORT_HAND_DATA,26, &search_dc}, + {IPP2P_DC,SHORT_HAND_IPP2P,25, search_all_dc}, + {IPP2P_DATA_GNU,SHORT_HAND_DATA,40, &search_gnu}, + {IPP2P_GNU,SHORT_HAND_IPP2P,35, &search_all_gnu}, + {IPP2P_KAZAA,SHORT_HAND_IPP2P,35, &search_all_kazaa}, + {IPP2P_BIT,SHORT_HAND_NONE,40, &search_bittorrent}, + {IPP2P_APPLE,SHORT_HAND_NONE,20, &search_apple}, + {IPP2P_SOUL,SHORT_HAND_NONE,25, &search_soul}, + {IPP2P_WINMX,SHORT_HAND_NONE,20, &search_winmx}, + {IPP2P_ARES,SHORT_HAND_NONE,25, &search_ares}, + {0,0,0,NULL} +}; + + +static struct { + int command; + __u8 short_hand; /*for fucntions included in short hands*/ + int packet_len; + int (*function_name) (unsigned char *, int); +} udp_list[] = { + {IPP2P_KAZAA,SHORT_HAND_IPP2P,14, &udp_search_kazaa}, + {IPP2P_BIT,SHORT_HAND_NONE,23, &udp_search_bit}, + {IPP2P_GNU,SHORT_HAND_IPP2P,11, &udp_search_gnu}, + {IPP2P_EDK,SHORT_HAND_IPP2P,9, &udp_search_edk}, + {0,0,0,NULL} +}; + + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + int *hotdrop) +{ + const struct ipt_p2p_info *info = matchinfo; + unsigned char *haystack; + struct iphdr *ip = skb->nh.iph; + int p2p_result = 0, i = 0; + int head_len; + int hlen = ntohs(ip->tot_len)-(ip->ihl*4); /*hlen = packet-data length*/ + + /*must not be a fragment*/ + if (offset) { + if (info->debug) printk("IPP2P.match: offset found %i \n",offset); + return 0; + } + + /*make sure that skb is linear*/ + if(skb_is_nonlinear(skb)){ + if (info->debug) printk("IPP2P.match: nonlinear skb found\n"); + return 0; + } + + + haystack=(char *)ip+(ip->ihl*4); /*haystack = packet data*/ + + switch (ip->protocol){ + case IPPROTO_TCP: /*what to do with a TCP packet*/ + { + struct tcphdr *tcph = (void *) ip + ip->ihl * 4; + + if (tcph->fin) return 0; /*if FIN bit is set bail out*/ + if (tcph->syn) return 0; /*if SYN bit is set bail out*/ + if (tcph->rst) return 0; /*if RST bit is set bail out*/ + head_len = tcph->doff * 4; /*get TCP-Header-Size*/ + while (matchlist[i].command) { + if ((((info->cmd & matchlist[i].command) == matchlist[i].command) || + ((info->cmd & matchlist[i].short_hand) == matchlist[i].short_hand)) && + (hlen > matchlist[i].packet_len)) { + p2p_result = matchlist[i].function_name(haystack, hlen, head_len); + if (p2p_result) + { + if (info->debug) printk("IPP2P.debug:TCP-match: %i from: %u.%u.%u.%u:%i to: %u.%u.%u.%u:%i Length: %i\n", + p2p_result, NIPQUAD(ip->saddr),ntohs(tcph->source), NIPQUAD(ip->daddr),ntohs(tcph->dest),hlen); + return p2p_result; + } + } + i++; + } + return p2p_result; + } + + case IPPROTO_UDP: /*what to do with an UDP packet*/ + { + struct udphdr *udph = (void *) ip + ip->ihl * 4; + + while (udp_list[i].command){ + if ((((info->cmd & udp_list[i].command) == udp_list[i].command) || + ((info->cmd & udp_list[i].short_hand) == udp_list[i].short_hand)) && + (hlen > udp_list[i].packet_len)) { + p2p_result = udp_list[i].function_name(haystack, hlen); + if (p2p_result){ + if (info->debug) printk("IPP2P.debug:UDP-match: %i from: %u.%u.%u.%u:%i to: %u.%u.%u.%u:%i Length: %i\n", + p2p_result, NIPQUAD(ip->saddr),ntohs(udph->source), NIPQUAD(ip->daddr),ntohs(udph->dest),hlen); + return p2p_result; + } + } + i++; + } + return p2p_result; + } + + default: return 0; + } +} + + + +static int +checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + /* Must specify -p tcp */ +/* if (ip->proto != IPPROTO_TCP || (ip->invflags & IPT_INV_PROTO)) { + * printk("ipp2p: Only works on TCP packets, use -p tcp\n"); + * return 0; + * }*/ + return 1; +} + + + + +static struct ipt_match ipp2p_match = { + .name = "ipp2p", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; + + +static int __init init(void) +{ + printk(KERN_INFO "IPP2P v%s loading\n", IPP2P_VERSION); + return ipt_register_match(&ipp2p_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&ipp2p_match); + printk(KERN_INFO "IPP2P v%s unloaded\n", IPP2P_VERSION); +} + +module_init(init); +module_exit(fini); + + diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ipt_ipv4options.c linux-2.6.11.3/net/ipv4/netfilter/ipt_ipv4options.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ipt_ipv4options.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ipt_ipv4options.c 2005-03-13 22:03:56.288361192 +0100 @@ -0,0 +1,172 @@ +/* + This is a module which is used to match ipv4 options. + This file is distributed under the terms of the GNU General Public + License (GPL). Copies of the GPL can be obtained from: + ftp://prep.ai.mit.edu/pub/gnu/GPL + + 11-mars-2001 Fabrice MARIE : initial development. + 12-july-2001 Fabrice MARIE : added router-alert otions matching. Fixed a bug with no-srr + 12-august-2001 Imran Patel : optimization of the match. + 18-november-2001 Fabrice MARIE : added [!] 'any' option match. + 19-february-2004 Harald Welte : merge with 2.6.x +*/ + +#include +#include +#include + +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Fabrice Marie "); + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + int *hotdrop) +{ + const struct ipt_ipv4options_info *info = matchinfo; /* match info for rule */ + const struct iphdr *iph = skb->nh.iph; + const struct ip_options *opt; + + if (iph->ihl * 4 == sizeof(struct iphdr)) { + /* No options, so we match only the "DONTs" and the "IGNOREs" */ + + if (((info->options & IPT_IPV4OPTION_MATCH_ANY_OPT) == IPT_IPV4OPTION_MATCH_ANY_OPT) || + ((info->options & IPT_IPV4OPTION_MATCH_SSRR) == IPT_IPV4OPTION_MATCH_SSRR) || + ((info->options & IPT_IPV4OPTION_MATCH_LSRR) == IPT_IPV4OPTION_MATCH_LSRR) || + ((info->options & IPT_IPV4OPTION_MATCH_RR) == IPT_IPV4OPTION_MATCH_RR) || + ((info->options & IPT_IPV4OPTION_MATCH_TIMESTAMP) == IPT_IPV4OPTION_MATCH_TIMESTAMP) || + ((info->options & IPT_IPV4OPTION_MATCH_ROUTER_ALERT) == IPT_IPV4OPTION_MATCH_ROUTER_ALERT)) + return 0; + return 1; + } + else { + if ((info->options & IPT_IPV4OPTION_MATCH_ANY_OPT) == IPT_IPV4OPTION_MATCH_ANY_OPT) + /* there are options, and we don't need to care which one */ + return 1; + else { + if ((info->options & IPT_IPV4OPTION_DONT_MATCH_ANY_OPT) == IPT_IPV4OPTION_DONT_MATCH_ANY_OPT) + /* there are options but we don't want any ! */ + return 0; + } + } + + opt = &(IPCB(skb)->opt); + + /* source routing */ + if ((info->options & IPT_IPV4OPTION_MATCH_SSRR) == IPT_IPV4OPTION_MATCH_SSRR) { + if (!((opt->srr) & (opt->is_strictroute))) + return 0; + } + else if ((info->options & IPT_IPV4OPTION_MATCH_LSRR) == IPT_IPV4OPTION_MATCH_LSRR) { + if (!((opt->srr) & (!opt->is_strictroute))) + return 0; + } + else if ((info->options & IPT_IPV4OPTION_DONT_MATCH_SRR) == IPT_IPV4OPTION_DONT_MATCH_SRR) { + if (opt->srr) + return 0; + } + /* record route */ + if ((info->options & IPT_IPV4OPTION_MATCH_RR) == IPT_IPV4OPTION_MATCH_RR) { + if (!opt->rr) + return 0; + } + else if ((info->options & IPT_IPV4OPTION_DONT_MATCH_RR) == IPT_IPV4OPTION_DONT_MATCH_RR) { + if (opt->rr) + return 0; + } + /* timestamp */ + if ((info->options & IPT_IPV4OPTION_MATCH_TIMESTAMP) == IPT_IPV4OPTION_MATCH_TIMESTAMP) { + if (!opt->ts) + return 0; + } + else if ((info->options & IPT_IPV4OPTION_DONT_MATCH_TIMESTAMP) == IPT_IPV4OPTION_DONT_MATCH_TIMESTAMP) { + if (opt->ts) + return 0; + } + /* router-alert option */ + if ((info->options & IPT_IPV4OPTION_MATCH_ROUTER_ALERT) == IPT_IPV4OPTION_MATCH_ROUTER_ALERT) { + if (!opt->router_alert) + return 0; + } + else if ((info->options & IPT_IPV4OPTION_DONT_MATCH_ROUTER_ALERT) == IPT_IPV4OPTION_DONT_MATCH_ROUTER_ALERT) { + if (opt->router_alert) + return 0; + } + + /* we match ! */ + return 1; +} + +static int +checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + const struct ipt_ipv4options_info *info = matchinfo; /* match info for rule */ + /* Check the size */ + if (matchsize != IPT_ALIGN(sizeof(struct ipt_ipv4options_info))) + return 0; + /* Now check the coherence of the data ... */ + if (((info->options & IPT_IPV4OPTION_MATCH_ANY_OPT) == IPT_IPV4OPTION_MATCH_ANY_OPT) && + (((info->options & IPT_IPV4OPTION_DONT_MATCH_SRR) == IPT_IPV4OPTION_DONT_MATCH_SRR) || + ((info->options & IPT_IPV4OPTION_DONT_MATCH_RR) == IPT_IPV4OPTION_DONT_MATCH_RR) || + ((info->options & IPT_IPV4OPTION_DONT_MATCH_TIMESTAMP) == IPT_IPV4OPTION_DONT_MATCH_TIMESTAMP) || + ((info->options & IPT_IPV4OPTION_DONT_MATCH_ROUTER_ALERT) == IPT_IPV4OPTION_DONT_MATCH_ROUTER_ALERT) || + ((info->options & IPT_IPV4OPTION_DONT_MATCH_ANY_OPT) == IPT_IPV4OPTION_DONT_MATCH_ANY_OPT))) + return 0; /* opposites */ + if (((info->options & IPT_IPV4OPTION_DONT_MATCH_ANY_OPT) == IPT_IPV4OPTION_DONT_MATCH_ANY_OPT) && + (((info->options & IPT_IPV4OPTION_MATCH_LSRR) == IPT_IPV4OPTION_MATCH_LSRR) || + ((info->options & IPT_IPV4OPTION_MATCH_SSRR) == IPT_IPV4OPTION_MATCH_SSRR) || + ((info->options & IPT_IPV4OPTION_MATCH_RR) == IPT_IPV4OPTION_MATCH_RR) || + ((info->options & IPT_IPV4OPTION_MATCH_TIMESTAMP) == IPT_IPV4OPTION_MATCH_TIMESTAMP) || + ((info->options & IPT_IPV4OPTION_MATCH_ROUTER_ALERT) == IPT_IPV4OPTION_MATCH_ROUTER_ALERT) || + ((info->options & IPT_IPV4OPTION_MATCH_ANY_OPT) == IPT_IPV4OPTION_MATCH_ANY_OPT))) + return 0; /* opposites */ + if (((info->options & IPT_IPV4OPTION_MATCH_SSRR) == IPT_IPV4OPTION_MATCH_SSRR) && + ((info->options & IPT_IPV4OPTION_MATCH_LSRR) == IPT_IPV4OPTION_MATCH_LSRR)) + return 0; /* cannot match in the same time loose and strict source routing */ + if ((((info->options & IPT_IPV4OPTION_MATCH_SSRR) == IPT_IPV4OPTION_MATCH_SSRR) || + ((info->options & IPT_IPV4OPTION_MATCH_LSRR) == IPT_IPV4OPTION_MATCH_LSRR)) && + ((info->options & IPT_IPV4OPTION_DONT_MATCH_SRR) == IPT_IPV4OPTION_DONT_MATCH_SRR)) + return 0; /* opposites */ + if (((info->options & IPT_IPV4OPTION_MATCH_RR) == IPT_IPV4OPTION_MATCH_RR) && + ((info->options & IPT_IPV4OPTION_DONT_MATCH_RR) == IPT_IPV4OPTION_DONT_MATCH_RR)) + return 0; /* opposites */ + if (((info->options & IPT_IPV4OPTION_MATCH_TIMESTAMP) == IPT_IPV4OPTION_MATCH_TIMESTAMP) && + ((info->options & IPT_IPV4OPTION_DONT_MATCH_TIMESTAMP) == IPT_IPV4OPTION_DONT_MATCH_TIMESTAMP)) + return 0; /* opposites */ + if (((info->options & IPT_IPV4OPTION_MATCH_ROUTER_ALERT) == IPT_IPV4OPTION_MATCH_ROUTER_ALERT) && + ((info->options & IPT_IPV4OPTION_DONT_MATCH_ROUTER_ALERT) == IPT_IPV4OPTION_DONT_MATCH_ROUTER_ALERT)) + return 0; /* opposites */ + + /* everything looks ok. */ + return 1; +} + +static struct ipt_match ipv4options_match = { + .name = "ipv4options", + .match = match, + .checkentry = checkentry, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + return ipt_register_match(&ipv4options_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&ipv4options_match); +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ipt_nth.c linux-2.6.11.3/net/ipv4/netfilter/ipt_nth.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ipt_nth.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ipt_nth.c 2005-03-13 22:03:57.030248408 +0100 @@ -0,0 +1,166 @@ +/* + This is a module which is used for match support for every Nth packet + This file is distributed under the terms of the GNU General Public + License (GPL). Copies of the GPL can be obtained from: + ftp://prep.ai.mit.edu/pub/gnu/GPL + + 2001-07-18 Fabrice MARIE : initial implementation. + 2001-09-20 Richard Wagner (rwagner@cloudnet.com) + * added support for multiple counters + * added support for matching on individual packets + in the counter cycle + 2004-02-19 Harald Welte + * port to 2.6.x + +*/ + +#include +#include +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Fabrice Marie "); + +/* + * State information. + */ +struct state { + spinlock_t lock; + u_int16_t number; +}; + +static struct state states[IPT_NTH_NUM_COUNTERS]; + +static int +ipt_nth_match(const struct sk_buff *pskb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + int *hotdrop) +{ + /* Parameters from userspace */ + const struct ipt_nth_info *info = matchinfo; + unsigned counter = info->counter; + if((counter < 0) || (counter >= IPT_NTH_NUM_COUNTERS)) + { + printk(KERN_WARNING "nth: invalid counter %u. counter between 0 and %u\n", counter, IPT_NTH_NUM_COUNTERS-1); + return 0; + }; + + spin_lock(&states[counter].lock); + + /* Are we matching every nth packet?*/ + if (info->packet == 0xFF) + { + /* We're matching every nth packet and only every nth packet*/ + /* Do we match or invert match? */ + if (info->not == 0) + { + if (states[counter].number == 0) + { + ++states[counter].number; + goto match; + } + if (states[counter].number >= info->every) + states[counter].number = 0; /* reset the counter */ + else + ++states[counter].number; + goto dontmatch; + } + else + { + if (states[counter].number == 0) + { + ++states[counter].number; + goto dontmatch; + } + if (states[counter].number >= info->every) + states[counter].number = 0; + else + ++states[counter].number; + goto match; + } + } + else + { + /* We're using the --packet, so there must be a rule for every value */ + if (states[counter].number == info->packet) + { + /* only increment the counter when a match happens */ + if (states[counter].number >= info->every) + states[counter].number = 0; /* reset the counter */ + else + ++states[counter].number; + goto match; + } + else + goto dontmatch; + } + + dontmatch: + /* don't match */ + spin_unlock(&states[counter].lock); + return 0; + + match: + spin_unlock(&states[counter].lock); + return 1; +} + +static int +ipt_nth_checkentry(const char *tablename, + const struct ipt_ip *e, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + /* Parameters from userspace */ + const struct ipt_nth_info *info = matchinfo; + unsigned counter = info->counter; + if((counter < 0) || (counter >= IPT_NTH_NUM_COUNTERS)) + { + printk(KERN_WARNING "nth: invalid counter %u. counter between 0 and %u\n", counter, IPT_NTH_NUM_COUNTERS-1); + return 0; + }; + + if (matchsize != IPT_ALIGN(sizeof(struct ipt_nth_info))) { + printk("nth: matchsize %u != %u\n", matchsize, + IPT_ALIGN(sizeof(struct ipt_nth_info))); + return 0; + } + + states[counter].number = info->startat; + + return 1; +} + +static struct ipt_match ipt_nth_reg = { + .name = "nth", + .match = ipt_nth_match, + .checkentry = ipt_nth_checkentry, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + unsigned counter; + + memset(&states, 0, sizeof(states)); + for (counter = 0; counter < IPT_NTH_NUM_COUNTERS; counter++) + spin_lock_init(&(states[counter].lock)); + + return ipt_register_match(&ipt_nth_reg); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&ipt_nth_reg); +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ipt_osf.c linux-2.6.11.3/net/ipv4/netfilter/ipt_osf.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ipt_osf.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ipt_osf.c 2005-03-13 22:03:57.763136992 +0100 @@ -0,0 +1,870 @@ +/* + * ipt_osf.c + * + * Copyright (c) 2003 Evgeniy Polyakov + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* + * OS fingerprint matching module. + * It simply compares various parameters from SYN packet with + * some hardcoded ones. + * + * Original table was created by Michal Zalewski + * for his p0f. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#define OSF_DEBUG + +#ifdef OSF_DEBUG +#define log(x...) printk(KERN_INFO "ipt_osf: " x) +#define loga(x...) printk(x) +#else +#define log(x...) do {} while(0) +#define loga(x...) do {} while(0) +#endif + +#define FMATCH_WRONG 0 +#define FMATCH_OK 1 +#define FMATCH_OPT_WRONG 2 + +#define OPTDEL ',' +#define OSFPDEL ':' +#define MAXOPTSTRLEN 128 +#define OSFFLUSH "FLUSH" + +static rwlock_t osf_lock = RW_LOCK_UNLOCKED; +static spinlock_t ipt_osf_netlink_lock = SPIN_LOCK_UNLOCKED; +static struct list_head finger_list; +static int match(const struct sk_buff *, const struct net_device *, const struct net_device *, + const void *, int, + int *); +static int checkentry(const char *, const struct ipt_ip *, void *, + unsigned int, unsigned int); + +static unsigned long seq, ipt_osf_groups = 1; +static struct sock *nts; + +static struct ipt_match osf_match = { + .name = "osf", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE +}; + +static void ipt_osf_nlsend(struct osf_finger *f, const struct sk_buff *sk) +{ + unsigned int size; + struct sk_buff *skb; + struct ipt_osf_nlmsg *data; + struct nlmsghdr *nlh; + + size = NLMSG_SPACE(sizeof(struct ipt_osf_nlmsg)); + + skb = alloc_skb(size, GFP_ATOMIC); + if (!skb) + { + log("skb_alloc() failed.\n"); + return; + } + + nlh = NLMSG_PUT(skb, 0, seq++, NLMSG_DONE, size - sizeof(*nlh)); + + data = (struct ipt_osf_nlmsg *)NLMSG_DATA(nlh); + + memcpy(&data->f, f, sizeof(struct osf_finger)); + memcpy(&data->ip, sk->nh.iph, sizeof(struct iphdr)); + memcpy(&data->tcp, (struct tcphdr *)((u_int32_t *)sk->nh.iph + sk->nh.iph->ihl), sizeof(struct tcphdr)); + + NETLINK_CB(skb).dst_groups = ipt_osf_groups; + netlink_broadcast(nts, skb, 0, ipt_osf_groups, GFP_ATOMIC); + +nlmsg_failure: + return; +} + +static inline int smart_dec(const struct sk_buff *skb, unsigned long flags, unsigned char f_ttl) +{ + struct iphdr *ip = skb->nh.iph; + + if (flags & IPT_OSF_SMART) + { + struct in_device *in_dev = in_dev_get(skb->dev); + + for_ifa(in_dev) + { + if (inet_ifa_match(ip->saddr, ifa)) + { + in_dev_put(in_dev); + return (ip->ttl == f_ttl); + } + } + endfor_ifa(in_dev); + + in_dev_put(in_dev); + return (ip->ttl <= f_ttl); + } + else + return (ip->ttl == f_ttl); +} + +static int +match(const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, + const void *matchinfo, int offset, + int *hotdrop) +{ + struct ipt_osf_info *info = (struct ipt_osf_info *)matchinfo; + struct iphdr *ip = skb->nh.iph; + struct tcphdr *tcp; + int fmatch = FMATCH_WRONG, fcount = 0; + unsigned long totlen, optsize = 0, window; + unsigned char df, *optp = NULL, *_optp = NULL; + char check_WSS = 0; + struct list_head *ent; + struct osf_finger *f; + + if (!ip || !info) + return 0; + + tcp = (struct tcphdr *)((u_int32_t *)ip + ip->ihl); + + if (!tcp->syn) + return 0; + + totlen = ntohs(ip->tot_len); + df = ((ntohs(ip->frag_off) & IP_DF)?1:0); + window = ntohs(tcp->window); + + if (tcp->doff*4 > sizeof(struct tcphdr)) + { + _optp = optp = (char *)(tcp+1); + optsize = tcp->doff*4 - sizeof(struct tcphdr); + } + + /* Actually we can create hash/table of all genres and search + * only in appropriate part, but here is initial variant, + * so will use slow path. + */ + read_lock(&osf_lock); + list_for_each(ent, &finger_list) + { + f = list_entry(ent, struct osf_finger, flist); + + if (!(info->flags & IPT_OSF_LOG) && strcmp(info->genre, f->genre)) + continue; + + optp = _optp; + fmatch = FMATCH_WRONG; + + if (totlen == f->ss && df == f->df && + smart_dec(skb, info->flags, f->ttl)) + { + unsigned long foptsize; + int optnum; + unsigned short mss = 0; + + check_WSS = 0; + + switch (f->wss.wc) + { + case 0: check_WSS = 0; break; + case 'S': check_WSS = 1; break; + case 'T': check_WSS = 2; break; + case '%': check_WSS = 3; break; + default: log("Wrong fingerprint wss.wc=%d, %s - %s\n", + f->wss.wc, f->genre, f->details); + check_WSS = 4; + break; + } + if (check_WSS == 4) + continue; + + /* Check options */ + + foptsize = 0; + for (optnum=0; optnumopt_num; ++optnum) + foptsize += f->opt[optnum].length; + + + if (foptsize > MAX_IPOPTLEN || optsize > MAX_IPOPTLEN || optsize != foptsize) + continue; + + if (!optp) + { + fmatch = FMATCH_OK; + loga("\tYEP : matching without options.\n"); + if ((info->flags & IPT_OSF_LOG) && + info->loglevel == IPT_OSF_LOGLEVEL_FIRST) + break; + else + continue; + } + + + for (optnum=0; optnumopt_num; ++optnum) + { + if (f->opt[optnum].kind == (*optp)) + { + unsigned char len = f->opt[optnum].length; + unsigned char *optend = optp + len; + int loop_cont = 0; + + fmatch = FMATCH_OK; + + + switch (*optp) + { + case OSFOPT_MSS: + mss = ntohs(*(unsigned short *)(optp+2)); + break; + case OSFOPT_TS: + loop_cont = 1; + break; + } + + if (loop_cont) + { + optp = optend; + continue; + } + + if (len != 1) + { + /* Skip kind and length fields*/ + optp += 2; + + if (f->opt[optnum].wc.val != 0) + { + unsigned long tmp = 0; + + /* Hmmm... It looks a bit ugly. :) */ + memcpy(&tmp, optp, + (len > sizeof(unsigned long)? + sizeof(unsigned long):len)); + /* 2 + 2: optlen(2 bytes) + + * kind(1 byte) + length(1 byte) */ + if (len == 4) + tmp = ntohs(tmp); + else + tmp = ntohl(tmp); + + if (f->opt[optnum].wc.wc == '%') + { + if ((tmp % f->opt[optnum].wc.val) != 0) + fmatch = FMATCH_OPT_WRONG; + } + else if (tmp != f->opt[optnum].wc.val) + fmatch = FMATCH_OPT_WRONG; + } + } + + optp = optend; + } + else + fmatch = FMATCH_OPT_WRONG; + + if (fmatch != FMATCH_OK) + break; + } + + if (fmatch != FMATCH_OPT_WRONG) + { + fmatch = FMATCH_WRONG; + + switch (check_WSS) + { + case 0: + if (f->wss.val == 0 || window == f->wss.val) + fmatch = FMATCH_OK; + break; + case 1: /* MSS */ +/* Lurked in OpenBSD */ +#define SMART_MSS 1460 + if (window == f->wss.val*mss || + window == f->wss.val*SMART_MSS) + fmatch = FMATCH_OK; + break; + case 2: /* MTU */ + if (window == f->wss.val*(mss+40) || + window == f->wss.val*(SMART_MSS+40)) + fmatch = FMATCH_OK; + break; + case 3: /* MOD */ + if ((window % f->wss.val) == 0) + fmatch = FMATCH_OK; + break; + } + } + + + if (fmatch == FMATCH_OK) + { + fcount++; + log("%s [%s:%s:%s] : %u.%u.%u.%u:%u -> %u.%u.%u.%u:%u hops=%d\n", + f->genre, f->version, + f->subtype, f->details, + NIPQUAD(ip->saddr), ntohs(tcp->source), + NIPQUAD(ip->daddr), ntohs(tcp->dest), + f->ttl - ip->ttl); + if (info->flags & IPT_OSF_NETLINK) + { + spin_lock_bh(&ipt_osf_netlink_lock); + ipt_osf_nlsend(f, skb); + spin_unlock_bh(&ipt_osf_netlink_lock); + } + if ((info->flags & IPT_OSF_LOG) && + info->loglevel == IPT_OSF_LOGLEVEL_FIRST) + break; + } + } + } + if (!fcount && (info->flags & (IPT_OSF_LOG | IPT_OSF_NETLINK))) + { + unsigned char opt[4 * 15 - sizeof(struct tcphdr)]; + unsigned int i, optsize; + struct osf_finger fg; + + memset(&fg, 0, sizeof(fg)); + + if ((info->flags & IPT_OSF_LOG)) + log("Unknown: %lu:%d:%d:%lu:", window, ip->ttl, df, totlen); + if (optp) + { + optsize = tcp->doff * 4 - sizeof(struct tcphdr); + if (skb_copy_bits(skb, ip->ihl*4 + sizeof(struct tcphdr), + opt, optsize) < 0) + { + if (info->flags & IPT_OSF_LOG) + loga("TRUNCATED"); + if (info->flags & IPT_OSF_NETLINK) + strcpy(fg.details, "TRUNCATED"); + } + else + { + for (i = 0; i < optsize; i++) + { + if (info->flags & IPT_OSF_LOG) + loga("%02X", opt[i]); + } + if (info->flags & IPT_OSF_NETLINK) + memcpy(fg.details, opt, MAXDETLEN); + } + } + if ((info->flags & IPT_OSF_LOG)) + loga(" %u.%u.%u.%u:%u -> %u.%u.%u.%u:%u\n", + NIPQUAD(ip->saddr), ntohs(tcp->source), + NIPQUAD(ip->daddr), ntohs(tcp->dest)); + + if (info->flags & IPT_OSF_NETLINK) + { + fg.wss.val = window; + fg.ttl = ip->ttl; + fg.df = df; + fg.ss = totlen; + strncpy(fg.genre, "Unknown", MAXGENRELEN); + + spin_lock_bh(&ipt_osf_netlink_lock); + ipt_osf_nlsend(&fg, skb); + spin_unlock_bh(&ipt_osf_netlink_lock); + } + } + + read_unlock(&osf_lock); + + if (fcount) + fmatch = FMATCH_OK; + + return (fmatch == FMATCH_OK)?1:0; +} + +static int +checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + if (matchsize != IPT_ALIGN(sizeof(struct ipt_osf_info))) + return 0; + if (ip->proto != IPPROTO_TCP) + return 0; + + return 1; +} + +static char * osf_strchr(char *ptr, char c) +{ + char *tmp; + + tmp = strchr(ptr, c); + + while (tmp && tmp+1 && isspace(*(tmp+1))) + tmp++; + + return tmp; +} + +static struct osf_finger * finger_alloc(void) +{ + struct osf_finger *f; + + f = kmalloc(sizeof(struct osf_finger), GFP_KERNEL); + if (f) + memset(f, 0, sizeof(struct osf_finger)); + + return f; +} + +static void finger_free(struct osf_finger *f) +{ + memset(f, 0, sizeof(struct osf_finger)); + kfree(f); +} + + +static void osf_parse_opt(struct osf_opt *opt, int *optnum, char *obuf, int olen) +{ + int i, op; + char *ptr, wc; + unsigned long val; + + ptr = &obuf[0]; + i = 0; + while (ptr != NULL && i < olen) + { + val = 0; + op = 0; + wc = 0; + switch (obuf[i]) + { + case 'N': + op = OSFOPT_NOP; + ptr = osf_strchr(&obuf[i], OPTDEL); + if (ptr) + { + *ptr = '\0'; + ptr++; + i += (int)(ptr-&obuf[i]); + + } + else + i++; + break; + case 'S': + op = OSFOPT_SACKP; + ptr = osf_strchr(&obuf[i], OPTDEL); + if (ptr) + { + *ptr = '\0'; + ptr++; + i += (int)(ptr-&obuf[i]); + + } + else + i++; + break; + case 'T': + op = OSFOPT_TS; + ptr = osf_strchr(&obuf[i], OPTDEL); + if (ptr) + { + *ptr = '\0'; + ptr++; + i += (int)(ptr-&obuf[i]); + + } + else + i++; + break; + case 'W': + op = OSFOPT_WSO; + ptr = osf_strchr(&obuf[i], OPTDEL); + if (ptr) + { + switch (obuf[i+1]) + { + case '%': wc = '%'; break; + case 'S': wc = 'S'; break; + case 'T': wc = 'T'; break; + default: wc = 0; break; + } + + *ptr = '\0'; + ptr++; + if (wc) + val = simple_strtoul(&obuf[i+2], NULL, 10); + else + val = simple_strtoul(&obuf[i+1], NULL, 10); + i += (int)(ptr-&obuf[i]); + + } + else + i++; + break; + case 'M': + op = OSFOPT_MSS; + ptr = osf_strchr(&obuf[i], OPTDEL); + if (ptr) + { + if (obuf[i+1] == '%') + wc = '%'; + *ptr = '\0'; + ptr++; + if (wc) + val = simple_strtoul(&obuf[i+2], NULL, 10); + else + val = simple_strtoul(&obuf[i+1], NULL, 10); + i += (int)(ptr-&obuf[i]); + + } + else + i++; + break; + case 'E': + op = OSFOPT_EOL; + ptr = osf_strchr(&obuf[i], OPTDEL); + if (ptr) + { + *ptr = '\0'; + ptr++; + i += (int)(ptr-&obuf[i]); + + } + else + i++; + break; + default: + ptr = osf_strchr(&obuf[i], OPTDEL); + if (ptr) + { + ptr++; + i += (int)(ptr-&obuf[i]); + + } + else + i++; + break; + } + + opt[*optnum].kind = IANA_opts[op].kind; + opt[*optnum].length = IANA_opts[op].length; + opt[*optnum].wc.wc = wc; + opt[*optnum].wc.val = val; + + (*optnum)++; + } +} + +static int osf_proc_read(char *buf, char **start, off_t off, int count, int *eof, void *data) +{ + struct list_head *ent; + struct osf_finger *f = NULL; + int i, __count, err; + + *eof = 1; + __count = count; + count = 0; + + read_lock_bh(&osf_lock); + list_for_each(ent, &finger_list) + { + f = list_entry(ent, struct osf_finger, flist); + + log("%s [%s]", f->genre, f->details); + + err = snprintf(buf+count, __count-count, "%s - %s[%s] : %s", + f->genre, f->version, + f->subtype, f->details); + if (err == 0 || __count <= count + err) + break; + else + count += err; + if (f->opt_num) + { + loga(" OPT: "); + //count += sprintf(buf+count, " OPT: "); + for (i=0; iopt_num; ++i) + { + //count += sprintf(buf+count, "%d.%c%lu; ", + // f->opt[i].kind, (f->opt[i].wc.wc)?f->opt[i].wc.wc:' ', f->opt[i].wc.val); + loga("%d.%c%lu; ", + f->opt[i].kind, (f->opt[i].wc.wc)?f->opt[i].wc.wc:' ', f->opt[i].wc.val); + } + } + loga("\n"); + err = snprintf(buf+count, __count-count, "\n"); + if (err == 0 || __count <= count + err) + break; + else + count += err; + } + read_unlock_bh(&osf_lock); + + return count; +} + +static int osf_proc_write(struct file *file, const char *buffer, unsigned long count, void *data) +{ + int cnt; + unsigned long i; + char obuf[MAXOPTSTRLEN]; + struct osf_finger *finger; + struct list_head *ent, *n; + + char *pbeg, *pend; + + if (count == strlen(OSFFLUSH) && !strncmp(buffer, OSFFLUSH, strlen(OSFFLUSH))) + { + int i = 0; + write_lock_bh(&osf_lock); + list_for_each_safe(ent, n, &finger_list) + { + i++; + finger = list_entry(ent, struct osf_finger, flist); + list_del(&finger->flist); + finger_free(finger); + } + write_unlock_bh(&osf_lock); + + log("Flushed %d entries.\n", i); + + return count; + } + + + cnt = 0; + for (i=0; iwss.wc = 'S'; + if (pbeg[1] == '%') + finger->wss.val = simple_strtoul(pbeg+2, NULL, 10); + else if (pbeg[1] == '*') + finger->wss.val = 0; + else + finger->wss.val = simple_strtoul(pbeg+1, NULL, 10); + } + else if (pbeg[0] == 'T') + { + finger->wss.wc = 'T'; + if (pbeg[1] == '%') + finger->wss.val = simple_strtoul(pbeg+2, NULL, 10); + else if (pbeg[1] == '*') + finger->wss.val = 0; + else + finger->wss.val = simple_strtoul(pbeg+1, NULL, 10); + } + else if (pbeg[0] == '%') + { + finger->wss.wc = '%'; + finger->wss.val = simple_strtoul(pbeg+1, NULL, 10); + } + else if (isdigit(pbeg[0])) + { + finger->wss.wc = 0; + finger->wss.val = simple_strtoul(pbeg, NULL, 10); + } + + pbeg = pend+1; + } + pend = osf_strchr(pbeg, OSFPDEL); + if (pend) + { + *pend = '\0'; + finger->ttl = simple_strtoul(pbeg, NULL, 10); + pbeg = pend+1; + } + pend = osf_strchr(pbeg, OSFPDEL); + if (pend) + { + *pend = '\0'; + finger->df = simple_strtoul(pbeg, NULL, 10); + pbeg = pend+1; + } + pend = osf_strchr(pbeg, OSFPDEL); + if (pend) + { + *pend = '\0'; + finger->ss = simple_strtoul(pbeg, NULL, 10); + pbeg = pend+1; + } + + pend = osf_strchr(pbeg, OSFPDEL); + if (pend) + { + *pend = '\0'; + cnt = snprintf(obuf, sizeof(obuf), "%s", pbeg); + pbeg = pend+1; + } + + pend = osf_strchr(pbeg, OSFPDEL); + if (pend) + { + *pend = '\0'; + if (pbeg[0] == '@' || pbeg[0] == '*') + cnt = snprintf(finger->genre, sizeof(finger->genre), "%s", pbeg+1); + else + cnt = snprintf(finger->genre, sizeof(finger->genre), "%s", pbeg); + pbeg = pend+1; + } + + pend = osf_strchr(pbeg, OSFPDEL); + if (pend) + { + *pend = '\0'; + cnt = snprintf(finger->version, sizeof(finger->version), "%s", pbeg); + pbeg = pend+1; + } + + pend = osf_strchr(pbeg, OSFPDEL); + if (pend) + { + *pend = '\0'; + cnt = snprintf(finger->subtype, sizeof(finger->subtype), "%s", pbeg); + pbeg = pend+1; + } + + cnt = snprintf(finger->details, + ((count - (pbeg - buffer)+1) > MAXDETLEN)?MAXDETLEN:(count - (pbeg - buffer)+1), + "%s", pbeg); + + log("%s - %s[%s] : %s\n", + finger->genre, finger->version, + finger->subtype, finger->details); + + osf_parse_opt(finger->opt, &finger->opt_num, obuf, sizeof(obuf)); + + + write_lock_bh(&osf_lock); + list_add_tail(&finger->flist, &finger_list); + write_unlock_bh(&osf_lock); + + return count; +} + +static int __init osf_init(void) +{ + int err; + struct proc_dir_entry *p; + + log("Startng OS fingerprint matching module.\n"); + + INIT_LIST_HEAD(&finger_list); + + err = ipt_register_match(&osf_match); + if (err) + { + log("Failed to register OS fingerprint matching module.\n"); + return -ENXIO; + } + + p = create_proc_entry("sys/net/ipv4/osf", S_IFREG | 0644, NULL); + if (!p) + { + ipt_unregister_match(&osf_match); + return -ENXIO; + } + + p->write_proc = osf_proc_write; + p->read_proc = osf_proc_read; + + nts = netlink_kernel_create(NETLINK_NFLOG, NULL); + if (!nts) + { + log("netlink_kernel_create() failed\n"); + remove_proc_entry("sys/net/ipv4/osf", NULL); + ipt_unregister_match(&osf_match); + return -ENOMEM; + } + + return 0; +} + +static void __exit osf_fini(void) +{ + struct list_head *ent, *n; + struct osf_finger *f; + + remove_proc_entry("sys/net/ipv4/osf", NULL); + ipt_unregister_match(&osf_match); + if (nts && nts->sk_socket) + sock_release(nts->sk_socket); + + list_for_each_safe(ent, n, &finger_list) + { + f = list_entry(ent, struct osf_finger, flist); + list_del(&f->flist); + finger_free(f); + } + + log("OS fingerprint matching module finished.\n"); +} + +module_init(osf_init); +module_exit(osf_fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Evgeniy Polyakov "); +MODULE_DESCRIPTION("Passive OS fingerprint matching."); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ipt_owner.c linux-2.6.11.3/net/ipv4/netfilter/ipt_owner.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ipt_owner.c 2005-03-13 07:44:41.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ipt_owner.c 2005-03-13 22:06:04.901808976 +0100 @@ -6,12 +6,19 @@ * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. + * + * 03/26/2003 Patrick McHardy : LOCAL_IN support */ #include #include #include +#include +#include +#include #include +#include +#include #include #include @@ -21,7 +28,7 @@ MODULE_DESCRIPTION("iptables owner match"); static int -match_comm(const struct sk_buff *skb, const char *comm) +match_comm(const struct sock *sk, const char *comm) { struct task_struct *g, *p; struct files_struct *files; @@ -38,7 +45,7 @@ spin_lock(&files->file_lock); for (i=0; i < files->max_fds; i++) { if (fcheck_files(files, i) == - skb->sk->sk_socket->file) { + sk->sk_socket->file) { spin_unlock(&files->file_lock); task_unlock(p); read_unlock(&tasklist_lock); @@ -54,7 +61,7 @@ } static int -match_pid(const struct sk_buff *skb, pid_t pid) +match_pid(const struct sock *sk, pid_t pid) { struct task_struct *p; struct files_struct *files; @@ -70,7 +77,7 @@ spin_lock(&files->file_lock); for (i=0; i < files->max_fds; i++) { if (fcheck_files(files, i) == - skb->sk->sk_socket->file) { + sk->sk_socket->file) { spin_unlock(&files->file_lock); task_unlock(p); read_unlock(&tasklist_lock); @@ -86,10 +93,10 @@ } static int -match_sid(const struct sk_buff *skb, pid_t sid) +match_sid(const struct sock *sk, pid_t sid) { struct task_struct *g, *p; - struct file *file = skb->sk->sk_socket->file; + struct file *file = sk->sk_socket->file; int i, found=0; read_lock(&tasklist_lock); @@ -129,41 +136,71 @@ int *hotdrop) { const struct ipt_owner_info *info = matchinfo; + struct iphdr *iph = skb->nh.iph; + struct sock *sk = NULL; + int ret = 0; + + if (out) { + sk = skb->sk; + } else { + if (iph->protocol == IPPROTO_TCP) { + struct tcphdr *tcph = + (struct tcphdr *)((u_int32_t *)iph + iph->ihl); + sk = tcp_v4_lookup(iph->saddr, tcph->source, + iph->daddr, tcph->dest, + skb->dev->ifindex); + if (sk && sk->sk_state == TCP_TIME_WAIT) { + tcp_tw_put((struct tcp_tw_bucket *)sk); + return ret; + } + } else if (iph->protocol == IPPROTO_UDP) { + struct udphdr *udph = + (struct udphdr *)((u_int32_t *)iph + iph->ihl); + sk = udp_v4_lookup(iph->saddr, udph->source, iph->daddr, + udph->dest, skb->dev->ifindex); + } + } - if (!skb->sk || !skb->sk->sk_socket || !skb->sk->sk_socket->file) - return 0; + if (!sk || !sk->sk_socket || !sk->sk_socket->file) + goto out; if(info->match & IPT_OWNER_UID) { - if ((skb->sk->sk_socket->file->f_uid != info->uid) ^ + if ((sk->sk_socket->file->f_uid != info->uid) ^ !!(info->invert & IPT_OWNER_UID)) - return 0; + goto out; } if(info->match & IPT_OWNER_GID) { - if ((skb->sk->sk_socket->file->f_gid != info->gid) ^ + if ((sk->sk_socket->file->f_gid != info->gid) ^ !!(info->invert & IPT_OWNER_GID)) - return 0; + goto out; } if(info->match & IPT_OWNER_PID) { - if (!match_pid(skb, info->pid) ^ + if (!match_pid(sk, info->pid) ^ !!(info->invert & IPT_OWNER_PID)) - return 0; + goto out; } if(info->match & IPT_OWNER_SID) { - if (!match_sid(skb, info->sid) ^ + if (!match_sid(sk, info->sid) ^ !!(info->invert & IPT_OWNER_SID)) - return 0; + goto out; } if(info->match & IPT_OWNER_COMM) { - if (!match_comm(skb, info->comm) ^ + if (!match_comm(sk, info->comm) ^ !!(info->invert & IPT_OWNER_COMM)) - return 0; + goto out; } - return 1; + ret = 1; + +out: + if (in && sk) + sock_put(sk); + + return ret; } static int @@ -173,11 +210,19 @@ unsigned int matchsize, unsigned int hook_mask) { - if (hook_mask - & ~((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING))) { - printk("ipt_owner: only valid for LOCAL_OUT or POST_ROUTING.\n"); - return 0; - } + if (hook_mask + & ~((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING) | + (1 << NF_IP_LOCAL_IN))) { + printk("ipt_owner: only valid for LOCAL_IN, LOCAL_OUT " + "or POST_ROUTING.\n"); + return 0; + } + + if ((hook_mask & (1 << NF_IP_LOCAL_IN)) + && ip->proto != IPPROTO_TCP && ip->proto != IPPROTO_UDP) { + printk("ipt_owner: only TCP or UDP can be used in LOCAL_IN\n"); + return 0; + } if (matchsize != IPT_ALIGN(sizeof(struct ipt_owner_info))) { printk("Matchsize %u != %Zu\n", matchsize, diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ipt_policy.c linux-2.6.11.3/net/ipv4/netfilter/ipt_policy.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ipt_policy.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ipt_policy.c 2005-03-13 22:06:07.322440984 +0100 @@ -0,0 +1,176 @@ +/* IP tables module for matching IPsec policy + * + * Copyright (c) 2004 Patrick McHardy, + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +MODULE_AUTHOR("Patrick McHardy "); +MODULE_DESCRIPTION("IPtables IPsec policy matching module"); +MODULE_LICENSE("GPL"); + + +static inline int +match_xfrm_state(struct xfrm_state *x, const struct ipt_policy_elem *e) +{ +#define MISMATCH(x,y) (e->match.x && ((e->x != (y)) ^ e->invert.x)) + + if (MISMATCH(saddr, x->props.saddr.a4 & e->smask) || + MISMATCH(daddr, x->id.daddr.a4 & e->dmask) || + MISMATCH(proto, x->id.proto) || + MISMATCH(mode, x->props.mode) || + MISMATCH(spi, x->id.spi) || + MISMATCH(reqid, x->props.reqid)) + return 0; + return 1; +} + +static int +match_policy_in(const struct sk_buff *skb, const struct ipt_policy_info *info) +{ + const struct ipt_policy_elem *e; + struct sec_path *sp = skb->sp; + int strict = info->flags & POLICY_MATCH_STRICT; + int i, pos; + + if (sp == NULL) + return -1; + if (strict && info->len != sp->len) + return 0; + + for (i = sp->len - 1; i >= 0; i--) { + pos = strict ? i - sp->len + 1 : 0; + if (pos >= info->len) + return 0; + e = &info->pol[pos]; + + if (match_xfrm_state(sp->x[i].xvec, e)) { + if (!strict) + return 1; + } else if (strict) + return 0; + } + + return strict ? 1 : 0; +} + +static int +match_policy_out(const struct sk_buff *skb, const struct ipt_policy_info *info) +{ + const struct ipt_policy_elem *e; + struct dst_entry *dst = skb->dst; + int strict = info->flags & POLICY_MATCH_STRICT; + int i, pos; + + if (dst->xfrm == NULL) + return -1; + + for (i = 0; dst && dst->xfrm; dst = dst->child, i++) { + pos = strict ? i : 0; + if (pos >= info->len) + return 0; + e = &info->pol[pos]; + + if (match_xfrm_state(dst->xfrm, e)) { + if (!strict) + return 1; + } else if (strict) + return 0; + } + + return strict ? 1 : 0; +} + +static int match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, int offset, int *hotdrop) +{ + const struct ipt_policy_info *info = matchinfo; + int ret; + + if (info->flags & POLICY_MATCH_IN) + ret = match_policy_in(skb, info); + else + ret = match_policy_out(skb, info); + + if (ret < 0) { + if (info->flags & POLICY_MATCH_NONE) + ret = 1; + else + ret = 0; + } else if (info->flags & POLICY_MATCH_NONE) + ret = 0; + + return ret; +} + +static int checkentry(const char *tablename, const struct ipt_ip *ip, + void *matchinfo, unsigned int matchsize, + unsigned int hook_mask) +{ + struct ipt_policy_info *info = matchinfo; + + if (matchsize != IPT_ALIGN(sizeof(*info))) { + printk(KERN_ERR "ipt_policy: matchsize %u != %u\n", + matchsize, IPT_ALIGN(sizeof(*info))); + return 0; + } + if (!(info->flags & (POLICY_MATCH_IN|POLICY_MATCH_OUT))) { + printk(KERN_ERR "ipt_policy: neither incoming nor " + "outgoing policy selected\n"); + return 0; + } + if (hook_mask & (1 << NF_IP_PRE_ROUTING | 1 << NF_IP_LOCAL_IN) + && info->flags & POLICY_MATCH_OUT) { + printk(KERN_ERR "ipt_policy: output policy not valid in " + "PRE_ROUTING and INPUT\n"); + return 0; + } + if (hook_mask & (1 << NF_IP_POST_ROUTING | 1 << NF_IP_LOCAL_OUT) + && info->flags & POLICY_MATCH_IN) { + printk(KERN_ERR "ipt_policy: input policy not valid in " + "POST_ROUTING and OUTPUT\n"); + return 0; + } + if (info->len > POLICY_MAX_ELEM) { + printk(KERN_ERR "ipt_policy: too many policy elements\n"); + return 0; + } + + return 1; +} + +static struct ipt_match policy_match = +{ + .name = "policy", + .match = match, + .checkentry = checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ipt_register_match(&policy_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&policy_match); +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ipt_psd.c linux-2.6.11.3/net/ipv4/netfilter/ipt_psd.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ipt_psd.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ipt_psd.c 2005-03-13 22:03:58.556016456 +0100 @@ -0,0 +1,358 @@ +/* + This is a module which is used for PSD (portscan detection) + Derived from scanlogd v2.1 written by Solar Designer + and LOG target module. + + Copyright (C) 2000,2001 astaro AG + + This file is distributed under the terms of the GNU General Public + License (GPL). Copies of the GPL can be obtained from: + ftp://prep.ai.mit.edu/pub/gnu/GPL + + 2000-05-04 Markus Hennig : initial + 2000-08-18 Dennis Koslowski : first release + 2000-12-01 Dennis Koslowski : UDP scans detection added + 2001-01-02 Dennis Koslowski : output modified + 2001-02-04 Jan Rekorajski : converted from target to match + 2004-05-05 Martijn Lievaart : ported to 2.6 +*/ + +#include +#include +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Dennis Koslowski "); + +#define HF_DADDR_CHANGING 0x01 +#define HF_SPORT_CHANGING 0x02 +#define HF_TOS_CHANGING 0x04 +#define HF_TTL_CHANGING 0x08 + +/* + * Information we keep per each target port + */ +struct port { + u_int16_t number; /* port number */ + u_int8_t proto; /* protocol number */ + u_int8_t and_flags; /* tcp ANDed flags */ + u_int8_t or_flags; /* tcp ORed flags */ +}; + +/* + * Information we keep per each source address. + */ +struct host { + struct host *next; /* Next entry with the same hash */ + clock_t timestamp; /* Last update time */ + struct in_addr src_addr; /* Source address */ + struct in_addr dest_addr; /* Destination address */ + unsigned short src_port; /* Source port */ + int count; /* Number of ports in the list */ + int weight; /* Total weight of ports in the list */ + struct port ports[SCAN_MAX_COUNT - 1]; /* List of ports */ + unsigned char tos; /* TOS */ + unsigned char ttl; /* TTL */ + unsigned char flags; /* HF_ flags bitmask */ +}; + +/* + * State information. + */ +static struct { + spinlock_t lock; + struct host list[LIST_SIZE]; /* List of source addresses */ + struct host *hash[HASH_SIZE]; /* Hash: pointers into the list */ + int index; /* Oldest entry to be replaced */ +} state; + +/* + * Convert an IP address into a hash table index. + */ +static inline int hashfunc(struct in_addr addr) +{ + unsigned int value; + int hash; + + value = addr.s_addr; + hash = 0; + do { + hash ^= value; + } while ((value >>= HASH_LOG)); + + return hash & (HASH_SIZE - 1); +} + +static int +ipt_psd_match(const struct sk_buff *pskb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + int *hotdrop) +{ + struct iphdr *ip_hdr; + struct tcphdr *tcp_hdr; + struct in_addr addr; + u_int16_t src_port,dest_port; + u_int8_t tcp_flags, proto; + clock_t now; + struct host *curr, *last, **head; + int hash, index, count; + + /* Parameters from userspace */ + const struct ipt_psd_info *psdinfo = matchinfo; + + /* IP header */ + ip_hdr = pskb->nh.iph; + + /* Sanity check */ + if (ntohs(ip_hdr->frag_off) & IP_OFFSET) { + DEBUGP("PSD: sanity check failed\n"); + return 0; + } + + /* TCP or UDP ? */ + proto = ip_hdr->protocol; + + if (proto != IPPROTO_TCP && proto != IPPROTO_UDP) { + DEBUGP("PSD: protocol not supported\n"); + return 0; + } + + /* Get the source address, source & destination ports, and TCP flags */ + + addr.s_addr = ip_hdr->saddr; + + tcp_hdr = (struct tcphdr*)((u_int32_t *)ip_hdr + ip_hdr->ihl); + + /* Yep, it´s dirty */ + src_port = tcp_hdr->source; + dest_port = tcp_hdr->dest; + + if (proto == IPPROTO_TCP) { + tcp_flags = *((u_int8_t*)tcp_hdr + 13); + } + else { + tcp_flags = 0x00; + } + + /* We're using IP address 0.0.0.0 for a special purpose here, so don't let + * them spoof us. [DHCP needs this feature - HW] */ + if (!addr.s_addr) { + DEBUGP("PSD: spoofed source address (0.0.0.0)\n"); + return 0; + } + + /* Use jiffies here not to depend on someone setting the time while we're + * running; we need to be careful with possible return value overflows. */ + now = jiffies; + + spin_lock(&state.lock); + + /* Do we know this source address already? */ + count = 0; + last = NULL; + if ((curr = *(head = &state.hash[hash = hashfunc(addr)]))) + do { + if (curr->src_addr.s_addr == addr.s_addr) break; + count++; + if (curr->next) last = curr; + } while ((curr = curr->next)); + + if (curr) { + + /* We know this address, and the entry isn't too old. Update it. */ + if (now - curr->timestamp <= (psdinfo->delay_threshold*HZ)/100 && + time_after_eq(now, curr->timestamp)) { + + /* Just update the appropriate list entry if we've seen this port already */ + for (index = 0; index < curr->count; index++) { + if (curr->ports[index].number == dest_port) { + curr->ports[index].proto = proto; + curr->ports[index].and_flags &= tcp_flags; + curr->ports[index].or_flags |= tcp_flags; + goto out_no_match; + } + } + + /* TCP/ACK and/or TCP/RST to a new port? This could be an outgoing connection. */ + if (proto == IPPROTO_TCP && (tcp_hdr->ack || tcp_hdr->rst)) + goto out_no_match; + + /* Packet to a new port, and not TCP/ACK: update the timestamp */ + curr->timestamp = now; + + /* Logged this scan already? Then drop the packet. */ + if (curr->weight >= psdinfo->weight_threshold) + goto out_match; + + /* Specify if destination address, source port, TOS or TTL are not fixed */ + if (curr->dest_addr.s_addr != ip_hdr->daddr) + curr->flags |= HF_DADDR_CHANGING; + if (curr->src_port != src_port) + curr->flags |= HF_SPORT_CHANGING; + if (curr->tos != ip_hdr->tos) + curr->flags |= HF_TOS_CHANGING; + if (curr->ttl != ip_hdr->ttl) + curr->flags |= HF_TTL_CHANGING; + + /* Update the total weight */ + curr->weight += (ntohs(dest_port) < 1024) ? + psdinfo->lo_ports_weight : psdinfo->hi_ports_weight; + + /* Got enough destination ports to decide that this is a scan? */ + /* Then log it and drop the packet. */ + if (curr->weight >= psdinfo->weight_threshold) + goto out_match; + + /* Remember the new port */ + if (curr->count < SCAN_MAX_COUNT) { + curr->ports[curr->count].number = dest_port; + curr->ports[curr->count].proto = proto; + curr->ports[curr->count].and_flags = tcp_flags; + curr->ports[curr->count].or_flags = tcp_flags; + curr->count++; + } + + goto out_no_match; + } + + /* We know this address, but the entry is outdated. Mark it unused, and + * remove from the hash table. We'll allocate a new entry instead since + * this one might get re-used too soon. */ + curr->src_addr.s_addr = 0; + if (last) + last->next = last->next->next; + else if (*head) + *head = (*head)->next; + last = NULL; + } + + /* We don't need an ACK from a new source address */ + if (proto == IPPROTO_TCP && tcp_hdr->ack) + goto out_no_match; + + /* Got too many source addresses with the same hash value? Then remove the + * oldest one from the hash table, so that they can't take too much of our + * CPU time even with carefully chosen spoofed IP addresses. */ + if (count >= HASH_MAX && last) last->next = NULL; + + /* We're going to re-use the oldest list entry, so remove it from the hash + * table first (if it is really already in use, and isn't removed from the + * hash table already because of the HASH_MAX check above). */ + + /* First, find it */ + if (state.list[state.index].src_addr.s_addr) + head = &state.hash[hashfunc(state.list[state.index].src_addr)]; + else + head = &last; + last = NULL; + if ((curr = *head)) + do { + if (curr == &state.list[state.index]) break; + last = curr; + } while ((curr = curr->next)); + + /* Then, remove it */ + if (curr) { + if (last) + last->next = last->next->next; + else if (*head) + *head = (*head)->next; + } + + /* Get our list entry */ + curr = &state.list[state.index++]; + if (state.index >= LIST_SIZE) state.index = 0; + + /* Link it into the hash table */ + head = &state.hash[hash]; + curr->next = *head; + *head = curr; + + /* And fill in the fields */ + curr->timestamp = now; + curr->src_addr = addr; + curr->dest_addr.s_addr = ip_hdr->daddr; + curr->src_port = src_port; + curr->count = 1; + curr->weight = (ntohs(dest_port) < 1024) ? + psdinfo->lo_ports_weight : psdinfo->hi_ports_weight; + curr->ports[0].number = dest_port; + curr->ports[0].proto = proto; + curr->ports[0].and_flags = tcp_flags; + curr->ports[0].or_flags = tcp_flags; + curr->tos = ip_hdr->tos; + curr->ttl = ip_hdr->ttl; + +out_no_match: + spin_unlock(&state.lock); + return 0; + +out_match: + spin_unlock(&state.lock); + return 1; +} + +static int ipt_psd_checkentry(const char *tablename, + const struct ipt_ip *e, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ +/* const struct ipt_psd_info *psdinfo = targinfo;*/ + + /* we accept TCP only */ +/* if (e->ip.proto != IPPROTO_TCP) { */ +/* DEBUGP("PSD: specified protocol may be TCP only\n"); */ +/* return 0; */ +/* } */ + + if (matchsize != IPT_ALIGN(sizeof(struct ipt_psd_info))) { + DEBUGP("PSD: matchsize %u != %u\n", + matchsize, + IPT_ALIGN(sizeof(struct ipt_psd_info))); + return 0; + } + + return 1; +} + +static struct ipt_match ipt_psd_reg = { + .name = "psd", + .match = ipt_psd_match, + .checkentry = ipt_psd_checkentry, + .me = THIS_MODULE }; + +static int __init init(void) +{ + if (ipt_register_match(&ipt_psd_reg)) + return -EINVAL; + + memset(&state, 0, sizeof(state)); + + spin_lock_init(&(state.lock)); + + printk("netfilter PSD loaded - (c) astaro AG\n"); + return 0; +} + +static void __exit fini(void) +{ + ipt_unregister_match(&ipt_psd_reg); + printk("netfilter PSD unloaded - (c) astaro AG\n"); +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ipt_quota.c linux-2.6.11.3/net/ipv4/netfilter/ipt_quota.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ipt_quota.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ipt_quota.c 2005-03-13 22:03:59.678845760 +0100 @@ -0,0 +1,91 @@ +/* + * netfilter module to enforce network quotas + * + * Sam Johnston + */ +#include +#include +#include +#include + +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Sam Johnston "); + +static spinlock_t quota_lock = SPIN_LOCK_UNLOCKED; + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, int *hotdrop) +{ + struct ipt_quota_info *q = (struct ipt_quota_info *) matchinfo; + unsigned int datalen; + + if (skb->len < sizeof(struct iphdr)) + return NF_ACCEPT; + + datalen = skb->len - skb->nh.iph->ihl*4; + + spin_lock_bh("a_lock); + + if (q->quota >= datalen) { + /* we can afford this one */ + q->quota -= datalen; + spin_unlock_bh("a_lock); + +#ifdef DEBUG_IPT_QUOTA + printk("IPT Quota OK: %llu datlen %d \n", q->quota, datalen); +#endif + return 1; + } + + /* so we do not allow even small packets from now on */ + q->quota = 0; + +#ifdef DEBUG_IPT_QUOTA + printk("IPT Quota Failed: %llu datlen %d \n", q->quota, datalen); +#endif + + spin_unlock_bh("a_lock); + return 0; +} + +static int +checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, unsigned int matchsize, unsigned int hook_mask) +{ + /* TODO: spinlocks? sanity checks? */ + if (matchsize != IPT_ALIGN(sizeof (struct ipt_quota_info))) + return 0; + + return 1; +} + +static struct ipt_match quota_match = { + .name = "quota", + .match = match, + .checkentry = checkentry, + .me = THIS_MODULE +}; + +static int __init +init(void) +{ + return ipt_register_match("a_match); +} + +static void __exit +fini(void) +{ + ipt_unregister_match("a_match); +} + +module_init(init); +module_exit(fini); + diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ipt_set.c linux-2.6.11.3/net/ipv4/netfilter/ipt_set.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ipt_set.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ipt_set.c 2005-03-13 22:04:00.492722032 +0100 @@ -0,0 +1,112 @@ +/* Copyright (C) 2000-2002 Joakim Axelsson + * Patrick Schaaf + * Martin Josefsson + * Copyright (C) 2003-2004 Jozsef Kadlecsik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* Kernel module to match an IP set. */ + +#include +#include +#include + +#include +#include +#include + +static inline int +match_set(const struct ipt_set_info *info, + const struct sk_buff *skb, + int inv) +{ + if (ip_set_testip_kernel(info->index, skb, info->flags)) + inv = !inv; + return inv; +} + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + int *hotdrop) +{ + const struct ipt_set_info_match *info = matchinfo; + + return match_set(&info->match_set, + skb, + info->match_set.flags[0] & IPSET_MATCH_INV); +} + +static int +checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + struct ipt_set_info_match *info = + (struct ipt_set_info_match *) matchinfo; + ip_set_id_t index; + + if (matchsize != IPT_ALIGN(sizeof(struct ipt_set_info_match))) { + ip_set_printk("invalid matchsize %d", matchsize); + return 0; + } + + index = ip_set_get_byindex(info->match_set.index); + + if (index == IP_SET_INVALID_ID) { + ip_set_printk("Cannot find set indentified by id %u to match", + info->match_set.index); + return 0; /* error */ + } + if (info->match_set.flags[IP_SET_MAX_BINDINGS] != 0) { + ip_set_printk("That's nasty!"); + return 0; /* error */ + } + + return 1; +} + +static void destroy(void *matchinfo, unsigned int matchsize) +{ + struct ipt_set_info_match *info = matchinfo; + + if (matchsize != IPT_ALIGN(sizeof(struct ipt_set_info_match))) { + ip_set_printk("invalid matchsize %d", matchsize); + return; + } + + ip_set_put(info->match_set.index); +} + +static struct ipt_match set_match = { + .name = "set", + .match = &match, + .checkentry = &checkentry, + .destroy = &destroy, + .me = THIS_MODULE +}; + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jozsef Kadlecsik "); +MODULE_DESCRIPTION("iptables IP set match module"); + +static int __init init(void) +{ + return ipt_register_match(&set_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&set_match); +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ipt_time.c linux-2.6.11.3/net/ipv4/netfilter/ipt_time.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ipt_time.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ipt_time.c 2005-03-13 22:04:01.209613048 +0100 @@ -0,0 +1,191 @@ +/* + This is a module which is used for time matching + It is using some modified code from dietlibc (localtime() function) + that you can find at http://www.fefe.de/dietlibc/ + This file is distributed under the terms of the GNU General Public + License (GPL). Copies of the GPL can be obtained from: ftp://prep.ai.mit.edu/pub/gnu/GPL + 2001-05-04 Fabrice MARIE : initial development. + 2001-21-05 Fabrice MARIE : bug fix in the match code, + thanks to "Zeng Yu" for bug report. + 2001-26-09 Fabrice MARIE : force the match to be in LOCAL_IN or PRE_ROUTING only. + 2001-30-11 Fabrice : added the possibility to use the match in FORWARD/OUTPUT with a little hack, + added Nguyen Dang Phuoc Dong patch to support timezones. + 2004-05-02 Fabrice : added support for date matching, from an idea of Fabien COELHO. +*/ + +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Fabrice MARIE "); +MODULE_DESCRIPTION("Match arrival timestamp/date"); +MODULE_LICENSE("GPL"); + +struct tm +{ + int tm_sec; /* Seconds. [0-60] (1 leap second) */ + int tm_min; /* Minutes. [0-59] */ + int tm_hour; /* Hours. [0-23] */ + int tm_mday; /* Day. [1-31] */ + int tm_mon; /* Month. [0-11] */ + int tm_year; /* Year - 1900. */ + int tm_wday; /* Day of week. [0-6] */ + int tm_yday; /* Days in year.[0-365] */ + int tm_isdst; /* DST. [-1/0/1]*/ + + long int tm_gmtoff; /* we don't care, we count from GMT */ + const char *tm_zone; /* we don't care, we count from GMT */ +}; + +void +localtime(const time_t *timepr, struct tm *r); + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + int *hotdrop) +{ + const struct ipt_time_info *info = matchinfo; /* match info for rule */ + struct tm currenttime; /* time human readable */ + u_int8_t days_of_week[7] = {64, 32, 16, 8, 4, 2, 1}; + u_int16_t packet_time; + struct timeval kerneltimeval; + time_t packet_local_time; + + /* if kerneltime=1, we don't read the skb->timestamp but kernel time instead */ + if (info->kerneltime) + { + do_gettimeofday(&kerneltimeval); + packet_local_time = kerneltimeval.tv_sec; + } + else + packet_local_time = skb->stamp.tv_sec; + + /* First we make sure we are in the date start-stop boundaries */ + if ((packet_local_time < info->date_start) || (packet_local_time > info->date_stop)) + return 0; /* We are outside the date boundaries */ + + /* Transform the timestamp of the packet, in a human readable form */ + localtime(&packet_local_time, ¤ttime); + + /* check if we match this timestamp, we start by the days... */ + if ((days_of_week[currenttime.tm_wday] & info->days_match) != days_of_week[currenttime.tm_wday]) + return 0; /* the day doesn't match */ + + /* ... check the time now */ + packet_time = (currenttime.tm_hour * 60) + currenttime.tm_min; + if ((packet_time < info->time_start) || (packet_time > info->time_stop)) + return 0; + + /* here we match ! */ + return 1; +} + +static int +checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + struct ipt_time_info *info = matchinfo; /* match info for rule */ + + /* First, check that we are in the correct hooks */ + if (hook_mask + & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT))) + { + printk("ipt_time: error, only valid for PRE_ROUTING, LOCAL_IN, FORWARD and OUTPUT)\n"); + return 0; + } + /* we use the kerneltime if we are in forward or output */ + info->kerneltime = 1; + if (hook_mask & ~((1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT))) + /* we use the skb time */ + info->kerneltime = 0; + + /* Check the size */ + if (matchsize != IPT_ALIGN(sizeof(struct ipt_time_info))) + return 0; + /* Now check the coherence of the data ... */ + if ((info->time_start > 1439) || /* 23*60+59 = 1439*/ + (info->time_stop > 1439)) + { + printk(KERN_WARNING "ipt_time: invalid argument\n"); + return 0; + } + + return 1; +} + +static struct ipt_match time_match = { + .name = "time", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + printk("ipt_time loading\n"); + return ipt_register_match(&time_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&time_match); + printk("ipt_time unloaded\n"); +} + +module_init(init); +module_exit(fini); + + +/* The part below is borowed and modified from dietlibc */ + +/* seconds per day */ +#define SPD 24*60*60 + +void +localtime(const time_t *timepr, struct tm *r) { + time_t i; + time_t timep; + extern struct timezone sys_tz; + const unsigned int __spm[12] = + { 0, + (31), + (31+28), + (31+28+31), + (31+28+31+30), + (31+28+31+30+31), + (31+28+31+30+31+30), + (31+28+31+30+31+30+31), + (31+28+31+30+31+30+31+31), + (31+28+31+30+31+30+31+31+30), + (31+28+31+30+31+30+31+31+30+31), + (31+28+31+30+31+30+31+31+30+31+30), + }; + register time_t work; + + timep = (*timepr) - (sys_tz.tz_minuteswest * 60); + work=timep%(SPD); + r->tm_sec=work%60; work/=60; + r->tm_min=work%60; r->tm_hour=work/60; + work=timep/(SPD); + r->tm_wday=(4+work)%7; + for (i=1970; ; ++i) { + register time_t k= (!(i%4) && ((i%100) || !(i%400)))?366:365; + if (work>k) + work-=k; + else + break; + } + r->tm_year=i-1900; + for (i=11; i && __spm[i]>work; --i) ; + r->tm_mon=i; + r->tm_mday=work-__spm[i]+1; +} diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/ipt_unclean.c linux-2.6.11.3/net/ipv4/netfilter/ipt_unclean.c --- linux-2.6.11.3.org/net/ipv4/netfilter/ipt_unclean.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/ipt_unclean.c 2005-03-13 22:06:37.021925976 +0100 @@ -0,0 +1,611 @@ +/* Kernel module to match suspect packets. */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#define limpk(format, args...) \ +do { \ + if (net_ratelimit()) \ + printk("ipt_unclean: %s" format, \ + embedded ? "(embedded packet) " : "" , ## args); \ +} while(0) + +enum icmp_error_status +{ + ICMP_MAY_BE_ERROR, + ICMP_IS_ERROR, + ICMP_NOT_ERROR +}; + +struct icmp_info +{ + size_t min_len, max_len; + enum icmp_error_status err; + u_int8_t min_code, max_code; +}; + +static int +check_ip(const struct sk_buff *skb, unsigned int offset); + +/* ICMP-specific checks. */ +static int +check_icmp(const struct sk_buff *skb, + unsigned int offset, + unsigned int fragoff, + int more_frags, + int embedded) +{ + struct icmphdr icmph; + static struct icmp_info info[] + = { [ICMP_ECHOREPLY] + = { 8, 65536, ICMP_NOT_ERROR, 0, 0 }, + [ICMP_DEST_UNREACH] + = { 8 + 28, 65536, ICMP_IS_ERROR, 0, 15 }, + [ICMP_SOURCE_QUENCH] + = { 8 + 28, 65536, ICMP_IS_ERROR, 0, 0 }, + [ICMP_REDIRECT] + = { 8 + 28, 65536, ICMP_IS_ERROR, 0, 3 }, + [ICMP_ECHO] + = { 8, 65536, ICMP_NOT_ERROR, 0, 0 }, + /* Router advertisement. */ + [9] + = { 8, 8 + 255 * 8, ICMP_NOT_ERROR, 0, 0 }, + /* Router solicitation. */ + [10] + = { 8, 8, ICMP_NOT_ERROR, 0, 0 }, + [ICMP_TIME_EXCEEDED] + = { 8 + 28, 65536, ICMP_IS_ERROR, 0, 1 }, + [ICMP_PARAMETERPROB] + = { 8 + 28, 65536, ICMP_IS_ERROR, 0, 1 }, + [ICMP_TIMESTAMP] + = { 20, 20, ICMP_NOT_ERROR, 0, 0 }, + [ICMP_TIMESTAMPREPLY] + = { 20, 20, ICMP_NOT_ERROR, 0, 0 }, + [ICMP_INFO_REQUEST] + = { 8, 65536, ICMP_NOT_ERROR, 0, 0 }, + [ICMP_INFO_REPLY] + = { 8, 65536, ICMP_NOT_ERROR, 0, 0 }, + [ICMP_ADDRESS] + = { 12, 12, ICMP_NOT_ERROR, 0, 0 }, + [ICMP_ADDRESSREPLY] + = { 12, 12, ICMP_NOT_ERROR, 0, 0 } }; + + /* Can't do anything if it's a fragment. */ + if (fragoff) + return 1; + + /* CHECK: Must have whole header.. */ + if (skb_copy_bits(skb, offset, &icmph, sizeof(icmph)) < 0) { + limpk("ICMP len=%u too short\n", skb->len - offset); + return 0; + } + + /* If not embedded in an ICMP error already. */ + if (!embedded) { + /* CHECK: Truncated ICMP (even if first fragment). */ + if (icmph.type < sizeof(info)/sizeof(struct icmp_info) + && info[icmph.type].min_len != 0 + && skb->len - offset < info[icmph.type].min_len) { + limpk("ICMP type %u len %u too short\n", + icmph.type, skb->len - offset); + return 0; + } + + /* CHECK: Check within known error ICMPs. */ + if (icmph.type < sizeof(info)/sizeof(struct icmp_info) + && info[icmph.type].err == ICMP_IS_ERROR) { + /* Max IP header size = 60 */ + char inner[60 + 8]; + struct iphdr *inner_ip = (struct iphdr *)inner; + + /* CHECK: Embedded packet must be at least + length of iph + 8 bytes. */ + if (skb_copy_bits(skb, offset + sizeof(icmph), + inner, sizeof(struct iphdr)+8) < 0) { + limpk("ICMP error internal way too short\n"); + return 0; + } + + /* iphhdr may actually be longer: still need 8 + actual protocol bytes. */ + if (offset + sizeof(icmph) + inner_ip->ihl*4 + 8 + > skb->len) { + limpk("ICMP error internal too short\n"); + return 0; + } + if (!check_ip(skb, offset + sizeof(icmph))) + return 0; + } + } else { + /* CHECK: Can't embed ICMP unless known non-error. */ + if (icmph.type >= sizeof(info)/sizeof(struct icmp_info) + || info[icmph.type].err != ICMP_NOT_ERROR) { + limpk("ICMP type %u not embeddable\n", + icmph.type); + return 0; + } + } + + /* CHECK: Invalid ICMP codes. */ + if (icmph.type < sizeof(info)/sizeof(struct icmp_info) + && (icmph.code < info[icmph.type].min_code + || icmph.code > info[icmph.type].max_code)) { + limpk("ICMP type=%u code=%u\n", + icmph.type, icmph.code); + return 0; + } + + /* CHECK: Above maximum length. */ + if (icmph.type < sizeof(info)/sizeof(struct icmp_info) + && info[icmph.type].max_len != 0 + && skb->len - offset > info[icmph.type].max_len) { + limpk("ICMP type=%u too long: %u bytes\n", + icmph.type, skb->len - offset); + return 0; + } + + switch (icmph.type) { + case ICMP_PARAMETERPROB: { + /* CHECK: Problem param must be within error packet's + * IP header. */ + u_int32_t arg = ntohl(icmph.un.gateway); + + if (icmph.code == 0) { + /* We've already made sure it's long enough. */ + struct iphdr iph; + skb_copy_bits(skb, offset + sizeof(icmph), &iph, + sizeof(iph)); + /* Code 0 means that upper 8 bits is pointer + to problem. */ + if ((arg >> 24) >= iph.ihl*4) { + limpk("ICMP PARAMETERPROB ptr = %u\n", + ntohl(icmph.un.gateway) >> 24); + return 0; + } + arg &= 0x00FFFFFF; + } + + /* CHECK: Rest must be zero. */ + if (arg) { + limpk("ICMP PARAMETERPROB nonzero arg = %u\n", + arg); + return 0; + } + break; + } + + case ICMP_TIME_EXCEEDED: + case ICMP_SOURCE_QUENCH: + /* CHECK: Unused must be zero. */ + if (icmph.un.gateway != 0) { + limpk("ICMP type=%u unused = %u\n", + icmph.type, ntohl(icmph.un.gateway)); + return 0; + } + break; + } + + return 1; +} + +/* UDP-specific checks. */ +static int +check_udp(const struct sk_buff *skb, + unsigned int offset, + unsigned int fragoff, + int more_frags, + int embedded) +{ + struct udphdr udph; + + /* Can't do anything if it's a fragment. */ + if (fragoff) + return 1; + + /* CHECK: Must cover UDP header. */ + if (skb_copy_bits(skb, offset, &udph, sizeof(udph)) < 0) { + limpk("UDP len=%u too short\n", skb->len - offset); + return 0; + } + + /* CHECK: Destination port can't be zero. */ + if (!udph.dest) { + limpk("UDP zero destination port\n"); + return 0; + } + + if (!more_frags) { + if (!embedded) { + /* CHECK: UDP length must match. */ + if (ntohs(udph.len) != skb->len - offset) { + limpk("UDP len too short %u vs %u\n", + ntohs(udph.len), skb->len - offset); + return 0; + } + } else { + /* CHECK: UDP length be >= this truncated pkt. */ + if (ntohs(udph.len) < skb->len - offset) { + limpk("UDP len too long %u vs %u\n", + ntohs(udph.len), skb->len - offset); + return 0; + } + } + } else { + /* CHECK: UDP length must be > this frag's length. */ + if (ntohs(udph.len) <= skb->len - offset) { + limpk("UDP fragment len too short %u vs %u\n", + ntohs(udph.len), skb->len - offset); + return 0; + } + } + + return 1; +} + +/* TCP-specific checks. */ +static int +check_tcp(const struct sk_buff *skb, + unsigned int offset, + unsigned int fragoff, + int more_frags, + int embedded) +{ + struct tcphdr tcph; + unsigned char opt[15 * 4 - sizeof(struct tcphdr)]; + u32 tcpflags; + int end_of_options = 0; + unsigned int i, optlen; + + /* CHECK: Can't have offset=1: used to override TCP syn-checks. */ + /* In fact, this is caught below (offset < 516). */ + + /* Can't do anything if it's a fragment. */ + if (fragoff) + return 1; + + /* CHECK: Smaller than minimal TCP hdr. */ + if (skb_copy_bits(skb, offset, &tcph, sizeof(tcph)) < 0) { + u16 ports[2]; + + if (!embedded) { + limpk("Packet length %u < TCP header.\n", + skb->len - offset); + return 0; + } + + /* Must have ports available (datalen >= 8), from + check_icmp which set embedded = 1 */ + /* CHECK: TCP ports inside ICMP error */ + skb_copy_bits(skb, offset, ports, sizeof(ports)); + if (!ports[0] || !ports[1]) { + limpk("Zero TCP ports %u/%u.\n", + htons(ports[0]), htons(ports[1])); + return 0; + } + return 1; + } + + /* CHECK: TCP header claims tiny size. */ + if (tcph.doff * 4 < sizeof(tcph)) { + limpk("TCP header claims tiny size %u\n", tcph.doff * 4); + return 0; + } + + /* CHECK: Packet smaller than actual TCP hdr. */ + optlen = tcph.doff*4 - sizeof(tcph); + if (skb_copy_bits(skb, offset + sizeof(tcph), opt, optlen) < 0) { + if (!embedded) { + limpk("Packet length %u < actual TCP header.\n", + skb->len - offset); + return 0; + } else + return 1; + } + + /* CHECK: TCP ports non-zero */ + if (!tcph.source || !tcph.dest) { + limpk("Zero TCP ports %u/%u.\n", + htons(tcph.source), htons(tcph.dest)); + return 0; + } + + tcpflags = tcp_flag_word(&tcph); + + /* CHECK: TCP reserved bits zero. */ + if (tcpflags & TCP_RESERVED_BITS) { + limpk("TCP reserved bits not zero\n"); + return 0; + } + + tcpflags &= ~(TCP_DATA_OFFSET | TCP_FLAG_CWR | TCP_FLAG_ECE + | __constant_htonl(0x0000FFFF)); + + /* CHECK: TCP flags. */ + if (tcpflags != TCP_FLAG_SYN + && tcpflags != (TCP_FLAG_SYN|TCP_FLAG_ACK) + && tcpflags != TCP_FLAG_RST + && tcpflags != (TCP_FLAG_RST|TCP_FLAG_ACK) + && tcpflags != (TCP_FLAG_RST|TCP_FLAG_ACK|TCP_FLAG_PSH) + && tcpflags != (TCP_FLAG_FIN|TCP_FLAG_ACK) + && tcpflags != TCP_FLAG_ACK + && tcpflags != (TCP_FLAG_ACK|TCP_FLAG_PSH) + && tcpflags != (TCP_FLAG_ACK|TCP_FLAG_URG) + && tcpflags != (TCP_FLAG_ACK|TCP_FLAG_URG|TCP_FLAG_PSH) + && tcpflags != (TCP_FLAG_FIN|TCP_FLAG_ACK|TCP_FLAG_PSH) + && tcpflags != (TCP_FLAG_FIN|TCP_FLAG_ACK|TCP_FLAG_URG) + && tcpflags != (TCP_FLAG_FIN|TCP_FLAG_ACK|TCP_FLAG_URG + |TCP_FLAG_PSH)) { + limpk("TCP flags bad: 0x%04X\n", ntohl(tcpflags) >> 16); + return 0; + } + + for (i = 0; i < optlen; ) { + switch (opt[i]) { + case 0: + end_of_options = 1; + i++; + break; + case 1: + i++; + break; + default: + /* CHECK: options after EOO. */ + if (end_of_options) { + limpk("TCP option %u after end\n", + opt[i]); + return 0; + } + /* CHECK: options at tail. */ + else if (i+1 >= optlen) { + limpk("TCP option %u at tail\n", + opt[i]); + return 0; + } + /* CHECK: zero-length options. */ + else if (opt[i+1] == 0) { + limpk("TCP option %u 0 len\n", + opt[i]); + return 0; + } + /* CHECK: oversize options. */ + else if (i + opt[i+1] > optlen) { + limpk("TCP option %u at %u too long\n", + (unsigned int) opt[i], i); + return 0; + } + /* Move to next option */ + i += opt[i+1]; + } + } + + return 1; +} + +/* Returns 1 if ok */ +/* Standard IP checks. */ +static int +check_ip(const struct sk_buff *skb, unsigned int offset) +{ + int end_of_options = 0; + unsigned int datalen, optlen; + unsigned int i; + unsigned int fragoff; + struct iphdr iph; + unsigned char opt[15 * 4 - sizeof(struct iphdr)]; + int embedded = offset; + + /* Should only happen for local outgoing raw-socket packets. */ + /* CHECK: length >= ip header. */ + if (skb_copy_bits(skb, offset, &iph, sizeof(iph)) < 0) { + limpk("Packet length %u < IP header.\n", skb->len - offset); + return 0; + } + if (iph.ihl * 4 < sizeof(iph)) { + limpk("IP len %u < minimum IP header.\n", iph.ihl*4); + return 0; + } + + optlen = iph.ihl * 4 - sizeof(iph); + if (skb_copy_bits(skb, offset+sizeof(struct iphdr), opt, optlen)<0) { + limpk("Packet length %u < IP header %u.\n", + skb->len - offset, iph.ihl * 4); + return 0; + } + + fragoff = (ntohs(iph.frag_off) & IP_OFFSET); + datalen = skb->len - (offset + sizeof(struct iphdr) + optlen); + + /* CHECK: Embedded fragment. */ + if (offset && fragoff) { + limpk("Embedded fragment.\n"); + return 0; + } + + for (i = 0; i < optlen; ) { + switch (opt[i]) { + case 0: + end_of_options = 1; + i++; + break; + case 1: + i++; + break; + default: + /* CHECK: options after EOO. */ + if (end_of_options) { + limpk("IP option %u after end\n", + opt[i]); + return 0; + } + /* CHECK: options at tail. */ + else if (i+1 >= optlen) { + limpk("IP option %u at tail\n", + opt[i]); + return 0; + } + /* CHECK: zero-length or one-length options. */ + else if (opt[i+1] < 2) { + limpk("IP option %u %u len\n", + opt[i], opt[i+1]); + return 0; + } + /* CHECK: oversize options. */ + else if (i + opt[i+1] > optlen) { + limpk("IP option %u at %u too long\n", + opt[i], i); + return 0; + } + /* Move to next option */ + i += opt[i+1]; + } + } + + /* Fragment checks. */ + + /* CHECK: More fragments, but doesn't fill 8-byte boundary. */ + if ((ntohs(iph.frag_off) & IP_MF) + && (ntohs(iph.tot_len) % 8) != 0) { + limpk("Truncated fragment %u long.\n", ntohs(iph.tot_len)); + return 0; + } + + /* CHECK: Oversize fragment a-la Ping of Death. */ + if (fragoff * 8 + datalen > 65535) { + limpk("Oversize fragment to %u.\n", fragoff * 8); + return 0; + } + + /* CHECK: DF set and fragoff or MF set. */ + if ((ntohs(iph.frag_off) & IP_DF) + && (fragoff || (ntohs(iph.frag_off) & IP_MF))) { + limpk("DF set and offset=%u, MF=%u.\n", + fragoff, ntohs(iph.frag_off) & IP_MF); + return 0; + } + + /* CHECK: Zero-sized fragments. */ + if ((fragoff || (ntohs(iph.frag_off) & IP_MF)) + && datalen == 0) { + limpk("Zero size fragment offset=%u\n", fragoff); + return 0; + } + + /* Note: we can have even middle fragments smaller than this: + consider a large packet passing through a 600MTU then + 576MTU link: this gives a fragment of 24 data bytes. But + everyone packs fragments largest first, hence a fragment + can't START before 576 - MAX_IP_HEADER_LEN. */ + + /* Used to be min-size 576: I recall Alan Cox saying ax25 goes + down to 128 (576 taken from RFC 791: All hosts must be + prepared to accept datagrams of up to 576 octets). Use 128 + here. */ +#define MIN_LIKELY_MTU 128 + /* CHECK: Min size of first frag = 128. */ + if ((ntohs(iph.frag_off) & IP_MF) + && fragoff == 0 + && ntohs(iph.tot_len) < MIN_LIKELY_MTU) { + limpk("First fragment size %u < %u\n", ntohs(iph.tot_len), + MIN_LIKELY_MTU); + return 0; + } + + /* CHECK: Min offset of frag = 128 - IP hdr len. */ + if (fragoff && fragoff * 8 < MIN_LIKELY_MTU - iph.ihl * 4) { + limpk("Fragment starts at %u < %u\n", fragoff * 8, + MIN_LIKELY_MTU - iph.ihl * 4); + return 0; + } + + /* CHECK: Protocol specification non-zero. */ + if (iph.protocol == 0) { + limpk("Zero protocol\n"); + return 0; + } + + /* FIXME: This is already checked for in "Oversize fragment" + above --RR */ + /* CHECK: Do not use what is unused. + * First bit of fragmentation flags should be unused. + * May be used by OS fingerprinting tools. + * 04 Jun 2002, Maciej Soltysiak, solt@dns.toxicfilms.tv + */ + if (ntohs(iph.frag_off)>>15) { + limpk("IP unused bit set\n"); + return 0; + } + + /* Per-protocol checks. */ + switch (iph.protocol) { + case IPPROTO_ICMP: + return check_icmp(skb, offset + iph.ihl*4, fragoff, + (ntohs(iph.frag_off) & IP_MF), + embedded); + + case IPPROTO_UDP: + return check_udp(skb, offset + iph.ihl*4, fragoff, + (ntohs(iph.frag_off) & IP_MF), + embedded); + + case IPPROTO_TCP: + return check_tcp(skb, offset + iph.ihl*4, fragoff, + (ntohs(iph.frag_off) & IP_MF), + embedded); + default: + /* Ignorance is bliss. */ + return 1; + } +} + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + int *hotdrop) +{ + return !check_ip(skb, 0); +} + +/* Called when user tries to insert an entry of this type. */ +static int +checkentry(const char *tablename, + const struct ipt_ip *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + if (matchsize != IPT_ALIGN(0)) + return 0; + + return 1; +} + +static struct ipt_match unclean_match = { + .name = "unclean", + .match = &match, + .checkentry = &checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ipt_register_match(&unclean_match); +} + +static void __exit fini(void) +{ + ipt_unregister_match(&unclean_match); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c linux-2.6.11.3/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c --- linux-2.6.11.3.org/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c 2005-03-13 22:06:02.173223784 +0100 @@ -0,0 +1,549 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI + * - move L3 protocol dependent part to this file. + * 23 Mar 2004: Yasuyuki Kozakai @USAGI + * - add get_features() to support various size of conntrack + * structures. + * + * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +DECLARE_PER_CPU(struct nf_conntrack_stat, nf_conntrack_stat); + +static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, + struct nf_conntrack_tuple *tuple) +{ + u_int32_t _addrs[2], *ap; + ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr), + sizeof(u_int32_t) * 2, _addrs); + if (ap == NULL) + return 0; + + tuple->src.u3.ip = ap[0]; + tuple->dst.u3.ip = ap[1]; + + return 1; +} + +static int ipv4_invert_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) +{ + tuple->src.u3.ip = orig->dst.u3.ip; + tuple->dst.u3.ip = orig->src.u3.ip; + + return 1; +} + +static int ipv4_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) +{ + return seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ", + NIPQUAD(tuple->src.u3.ip), + NIPQUAD(tuple->dst.u3.ip)); +} + +static int ipv4_print_conntrack(struct seq_file *s, + const struct nf_conn *conntrack) +{ + return 0; +} + +/* Returns new sk_buff, or NULL */ +static struct sk_buff * +nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) +{ + struct sock *sk = skb->sk; +#ifdef CONFIG_NETFILTER_DEBUG + unsigned int olddebug = skb->nf_debug; +#endif + + if (sk) { + sock_hold(sk); + skb_orphan(skb); + } + + local_bh_disable(); + skb = ip_defrag(skb, user); + local_bh_enable(); + + if (!skb) { + if (sk) + sock_put(sk); + return skb; + } + + if (sk) { + skb_set_owner_w(skb, sk); + sock_put(sk); + } + + ip_send_check(skb->nh.iph); + skb->nfcache |= NFC_ALTERED; +#ifdef CONFIG_NETFILTER_DEBUG + /* Packet path as if nothing had happened. */ + skb->nf_debug = olddebug; +#endif + return skb; +} + +static int +ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, + u_int8_t *protonum, int *ret) +{ + /* Never happen */ + if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) { + if (net_ratelimit()) { + printk(KERN_ERR "ipv4_prepare: Frag of proto %u (hook=%u)\n", + (*pskb)->nh.iph->protocol, hooknum); + } + *ret = NF_DROP; + return 0; + } + + /* FIXME: Do this right please. --RR */ + (*pskb)->nfcache |= NFC_UNKNOWN; + + *dataoff = (*pskb)->nh.raw - (*pskb)->data + (*pskb)->nh.iph->ihl*4; + *protonum = (*pskb)->nh.iph->protocol; + + return 1; +} + +int nat_module_is_loaded = 0; +static u_int32_t ipv4_get_features(const struct nf_conntrack_tuple *tuple) +{ + if (nat_module_is_loaded) + return NF_CT_F_NAT; + + return NF_CT_F_BASIC; +} + +static unsigned int ipv4_confirm(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nf_conn *ct; + enum nf_conntrack_info ctinfo; + + /* This is where we call the helper: as the packet goes out. */ + ct = nf_ct_get(*pskb, &ctinfo); + if (ct && ct->helper) { + unsigned int ret; + ret = ct->helper->help(pskb, + (*pskb)->nh.raw - (*pskb)->data + + (*pskb)->nh.iph->ihl*4, + ct, ctinfo); + if (ret != NF_ACCEPT) + return ret; + } + + /* We've seen it coming out the other side: confirm it */ + + return nf_conntrack_confirm(pskb); +} + +static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + /* Previously seen (loopback)? Ignore. Do this before + fragment check. */ + if ((*pskb)->nfct) + return NF_ACCEPT; + + /* Gather fragments. */ + if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { + *pskb = nf_ct_ipv4_gather_frags(*pskb, + hooknum == NF_IP_PRE_ROUTING ? + IP_DEFRAG_CONNTRACK_IN : + IP_DEFRAG_CONNTRACK_OUT); + if (!*pskb) + return NF_STOLEN; + } + return NF_ACCEPT; +} + +static unsigned int ipv4_refrag(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct rtable *rt = (struct rtable *)(*pskb)->dst; + + /* We've seen it coming out the other side: confirm */ + if (ipv4_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT) + return NF_DROP; + + /* Local packets are never produced too large for their + interface. We degfragment them at LOCAL_OUT, however, + so we have to refragment them here. */ + if ((*pskb)->len > dst_pmtu(&rt->u.dst) && + !skb_shinfo(*pskb)->tso_size) { + /* No hook can be after us, so this should be OK. */ + ip_fragment(*pskb, okfn); + return NF_STOLEN; + } + return NF_ACCEPT; +} + +static unsigned int ipv4_conntrack_in(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return nf_conntrack_in(PF_INET, hooknum, pskb); +} + +static unsigned int ipv4_conntrack_local(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + /* root is playing with raw sockets. */ + if ((*pskb)->len < sizeof(struct iphdr) + || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) { + if (net_ratelimit()) + printk("ipt_hook: happy cracking.\n"); + return NF_ACCEPT; + } + return nf_conntrack_in(PF_INET, hooknum, pskb); +} + +/* Connection tracking may drop packets, but never alters them, so + make it the first hook. */ +static struct nf_hook_ops ipv4_conntrack_defrag_ops = { + .hook = ipv4_conntrack_defrag, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_PRE_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_DEFRAG, +}; + +static struct nf_hook_ops ipv4_conntrack_in_ops = { + .hook = ipv4_conntrack_in, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_PRE_ROUTING, + .priority = NF_IP_PRI_CONNTRACK, +}; + +static struct nf_hook_ops ipv4_conntrack_defrag_local_out_ops = { + .hook = ipv4_conntrack_defrag, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_LOCAL_OUT, + .priority = NF_IP_PRI_CONNTRACK_DEFRAG, +}; + +static struct nf_hook_ops ipv4_conntrack_local_out_ops = { + .hook = ipv4_conntrack_local, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_LOCAL_OUT, + .priority = NF_IP_PRI_CONNTRACK, +}; + +/* Refragmenter; last chance. */ +static struct nf_hook_ops ipv4_conntrack_out_ops = { + .hook = ipv4_refrag, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_POST_ROUTING, + .priority = NF_IP_PRI_LAST, +}; + +static struct nf_hook_ops ipv4_conntrack_local_in_ops = { + .hook = ipv4_confirm, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_IP_LOCAL_IN, + .priority = NF_IP_PRI_LAST-1, +}; + +#ifdef CONFIG_SYSCTL +/* From nf_conntrack_proto_icmp.c */ +extern unsigned long nf_ct_icmp_timeout; +static struct ctl_table_header *nf_ct_ipv4_sysctl_header; + +static ctl_table nf_ct_sysctl_table[] = { + { + .ctl_name = NET_NF_CONNTRACK_ICMP_TIMEOUT, + .procname = "nf_conntrack_icmp_timeout", + .data = &nf_ct_icmp_timeout, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { .ctl_name = 0 } +}; + +static ctl_table nf_ct_netfilter_table[] = { + { + .ctl_name = NET_NETFILTER, + .procname = "netfilter", + .mode = 0555, + .child = nf_ct_sysctl_table, + }, + { .ctl_name = 0 } +}; + +static ctl_table nf_ct_net_table[] = { + { + .ctl_name = CTL_NET, + .procname = "net", + .mode = 0555, + .child = nf_ct_netfilter_table, + }, + { .ctl_name = 0 } +}; +#endif + +/* Fast function for those who don't want to parse /proc (and I don't + blame them). */ +/* Reversing the socket's dst/src point of view gives us the reply + mapping. */ +static int +getorigdst(struct sock *sk, int optval, void __user *user, int *len) +{ + struct inet_sock *inet = inet_sk(sk); + struct nf_conntrack_tuple_hash *h; + struct nf_conntrack_tuple tuple; + + NF_CT_TUPLE_U_BLANK(&tuple); + tuple.src.u3.ip = inet->rcv_saddr; + tuple.src.u.tcp.port = inet->sport; + tuple.dst.u3.ip = inet->daddr; + tuple.dst.u.tcp.port = inet->dport; + tuple.src.l3num = PF_INET; + tuple.dst.protonum = IPPROTO_TCP; + + /* We only do TCP at the moment: is there a better way? */ + if (strcmp(sk->sk_prot->name, "TCP")) { + DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n"); + return -ENOPROTOOPT; + } + + if ((unsigned int) *len < sizeof(struct sockaddr_in)) { + DEBUGP("SO_ORIGINAL_DST: len %u not %u\n", + *len, sizeof(struct sockaddr_in)); + return -EINVAL; + } + + h = nf_conntrack_find_get(&tuple, NULL); + if (h) { + struct sockaddr_in sin; + struct nf_conn *ct = tuplehash_to_ctrack(h); + + sin.sin_family = AF_INET; + sin.sin_port = ct->tuplehash[NF_CT_DIR_ORIGINAL] + .tuple.dst.u.tcp.port; + sin.sin_addr.s_addr = ct->tuplehash[NF_CT_DIR_ORIGINAL] + .tuple.dst.u3.ip; + + DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n", + NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); + nf_ct_put(ct); + if (copy_to_user(user, &sin, sizeof(sin)) != 0) + return -EFAULT; + else + return 0; + } + DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n", + NIPQUAD(tuple.src.u3.ip), ntohs(tuple.src.u.tcp.port), + NIPQUAD(tuple.dst.u3.ip), ntohs(tuple.dst.u.tcp.port)); + return -ENOENT; +} + +static struct nf_sockopt_ops so_getorigdst = { + .pf = PF_INET, + .get_optmin = SO_ORIGINAL_DST, + .get_optmax = SO_ORIGINAL_DST+1, + .get = &getorigdst, +}; + +struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = { + .l3proto = PF_INET, + .name = "ipv4", + .pkt_to_tuple = ipv4_pkt_to_tuple, + .invert_tuple = ipv4_invert_tuple, + .print_tuple = ipv4_print_tuple, + .print_conntrack = ipv4_print_conntrack, + .prepare = ipv4_prepare, + .get_features = ipv4_get_features, + .me = THIS_MODULE, +}; + +extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp4; +extern struct nf_conntrack_protocol nf_conntrack_protocol_udp4; +extern struct nf_conntrack_protocol nf_conntrack_protocol_icmp; +static int init_or_cleanup(int init) +{ + int ret = 0; + + if (!init) goto cleanup; + + ret = nf_register_sockopt(&so_getorigdst); + if (ret < 0) { + printk(KERN_ERR "Unable to register netfilter socket option\n"); + goto cleanup_nothing; + } + + ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_tcp4); + if (ret < 0) { + printk("nf_conntrack_ipv4: can't register tcp.\n"); + goto cleanup_sockopt; + } + + ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_udp4); + if (ret < 0) { + printk("nf_conntrack_ipv4: can't register udp.\n"); + goto cleanup_tcp; + } + + ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_icmp); + if (ret < 0) { + printk("nf_conntrack_ipv4: can't register icmp.\n"); + goto cleanup_udp; + } + + ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4); + if (ret < 0) { + printk("nf_conntrack_ipv4: can't register ipv4\n"); + goto cleanup_icmp; + } + + ret = nf_register_hook(&ipv4_conntrack_defrag_ops); + if (ret < 0) { + printk("nf_conntrack_ipv4: can't register pre-routing defrag hook.\n"); + goto cleanup_ipv4; + } + ret = nf_register_hook(&ipv4_conntrack_defrag_local_out_ops); + if (ret < 0) { + printk("nf_conntrack_ipv4: can't register local_out defrag hook.\n"); + goto cleanup_defragops; + } + + ret = nf_register_hook(&ipv4_conntrack_in_ops); + if (ret < 0) { + printk("nf_conntrack_ipv4: can't register pre-routing hook.\n"); + goto cleanup_defraglocalops; + } + + ret = nf_register_hook(&ipv4_conntrack_local_out_ops); + if (ret < 0) { + printk("nf_conntrack_ipv4: can't register local out hook.\n"); + goto cleanup_inops; + } + + ret = nf_register_hook(&ipv4_conntrack_out_ops); + if (ret < 0) { + printk("nf_conntrack_ipv4: can't register post-routing hook.\n"); + goto cleanup_inandlocalops; + } + + ret = nf_register_hook(&ipv4_conntrack_local_in_ops); + if (ret < 0) { + printk("nf_conntrack_ipv4: can't register local in hook.\n"); + goto cleanup_inoutandlocalops; + } + +#ifdef CONFIG_SYSCTL + nf_ct_ipv4_sysctl_header = register_sysctl_table(nf_ct_net_table, 0); + if (nf_ct_ipv4_sysctl_header == NULL) { + printk("nf_conntrack: can't register to sysctl.\n"); + ret = -ENOMEM; + goto cleanup_localinops; + } +#endif + + /* For use by REJECT target */ + ip_ct_attach = __nf_conntrack_attach; + + return ret; + + cleanup: + ip_ct_attach = NULL; +#ifdef CONFIG_SYSCTL + unregister_sysctl_table(nf_ct_ipv4_sysctl_header); + cleanup_localinops: +#endif + nf_unregister_hook(&ipv4_conntrack_local_in_ops); + cleanup_inoutandlocalops: + nf_unregister_hook(&ipv4_conntrack_out_ops); + cleanup_inandlocalops: + nf_unregister_hook(&ipv4_conntrack_local_out_ops); + cleanup_inops: + nf_unregister_hook(&ipv4_conntrack_in_ops); + cleanup_defraglocalops: + nf_unregister_hook(&ipv4_conntrack_defrag_local_out_ops); + cleanup_defragops: + nf_unregister_hook(&ipv4_conntrack_defrag_ops); + cleanup_ipv4: + nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4); + cleanup_icmp: + nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmp); + cleanup_udp: + nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp4); + cleanup_tcp: + nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp4); + cleanup_sockopt: + nf_unregister_sockopt(&so_getorigdst); + cleanup_nothing: + return ret; +} + +MODULE_LICENSE("GPL"); + +static int __init init(void) +{ + need_nf_conntrack(); + return init_or_cleanup(1); +} + +static void __exit fini(void) +{ + init_or_cleanup(0); +} + +module_init(init); +module_exit(fini); + +PROVIDES_CONNTRACK(ipv4); +EXPORT_SYMBOL(nf_ct_ipv4_gather_frags); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/netfilter/nf_conntrack_proto_icmp.c linux-2.6.11.3/net/ipv4/netfilter/nf_conntrack_proto_icmp.c --- linux-2.6.11.3.org/net/ipv4/netfilter/nf_conntrack_proto_icmp.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/netfilter/nf_conntrack_proto_icmp.c 2005-03-13 22:06:02.188221504 +0100 @@ -0,0 +1,299 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI + * - enable working with Layer 3 protocol independent connection tracking. + * + * Derived from net/ipv4/netfilter/ip_conntrack_proto_icmp.c + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +unsigned long nf_ct_icmp_timeout = 30*HZ; + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +static int icmp_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct nf_conntrack_tuple *tuple) +{ + struct icmphdr _hdr, *hp; + + hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + if (hp == NULL) + return 0; + + tuple->dst.u.icmp.type = hp->type; + tuple->src.u.icmp.id = hp->un.echo.id; + tuple->dst.u.icmp.code = hp->code; + + return 1; +} + +static int icmp_invert_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) +{ + /* Add 1; spaces filled with 0. */ + static u_int8_t invmap[] + = { [ICMP_ECHO] = ICMP_ECHOREPLY + 1, + [ICMP_ECHOREPLY] = ICMP_ECHO + 1, + [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1, + [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1, + [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1, + [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1, + [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1, + [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1}; + + if (orig->dst.u.icmp.type >= sizeof(invmap) + || !invmap[orig->dst.u.icmp.type]) + return 0; + + tuple->src.u.icmp.id = orig->src.u.icmp.id; + tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1; + tuple->dst.u.icmp.code = orig->dst.u.icmp.code; + return 1; +} + +/* Print out the per-protocol part of the tuple. */ +static int icmp_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) +{ + return seq_printf(s, "type=%u code=%u id=%u ", + tuple->dst.u.icmp.type, + tuple->dst.u.icmp.code, + ntohs(tuple->src.u.icmp.id)); +} + +/* Print out the private part of the conntrack. */ +static int icmp_print_conntrack(struct seq_file *s, + const struct nf_conn *conntrack) +{ + return 0; +} + +/* Returns verdict for packet, or -1 for invalid. */ +static int icmp_packet(struct nf_conn *ct, + const struct sk_buff *skb, + unsigned int dataoff, + enum nf_conntrack_info ctinfo, + int pf, + unsigned int hooknum) +{ + /* Try to delete connection immediately after all replies: + won't actually vanish as we still have skb, and del_timer + means this will only run once even if count hits zero twice + (theoretically possible with SMP) */ + if (NFCTINFO2DIR(ctinfo) == NF_CT_DIR_REPLY) { + if (atomic_dec_and_test(&ct->proto.icmp.count) + && del_timer(&ct->timeout)) + ct->timeout.function((unsigned long)ct); + } else { + atomic_inc(&ct->proto.icmp.count); + nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout); + } + + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int icmp_new(struct nf_conn *conntrack, + const struct sk_buff *skb, unsigned int dataoff) +{ + static u_int8_t valid_new[] + = { [ICMP_ECHO] = 1, + [ICMP_TIMESTAMP] = 1, + [ICMP_INFO_REQUEST] = 1, + [ICMP_ADDRESS] = 1 }; + + if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) + || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) { + /* Can't create a new ICMP `conn' with this. */ + DEBUGP("icmp: can't create new conn with type %u\n", + conntrack->tuplehash[0].tuple.dst.u.icmp.type); + NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple); + return 0; + } + atomic_set(&conntrack->proto.icmp.count, 0); + return 1; +} + +extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; +/* Returns conntrack if it dealt with ICMP, and filled in skb fields */ +static int +icmp_error_message(struct sk_buff *skb, + enum nf_conntrack_info *ctinfo, + unsigned int hooknum) +{ + struct nf_conntrack_tuple innertuple, origtuple; + struct { + struct icmphdr icmp; + struct iphdr ip; + } _in, *inside; + struct nf_conntrack_protocol *innerproto; + struct nf_conntrack_tuple_hash *h; + int dataoff; + + NF_CT_ASSERT(skb->nfct == NULL); + + /* Not enough header? */ + inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in); + if (inside == NULL) + return NF_ACCEPT; + + /* Ignore ICMP's containing fragments (shouldn't happen) */ + if (inside->ip.frag_off & htons(IP_OFFSET)) { + DEBUGP("icmp_error_message: fragment of proto %u\n", + inside->ip.protocol); + return NF_ACCEPT; + } + + innerproto = nf_ct_find_proto(PF_INET, inside->ip.protocol); + dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp); + /* Are they talking about one of our connections? */ + if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET, + inside->ip.protocol, &origtuple, + &nf_conntrack_l3proto_ipv4, innerproto)) { + DEBUGP("icmp_error_message: ! get_tuple p=%u", + inside->ip.protocol); + return NF_ACCEPT; + } + + /* Ordinarily, we'd expect the inverted tupleproto, but it's + been preserved inside the ICMP. */ + if (!nf_ct_invert_tuple(&innertuple, &origtuple, + &nf_conntrack_l3proto_ipv4, innerproto)) { + DEBUGP("icmp_error_message: no match\n"); + return NF_ACCEPT; + } + + *ctinfo = NF_CT_RELATED; + + h = nf_conntrack_find_get(&innertuple, NULL); + if (!h) { + /* Locally generated ICMPs will match inverted if they + haven't been SNAT'ed yet */ + /* FIXME: NAT code has to handle half-done double NAT --RR */ + if (hooknum == NF_IP_LOCAL_OUT) + h = nf_conntrack_find_get(&origtuple, NULL); + + if (!h) { + DEBUGP("icmp_error_message: no match\n"); + return NF_ACCEPT; + } + + /* Reverse direction from that found */ + if (NF_CT_DIRECTION(h) == NF_CT_DIR_REPLY) + *ctinfo += NF_CT_IS_REPLY; + } else { + if (NF_CT_DIRECTION(h) == NF_CT_DIR_REPLY) + *ctinfo += NF_CT_IS_REPLY; + } + + /* Update skb to refer to this connection */ + skb->nfct = &tuplehash_to_ctrack(h)->ct_general; + skb->nfctinfo = *ctinfo; + return -NF_ACCEPT; +} + +/* Small and modified version of icmp_rcv */ +static int +icmp_error(struct sk_buff *skb, unsigned int dataoff, + enum nf_conntrack_info *ctinfo, int pf, unsigned int hooknum) +{ + struct icmphdr _ih, *icmph; + + /* Not enough header? */ + icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih); + if (icmph == NULL) { + if (LOG_INVALID(IPPROTO_ICMP)) + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + "nf_ct_icmp: short packet "); + return -NF_ACCEPT; + } + + /* See ip_conntrack_proto_tcp.c */ + if (hooknum != NF_IP_PRE_ROUTING) + goto checksum_skipped; + + switch (skb->ip_summed) { + case CHECKSUM_HW: + if (!(u16)csum_fold(skb->csum)) + break; + if (LOG_INVALID(IPPROTO_ICMP)) + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + "nf_ct_icmp: bad HW ICMP checksum "); + return -NF_ACCEPT; + case CHECKSUM_NONE: + if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) { + if (LOG_INVALID(IPPROTO_ICMP)) + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + "nf_ct_icmp: bad ICMP checksum "); + return -NF_ACCEPT; + } + default: + break; + } + +checksum_skipped: + /* + * 18 is the highest 'known' ICMP type. Anything else is a mystery + * + * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently + * discarded. + */ + if (icmph->type > NR_ICMP_TYPES) { + if (LOG_INVALID(IPPROTO_ICMP)) + nf_log_packet(PF_INET, 0, skb, NULL, NULL, + "nf_ct_icmp: invalid ICMP type "); + return -NF_ACCEPT; + } + + /* Need to track icmp error message? */ + if (icmph->type != ICMP_DEST_UNREACH + && icmph->type != ICMP_SOURCE_QUENCH + && icmph->type != ICMP_TIME_EXCEEDED + && icmph->type != ICMP_PARAMETERPROB + && icmph->type != ICMP_REDIRECT) + return NF_ACCEPT; + + return icmp_error_message(skb, ctinfo, hooknum); +} + +struct nf_conntrack_protocol nf_conntrack_protocol_icmp = +{ + .list = { NULL, NULL }, + .l3proto = PF_INET, + .proto = IPPROTO_ICMP, + .name = "icmp", + .pkt_to_tuple = icmp_pkt_to_tuple, + .invert_tuple = icmp_invert_tuple, + .print_tuple = icmp_print_tuple, + .print_conntrack = icmp_print_conntrack, + .packet = icmp_packet, + .new = icmp_new, + .error = icmp_error, + .destroy = NULL, + .me = NULL +}; + +EXPORT_SYMBOL(nf_conntrack_protocol_icmp); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv4/udp.c linux-2.6.11.3/net/ipv4/udp.c --- linux-2.6.11.3.org/net/ipv4/udp.c 2005-03-13 07:44:10.000000000 +0100 +++ linux-2.6.11.3/net/ipv4/udp.c 2005-03-13 22:06:04.902808824 +0100 @@ -1567,6 +1567,7 @@ EXPORT_SYMBOL(udp_prot); EXPORT_SYMBOL(udp_sendmsg); EXPORT_SYMBOL(udp_poll); +EXPORT_SYMBOL(udp_v4_lookup); #ifdef CONFIG_PROC_FS EXPORT_SYMBOL(udp_proc_register); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv6/ip6_output.c linux-2.6.11.3/net/ipv6/ip6_output.c --- linux-2.6.11.3.org/net/ipv6/ip6_output.c 2005-03-13 07:44:02.000000000 +0100 +++ linux-2.6.11.3/net/ipv6/ip6_output.c 2005-03-13 22:06:02.252211776 +0100 @@ -473,9 +473,15 @@ #ifdef CONFIG_NETFILTER to->nfmark = from->nfmark; /* Connection association is same as pre-frag packet */ + nf_conntrack_put(to->nfct); to->nfct = from->nfct; nf_conntrack_get(to->nfct); to->nfctinfo = from->nfctinfo; +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + nf_conntrack_put_reasm(to->nfct_reasm); + to->nfct_reasm = from->nfct_reasm; + nf_conntrack_get_reasm(to->nfct_reasm); +#endif #ifdef CONFIG_BRIDGE_NETFILTER nf_bridge_put(to->nf_bridge); to->nf_bridge = from->nf_bridge; diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv6/netfilter/Kconfig linux-2.6.11.3/net/ipv6/netfilter/Kconfig --- linux-2.6.11.3.org/net/ipv6/netfilter/Kconfig 2005-03-13 07:44:40.000000000 +0100 +++ linux-2.6.11.3/net/ipv6/netfilter/Kconfig 2005-03-13 22:06:07.329439920 +0100 @@ -238,5 +238,102 @@ If you want to compile it as a module, say M here and read . If unsure, say `N'. +config IP6_NF_TARGET_HL + tristate 'HL target support' + depends on IP6_NF_MANGLE + help + This option adds a `HL' target, which allows you to modify the value of + IPv6 Hop Limit field. + + If you want to compile it as a module, say M here and read + . If unsure, say `N'. + +config IP6_NF_MATCH_FUZZY + tristate 'Fuzzy match support' + depends on IP6_NF_FILTER + help + This option adds a `fuzzy' match, which allows you to match + packets according to a fuzzy logic based law. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +config IP6_NF_MATCH_NTH + tristate 'Nth match support' + depends on IP6_NF_IPTABLES + help + This option adds a `Nth' match, which allow you to make + rules that match every Nth packet. By default there are + 16 different counters. + + [options] + --every Nth Match every Nth packet + [--counter] num Use counter 0-15 (default:0) + [--start] num Initialize the counter at the number 'num' + instead of 0. Must be between 0 and Nth-1 + [--packet] num Match on 'num' packet. Must be between 0 + and Nth-1. + + If --packet is used for a counter than + there must be Nth number of --packet + rules, covering all values between 0 and + Nth-1 inclusively. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +config IP6_NF_TARGET_ROUTE + tristate ' ROUTE target support' + depends on IP6_NF_MANGLE + help + This option adds a `ROUTE' target, which enables you to setup unusual + routes. The ROUTE target is also able to change the incoming interface + of a packet. + + The target can be or not a final target. It has to be used inside the + mangle table. + + Not working as a module. + +config IP6_NF_TARGET_ULOG + tristate "ULOG target support" + depends on IP6_NF_IPTABLES && IP_NF_TARGET_ULOG + ---help--- + This option adds a `ULOG' target, which allows you to create rules in + any ip6tables table. The packet is passed to a userspace logging + daemon using netlink multicast sockets; unlike the LOG target + which can only be viewed through syslog. + + This target requires the ipv4 version to be compiled as well. + + The apropriate userspace logging daemon (ulogd) may be obtained from + + + To compile it as a module, choose M here. If unsure, say N. + +config NF_CONNTRACK_IPV6 + tristate "IPv6 support on new connection tracking (EXPERIMENTAL)" + depends on EXPERIMENTAL && NF_CONNTRACK + ---help--- + Connection tracking keeps a record of what packets have passed + through your machine, in order to figure out how they are related + into connections. + + This is IPv6 support on Layer 3 independent connection tracking. + Layer 3 independent connection tracking is experimental scheme + which generalize ip_conntrack to support other layer 3 protocols. + + To compile it as a module, choose M here. If unsure, say N. + +config IP6_NF_MATCH_POLICY + tristate "IPsec policy match support" + depends on IP6_NF_IPTABLES && XFRM + help + Policy matching allows you to match packets based on the + IPsec policy that was used during decapsulation/will + be used during encapsulation. + + To compile it as a module, choose M here. If unsure, say N. + endmenu diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv6/netfilter/Makefile linux-2.6.11.3/net/ipv6/netfilter/Makefile --- linux-2.6.11.3.org/net/ipv6/netfilter/Makefile 2005-03-13 07:44:28.000000000 +0100 +++ linux-2.6.11.3/net/ipv6/netfilter/Makefile 2005-03-13 22:06:07.330439768 +0100 @@ -8,11 +8,13 @@ obj-$(CONFIG_IP6_NF_MATCH_MARK) += ip6t_mark.o obj-$(CONFIG_IP6_NF_MATCH_LENGTH) += ip6t_length.o obj-$(CONFIG_IP6_NF_MATCH_MAC) += ip6t_mac.o +obj-$(CONFIG_IP6_NF_MATCH_FUZZY) += ip6t_fuzzy.o obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o ip6t_dst.o obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o obj-$(CONFIG_IP6_NF_MATCH_AHESP) += ip6t_esp.o ip6t_ah.o +obj-$(CONFIG_IP6_NF_MATCH_POLICY) += ip6t_policy.o obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o obj-$(CONFIG_IP6_NF_MATCH_MULTIPORT) += ip6t_multiport.o obj-$(CONFIG_IP6_NF_MATCH_OWNER) += ip6t_owner.o @@ -20,7 +22,18 @@ obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o obj-$(CONFIG_IP6_NF_TARGET_MARK) += ip6t_MARK.o +obj-$(CONFIG_IP6_NF_TARGET_ROUTE) += ip6t_ROUTE.o obj-$(CONFIG_IP6_NF_QUEUE) += ip6_queue.o obj-$(CONFIG_IP6_NF_TARGET_LOG) += ip6t_LOG.o +obj-$(CONFIG_IP6_NF_TARGET_ULOG) += ip6t_ULOG.o + +obj-$(CONFIG_IP6_NF_MATCH_NTH) += ip6t_nth.o +obj-$(CONFIG_IP6_NF_TARGET_HL) += ip6t_HL.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_MATCH_HL) += ip6t_hl.o + +# objects for l3 independent conntrack +nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o + +# l3 independent conntrack +obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv6/netfilter/ip6t_HL.c linux-2.6.11.3/net/ipv6/netfilter/ip6t_HL.c --- linux-2.6.11.3.org/net/ipv6/netfilter/ip6t_HL.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv6/netfilter/ip6t_HL.c 2005-03-13 22:03:48.783502104 +0100 @@ -0,0 +1,109 @@ +/* + * Hop Limit modification target for ip6tables + * Maciej Soltysiak + * Based on HW's TTL module + * + * This software is distributed under the terms of GNU GPL + */ + +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Maciej Soltysiak "); +MODULE_DESCRIPTION("IP tables Hop Limit modification module"); +MODULE_LICENSE("GPL"); + +static unsigned int ip6t_hl_target(struct sk_buff **pskb, unsigned int hooknum, + const struct net_device *in, const struct net_device *out, + const void *targinfo, void *userinfo) +{ + struct ipv6hdr *ip6h = (*pskb)->nh.ipv6h; + const struct ip6t_HL_info *info = targinfo; + u_int16_t diffs[2]; + int new_hl; + + switch (info->mode) { + case IP6T_HL_SET: + new_hl = info->hop_limit; + break; + case IP6T_HL_INC: + new_hl = ip6h->hop_limit + info->hop_limit; + if (new_hl > 255) + new_hl = 255; + break; + case IP6T_HL_DEC: + new_hl = ip6h->hop_limit - info->hop_limit; + if (new_hl < 0) + new_hl = 0; + break; + default: + new_hl = ip6h->hop_limit; + break; + } + + if (new_hl != ip6h->hop_limit) { + diffs[0] = htons(((unsigned)ip6h->hop_limit) << 8) ^ 0xFFFF; + ip6h->hop_limit = new_hl; + diffs[1] = htons(((unsigned)ip6h->hop_limit) << 8); + } + + return IP6T_CONTINUE; +} + +static int ip6t_hl_checkentry(const char *tablename, + const struct ip6t_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + struct ip6t_HL_info *info = targinfo; + + if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_HL_info))) { + printk(KERN_WARNING "HL: targinfosize %u != %Zu\n", + targinfosize, + IP6T_ALIGN(sizeof(struct ip6t_HL_info))); + return 0; + } + + if (strcmp(tablename, "mangle")) { + printk(KERN_WARNING "HL: can only be called from \"mangle\" table, not \"%s\"\n", tablename); + return 0; + } + + if (info->mode > IP6T_HL_MAXMODE) { + printk(KERN_WARNING "HL: invalid or unknown Mode %u\n", + info->mode); + return 0; + } + + if ((info->mode != IP6T_HL_SET) && (info->hop_limit == 0)) { + printk(KERN_WARNING "HL: increment/decrement doesn't make sense with value 0\n"); + return 0; + } + + return 1; +} + +static struct ip6t_target ip6t_HL = { + .name = "HL", + .target = ip6t_hl_target, + .checkentry = ip6t_hl_checkentry, + .me = THIS_MODULE +}; + +static int __init init(void) +{ + return ip6t_register_target(&ip6t_HL); +} + +static void __exit fini(void) +{ + ip6t_unregister_target(&ip6t_HL); +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv6/netfilter/ip6t_ROUTE.c linux-2.6.11.3/net/ipv6/netfilter/ip6t_ROUTE.c --- linux-2.6.11.3.org/net/ipv6/netfilter/ip6t_ROUTE.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv6/netfilter/ip6t_ROUTE.c 2005-03-13 22:04:33.121761664 +0100 @@ -0,0 +1,308 @@ +/* + * This implements the ROUTE v6 target, which enables you to setup unusual + * routes not supported by the standard kernel routing table. + * + * Copyright (C) 2003 Cedric de Launois + * + * v 1.1 2004/11/23 + * + * This software is distributed under GNU GPL v2, 1991 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if 1 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +#define NIP6(addr) \ + ntohs((addr).s6_addr16[0]), \ + ntohs((addr).s6_addr16[1]), \ + ntohs((addr).s6_addr16[2]), \ + ntohs((addr).s6_addr16[3]), \ + ntohs((addr).s6_addr16[4]), \ + ntohs((addr).s6_addr16[5]), \ + ntohs((addr).s6_addr16[6]), \ + ntohs((addr).s6_addr16[7]) + +/* Route the packet according to the routing keys specified in + * route_info. Keys are : + * - ifindex : + * 0 if no oif preferred, + * otherwise set to the index of the desired oif + * - route_info->gw : + * 0 if no gateway specified, + * otherwise set to the next host to which the pkt must be routed + * If success, skb->dev is the output device to which the packet must + * be sent and skb->dst is not NULL + * + * RETURN: 1 if the packet was succesfully routed to the + * destination desired + * 0 if the kernel routing table could not route the packet + * according to the keys specified + */ +static int +route6(struct sk_buff *skb, + unsigned int ifindex, + const struct ip6t_route_target_info *route_info) +{ + struct rt6_info *rt = NULL; + struct ipv6hdr *ipv6h = skb->nh.ipv6h; + struct in6_addr *gw = (struct in6_addr*)&route_info->gw; + + DEBUGP("ip6t_ROUTE: called with: "); + DEBUGP("DST=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ", NIP6(ipv6h->daddr)); + DEBUGP("GATEWAY=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ", NIP6(*gw)); + DEBUGP("OUT=%s\n", route_info->oif); + + if (ipv6_addr_any(gw)) + rt = rt6_lookup(&ipv6h->daddr, &ipv6h->saddr, ifindex, 1); + else + rt = rt6_lookup(gw, &ipv6h->saddr, ifindex, 1); + + if (!rt) + goto no_route; + + DEBUGP("ip6t_ROUTE: routing gives: "); + DEBUGP("DST=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ", NIP6(rt->rt6i_dst.addr)); + DEBUGP("GATEWAY=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ", NIP6(rt->rt6i_gateway)); + DEBUGP("OUT=%s\n", rt->rt6i_dev->name); + + if (ifindex && rt->rt6i_dev->ifindex!=ifindex) + goto wrong_route; + + if (!rt->rt6i_nexthop) { + DEBUGP("ip6t_ROUTE: discovering neighbour\n"); + rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_dst.addr); + } + + /* Drop old route. */ + dst_release(skb->dst); + skb->dst = &rt->u.dst; + skb->dev = rt->rt6i_dev; + return 1; + + wrong_route: + dst_release(&rt->u.dst); + no_route: + if (!net_ratelimit()) + return 0; + + printk("ip6t_ROUTE: no explicit route found "); + if (ifindex) + printk("via interface %s ", route_info->oif); + if (!ipv6_addr_any(gw)) + printk("via gateway %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x", NIP6(*gw)); + printk("\n"); + return 0; +} + + +/* Stolen from ip6_output_finish + * PRE : skb->dev is set to the device we are leaving by + * skb->dst is not NULL + * POST: the packet is sent with the link layer header pushed + * the packet is destroyed + */ +static void ip_direct_send(struct sk_buff *skb) +{ + struct dst_entry *dst = skb->dst; + struct hh_cache *hh = dst->hh; + + if (hh) { + read_lock_bh(&hh->hh_lock); + memcpy(skb->data - 16, hh->hh_data, 16); + read_unlock_bh(&hh->hh_lock); + skb_push(skb, hh->hh_len); + hh->hh_output(skb); + } else if (dst->neighbour) + dst->neighbour->output(skb); + else { + if (net_ratelimit()) + DEBUGP(KERN_DEBUG "ip6t_ROUTE: no hdr & no neighbour cache!\n"); + kfree_skb(skb); + } +} + + +static unsigned int +route6_oif(const struct ip6t_route_target_info *route_info, + struct sk_buff *skb) +{ + unsigned int ifindex = 0; + struct net_device *dev_out = NULL; + + /* The user set the interface name to use. + * Getting the current interface index. + */ + if ((dev_out = dev_get_by_name(route_info->oif))) { + ifindex = dev_out->ifindex; + } else { + /* Unknown interface name : packet dropped */ + if (net_ratelimit()) + DEBUGP("ip6t_ROUTE: oif interface %s not found\n", route_info->oif); + + if (route_info->flags & IP6T_ROUTE_CONTINUE) + return IP6T_CONTINUE; + else + return NF_DROP; + } + + /* Trying the standard way of routing packets */ + if (route6(skb, ifindex, route_info)) { + dev_put(dev_out); + if (route_info->flags & IP6T_ROUTE_CONTINUE) + return IP6T_CONTINUE; + + ip_direct_send(skb); + return NF_STOLEN; + } else + return NF_DROP; +} + + +static unsigned int +route6_gw(const struct ip6t_route_target_info *route_info, + struct sk_buff *skb) +{ + if (route6(skb, 0, route_info)) { + if (route_info->flags & IP6T_ROUTE_CONTINUE) + return IP6T_CONTINUE; + + ip_direct_send(skb); + return NF_STOLEN; + } else + return NF_DROP; +} + + +static unsigned int +ip6t_route_target(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const void *targinfo, + void *userinfo) +{ + const struct ip6t_route_target_info *route_info = targinfo; + struct sk_buff *skb = *pskb; + struct in6_addr *gw = (struct in6_addr*)&route_info->gw; + unsigned int res; + + if (route_info->flags & IP6T_ROUTE_CONTINUE) + goto do_it; + + /* If we are at PREROUTING or INPUT hook + * the TTL isn't decreased by the IP stack + */ + if (hooknum == NF_IP6_PRE_ROUTING || + hooknum == NF_IP6_LOCAL_IN) { + + struct ipv6hdr *ipv6h = skb->nh.ipv6h; + + if (ipv6h->hop_limit <= 1) { + /* Force OUTPUT device used as source address */ + skb->dev = skb->dst->dev; + + icmpv6_send(skb, ICMPV6_TIME_EXCEED, + ICMPV6_EXC_HOPLIMIT, 0, skb->dev); + + return NF_DROP; + } + + ipv6h->hop_limit--; + } + + if ((route_info->flags & IP6T_ROUTE_TEE)) { + /* + * Copy the *pskb, and route the copy. Will later return + * IP6T_CONTINUE for the original skb, which should continue + * on its way as if nothing happened. The copy should be + * independantly delivered to the ROUTE --gw. + */ + skb = skb_copy(*pskb, GFP_ATOMIC); + if (!skb) { + if (net_ratelimit()) + DEBUGP(KERN_DEBUG "ip6t_ROUTE: copy failed!\n"); + return IP6T_CONTINUE; + } + } + +do_it: + if (route_info->oif[0]) { + res = route6_oif(route_info, skb); + } else if (!ipv6_addr_any(gw)) { + res = route6_gw(route_info, skb); + } else { + if (net_ratelimit()) + DEBUGP(KERN_DEBUG "ip6t_ROUTE: no parameter !\n"); + res = IP6T_CONTINUE; + } + + if ((route_info->flags & IP6T_ROUTE_TEE)) + res = IP6T_CONTINUE; + + return res; +} + + +static int +ip6t_route_checkentry(const char *tablename, + const struct ip6t_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + if (strcmp(tablename, "mangle") != 0) { + printk("ip6t_ROUTE: can only be called from \"mangle\" table.\n"); + return 0; + } + + if (targinfosize != IP6T_ALIGN(sizeof(struct ip6t_route_target_info))) { + printk(KERN_WARNING "ip6t_ROUTE: targinfosize %u != %Zu\n", + targinfosize, + IP6T_ALIGN(sizeof(struct ip6t_route_target_info))); + return 0; + } + + return 1; +} + + +static struct ip6t_target ip6t_route_reg = { + .name = "ROUTE", + .target = ip6t_route_target, + .checkentry = ip6t_route_checkentry, + .me = THIS_MODULE +}; + + +static int __init init(void) +{ + printk(KERN_DEBUG "registering ipv6 ROUTE target\n"); + if (ip6t_register_target(&ip6t_route_reg)) + return -EINVAL; + + return 0; +} + + +static void __exit fini(void) +{ + ip6t_unregister_target(&ip6t_route_reg); +} + +module_init(init); +module_exit(fini); +MODULE_LICENSE("GPL"); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv6/netfilter/ip6t_ULOG.c linux-2.6.11.3/net/ipv6/netfilter/ip6t_ULOG.c --- linux-2.6.11.3.org/net/ipv6/netfilter/ip6t_ULOG.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv6/netfilter/ip6t_ULOG.c 2005-03-13 22:05:11.750889144 +0100 @@ -0,0 +1,156 @@ +/* + * netfilter module for userspace packet logging daemons + * + * (C) 2000-2004 by Harald Welte + * + * 2000/09/22 ulog-cprange feature added + * 2001/01/04 in-kernel queue as proposed by Sebastian Zander + * + * 2001/01/30 per-rule nlgroup conflicts with global queue. + * nlgroup now global (sysctl) + * 2001/04/19 ulog-queue reworked, now fixed buffer size specified at + * module loadtime -HW + * 2002/07/07 remove broken nflog_rcv() function -HW + * 2002/08/29 fix shifted/unshifted nlgroup bug -HW + * 2002/10/30 fix uninitialized mac_len field - + * 2004/10/25 fix erroneous calculation of 'len' parameter to NLMSG_PUT + * resulting in bogus 'error during NLMSG_PUT' messages. + * 2005/02/10 ported to ipv6 + * + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Jonas Berlin "); +MODULE_DESCRIPTION("ip6tables userspace logging module"); + +#if 0 +#define DEBUGP(format, args...) printk("%s:%s:" format, \ + __FILE__, __FUNCTION__ , ## args) +#else +#define DEBUGP(format, args...) +#endif + +static unsigned int nflog = 1; +module_param(nflog, int, 0400); +MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); + +// from ipt_ULOG.c +void ipt_ulog_packet(unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct ipt_ulog_info *loginfo, + const char *prefix); + +static unsigned int ip6t_ulog_target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const void *targinfo, void *userinfo) +{ + struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; + + ipt_ulog_packet(hooknum, *pskb, in, out, loginfo, NULL); + + return IP6T_CONTINUE; +} + +static void ip6t_logfn(unsigned int hooknum, + const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const char *prefix) +{ + struct ipt_ulog_info loginfo = { + .nl_group = ULOG_DEFAULT_NLGROUP, + .copy_range = 0, + .qthreshold = ULOG_DEFAULT_QTHRESHOLD, + .prefix = "" + }; + + ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); +} + +static int ip6t_ulog_checkentry(const char *tablename, + const struct ip6t_entry *e, + void *targinfo, + unsigned int targinfosize, + unsigned int hookmask) +{ + struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; + + if (targinfosize != IP6T_ALIGN(sizeof(struct ipt_ulog_info))) { + DEBUGP("ip6t_ULOG: targinfosize %u != 0\n", targinfosize); + return 0; + } + + if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') { + DEBUGP("ip6t_ULOG: prefix term %i\n", + loginfo->prefix[sizeof(loginfo->prefix) - 1]); + return 0; + } + + if (loginfo->qthreshold > ULOG_MAX_QLEN) { + DEBUGP("ip6t_ULOG: queue threshold %i > MAX_QLEN\n", + loginfo->qthreshold); + return 0; + } + + return 1; +} + +static struct ip6t_target ip6t_ulog_reg = { + .name = "ULOG", + .target = ip6t_ulog_target, + .checkentry = ip6t_ulog_checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + DEBUGP("ip6t_ULOG: init module\n"); + + if (ip6t_register_target(&ip6t_ulog_reg) != 0) { + return -EINVAL; + } + if (nflog) + nf_log_register(PF_INET, &ip6t_logfn); + + return 0; +} + +static void __exit fini(void) +{ + DEBUGP("ip6t_ULOG: cleanup_module\n"); + + if (nflog) + nf_log_unregister(PF_INET, &ip6t_logfn); + ip6t_unregister_target(&ip6t_ulog_reg); +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv6/netfilter/ip6t_fuzzy.c linux-2.6.11.3/net/ipv6/netfilter/ip6t_fuzzy.c --- linux-2.6.11.3.org/net/ipv6/netfilter/ip6t_fuzzy.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv6/netfilter/ip6t_fuzzy.c 2005-03-13 22:03:55.524477320 +0100 @@ -0,0 +1,189 @@ +/* + * This module implements a simple TSK FLC + * (Takagi-Sugeno-Kang Fuzzy Logic Controller) that aims + * to limit , in an adaptive and flexible way , the packet rate crossing + * a given stream . It serves as an initial and very simple (but effective) + * example of how Fuzzy Logic techniques can be applied to defeat DoS attacks. + * As a matter of fact , Fuzzy Logic can help us to insert any "behavior" + * into our code in a precise , adaptive and efficient manner. + * The goal is very similar to that of "limit" match , but using techniques of + * Fuzzy Control , that allow us to shape the transfer functions precisely , + * avoiding over and undershoots - and stuff like that . + * + * + * 2002-08-10 Hime Aguiar e Oliveira Jr. : Initial version. + * 2002-08-17 : Changed to eliminate floating point operations . + * 2002-08-23 : Coding style changes . + * 2003-04-08 Maciej Soltysiak : IPv6 Port + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + Packet Acceptance Rate - LOW and Packet Acceptance Rate - HIGH + Expressed in percentage +*/ + +#define PAR_LOW 1/100 +#define PAR_HIGH 1 + +static spinlock_t fuzzy_lock = SPIN_LOCK_UNLOCKED; + +MODULE_AUTHOR("Hime Aguiar e Oliveira Junior "); +MODULE_DESCRIPTION("IP tables Fuzzy Logic Controller match module"); +MODULE_LICENSE("GPL"); + +static u_int8_t mf_high(u_int32_t tx,u_int32_t mini,u_int32_t maxi) +{ + if (tx >= maxi) return 100; + + if (tx <= mini) return 0; + + return ((100 * (tx-mini)) / (maxi-mini)); +} + +static u_int8_t mf_low(u_int32_t tx,u_int32_t mini,u_int32_t maxi) +{ + if (tx <= mini) return 100; + + if (tx >= maxi) return 0; + + return ((100 * (maxi - tx)) / (maxi - mini)); + +} + +static int +ip6t_fuzzy_match(const struct sk_buff *pskb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + /* From userspace */ + + struct ip6t_fuzzy_info *info = (struct ip6t_fuzzy_info *) matchinfo; + + u_int8_t random_number; + unsigned long amount; + u_int8_t howhigh, howlow; + + + spin_lock_bh(&fuzzy_lock); /* Rise the lock */ + + info->bytes_total += pskb->len; + info->packets_total++; + + info->present_time = jiffies; + + if (info->present_time >= info->previous_time) + amount = info->present_time - info->previous_time; + else { + /* There was a transition : I choose to re-sample + and keep the old acceptance rate... + */ + + amount = 0; + info->previous_time = info->present_time; + info->bytes_total = info->packets_total = 0; + }; + + if ( amount > HZ/10) {/* More than 100 ms elapsed ... */ + + info->mean_rate = (u_int32_t) ((HZ * info->packets_total) \ + / amount); + + info->previous_time = info->present_time; + info->bytes_total = info->packets_total = 0; + + howhigh = mf_high(info->mean_rate,info->minimum_rate,info->maximum_rate); + howlow = mf_low(info->mean_rate,info->minimum_rate,info->maximum_rate); + + info->acceptance_rate = (u_int8_t) \ + (howhigh * PAR_LOW + PAR_HIGH * howlow); + + /* In fact, the above defuzzification would require a denominator + * proportional to (howhigh+howlow) but, in this particular case, + * that expression is constant. + * An imediate consequence is that it is not necessary to call + * both mf_high and mf_low - but to keep things understandable, + * I did so. + */ + + } + + spin_unlock_bh(&fuzzy_lock); /* Release the lock */ + + + if (info->acceptance_rate < 100) + { + get_random_bytes((void *)(&random_number), 1); + + /* If within the acceptance , it can pass => don't match */ + if (random_number <= (255 * info->acceptance_rate) / 100) + return 0; + else + return 1; /* It can't pass (It matches) */ + }; + + return 0; /* acceptance_rate == 100 % => Everything passes ... */ + +} + +static int +ip6t_fuzzy_checkentry(const char *tablename, + const struct ip6t_ip6 *ip, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + + const struct ip6t_fuzzy_info *info = matchinfo; + + if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_fuzzy_info))) { + printk("ip6t_fuzzy: matchsize %u != %u\n", matchsize, + IP6T_ALIGN(sizeof(struct ip6t_fuzzy_info))); + return 0; + } + + if ((info->minimum_rate < MINFUZZYRATE) || (info->maximum_rate > MAXFUZZYRATE) + || (info->minimum_rate >= info->maximum_rate)) { + printk("ip6t_fuzzy: BAD limits , please verify !!!\n"); + return 0; + } + + return 1; +} + +static struct ip6t_match ip6t_fuzzy_reg = { + {NULL, NULL}, + "fuzzy", + ip6t_fuzzy_match, + ip6t_fuzzy_checkentry, + NULL, + THIS_MODULE }; + +static int __init init(void) +{ + if (ip6t_register_match(&ip6t_fuzzy_reg)) + return -EINVAL; + + return 0; +} + +static void __exit fini(void) +{ + ip6t_unregister_match(&ip6t_fuzzy_reg); +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv6/netfilter/ip6t_nth.c linux-2.6.11.3/net/ipv6/netfilter/ip6t_nth.c --- linux-2.6.11.3.org/net/ipv6/netfilter/ip6t_nth.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv6/netfilter/ip6t_nth.c 2005-03-13 22:03:57.031248256 +0100 @@ -0,0 +1,173 @@ +/* + This is a module which is used for match support for every Nth packet + This file is distributed under the terms of the GNU General Public + License (GPL). Copies of the GPL can be obtained from: + ftp://prep.ai.mit.edu/pub/gnu/GPL + + 2001-07-18 Fabrice MARIE : initial implementation. + 2001-09-20 Richard Wagner (rwagner@cloudnet.com) + * added support for multiple counters + * added support for matching on individual packets + in the counter cycle + 2003-04-30 Maciej Soltysiak : IPv6 Port + +*/ + +#include +#include +#include +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); + +/* + * State information. + */ +struct state { + spinlock_t lock; + u_int16_t number; +}; + +static struct state states[IP6T_NTH_NUM_COUNTERS]; + +static int +ip6t_nth_match(const struct sk_buff *pskb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + /* Parameters from userspace */ + const struct ip6t_nth_info *info = matchinfo; + unsigned counter = info->counter; + if((counter < 0) || (counter >= IP6T_NTH_NUM_COUNTERS)) + { + printk(KERN_WARNING "nth: invalid counter %u. counter between 0 and %u\n", counter, IP6T_NTH_NUM_COUNTERS-1); + return 0; + }; + + spin_lock(&states[counter].lock); + + /* Are we matching every nth packet?*/ + if (info->packet == 0xFF) + { + /* We're matching every nth packet and only every nth packet*/ + /* Do we match or invert match? */ + if (info->not == 0) + { + if (states[counter].number == 0) + { + ++states[counter].number; + goto match; + } + if (states[counter].number >= info->every) + states[counter].number = 0; /* reset the counter */ + else + ++states[counter].number; + goto dontmatch; + } + else + { + if (states[counter].number == 0) + { + ++states[counter].number; + goto dontmatch; + } + if (states[counter].number >= info->every) + states[counter].number = 0; + else + ++states[counter].number; + goto match; + } + } + else + { + /* We're using the --packet, so there must be a rule for every value */ + if (states[counter].number == info->packet) + { + /* only increment the counter when a match happens */ + if (states[counter].number >= info->every) + states[counter].number = 0; /* reset the counter */ + else + ++states[counter].number; + goto match; + } + else + goto dontmatch; + } + + dontmatch: + /* don't match */ + spin_unlock(&states[counter].lock); + return 0; + + match: + spin_unlock(&states[counter].lock); + return 1; +} + +static int +ip6t_nth_checkentry(const char *tablename, + const struct ip6t_ip6 *e, + void *matchinfo, + unsigned int matchsize, + unsigned int hook_mask) +{ + /* Parameters from userspace */ + const struct ip6t_nth_info *info = matchinfo; + unsigned counter = info->counter; + if((counter < 0) || (counter >= IP6T_NTH_NUM_COUNTERS)) + { + printk(KERN_WARNING "nth: invalid counter %u. counter between 0 and %u\n", counter, IP6T_NTH_NUM_COUNTERS-1); + return 0; + }; + + if (matchsize != IP6T_ALIGN(sizeof(struct ip6t_nth_info))) { + printk("nth: matchsize %u != %u\n", matchsize, + IP6T_ALIGN(sizeof(struct ip6t_nth_info))); + return 0; + } + + states[counter].number = info->startat; + + return 1; +} + +static struct ip6t_match ip6t_nth_reg = { + {NULL, NULL}, + "nth", + ip6t_nth_match, + ip6t_nth_checkentry, + NULL, + THIS_MODULE }; + +static int __init init(void) +{ + unsigned counter; + memset(&states, 0, sizeof(states)); + if (ip6t_register_match(&ip6t_nth_reg)) + return -EINVAL; + + for(counter = 0; counter < IP6T_NTH_NUM_COUNTERS; counter++) + { + spin_lock_init(&(states[counter].lock)); + }; + + printk("ip6t_nth match loaded\n"); + return 0; +} + +static void __exit fini(void) +{ + ip6t_unregister_match(&ip6t_nth_reg); + printk("ip6t_nth match unloaded\n"); +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv6/netfilter/ip6t_owner.c linux-2.6.11.3/net/ipv6/netfilter/ip6t_owner.c --- linux-2.6.11.3.org/net/ipv6/netfilter/ip6t_owner.c 2005-03-13 07:44:53.000000000 +0100 +++ linux-2.6.11.3/net/ipv6/netfilter/ip6t_owner.c 2005-03-13 22:03:36.643347688 +0100 @@ -21,6 +21,39 @@ MODULE_LICENSE("GPL"); static int +match_comm(const struct sk_buff *skb, const char *comm) +{ + struct task_struct *p, *g; + struct files_struct *files; + int i; + + read_lock(&tasklist_lock); + do_each_thread(g, p) { + if(strncmp(p->comm, comm, sizeof(p->comm))) + continue; + + task_lock(p); + files = p->files; + if(files) { + spin_lock(&files->file_lock); + for (i=0; i < files->max_fds; i++) { + if (fcheck_files(files, i) == + skb->sk->sk_socket->file) { + spin_unlock(&files->file_lock); + task_unlock(p); + read_unlock(&tasklist_lock); + return 1; + } + } + spin_unlock(&files->file_lock); + } + task_unlock(p); + } while_each_thread(g, p); + read_unlock(&tasklist_lock); + return 0; +} + +static int match_pid(const struct sk_buff *skb, pid_t pid) { struct task_struct *p; @@ -124,6 +157,12 @@ return 0; } + if(info->match & IP6T_OWNER_COMM) { + if (!match_comm(skb, info->comm) ^ + !!(info->invert & IP6T_OWNER_COMM)) + return 0; + } + return 1; } @@ -145,8 +184,9 @@ #ifdef CONFIG_SMP /* files->file_lock can not be used in a BH */ if (((struct ip6t_owner_info *)matchinfo)->match - & (IP6T_OWNER_PID|IP6T_OWNER_SID)) { - printk("ip6t_owner: pid and sid matching is broken on SMP.\n"); + & (IP6T_OWNER_PID|IP6T_OWNER_SID|IP6T_OWNER_COMM)) { + printk("ip6t_owner: pid, sid and command matching is broken " + "on SMP.\n"); return 0; } #endif diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv6/netfilter/ip6t_policy.c linux-2.6.11.3/net/ipv6/netfilter/ip6t_policy.c --- linux-2.6.11.3.org/net/ipv6/netfilter/ip6t_policy.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv6/netfilter/ip6t_policy.c 2005-03-13 22:06:07.323440832 +0100 @@ -0,0 +1,201 @@ +/* IP tables module for matching IPsec policy + * + * Copyright (c) 2004 Patrick McHardy, + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +MODULE_AUTHOR("Patrick McHardy "); +MODULE_DESCRIPTION("IPtables IPsec policy matching module"); +MODULE_LICENSE("GPL"); + + +static inline int ip6_masked_addrcmp(struct in6_addr addr1, + struct in6_addr mask, + struct in6_addr addr2) +{ + int i; + + for (i = 0; i < 16; i++) { + if ((addr1.s6_addr[i] & mask.s6_addr[i]) != + (addr2.s6_addr[i] & mask.s6_addr[i])) + return 1; + } + return 0; +} + + +static inline int +match_xfrm_state(struct xfrm_state *x, const struct ip6t_policy_elem *e) +{ +#define MISMATCH(x,y) (e->match.x && ((e->x != (y)) ^ e->invert.x)) + + struct in6_addr xfrm_saddr, xfrm_daddr; + + if ((e->match.saddr + && (ip6_masked_addrcmp(xfrm_saddr, e->saddr, e->smask)) + ^ e->invert.saddr ) || + (e->match.daddr + && (ip6_masked_addrcmp(xfrm_daddr, e->daddr, e->dmask)) + ^ e->invert.daddr ) || + MISMATCH(proto, x->id.proto) || + MISMATCH(mode, x->props.mode) || + MISMATCH(spi, x->id.spi) || + MISMATCH(reqid, x->props.reqid)) + return 0; + return 1; +} + +static int +match_policy_in(const struct sk_buff *skb, const struct ip6t_policy_info *info) +{ + const struct ip6t_policy_elem *e; + struct sec_path *sp = skb->sp; + int strict = info->flags & POLICY_MATCH_STRICT; + int i, pos; + + if (sp == NULL) + return -1; + if (strict && info->len != sp->len) + return 0; + + for (i = sp->len - 1; i >= 0; i--) { + pos = strict ? i - sp->len + 1 : 0; + if (pos >= info->len) + return 0; + e = &info->pol[pos]; + + if (match_xfrm_state(sp->x[i].xvec, e)) { + if (!strict) + return 1; + } else if (strict) + return 0; + } + + return strict ? 1 : 0; +} + +static int +match_policy_out(const struct sk_buff *skb, const struct ip6t_policy_info *info) +{ + const struct ip6t_policy_elem *e; + struct dst_entry *dst = skb->dst; + int strict = info->flags & POLICY_MATCH_STRICT; + int i, pos; + + if (dst->xfrm == NULL) + return -1; + + for (i = 0; dst && dst->xfrm; dst = dst->child, i++) { + pos = strict ? i : 0; + if (pos >= info->len) + return 0; + e = &info->pol[pos]; + + if (match_xfrm_state(dst->xfrm, e)) { + if (!strict) + return 1; + } else if (strict) + return 0; + } + + return strict ? 1 : 0; +} + +static int match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const void *matchinfo, + int offset, + const void *hdr, + u_int16_t datalen, + int *hotdrop) +{ + const struct ip6t_policy_info *info = matchinfo; + int ret; + + if (info->flags & POLICY_MATCH_IN) + ret = match_policy_in(skb, info); + else + ret = match_policy_out(skb, info); + + if (ret < 0) { + if (info->flags & POLICY_MATCH_NONE) + ret = 1; + else + ret = 0; + } else if (info->flags & POLICY_MATCH_NONE) + ret = 0; + + return ret; +} + +static int checkentry(const char *tablename, const struct ip6t_ip6 *ip, + void *matchinfo, unsigned int matchsize, + unsigned int hook_mask) +{ + struct ip6t_policy_info *info = matchinfo; + + if (matchsize != IP6T_ALIGN(sizeof(*info))) { + printk(KERN_ERR "ip6t_policy: matchsize %u != %u\n", + matchsize, IP6T_ALIGN(sizeof(*info))); + return 0; + } + if (!(info->flags & (POLICY_MATCH_IN|POLICY_MATCH_OUT))) { + printk(KERN_ERR "ip6t_policy: neither incoming nor " + "outgoing policy selected\n"); + return 0; + } + if (hook_mask & (1 << NF_IP6_PRE_ROUTING | 1 << NF_IP6_LOCAL_IN) + && info->flags & POLICY_MATCH_OUT) { + printk(KERN_ERR "ip6t_policy: output policy not valid in " + "PRE_ROUTING and INPUT\n"); + return 0; + } + if (hook_mask & (1 << NF_IP6_POST_ROUTING | 1 << NF_IP6_LOCAL_OUT) + && info->flags & POLICY_MATCH_IN) { + printk(KERN_ERR "ip6t_policy: input policy not valid in " + "POST_ROUTING and OUTPUT\n"); + return 0; + } + if (info->len > POLICY_MAX_ELEM) { + printk(KERN_ERR "ip6t_policy: too many policy elements\n"); + return 0; + } + + return 1; +} + +static struct ip6t_match policy_match = +{ + .name = "policy", + .match = match, + .checkentry = checkentry, + .me = THIS_MODULE, +}; + +static int __init init(void) +{ + return ip6t_register_match(&policy_match); +} + +static void __exit fini(void) +{ + ip6t_unregister_match(&policy_match); +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c linux-2.6.11.3/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c --- linux-2.6.11.3.org/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c 2005-03-13 22:06:02.196220288 +0100 @@ -0,0 +1,630 @@ +/* + * Copyright (C)2004 USAGI/WIDE Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Author: + * Yasuyuki Kozakai @USAGI + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI + * - support Layer 3 protocol independent connection tracking. + * Based on the original ip_conntrack code which had the following + * copyright information: + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * + * 23 Mar 2004: Yasuyuki Kozakai @USAGI + * - add get_features() to support various size of conntrack + * structures. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +DECLARE_PER_CPU(struct nf_conntrack_stat, nf_conntrack_stat); + +static int ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, + struct nf_conntrack_tuple *tuple) +{ + u_int32_t _addrs[8], *ap; + + ap = skb_header_pointer(skb, nhoff + offsetof(struct ipv6hdr, saddr), + sizeof(_addrs), _addrs); + if (ap == NULL) + return 0; + + memcpy(tuple->src.u3.ip6, ap, sizeof(tuple->src.u3.ip6)); + memcpy(tuple->dst.u3.ip6, ap + 4, sizeof(tuple->dst.u3.ip6)); + + return 1; +} + +static int ipv6_invert_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) +{ + memcpy(tuple->src.u3.ip6, orig->dst.u3.ip6, sizeof(tuple->src.u3.ip6)); + memcpy(tuple->dst.u3.ip6, orig->src.u3.ip6, sizeof(tuple->dst.u3.ip6)); + + return 1; +} + +static int ipv6_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) +{ + return seq_printf(s, "src=%x:%x:%x:%x:%x:%x:%x:%x dst=%x:%x:%x:%x:%x:%x:%x:%x ", + NIP6(*((struct in6_addr *)tuple->src.u3.ip6)), + NIP6(*((struct in6_addr *)tuple->dst.u3.ip6))); +} + +static int ipv6_print_conntrack(struct seq_file *s, + const struct nf_conn *conntrack) +{ + return 0; +} + +/* + * Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c + * + * This function parses (probably truncated) exthdr set "hdr" + * of length "len". "nexthdrp" initially points to some place, + * where type of the first header can be found. + * + * It skips all well-known exthdrs, and returns pointer to the start + * of unparsable area i.e. the first header with unknown type. + * if success, *nexthdr is updated by type/protocol of this header. + * + * NOTES: - it may return pointer pointing beyond end of packet, + * if the last recognized header is truncated in the middle. + * - if packet is truncated, so that all parsed headers are skipped, + * it returns -1. + * - if packet is fragmented, return pointer of the fragment header. + * - ESP is unparsable for now and considered like + * normal payload protocol. + * - Note also special handling of AUTH header. Thanks to IPsec wizards. + */ + +int nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp, + int len) +{ + u8 nexthdr = *nexthdrp; + + while (ipv6_ext_hdr(nexthdr)) { + struct ipv6_opt_hdr hdr; + int hdrlen; + + if (len < (int)sizeof(struct ipv6_opt_hdr)) + return -1; + if (nexthdr == NEXTHDR_NONE) + break; + if (nexthdr == NEXTHDR_FRAGMENT) + break; + if (skb_copy_bits(skb, start, &hdr, sizeof(hdr))) + BUG(); + if (nexthdr == NEXTHDR_AUTH) + hdrlen = (hdr.hdrlen+2)<<2; + else + hdrlen = ipv6_optlen(&hdr); + + nexthdr = hdr.nexthdr; + len -= hdrlen; + start += hdrlen; + } + + *nexthdrp = nexthdr; + return start; +} + +static int +ipv6_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, + u_int8_t *protonum, int *ret) +{ + unsigned int extoff; + unsigned char pnum; + int protoff; + + extoff = (u8*)((*pskb)->nh.ipv6h + 1) - (*pskb)->data; + pnum = (*pskb)->nh.ipv6h->nexthdr; + + protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum, + (*pskb)->len - extoff); + + /* + * (protoff == (*pskb)->len) mean that the packet doesn't have no data + * except of IPv6 & ext headers. but it's tracked anyway. - YK + */ + if ((protoff < 0) || (protoff > (*pskb)->len)) { + DEBUGP("ip6_conntrack_core: can't find proto in pkt\n"); + NF_CT_STAT_INC(error); + NF_CT_STAT_INC(invalid); + *ret = NF_ACCEPT; + return 0; + } + + /* FIXME: Do this right please. --RR */ + (*pskb)->nfcache |= NFC_UNKNOWN; + + *dataoff = protoff; + *protonum = pnum; + return 1; +} + +static u_int32_t ipv6_get_features(const struct nf_conntrack_tuple *tuple) +{ + return NF_CT_F_BASIC; +} + +static unsigned int ipv6_confirm(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nf_conn *ct; + enum nf_conntrack_info ctinfo; + + /* This is where we call the helper: as the packet goes out. */ + ct = nf_ct_get(*pskb, &ctinfo); + if (ct && ct->helper) { + unsigned int ret, protoff; + unsigned int extoff = (u8*)((*pskb)->nh.ipv6h + 1) + - (*pskb)->data; + unsigned char pnum = (*pskb)->nh.ipv6h->nexthdr; + + protoff = nf_ct_ipv6_skip_exthdr(*pskb, extoff, &pnum, + (*pskb)->len - extoff); + if (protoff < 0 || protoff > (*pskb)->len || + pnum == NEXTHDR_FRAGMENT) { + DEBUGP("proto header not found\n"); + return NF_ACCEPT; + } + + ret = ct->helper->help(pskb, protoff, ct, ctinfo); + if (ret != NF_ACCEPT) + return ret; + } + + /* We've seen it coming out the other side: confirm it */ + + return nf_conntrack_confirm(pskb); +} + +extern struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb); +extern void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, + struct net_device *in, + struct net_device *out, + int (*okfn)(struct sk_buff *)); +static unsigned int ipv6_defrag(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct sk_buff *reasm; + + /* Previously seen (loopback)? */ + if ((*pskb)->nfct) + return NF_ACCEPT; + + reasm = nf_ct_frag6_gather(*pskb); + + /* queued */ + if (reasm == NULL) + return NF_STOLEN; + + /* error occured or not fragmented */ + if (reasm == *pskb) + return NF_ACCEPT; + + nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in, + (struct net_device *)out, okfn); + + return NF_STOLEN; +} + +static unsigned int ipv6_conntrack_in(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct sk_buff *reasm = (*pskb)->nfct_reasm; + + /* This packet is fragmented and has reassembled packet. */ + if (reasm) { + /* Reassembled packet isn't parsed yet ? */ + if (!reasm->nfct) { + unsigned int ret; + + ret = nf_conntrack_in(PF_INET6, hooknum, &reasm); + if (ret != NF_ACCEPT) + return ret; + } + nf_conntrack_get(reasm->nfct); + (*pskb)->nfct = reasm->nfct; + return NF_ACCEPT; + } + + return nf_conntrack_in(PF_INET6, hooknum, pskb); +} + +static unsigned int ipv6_conntrack_local(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + /* root is playing with raw sockets. */ + if ((*pskb)->len < sizeof(struct ipv6hdr)) { + if (net_ratelimit()) + printk("ipv6_conntrack_local: packet too short\n"); + return NF_ACCEPT; + } + return ipv6_conntrack_in(hooknum, pskb, in, out, okfn); +} + +/* Connection tracking may drop packets, but never alters them, so + make it the first hook. */ +static struct nf_hook_ops ipv6_conntrack_defrag_ops = { + .hook = ipv6_defrag, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_PRE_ROUTING, + .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, +}; + +static struct nf_hook_ops ipv6_conntrack_in_ops = { + .hook = ipv6_conntrack_in, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_PRE_ROUTING, + .priority = NF_IP6_PRI_CONNTRACK, +}; + +static struct nf_hook_ops ipv6_conntrack_local_out_ops = { + .hook = ipv6_conntrack_local, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_LOCAL_OUT, + .priority = NF_IP6_PRI_CONNTRACK, +}; + +static struct nf_hook_ops ipv6_conntrack_defrag_local_out_ops = { + .hook = ipv6_defrag, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_LOCAL_OUT, + .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, +}; + +/* Refragmenter; last chance. */ +static struct nf_hook_ops ipv6_conntrack_out_ops = { + .hook = ipv6_confirm, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_POST_ROUTING, + .priority = NF_IP6_PRI_LAST, +}; + +static struct nf_hook_ops ipv6_conntrack_local_in_ops = { + .hook = ipv6_confirm, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_IP6_LOCAL_IN, + .priority = NF_IP6_PRI_LAST-1, +}; + +#ifdef CONFIG_SYSCTL + +/* From nf_conntrack_proto_icmpv6.c */ +extern unsigned long nf_ct_icmpv6_timeout; + +/* From nf_conntrack_frag6.c */ +extern unsigned long nf_ct_frag6_timeout; +extern unsigned long nf_ct_frag6_low_thresh; +extern unsigned long nf_ct_frag6_high_thresh; + +static struct ctl_table_header *nf_ct_ipv6_sysctl_header; + +static ctl_table nf_ct_sysctl_table[] = { + { + .ctl_name = NET_NF_CONNTRACK_ICMPV6_TIMEOUT, + .procname = "nf_conntrack_icmpv6_timeout", + .data = &nf_ct_icmpv6_timeout, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_FRAG6_TIMEOUT, + .procname = "nf_conntrack_frag6_timeout", + .data = &nf_ct_frag6_timeout, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_FRAG6_LOW_THRESH, + .procname = "nf_conntrack_frag6_low_thresh", + .data = &nf_ct_frag6_low_thresh, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_FRAG6_HIGH_THRESH, + .procname = "nf_conntrack_frag6_high_thresh", + .data = &nf_ct_frag6_high_thresh, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { .ctl_name = 0 } +}; + +static ctl_table nf_ct_netfilter_table[] = { + { + .ctl_name = NET_NETFILTER, + .procname = "netfilter", + .mode = 0555, + .child = nf_ct_sysctl_table, + }, + { .ctl_name = 0 } +}; + +static ctl_table nf_ct_net_table[] = { + { + .ctl_name = CTL_NET, + .procname = "net", + .mode = 0555, + .child = nf_ct_netfilter_table, + }, + { .ctl_name = 0 } +}; +#endif + +/* Fast function for those who don't want to parse /proc (and I don't + blame them). */ +/* Reversing the socket's dst/src point of view gives us the reply + mapping. */ +static int +getorigdst(struct sock *sk, int optval, void __user *user, int *len) +{ + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct nf_conntrack_tuple_hash *h; + struct nf_conntrack_tuple tuple; + + memset(&tuple, 0, sizeof(tuple)); + ipv6_addr_copy((struct in6_addr *)tuple.src.u3.ip6, &np->rcv_saddr); + ipv6_addr_copy((struct in6_addr *)tuple.dst.u3.ip6, &np->daddr); + tuple.src.u.tcp.port = inet->sport; + tuple.dst.u.tcp.port = inet->dport; + tuple.dst.protonum = IPPROTO_TCP; + + /* We only do TCP at the moment: is there a better way? */ + if (strcmp(sk->sk_prot->name, "TCP")) { + DEBUGP("IPV6 ORIGINAL_DST: Not a TCP socket\n"); + return -ENOPROTOOPT; + } + + if ((unsigned int) *len < sizeof(struct sockaddr_in6)) { + DEBUGP("IPV6 ORIGINAL_DST: len %u not %u\n", + *len, sizeof(struct sockaddr_in6)); + return -EINVAL; + } + + h = nf_conntrack_find_get(&tuple, NULL); + if (h) { + struct sockaddr_in6 sin; + struct nf_conn *ct = tuplehash_to_ctrack(h); + + memset(&sin, 0, sizeof(sin)); + + sin.sin6_family = AF_INET6; + sin.sin6_port = ct->tuplehash[NF_CT_DIR_ORIGINAL] + .tuple.dst.u.tcp.port; + ipv6_addr_copy(&sin.sin6_addr, + (struct in6_addr *)ct->tuplehash[NF_CT_DIR_ORIGINAL].tuple.dst.u3.ip6); + /* FIXME: sin6_scope_id */ + + DEBUGP("IPV6 ORIGINAL_DST: %x:%x:%x:%x:%x:%x:%x:%x %u\n", + NIP6(sin.sin6_addr), ntohs(sin.sin6_port)); + nf_ct_put(ct); + if (copy_to_user(user, &sin, sizeof(sin)) != 0) + return -EFAULT; + else + return 0; + } + DEBUGP("IPV6 ORIGINAL_DST: Can't find %x:%x:%x:%x:%x:%x:%x:%x/%u-%x:%x:%x:%x:%x:%x:%x:%x/%u.\n", + NIP6(*((struct in6_addr *)tuple.src.u3.ip6)), + ntohs(tuple.src.u.tcp.port), + NIP6(*((struct in6_addr *)tuple.dst.u3.ip6)), + ntohs(tuple.dst.u.tcp.port)); + return -ENOENT; +} + +static struct nf_sockopt_ops so_getorigdst = { + .pf = PF_INET6, + .get_optmin = SO_ORIGINAL_DST, + .get_optmax = SO_ORIGINAL_DST+1, + .get = &getorigdst, +}; + +struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = { + .l3proto = PF_INET6, + .name = "ipv6", + .pkt_to_tuple = ipv6_pkt_to_tuple, + .invert_tuple = ipv6_invert_tuple, + .print_tuple = ipv6_print_tuple, + .print_conntrack = ipv6_print_conntrack, + .prepare = ipv6_prepare, + .get_features = ipv6_get_features, + .me = THIS_MODULE, +}; + +extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp6; +extern struct nf_conntrack_protocol nf_conntrack_protocol_udp6; +extern struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6; +extern int nf_ct_frag6_init(void); +extern void nf_ct_frag6_cleanup(void); +static int init_or_cleanup(int init) +{ + int ret = 0; + + if (!init) goto cleanup; + + ret = nf_register_sockopt(&so_getorigdst); + if (ret < 0) { + printk(KERN_ERR "Unable to register netfilter socket option\n"); + goto cleanup_nothing; + } + ret = nf_ct_frag6_init(); + if (ret < 0) { + printk("nf_conntrack_ipv6: can't initialize frag6.\n"); + goto cleanup_sockopt; + } + ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_tcp6); + if (ret < 0) { + printk("nf_conntrack_ipv6: can't register tcp.\n"); + goto cleanup_frag6; + } + + ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_udp6); + if (ret < 0) { + printk("nf_conntrack_ipv6: can't register udp.\n"); + goto cleanup_tcp; + } + + ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_icmpv6); + if (ret < 0) { + printk("nf_conntrack_ipv6: can't register icmpv6.\n"); + goto cleanup_udp; + } + + ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv6); + if (ret < 0) { + printk("nf_conntrack_ipv6: can't register ipv6\n"); + goto cleanup_icmpv6; + } + + ret = nf_register_hook(&ipv6_conntrack_defrag_ops); + if (ret < 0) { + printk("nf_conntrack_ipv6: can't register pre-routing defrag " + "hook.\n"); + goto cleanup_ipv6; + } + + ret = nf_register_hook(&ipv6_conntrack_defrag_local_out_ops); + if (ret < 0) { + printk("nf_conntrack_ipv6: can't register local_out defrag " + "hook.\n"); + goto cleanup_defragops; + } + + ret = nf_register_hook(&ipv6_conntrack_in_ops); + if (ret < 0) { + printk("nf_conntrack_ipv6: can't register pre-routing hook.\n"); + goto cleanup_defraglocalops; + } + + ret = nf_register_hook(&ipv6_conntrack_local_out_ops); + if (ret < 0) { + printk("nf_conntrack_ipv6: can't register local out hook.\n"); + goto cleanup_inops; + } + + ret = nf_register_hook(&ipv6_conntrack_out_ops); + if (ret < 0) { + printk("nf_conntrack_ipv6: can't register post-routing hook.\n"); + goto cleanup_inandlocalops; + } + + ret = nf_register_hook(&ipv6_conntrack_local_in_ops); + if (ret < 0) { + printk("nf_conntrack_ipv6: can't register local in hook.\n"); + goto cleanup_inoutandlocalops; + } + +#ifdef CONFIG_SYSCTL + nf_ct_ipv6_sysctl_header = register_sysctl_table(nf_ct_net_table, 0); + if (nf_ct_ipv6_sysctl_header == NULL) { + printk("nf_conntrack: can't register to sysctl.\n"); + ret = -ENOMEM; + goto cleanup_localinops; + } +#endif + return ret; + + cleanup: +#ifdef CONFIG_SYSCTL + unregister_sysctl_table(nf_ct_ipv6_sysctl_header); + cleanup_localinops: +#endif + nf_unregister_hook(&ipv6_conntrack_local_in_ops); + cleanup_inoutandlocalops: + nf_unregister_hook(&ipv6_conntrack_out_ops); + cleanup_inandlocalops: + nf_unregister_hook(&ipv6_conntrack_local_out_ops); + cleanup_inops: + nf_unregister_hook(&ipv6_conntrack_in_ops); + cleanup_defraglocalops: + nf_unregister_hook(&ipv6_conntrack_defrag_local_out_ops); + cleanup_defragops: + nf_unregister_hook(&ipv6_conntrack_defrag_ops); + cleanup_ipv6: + nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv6); + cleanup_icmpv6: + nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmpv6); + cleanup_udp: + nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp6); + cleanup_tcp: + nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp6); + cleanup_frag6: + nf_ct_frag6_cleanup(); + cleanup_sockopt: + nf_unregister_sockopt(&so_getorigdst); + cleanup_nothing: + return ret; +} + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Yasuyuki KOZAKAI @USAGI "); + +static int __init init(void) +{ + need_nf_conntrack(); + return init_or_cleanup(1); +} + +static void __exit fini(void) +{ + init_or_cleanup(0); +} + +module_init(init); +module_exit(fini); + +PROVIDES_CONNTRACK(ipv6); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c linux-2.6.11.3/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c --- linux-2.6.11.3.org/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c 2005-03-13 22:06:02.198219984 +0100 @@ -0,0 +1,270 @@ +/* + * Copyright (C)2003,2004 USAGI/WIDE Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Author: + * Yasuyuki Kozakai @USAGI + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI + * - ICMPv6 tracking support. Derived from the original ip_conntrack code + * net/ipv4/netfilter/ip_conntrack_proto_icmp.c which had the following + * copyright information: + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +unsigned long nf_ct_icmpv6_timeout = 30*HZ; + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +static int icmpv6_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct nf_conntrack_tuple *tuple) +{ + struct icmp6hdr _hdr, *hp; + + hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + if (hp == NULL) + return 0; + tuple->dst.u.icmp.type = hp->icmp6_type; + tuple->src.u.icmp.id = hp->icmp6_identifier; + tuple->dst.u.icmp.code = hp->icmp6_code; + + return 1; +} + +static int icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) +{ + /* Add 1; spaces filled with 0. */ + static u_int8_t invmap[] = { + [ICMPV6_ECHO_REQUEST - 128] = ICMPV6_ECHO_REPLY + 1, + [ICMPV6_ECHO_REPLY - 128] = ICMPV6_ECHO_REQUEST + 1, + [ICMPV6_NI_QUERY - 128] = ICMPV6_NI_QUERY + 1, + [ICMPV6_NI_REPLY - 128] = ICMPV6_NI_REPLY +1 + }; + + __u8 type = orig->dst.u.icmp.type - 128; + if (type >= sizeof(invmap) || !invmap[type]) + return 0; + + tuple->src.u.icmp.id = orig->src.u.icmp.id; + tuple->dst.u.icmp.type = invmap[type] - 1; + tuple->dst.u.icmp.code = orig->dst.u.icmp.code; + return 1; +} + +/* Print out the per-protocol part of the tuple. */ +static int icmpv6_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) +{ + return seq_printf(s, "type=%u code=%u id=%u ", + tuple->dst.u.icmp.type, + tuple->dst.u.icmp.code, + ntohs(tuple->src.u.icmp.id)); +} + +/* Print out the private part of the conntrack. */ +static int icmpv6_print_conntrack(struct seq_file *s, + const struct nf_conn *conntrack) +{ + return 0; +} + +/* Returns verdict for packet, or -1 for invalid. */ +static int icmpv6_packet(struct nf_conn *ct, + const struct sk_buff *skb, + unsigned int dataoff, + enum nf_conntrack_info ctinfo, + int pf, + unsigned int hooknum) +{ + /* Try to delete connection immediately after all replies: + won't actually vanish as we still have skb, and del_timer + means this will only run once even if count hits zero twice + (theoretically possible with SMP) */ + if (NFCTINFO2DIR(ctinfo) == NF_CT_DIR_REPLY) { + if (atomic_dec_and_test(&ct->proto.icmp.count) + && del_timer(&ct->timeout)) + ct->timeout.function((unsigned long)ct); + } else { + atomic_inc(&ct->proto.icmp.count); + nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmpv6_timeout); + } + + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int icmpv6_new(struct nf_conn *conntrack, + const struct sk_buff *skb, + unsigned int dataoff) +{ + static u_int8_t valid_new[] = { + [ICMPV6_ECHO_REQUEST - 128] = 1, + [ICMPV6_NI_QUERY - 128] = 1 + }; + + if (conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128 >= sizeof(valid_new) + || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type - 128]) { + /* Can't create a new ICMPv6 `conn' with this. */ + DEBUGP("icmp: can't create new conn with type %u\n", + conntrack->tuplehash[0].tuple.dst.u.icmp.type); + NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple); + return 0; + } + atomic_set(&conntrack->proto.icmp.count, 0); + return 1; +} + +extern int +nf_ct_ipv6_skip_exthdr(struct sk_buff *skb, int start, u8 *nexthdrp, int len); +extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6; +static int +icmpv6_error_message(struct sk_buff *skb, + unsigned int icmp6off, + enum nf_conntrack_info *ctinfo, + unsigned int hooknum) +{ + struct nf_conntrack_tuple intuple, origtuple; + struct nf_conntrack_tuple_hash *h; + struct icmp6hdr _hdr, *hp; + unsigned int inip6off; + struct nf_conntrack_protocol *inproto; + u_int8_t inprotonum; + unsigned int inprotoff; + + NF_CT_ASSERT(skb->nfct == NULL); + + hp = skb_header_pointer(skb, icmp6off, sizeof(_hdr), &_hdr); + if (hp == NULL) { + DEBUGP("icmpv6_error: Can't get ICMPv6 hdr.\n"); + return NF_ACCEPT; + } + + inip6off = icmp6off + sizeof(_hdr); + if (skb_copy_bits(skb, inip6off+offsetof(struct ipv6hdr, nexthdr), + &inprotonum, sizeof(inprotonum)) != 0) { + DEBUGP("icmpv6_error: Can't get nexthdr in inner IPv6 header.\n"); + return NF_ACCEPT; + } + inprotoff = nf_ct_ipv6_skip_exthdr(skb, + inip6off + sizeof(struct ipv6hdr), + &inprotonum, + skb->len - inip6off + - sizeof(struct ipv6hdr)); + + if ((inprotoff < 0) || (inprotoff > skb->len) || + (inprotonum == NEXTHDR_FRAGMENT)) { + DEBUGP("icmpv6_error: Can't get protocol header in ICMPv6 payload.\n"); + return NF_ACCEPT; + } + + inproto = nf_ct_find_proto(PF_INET6, inprotonum); + + /* Are they talking about one of our connections? */ + if (!nf_ct_get_tuple(skb, inip6off, inprotoff, PF_INET6, inprotonum, + &origtuple, &nf_conntrack_l3proto_ipv6, inproto)) { + DEBUGP("icmpv6_error: Can't get tuple\n"); + return NF_ACCEPT; + } + + /* Ordinarily, we'd expect the inverted tupleproto, but it's + been preserved inside the ICMP. */ + if (!nf_ct_invert_tuple(&intuple, &origtuple, + &nf_conntrack_l3proto_ipv6, inproto)) { + DEBUGP("icmpv6_error: Can't invert tuple\n"); + return NF_ACCEPT; + } + + *ctinfo = NF_CT_RELATED; + + h = nf_conntrack_find_get(&intuple, NULL); + if (!h) { + DEBUGP("icmpv6_error: no match\n"); + return NF_ACCEPT; + } else { + if (NF_CT_DIRECTION(h) == NF_CT_DIR_REPLY) + *ctinfo += NF_CT_IS_REPLY; + } + + /* Update skb to refer to this connection */ + skb->nfct = &tuplehash_to_ctrack(h)->ct_general; + skb->nfctinfo = *ctinfo; + return -NF_ACCEPT; +} + +static int +icmpv6_error(struct sk_buff *skb, unsigned int dataoff, + enum nf_conntrack_info *ctinfo, int pf, unsigned int hooknum) +{ + struct icmp6hdr _ih, *icmp6h; + + icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); + if (icmp6h == NULL) { + if (LOG_INVALID(IPPROTO_ICMPV6)) + nf_log_packet(PF_INET6, 0, skb, NULL, NULL, + "nf_ct_icmpv6: short packet "); + return -NF_ACCEPT; + } + + if (hooknum != NF_IP6_PRE_ROUTING) + goto skipped; + + /* Ignore it if the checksum's bogus. */ + if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, + skb->len - dataoff, IPPROTO_ICMPV6, + skb_checksum(skb, dataoff, + skb->len - dataoff, 0))) { + nf_log_packet(PF_INET6, 0, skb, NULL, NULL, + "nf_ct_icmpv6: ICMPv6 checksum failed\n"); + return -NF_ACCEPT; + } + +skipped: + + /* is not error message ? */ + if (icmp6h->icmp6_type >= 128) + return NF_ACCEPT; + + return icmpv6_error_message(skb, dataoff, ctinfo, hooknum); +} + +struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 = +{ + .l3proto = PF_INET6, + .proto = IPPROTO_ICMPV6, + .name = "icmpv6", + .pkt_to_tuple = icmpv6_pkt_to_tuple, + .invert_tuple = icmpv6_invert_tuple, + .print_tuple = icmpv6_print_tuple, + .print_conntrack = icmpv6_print_conntrack, + .packet = icmpv6_packet, + .new = icmpv6_new, + .error = icmpv6_error, +}; + +EXPORT_SYMBOL(nf_conntrack_protocol_icmpv6); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/ipv6/netfilter/nf_conntrack_reasm.c linux-2.6.11.3/net/ipv6/netfilter/nf_conntrack_reasm.c --- linux-2.6.11.3.org/net/ipv6/netfilter/nf_conntrack_reasm.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/ipv6/netfilter/nf_conntrack_reasm.c 2005-03-13 22:06:02.201219528 +0100 @@ -0,0 +1,887 @@ +/* + * IPv6 fragment reassembly for connection tracking + * + * Copyright (C)2004 USAGI/WIDE Project + * + * Author: + * Yasuyuki Kozakai @USAGI + * + * Based on: net/ipv6/reassembly.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +#define NF_CT_FRAG6_HIGH_THRESH 262144 /* == 256*1024 */ +#define NF_CT_FRAG6_LOW_THRESH 196608 /* == 192*1024 */ +#define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT + +int nf_ct_frag6_high_thresh = 256*1024; +int nf_ct_frag6_low_thresh = 192*1024; +int nf_ct_frag6_timeout = IPV6_FRAG_TIMEOUT; + +struct nf_ct_frag6_skb_cb +{ + struct inet6_skb_parm h; + int offset; + struct sk_buff *orig; +}; + +#define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb)) + +struct nf_ct_frag6_queue +{ + struct nf_ct_frag6_queue *next; + struct list_head lru_list; /* lru list member */ + + __u32 id; /* fragment id */ + struct in6_addr saddr; + struct in6_addr daddr; + + spinlock_t lock; + atomic_t refcnt; + struct timer_list timer; /* expire timer */ + struct sk_buff *fragments; + int len; + int meat; + struct timeval stamp; + unsigned int csum; + __u8 last_in; /* has first/last segment arrived? */ +#define COMPLETE 4 +#define FIRST_IN 2 +#define LAST_IN 1 + __u16 nhoffset; + struct nf_ct_frag6_queue **pprev; +}; + +/* Hash table. */ + +#define FRAG6Q_HASHSZ 64 + +static struct nf_ct_frag6_queue *nf_ct_frag6_hash[FRAG6Q_HASHSZ]; +static rwlock_t nf_ct_frag6_lock = RW_LOCK_UNLOCKED; +static u32 nf_ct_frag6_hash_rnd; +static LIST_HEAD(nf_ct_frag6_lru_list); +int nf_ct_frag6_nqueues = 0; + +static __inline__ void __fq_unlink(struct nf_ct_frag6_queue *fq) +{ + if (fq->next) + fq->next->pprev = fq->pprev; + *fq->pprev = fq->next; + list_del(&fq->lru_list); + nf_ct_frag6_nqueues--; +} + +static __inline__ void fq_unlink(struct nf_ct_frag6_queue *fq) +{ + write_lock(&nf_ct_frag6_lock); + __fq_unlink(fq); + write_unlock(&nf_ct_frag6_lock); +} + +static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr, + struct in6_addr *daddr) +{ + u32 a, b, c; + + a = saddr->s6_addr32[0]; + b = saddr->s6_addr32[1]; + c = saddr->s6_addr32[2]; + + a += JHASH_GOLDEN_RATIO; + b += JHASH_GOLDEN_RATIO; + c += nf_ct_frag6_hash_rnd; + __jhash_mix(a, b, c); + + a += saddr->s6_addr32[3]; + b += daddr->s6_addr32[0]; + c += daddr->s6_addr32[1]; + __jhash_mix(a, b, c); + + a += daddr->s6_addr32[2]; + b += daddr->s6_addr32[3]; + c += id; + __jhash_mix(a, b, c); + + return c & (FRAG6Q_HASHSZ - 1); +} + +static struct timer_list nf_ct_frag6_secret_timer; +int nf_ct_frag6_secret_interval = 10 * 60 * HZ; + +static void nf_ct_frag6_secret_rebuild(unsigned long dummy) +{ + unsigned long now = jiffies; + int i; + + write_lock(&nf_ct_frag6_lock); + get_random_bytes(&nf_ct_frag6_hash_rnd, sizeof(u32)); + for (i = 0; i < FRAG6Q_HASHSZ; i++) { + struct nf_ct_frag6_queue *q; + + q = nf_ct_frag6_hash[i]; + while (q) { + struct nf_ct_frag6_queue *next = q->next; + unsigned int hval = ip6qhashfn(q->id, + &q->saddr, + &q->daddr); + + if (hval != i) { + /* Unlink. */ + if (q->next) + q->next->pprev = q->pprev; + *q->pprev = q->next; + + /* Relink to new hash chain. */ + if ((q->next = nf_ct_frag6_hash[hval]) != NULL) + q->next->pprev = &q->next; + nf_ct_frag6_hash[hval] = q; + q->pprev = &nf_ct_frag6_hash[hval]; + } + + q = next; + } + } + write_unlock(&nf_ct_frag6_lock); + + mod_timer(&nf_ct_frag6_secret_timer, now + nf_ct_frag6_secret_interval); +} + +atomic_t nf_ct_frag6_mem = ATOMIC_INIT(0); + +/* Memory Tracking Functions. */ +static inline void frag_kfree_skb(struct sk_buff *skb) +{ + atomic_sub(skb->truesize, &nf_ct_frag6_mem); + if (NFCT_FRAG6_CB(skb)->orig) + kfree_skb(NFCT_FRAG6_CB(skb)->orig); + + kfree_skb(skb); +} + +static inline void frag_free_queue(struct nf_ct_frag6_queue *fq) +{ + atomic_sub(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem); + kfree(fq); +} + +static inline struct nf_ct_frag6_queue *frag_alloc_queue(void) +{ + struct nf_ct_frag6_queue *fq = kmalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC); + + if (!fq) + return NULL; + atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem); + return fq; +} + +/* Destruction primitives. */ + +/* Complete destruction of fq. */ +static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq) +{ + struct sk_buff *fp; + + BUG_TRAP(fq->last_in&COMPLETE); + BUG_TRAP(del_timer(&fq->timer) == 0); + + /* Release all fragment data. */ + fp = fq->fragments; + while (fp) { + struct sk_buff *xp = fp->next; + + frag_kfree_skb(fp); + fp = xp; + } + + frag_free_queue(fq); +} + +static __inline__ void fq_put(struct nf_ct_frag6_queue *fq) +{ + if (atomic_dec_and_test(&fq->refcnt)) + nf_ct_frag6_destroy(fq); +} + +/* Kill fq entry. It is not destroyed immediately, + * because caller (and someone more) holds reference count. + */ +static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq) +{ + if (del_timer(&fq->timer)) + atomic_dec(&fq->refcnt); + + if (!(fq->last_in & COMPLETE)) { + fq_unlink(fq); + atomic_dec(&fq->refcnt); + fq->last_in |= COMPLETE; + } +} + +static void nf_ct_frag6_evictor(void) +{ + struct nf_ct_frag6_queue *fq; + struct list_head *tmp; + + for (;;) { + if (atomic_read(&nf_ct_frag6_mem) <= nf_ct_frag6_low_thresh) + return; + read_lock(&nf_ct_frag6_lock); + if (list_empty(&nf_ct_frag6_lru_list)) { + read_unlock(&nf_ct_frag6_lock); + return; + } + tmp = nf_ct_frag6_lru_list.next; + fq = list_entry(tmp, struct nf_ct_frag6_queue, lru_list); + atomic_inc(&fq->refcnt); + read_unlock(&nf_ct_frag6_lock); + + spin_lock(&fq->lock); + if (!(fq->last_in&COMPLETE)) + fq_kill(fq); + spin_unlock(&fq->lock); + + fq_put(fq); + } +} + +static void nf_ct_frag6_expire(unsigned long data) +{ + struct nf_ct_frag6_queue *fq = (struct nf_ct_frag6_queue *) data; + + spin_lock(&fq->lock); + + if (fq->last_in & COMPLETE) + goto out; + + fq_kill(fq); + +out: + spin_unlock(&fq->lock); + fq_put(fq); +} + +/* Creation primitives. */ + + +static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash, + struct nf_ct_frag6_queue *fq_in) +{ + struct nf_ct_frag6_queue *fq; + + write_lock(&nf_ct_frag6_lock); +#ifdef CONFIG_SMP + for (fq = nf_ct_frag6_hash[hash]; fq; fq = fq->next) { + if (fq->id == fq_in->id && + !ipv6_addr_cmp(&fq_in->saddr, &fq->saddr) && + !ipv6_addr_cmp(&fq_in->daddr, &fq->daddr)) { + atomic_inc(&fq->refcnt); + write_unlock(&nf_ct_frag6_lock); + fq_in->last_in |= COMPLETE; + fq_put(fq_in); + return fq; + } + } +#endif + fq = fq_in; + + if (!mod_timer(&fq->timer, jiffies + nf_ct_frag6_timeout)) + atomic_inc(&fq->refcnt); + + atomic_inc(&fq->refcnt); + if ((fq->next = nf_ct_frag6_hash[hash]) != NULL) + fq->next->pprev = &fq->next; + nf_ct_frag6_hash[hash] = fq; + fq->pprev = &nf_ct_frag6_hash[hash]; + INIT_LIST_HEAD(&fq->lru_list); + list_add_tail(&fq->lru_list, &nf_ct_frag6_lru_list); + nf_ct_frag6_nqueues++; + write_unlock(&nf_ct_frag6_lock); + return fq; +} + + +static struct nf_ct_frag6_queue * +nf_ct_frag6_create(unsigned int hash, u32 id, struct in6_addr *src, struct in6_addr *dst) +{ + struct nf_ct_frag6_queue *fq; + + if ((fq = frag_alloc_queue()) == NULL) { + DEBUGP("Can't alloc new queue\n"); + goto oom; + } + + memset(fq, 0, sizeof(struct nf_ct_frag6_queue)); + + fq->id = id; + ipv6_addr_copy(&fq->saddr, src); + ipv6_addr_copy(&fq->daddr, dst); + + init_timer(&fq->timer); + fq->timer.function = nf_ct_frag6_expire; + fq->timer.data = (long) fq; + fq->lock = SPIN_LOCK_UNLOCKED; + atomic_set(&fq->refcnt, 1); + + return nf_ct_frag6_intern(hash, fq); + +oom: + return NULL; +} + +static __inline__ struct nf_ct_frag6_queue * +fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst) +{ + struct nf_ct_frag6_queue *fq; + unsigned int hash = ip6qhashfn(id, src, dst); + + read_lock(&nf_ct_frag6_lock); + for (fq = nf_ct_frag6_hash[hash]; fq; fq = fq->next) { + if (fq->id == id && + !ipv6_addr_cmp(src, &fq->saddr) && + !ipv6_addr_cmp(dst, &fq->daddr)) { + atomic_inc(&fq->refcnt); + read_unlock(&nf_ct_frag6_lock); + return fq; + } + } + read_unlock(&nf_ct_frag6_lock); + + return nf_ct_frag6_create(hash, id, src, dst); +} + + +static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, + struct frag_hdr *fhdr, int nhoff) +{ + struct sk_buff *prev, *next; + int offset, end; + + if (fq->last_in & COMPLETE) { + DEBUGP("Allready completed\n"); + goto err; + } + + offset = ntohs(fhdr->frag_off) & ~0x7; + end = offset + (ntohs(skb->nh.ipv6h->payload_len) - + ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1))); + + if ((unsigned int)end > IPV6_MAXPLEN) { + DEBUGP("offset is too large.\n"); + return -1; + } + + if (skb->ip_summed == CHECKSUM_HW) + skb->csum = csum_sub(skb->csum, + csum_partial(skb->nh.raw, + (u8*)(fhdr + 1) - skb->nh.raw, + 0)); + + /* Is this the final fragment? */ + if (!(fhdr->frag_off & htons(IP6_MF))) { + /* If we already have some bits beyond end + * or have different end, the segment is corrupted. + */ + if (end < fq->len || + ((fq->last_in & LAST_IN) && end != fq->len)) { + DEBUGP("already received last fragment\n"); + goto err; + } + fq->last_in |= LAST_IN; + fq->len = end; + } else { + /* Check if the fragment is rounded to 8 bytes. + * Required by the RFC. + */ + if (end & 0x7) { + /* RFC2460 says always send parameter problem in + * this case. -DaveM + */ + DEBUGP("the end of this fragment is not rounded to 8 bytes.\n"); + return -1; + } + if (end > fq->len) { + /* Some bits beyond end -> corruption. */ + if (fq->last_in & LAST_IN) { + DEBUGP("last packet already reached.\n"); + goto err; + } + fq->len = end; + } + } + + if (end == offset) + goto err; + + /* Point into the IP datagram 'data' part. */ + if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) { + DEBUGP("queue: message is too short.\n"); + goto err; + } + if (end-offset < skb->len) { + if (pskb_trim(skb, end - offset)) { + DEBUGP("Can't trim\n"); + goto err; + } + if (skb->ip_summed != CHECKSUM_UNNECESSARY) + skb->ip_summed = CHECKSUM_NONE; + } + + /* Find out which fragments are in front and at the back of us + * in the chain of fragments so far. We must know where to put + * this fragment, right? + */ + prev = NULL; + for (next = fq->fragments; next != NULL; next = next->next) { + if (NFCT_FRAG6_CB(next)->offset >= offset) + break; /* bingo! */ + prev = next; + } + + /* We found where to put this one. Check for overlap with + * preceding fragment, and, if needed, align things so that + * any overlaps are eliminated. + */ + if (prev) { + int i = (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset; + + if (i > 0) { + offset += i; + if (end <= offset) { + DEBUGP("overlap\n"); + goto err; + } + if (!pskb_pull(skb, i)) { + DEBUGP("Can't pull\n"); + goto err; + } + if (skb->ip_summed != CHECKSUM_UNNECESSARY) + skb->ip_summed = CHECKSUM_NONE; + } + } + + /* Look for overlap with succeeding segments. + * If we can merge fragments, do it. + */ + while (next && NFCT_FRAG6_CB(next)->offset < end) { + /* overlap is 'i' bytes */ + int i = end - NFCT_FRAG6_CB(next)->offset; + + if (i < next->len) { + /* Eat head of the next overlapped fragment + * and leave the loop. The next ones cannot overlap. + */ + DEBUGP("Eat head of the overlapped parts.: %d", i); + if (!pskb_pull(next, i)) + goto err; + + /* next fragment */ + NFCT_FRAG6_CB(next)->offset += i; + fq->meat -= i; + if (next->ip_summed != CHECKSUM_UNNECESSARY) + next->ip_summed = CHECKSUM_NONE; + break; + } else { + struct sk_buff *free_it = next; + + /* Old fragmnet is completely overridden with + * new one drop it. + */ + next = next->next; + + if (prev) + prev->next = next; + else + fq->fragments = next; + + fq->meat -= free_it->len; + frag_kfree_skb(free_it); + } + } + + NFCT_FRAG6_CB(skb)->offset = offset; + + /* Insert this fragment in the chain of fragments. */ + skb->next = next; + if (prev) + prev->next = skb; + else + fq->fragments = skb; + + skb->dev = NULL; + fq->stamp = skb->stamp; + fq->meat += skb->len; + atomic_add(skb->truesize, &nf_ct_frag6_mem); + + /* The first fragment. + * nhoffset is obtained from the first fragment, of course. + */ + if (offset == 0) { + fq->nhoffset = nhoff; + fq->last_in |= FIRST_IN; + } + write_lock(&nf_ct_frag6_lock); + list_move_tail(&fq->lru_list, &nf_ct_frag6_lru_list); + write_unlock(&nf_ct_frag6_lock); + return 0; + +err: + return -1; +} + +/* + * Check if this packet is complete. + * Returns NULL on failure by any reason, and pointer + * to current nexthdr field in reassembled frame. + * + * It is called with locked fq, and caller must check that + * queue is eligible for reassembly i.e. it is not COMPLETE, + * the last and the first frames arrived and all the bits are here. + */ +static struct sk_buff * +nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) +{ + struct sk_buff *fp, *op, *head = fq->fragments; + int payload_len; + + fq_kill(fq); + + BUG_TRAP(head != NULL); + BUG_TRAP(NFCT_FRAG6_CB(head)->offset == 0); + + /* Unfragmented part is taken from the first segment. */ + payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr); + if (payload_len > IPV6_MAXPLEN) { + DEBUGP("payload len is too large.\n"); + goto out_oversize; + } + + /* Head of list must not be cloned. */ + if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) { + DEBUGP("skb is cloned but can't expand head"); + goto out_oom; + } + + /* If the first fragment is fragmented itself, we split + * it to two chunks: the first with data and paged part + * and the second, holding only fragments. */ + if (skb_shinfo(head)->frag_list) { + struct sk_buff *clone; + int i, plen = 0; + + if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) { + DEBUGP("Can't alloc skb\n"); + goto out_oom; + } + clone->next = head->next; + head->next = clone; + skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; + skb_shinfo(head)->frag_list = NULL; + for (i=0; inr_frags; i++) + plen += skb_shinfo(head)->frags[i].size; + clone->len = clone->data_len = head->data_len - plen; + head->data_len -= clone->len; + head->len -= clone->len; + clone->csum = 0; + clone->ip_summed = head->ip_summed; + + NFCT_FRAG6_CB(clone)->orig = NULL; + atomic_add(clone->truesize, &nf_ct_frag6_mem); + } + + /* We have to remove fragment header from datagram and to relocate + * header in order to calculate ICV correctly. */ + head->nh.raw[fq->nhoffset] = head->h.raw[0]; + memmove(head->head + sizeof(struct frag_hdr), head->head, + (head->data - head->head) - sizeof(struct frag_hdr)); + head->mac.raw += sizeof(struct frag_hdr); + head->nh.raw += sizeof(struct frag_hdr); + + skb_shinfo(head)->frag_list = head->next; + head->h.raw = head->data; + skb_push(head, head->data - head->nh.raw); + atomic_sub(head->truesize, &nf_ct_frag6_mem); + + for (fp=head->next; fp; fp = fp->next) { + head->data_len += fp->len; + head->len += fp->len; + if (head->ip_summed != fp->ip_summed) + head->ip_summed = CHECKSUM_NONE; + else if (head->ip_summed == CHECKSUM_HW) + head->csum = csum_add(head->csum, fp->csum); + head->truesize += fp->truesize; + atomic_sub(fp->truesize, &nf_ct_frag6_mem); + } + + head->next = NULL; + head->dev = dev; + head->stamp = fq->stamp; + head->nh.ipv6h->payload_len = htons(payload_len); + + /* Yes, and fold redundant checksum back. 8) */ + if (head->ip_summed == CHECKSUM_HW) + head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum); + + fq->fragments = NULL; + + /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */ + fp = skb_shinfo(head)->frag_list; + if (NFCT_FRAG6_CB(fp)->orig == NULL) + /* at above code, head skb is divided into two skbs. */ + fp = fp->next; + + op = NFCT_FRAG6_CB(head)->orig; + for (; fp; fp = fp->next) { + struct sk_buff *orig = NFCT_FRAG6_CB(fp)->orig; + + op->next = orig; + op = orig; + NFCT_FRAG6_CB(fp)->orig = NULL; + } + + return head; + +out_oversize: + if (net_ratelimit()) + printk(KERN_DEBUG "nf_ct_frag6_reasm: payload len = %d\n", payload_len); + goto out_fail; +out_oom: + if (net_ratelimit()) + printk(KERN_DEBUG "nf_ct_frag6_reasm: no memory for reassembly\n"); +out_fail: + return NULL; +} + +/* + * find the header just before Fragment Header. + * + * if success return 0 and set ... + * (*prevhdrp): the value of "Next Header Field" in the header + * just before Fragment Header. + * (*prevhoff): the offset of "Next Header Field" in the header + * just before Fragment Header. + * (*fhoff) : the offset of Fragment Header. + * + * Based on ipv6_skip_hdr() in net/ipv6/exthdr.c + * + */ +static int +find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff) +{ + u8 nexthdr = skb->nh.ipv6h->nexthdr; + u8 prev_nhoff = (u8 *)&skb->nh.ipv6h->nexthdr - skb->data; + int start = (u8 *)(skb->nh.ipv6h+1) - skb->data; + int len = skb->len - start; + u8 prevhdr = NEXTHDR_IPV6; + + while (nexthdr != NEXTHDR_FRAGMENT) { + struct ipv6_opt_hdr hdr; + int hdrlen; + + if (!ipv6_ext_hdr(nexthdr)) { + return -1; + } + if (len < (int)sizeof(struct ipv6_opt_hdr)) { + DEBUGP("too short\n"); + return -1; + } + if (nexthdr == NEXTHDR_NONE) { + DEBUGP("next header is none\n"); + return -1; + } + if (skb_copy_bits(skb, start, &hdr, sizeof(hdr))) + BUG(); + if (nexthdr == NEXTHDR_AUTH) + hdrlen = (hdr.hdrlen+2)<<2; + else + hdrlen = ipv6_optlen(&hdr); + + prevhdr = nexthdr; + prev_nhoff = start; + + nexthdr = hdr.nexthdr; + len -= hdrlen; + start += hdrlen; + } + + if (len < 0) + return -1; + + *prevhdrp = prevhdr; + *prevhoff = prev_nhoff; + *fhoff = start; + + return 0; +} + +struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb) +{ + struct sk_buff *clone; + struct net_device *dev = skb->dev; + struct frag_hdr *fhdr; + struct nf_ct_frag6_queue *fq; + struct ipv6hdr *hdr; + int fhoff, nhoff; + u8 prevhdr; + struct sk_buff *ret_skb = NULL; + + /* Jumbo payload inhibits frag. header */ + if (skb->nh.ipv6h->payload_len == 0) { + DEBUGP("payload len = 0\n"); + return skb; + } + + if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0) + return skb; + + clone = skb_clone(skb, GFP_ATOMIC); + if (clone == NULL) { + DEBUGP("Can't clone skb\n"); + return skb; + } + + NFCT_FRAG6_CB(clone)->orig = skb; + + if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) { + DEBUGP("message is too short.\n"); + goto ret_orig; + } + + clone->h.raw = clone->data + fhoff; + hdr = clone->nh.ipv6h; + fhdr = (struct frag_hdr *)clone->h.raw; + + if (!(fhdr->frag_off & htons(0xFFF9))) { + DEBUGP("Invalid fragment offset\n"); + /* It is not a fragmented frame */ + goto ret_orig; + } + + if (atomic_read(&nf_ct_frag6_mem) > nf_ct_frag6_high_thresh) + nf_ct_frag6_evictor(); + + fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr); + if (fq == NULL) { + DEBUGP("Can't find and can't create new queue\n"); + goto ret_orig; + } + + spin_lock(&fq->lock); + + if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { + spin_unlock(&fq->lock); + DEBUGP("Can't insert skb to queue\n"); + fq_put(fq); + goto ret_orig; + } + + if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) { + ret_skb = nf_ct_frag6_reasm(fq, dev); + if (ret_skb == NULL) + DEBUGP("Can't reassemble fragmented packets\n"); + } + spin_unlock(&fq->lock); + + fq_put(fq); + return ret_skb; + +ret_orig: + kfree_skb(clone); + return skb; +} + +void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, + struct net_device *in, struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct sk_buff *s, *s2; + + for (s = NFCT_FRAG6_CB(skb)->orig; s;) { + s->nfcache = skb->nfcache; + + nf_conntrack_put_reasm(s->nfct_reasm); + nf_conntrack_get_reasm(skb); + s->nfct_reasm = skb; + + s2 = s->next; + NF_HOOK_THRESH(PF_INET6, hooknum, s, in, out, okfn, + NF_IP6_PRI_CONNTRACK_DEFRAG + 1); + s = s2; + } + nf_conntrack_put_reasm(skb); +} + +int nf_ct_frag6_kfree_frags(struct sk_buff *skb) +{ + struct sk_buff *s, *s2; + + for (s = NFCT_FRAG6_CB(skb)->orig; s; s = s2) { + + s2 = s->next; + kfree_skb(s); + } + + kfree_skb(skb); + + return 0; +} + +int nf_ct_frag6_init(void) +{ + nf_ct_frag6_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ + (jiffies ^ (jiffies >> 6))); + + init_timer(&nf_ct_frag6_secret_timer); + nf_ct_frag6_secret_timer.function = nf_ct_frag6_secret_rebuild; + nf_ct_frag6_secret_timer.expires = jiffies + + nf_ct_frag6_secret_interval; + add_timer(&nf_ct_frag6_secret_timer); + + return 0; +} + +void nf_ct_frag6_cleanup(void) +{ + del_timer(&nf_ct_frag6_secret_timer); + nf_ct_frag6_evictor(); +} diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/netfilter/Kconfig linux-2.6.11.3/net/netfilter/Kconfig --- linux-2.6.11.3.org/net/netfilter/Kconfig 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/netfilter/Kconfig 2005-03-13 22:06:02.207218616 +0100 @@ -0,0 +1,68 @@ +# +# protocol independent part of netfilter configuration +# + +menu "Generic Netfilter Configuration" + depends on INET && NETFILTER + +config NF_CONNTRACK + tristate "Layer 3 Independent Connection tracking (EXPERIMENTAL)" + depends on EXPERIMENTAL && NET + default n + ---help--- + Connection tracking keeps a record of what packets have passed + through your machine, in order to figure out how they are related + into connections. + + Layer 3 independent connection tracking is experimental scheme + which generalize ip_conntrack to support other layer 3 protocols. + + To compile it as a module, choose M here. If unsure, say N. + +config NF_CT_ACCT + bool "Connection tracking flow accounting" + depends on NF_CONNTRACK + help + If this option is enabled, the connection tracking code will + keep per-flow packet and byte counters. + + Those counters can be used for flow-based accounting or the + `connbytes' match. + + If unsure, say `N'. + +config NF_CONNTRACK_MARK + bool 'Connection mark tracking support' + depends on NF_CONNTRACK + help + This option enables support for connection marks, used by the + `CONNMARK' target and `connmark' match. Similar to the mark value + of packets, but this mark value is kept in the conntrack session + instead of the individual packets. + +config NF_CT_PROTO_SCTP + tristate 'SCTP protocol on new connection tracking support (EXPERIMENTAL)' + depends on EXPERIMENTAL && NF_CONNTRACK + default n + help + With this option enabled, the layer 3 independent connection + tracking code will be able to do state tracking on SCTP connections. + + If you want to compile it as a module, say M here and read + Documentation/modules.txt. If unsure, say `N'. + +config NF_CONNTRACK_FTP + tristate "FTP support on new connection tracking (EXPERIMENTAL)" + depends on EXPERIMENTAL && NF_CONNTRACK + help + Tracking FTP connections is problematic: special helpers are + required for tracking them, and doing masquerading and other forms + of Network Address Translation on them. + + This is FTP support on Layer 3 independent connection tracking. + Layer 3 independent connection tracking is experimental scheme + which generalize ip_conntrack to support other layer 3 protocols. + + To compile it as a module, choose M here. If unsure, say N. + +endmenu diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/netfilter/Makefile linux-2.6.11.3/net/netfilter/Makefile --- linux-2.6.11.3.org/net/netfilter/Makefile 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/netfilter/Makefile 2005-03-13 22:06:02.217217096 +0100 @@ -0,0 +1,7 @@ +nf_conntrack-objs := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o + +obj-$(CONFIG_NF_CONNTRACK) += nf_conntrack.o +obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o + +# SCTP protocol connection tracking +obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/netfilter/nf_conntrack_core.c linux-2.6.11.3/net/netfilter/nf_conntrack_core.c --- linux-2.6.11.3.org/net/netfilter/nf_conntrack_core.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/netfilter/nf_conntrack_core.c 2005-03-13 22:06:02.219216792 +0100 @@ -0,0 +1,1390 @@ +/* Connection state tracking for netfilter. This is separated from, + but required by, the NAT layer; it can also be used by an iptables + extension. */ + +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2005 Netfilter Core Team + * (C) 2003,2004 USAGI/WIDE Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 23 Apr 2001: Harald Welte + * - new API and handling of conntrack/nat helpers + * - now capable of multiple expectations for one master + * 16 Jul 2002: Harald Welte + * - add usage/reference counts to ip_conntrack_expect + * - export ip_conntrack[_expect]_{find_get,put} functions + * 16 Dec 2003: Yasuyuki Kozakai @USAGI + * - generalize L3 protocol denendent part. + * 23 Mar 2004: Yasuyuki Kozakai @USAGI + * - add support various size of conntrack structures. + * + * Derived from net/ipv4/netfilter/ip_conntrack_core.c + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* This rwlock protects the main hash table, protocol/helper/expected + registrations, conntrack timers*/ +#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&nf_conntrack_lock) +#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&nf_conntrack_lock) + +#include +#include +#include +#include +#include +#include + +#define NF_CONNTRACK_VERSION "0.3.1" + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +DECLARE_RWLOCK(nf_conntrack_lock); + +/* nf_conntrack_standalone needs this */ +atomic_t nf_conntrack_count = ATOMIC_INIT(0); + +void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL; +LIST_HEAD(nf_conntrack_expect_list); +struct nf_conntrack_protocol **nf_ct_protos[PF_MAX]; +struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX]; +static LIST_HEAD(helpers); +unsigned int nf_conntrack_htable_size = 0; +int nf_conntrack_max; +struct list_head *nf_conntrack_hash; +static kmem_cache_t *nf_conntrack_expect_cachep; +struct nf_conn nf_conntrack_untracked; +unsigned int nf_ct_log_invalid; +static LIST_HEAD(unconfirmed); +static int nf_conntrack_vmalloc; + +DEFINE_PER_CPU(struct nf_conntrack_stat, nf_conntrack_stat); +EXPORT_PER_CPU_SYMBOL(nf_conntrack_stat); + +/* + * This scheme offers various size of "struct nf_conn" dependent on + * features(helper, nat, ...) + */ + +#define NF_CT_FEATURES_NAMELEN 256 +static struct { + /* name of slab cache. printed in /proc/slabinfo */ + char *name; + + /* size of slab cache */ + size_t size; + + /* slab cache pointer */ + kmem_cache_t *cachep; + + /* allocated slab cache + modules which uses this slab cache */ + int use; + + /* Initialization */ + int (*init_conntrack)(struct nf_conn *, u_int32_t); + +} nf_ct_cache[NF_CT_F_NUM]; + +/* protect members of nf_ct_cache except of "use" */ +DECLARE_RWLOCK(nf_ct_cache_lock); + +/* This avoids calling kmem_cache_create() with same name simultaneously */ +DECLARE_MUTEX(nf_ct_cache_mutex); + +extern struct nf_conntrack_protocol nf_conntrack_generic_protocol; +struct nf_conntrack_protocol * +nf_ct_find_proto(u_int16_t l3proto, u_int8_t protocol) +{ + if (unlikely(nf_ct_protos[l3proto] == NULL)) + return &nf_conntrack_generic_protocol; + + return nf_ct_protos[l3proto][protocol]; +} + +void +nf_ct_put(struct nf_conn *ct) +{ + NF_CT_ASSERT(ct); + nf_conntrack_put(&ct->ct_general); +} + +static int nf_conntrack_hash_rnd_initted; +static unsigned int nf_conntrack_hash_rnd; + +static u_int32_t +hash_conntrack(const struct nf_conntrack_tuple *tuple) +{ + unsigned int a, b; + a = jhash((void *)tuple->src.u3.all, sizeof(tuple->src.u3.all), + ((tuple->src.l3num) << 16) | tuple->dst.protonum); + b = jhash((void *)tuple->dst.u3.all, sizeof(tuple->dst.u3.all), + (tuple->src.u.all << 16) | tuple->dst.u.all); + + return jhash_2words(a, b, nf_conntrack_hash_rnd) + % nf_conntrack_htable_size; +} + +struct nf_conn * +alloc_conntrack(u_int32_t features) +{ + struct nf_conn *conntrack = NULL; + + DEBUGP("alloc_conntrack: features=0x%x\n", features); + + READ_LOCK(&nf_ct_cache_lock); + + if (!nf_ct_cache[features].use) { + DEBUGP("alloc_conntrack: not supported features = 0x%x\n", + features); + goto out; + } + + conntrack = kmem_cache_alloc(nf_ct_cache[features].cachep, GFP_ATOMIC); + if (conntrack == NULL) { + DEBUGP("alloc_conntrack: Can't alloc conntrack from cache\n"); + goto out; + } + memset(conntrack, 0, nf_ct_cache[features].size); + conntrack->features = features; + if (nf_ct_cache[features].init_conntrack && + nf_ct_cache[features].init_conntrack(conntrack, features) < 0) { + DEBUGP("alloc_conntrack: failed to init\n"); + kmem_cache_free(nf_ct_cache[features].cachep, conntrack); + conntrack = NULL; + goto out; + } + +out: + READ_UNLOCK(&nf_ct_cache_lock); + return conntrack; +} + +void +free_conntrack(struct nf_conn *conntrack) +{ + u_int32_t features = conntrack->features; + NF_CT_ASSERT(features >= NF_CT_F_BASIC && features < NF_CT_F_NUM); + DEBUGP("free_conntrack: features = 0x%x, conntrack=%p\n", features, + conntrack); + kmem_cache_free(nf_ct_cache[features].cachep, conntrack); +} + +/* Initialize "struct nf_conn" which has spaces for helper */ +static int +init_conntrack_for_helper(struct nf_conn *conntrack, u_int32_t features) +{ + + conntrack->help = (union nf_conntrack_help *) + (((unsigned long)conntrack->data + + (__alignof__(union nf_conntrack_help) - 1)) + & (~((unsigned long)(__alignof__(union nf_conntrack_help) -1)))); + return 0; +} + +int nf_conntrack_register_cache(u_int32_t features, const char *name, + size_t size, + int (*init)(struct nf_conn *, u_int32_t)) +{ + int ret = 0; + char *cache_name; + kmem_cache_t *cachep; + + DEBUGP("nf_conntrack_register_cache: features=0x%x, name=%s, size=%d\n", + features, name, size); + + if (features < NF_CT_F_BASIC || features >= NF_CT_F_NUM) { + DEBUGP("nf_conntrack_register_cache: invalid features.: 0x%x\n", + features); + return -EINVAL; + } + + down(&nf_ct_cache_mutex); + + WRITE_LOCK(&nf_ct_cache_lock); + /* e.g: multiple helpers are loaded */ + if (nf_ct_cache[features].use > 0) { + DEBUGP("nf_conntrack_register_cache: already resisterd.\n"); + if ((!strncmp(nf_ct_cache[features].name, name, + NF_CT_FEATURES_NAMELEN)) + && nf_ct_cache[features].size == size + && nf_ct_cache[features].init_conntrack == init) { + DEBUGP("nf_conntrack_register_cache: reusing.\n"); + nf_ct_cache[features].use++; + ret = 0; + } else + ret = -EBUSY; + + WRITE_UNLOCK(&nf_ct_cache_lock); + up(&nf_ct_cache_mutex); + return ret; + } + WRITE_UNLOCK(&nf_ct_cache_lock); + + /* + * The memory space for name of slab cache must be alive until + * cache is destroyed. + */ + cache_name = kmalloc(sizeof(char)*NF_CT_FEATURES_NAMELEN, GFP_ATOMIC); + if (cache_name == NULL) { + DEBUGP("nf_conntrack_register_cache: can't alloc cache_name\n"); + ret = -ENOMEM; + goto out_up_mutex; + } + + if (strlcpy(cache_name, name, NF_CT_FEATURES_NAMELEN) + >= NF_CT_FEATURES_NAMELEN) { + printk("nf_conntrack_register_cache: name too long\n"); + ret = -EINVAL; + goto out_free_name; + } + + cachep = kmem_cache_create(cache_name, size, 0, 0, + NULL, NULL); + if (!cachep) { + printk("nf_conntrack_register_cache: Can't create slab cache " + "for the features = 0x%x\n", features); + ret = -ENOMEM; + goto out_free_name; + } + + WRITE_LOCK(&nf_ct_cache_lock); + nf_ct_cache[features].use = 1; + nf_ct_cache[features].size = size; + nf_ct_cache[features].init_conntrack = init; + nf_ct_cache[features].cachep = cachep; + nf_ct_cache[features].name = cache_name; + WRITE_UNLOCK(&nf_ct_cache_lock); + + goto out_up_mutex; + +out_free_name: + kfree(cache_name); +out_up_mutex: + up(&nf_ct_cache_mutex); + return ret; +} + +/* FIXME: In the current, only nf_conntrack_cleanup() can call this function. */ +void nf_conntrack_unregister_cache(u_int32_t features) +{ + kmem_cache_t *cachep; + char *name; + + /* + * This assures that kmem_cache_create() isn't called before destroying + * slab cache. + */ + DEBUGP("nf_conntrack_unregister_cache: 0x%04x\n", features); + down(&nf_ct_cache_mutex); + + WRITE_LOCK(&nf_ct_cache_lock); + if (--nf_ct_cache[features].use > 0) { + WRITE_UNLOCK(&nf_ct_cache_lock); + up(&nf_ct_cache_mutex); + return; + } + cachep = nf_ct_cache[features].cachep; + name = nf_ct_cache[features].name; + nf_ct_cache[features].cachep = NULL; + nf_ct_cache[features].name = NULL; + nf_ct_cache[features].init_conntrack = NULL; + nf_ct_cache[features].size = 0; + WRITE_UNLOCK(&nf_ct_cache_lock); + + synchronize_net(); + + kmem_cache_destroy(cachep); + kfree(name); + + up(&nf_ct_cache_mutex); +} + +int +nf_ct_get_tuple(const struct sk_buff *skb, + unsigned int nhoff, + unsigned int dataoff, + u_int16_t l3num, + u_int8_t protonum, + struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_l3proto *l3proto, + const struct nf_conntrack_protocol *protocol) +{ + NF_CT_TUPLE_U_BLANK(tuple); + + tuple->src.l3num = l3num; + if (l3proto->pkt_to_tuple(skb, nhoff, tuple) == 0) + return 0; + + tuple->dst.protonum = protonum; + tuple->dst.dir = NF_CT_DIR_ORIGINAL; + + return protocol->pkt_to_tuple(skb, dataoff, tuple); +} + +int +nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, + const struct nf_conntrack_tuple *orig, + const struct nf_conntrack_l3proto *l3proto, + const struct nf_conntrack_protocol *protocol) +{ + NF_CT_TUPLE_U_BLANK(inverse); + + inverse->src.l3num = orig->src.l3num; + if (l3proto->invert_tuple(inverse, orig) == 0) + return 0; + + inverse->dst.dir = !orig->dst.dir; + + inverse->dst.protonum = orig->dst.protonum; + return protocol->invert_tuple(inverse, orig); +} + +/* nf_conntrack_expect helper functions */ +static void destroy_expect(struct nf_conntrack_expect *exp) +{ + nf_ct_put(exp->master); + NF_CT_ASSERT(!timer_pending(&exp->timeout)); + kmem_cache_free(nf_conntrack_expect_cachep, exp); + NF_CT_STAT_INC(expect_delete); +} + +static void unlink_expect(struct nf_conntrack_expect *exp) +{ + MUST_BE_WRITE_LOCKED(&nf_conntrack_lock); + list_del(&exp->list); + /* Logically in destroy_expect, but we hold the lock here. */ + exp->master->expecting--; +} + +static void expectation_timed_out(unsigned long ul_expect) +{ + struct nf_conntrack_expect *exp = (void *)ul_expect; + + WRITE_LOCK(&nf_conntrack_lock); + unlink_expect(exp); + WRITE_UNLOCK(&nf_conntrack_lock); + destroy_expect(exp); +} + +/* If an expectation for this connection is found, it gets delete from + * global list then returned. */ +static struct nf_conntrack_expect * +find_expectation(const struct nf_conntrack_tuple *tuple) +{ + struct nf_conntrack_expect *i; + + list_for_each_entry(i, &nf_conntrack_expect_list, list) { + /* If master is not in hash table yet (ie. packet hasn't left + this machine yet), how can other end know about expected? + Hence these are not the droids you are looking for (if + master ct never got confirmed, we'd hold a reference to it + and weird things would happen to future packets). */ + if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) + && is_confirmed(i->master) + && del_timer(&i->timeout)) { + unlink_expect(i); + return i; + } + } + return NULL; +} + +/* delete all expectations for this conntrack */ +static void remove_expectations(struct nf_conn *ct) +{ + struct nf_conntrack_expect *i, *tmp; + + /* Optimization: most connection never expect any others. */ + if (ct->expecting == 0) + return; + + list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) { + if (i->master == ct && del_timer(&i->timeout)) { + unlink_expect(i); + destroy_expect(i); + } + } +} + +static void +clean_from_lists(struct nf_conn *ct) +{ + unsigned int ho, hr; + + DEBUGP("clean_from_lists(%p)\n", ct); + MUST_BE_WRITE_LOCKED(&nf_conntrack_lock); + + ho = hash_conntrack(&ct->tuplehash[NF_CT_DIR_ORIGINAL].tuple); + hr = hash_conntrack(&ct->tuplehash[NF_CT_DIR_REPLY].tuple); + LIST_DELETE(&nf_conntrack_hash[ho], &ct->tuplehash[NF_CT_DIR_ORIGINAL]); + LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[NF_CT_DIR_REPLY]); + + /* Destroy all pending expectations */ + remove_expectations(ct); +} + +static void +destroy_conntrack(struct nf_conntrack *nfct) +{ + struct nf_conn *ct = (struct nf_conn *)nfct; + struct nf_conntrack_l3proto *l3proto; + struct nf_conntrack_protocol *proto; + + DEBUGP("destroy_conntrack(%p)\n", ct); + NF_CT_ASSERT(atomic_read(&nfct->use) == 0); + NF_CT_ASSERT(!timer_pending(&ct->timeout)); + + /* To make sure we don't get any weird locking issues here: + * destroy_conntrack() MUST NOT be called with a write lock + * to nf_conntrack_lock!!! -HW */ + l3proto = nf_ct_find_l3proto(ct->tuplehash[NF_CT_DIR_REPLY].tuple.src.l3num); + if (l3proto && l3proto->destroy) + l3proto->destroy(ct); + + proto = nf_ct_find_proto(ct->tuplehash[NF_CT_DIR_REPLY].tuple.src.l3num, + ct->tuplehash[NF_CT_DIR_REPLY].tuple.dst.protonum); + if (proto && proto->destroy) + proto->destroy(ct); + + if (nf_conntrack_destroyed) + nf_conntrack_destroyed(ct); + + WRITE_LOCK(&nf_conntrack_lock); + /* Expectations will have been removed in clean_from_lists, + * except TFTP can create an expectation on the first packet, + * before connection is in the list, so we need to clean here, + * too. */ + remove_expectations(ct); + + /* We overload first tuple to link into unconfirmed list. */ + if (!is_confirmed(ct)) { + BUG_ON(list_empty(&ct->tuplehash[NF_CT_DIR_ORIGINAL].list)); + list_del(&ct->tuplehash[NF_CT_DIR_ORIGINAL].list); + } + + NF_CT_STAT_INC(delete); + WRITE_UNLOCK(&nf_conntrack_lock); + + if (ct->master) + nf_ct_put(ct->master); + + DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct); + free_conntrack(ct); + atomic_dec(&nf_conntrack_count); +} + +static void death_by_timeout(unsigned long ul_conntrack) +{ + struct nf_conn *ct = (void *)ul_conntrack; + + WRITE_LOCK(&nf_conntrack_lock); + /* Inside lock so preempt is disabled on module removal path. + * Otherwise we can get spurious warnings. */ + NF_CT_STAT_INC(delete_list); + clean_from_lists(ct); + WRITE_UNLOCK(&nf_conntrack_lock); + nf_ct_put(ct); +} + +static inline int +conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i, + const struct nf_conntrack_tuple *tuple, + const struct nf_conn *ignored_conntrack) +{ + MUST_BE_READ_LOCKED(&nf_conntrack_lock); + return tuplehash_to_ctrack(i) != ignored_conntrack + && nf_ct_tuple_equal(tuple, &i->tuple); +} + +static struct nf_conntrack_tuple_hash * +__nf_conntrack_find(const struct nf_conntrack_tuple *tuple, + const struct nf_conn *ignored_conntrack) +{ + struct nf_conntrack_tuple_hash *h; + unsigned int hash = hash_conntrack(tuple); + + MUST_BE_READ_LOCKED(&nf_conntrack_lock); + list_for_each_entry(h, &nf_conntrack_hash[hash], list) { + if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) { + NF_CT_STAT_INC(found); + return h; + } + NF_CT_STAT_INC(searched); + } + + return NULL; +} + +/* Find a connection corresponding to a tuple. */ +struct nf_conntrack_tuple_hash * +nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple, + const struct nf_conn *ignored_conntrack) +{ + struct nf_conntrack_tuple_hash *h; + + READ_LOCK(&nf_conntrack_lock); + h = __nf_conntrack_find(tuple, ignored_conntrack); + if (h) + atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use); + READ_UNLOCK(&nf_conntrack_lock); + + return h; +} + +/* Confirm a connection given skb; places it in hash table */ +int +__nf_conntrack_confirm(struct sk_buff **pskb) +{ + unsigned int hash, repl_hash; + struct nf_conn *ct; + enum nf_conntrack_info ctinfo; + + ct = nf_ct_get(*pskb, &ctinfo); + + /* ipt_REJECT uses nf_conntrack_attach to attach related + ICMP/TCP RST packets in other direction. Actual packet + which created connection will be NF_CT_NEW or for an + expected connection, NF_CT_RELATED. */ + if (NFCTINFO2DIR(ctinfo) != NF_CT_DIR_ORIGINAL) + return NF_ACCEPT; + + hash = hash_conntrack(&ct->tuplehash[NF_CT_DIR_ORIGINAL].tuple); + repl_hash = hash_conntrack(&ct->tuplehash[NF_CT_DIR_REPLY].tuple); + + /* We're not in hash table, and we refuse to set up related + connections for unconfirmed conns. But packet copies and + REJECT will give spurious warnings here. */ + /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ + + /* No external references means noone else could have + confirmed us. */ + NF_CT_ASSERT(!is_confirmed(ct)); + DEBUGP("Confirming conntrack %p\n", ct); + + WRITE_LOCK(&nf_conntrack_lock); + + /* See if there's one in the list already, including reverse: + NAT could have grabbed it without realizing, since we're + not in the hash. If there is, we lost race. */ + if (!LIST_FIND(&nf_conntrack_hash[hash], + conntrack_tuple_cmp, + struct nf_conntrack_tuple_hash *, + &ct->tuplehash[NF_CT_DIR_ORIGINAL].tuple, NULL) + && !LIST_FIND(&nf_conntrack_hash[repl_hash], + conntrack_tuple_cmp, + struct nf_conntrack_tuple_hash *, + &ct->tuplehash[NF_CT_DIR_REPLY].tuple, NULL)) { + /* Remove from unconfirmed list */ + list_del(&ct->tuplehash[NF_CT_DIR_ORIGINAL].list); + + list_prepend(&nf_conntrack_hash[hash], + &ct->tuplehash[NF_CT_DIR_ORIGINAL]); + list_prepend(&nf_conntrack_hash[repl_hash], + &ct->tuplehash[NF_CT_DIR_REPLY]); + /* Timer relative to confirmation time, not original + setting time, otherwise we'd get timer wrap in + weird delay cases. */ + ct->timeout.expires += jiffies; + add_timer(&ct->timeout); + atomic_inc(&ct->ct_general.use); + set_bit(NF_S_CONFIRMED_BIT, &ct->status); + NF_CT_STAT_INC(insert); + WRITE_UNLOCK(&nf_conntrack_lock); + return NF_ACCEPT; + } + + NF_CT_STAT_INC(insert_failed); + WRITE_UNLOCK(&nf_conntrack_lock); + return NF_DROP; +} + +/* Returns true if a connection correspondings to the tuple (required + for NAT). */ +int +nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, + const struct nf_conn *ignored_conntrack) +{ + struct nf_conntrack_tuple_hash *h; + + READ_LOCK(&nf_conntrack_lock); + h = __nf_conntrack_find(tuple, ignored_conntrack); + READ_UNLOCK(&nf_conntrack_lock); + + return h != NULL; +} + +/* There's a small race here where we may free a just-assured + connection. Too bad: we're in trouble anyway. */ +static inline int unreplied(const struct nf_conntrack_tuple_hash *i) +{ + return !(test_bit(NF_S_ASSURED_BIT, &tuplehash_to_ctrack(i)->status)); +} + +static int early_drop(struct list_head *chain) +{ + /* Traverse backwards: gives us oldest, which is roughly LRU */ + struct nf_conntrack_tuple_hash *h; + struct nf_conn *ct = NULL; + int dropped = 0; + + READ_LOCK(&nf_conntrack_lock); + h = LIST_FIND_B(chain, unreplied, struct nf_conntrack_tuple_hash *); + if (h) { + ct = tuplehash_to_ctrack(h); + atomic_inc(&ct->ct_general.use); + } + READ_UNLOCK(&nf_conntrack_lock); + + if (!ct) + return dropped; + + if (del_timer(&ct->timeout)) { + death_by_timeout((unsigned long)ct); + dropped = 1; + NF_CT_STAT_INC(early_drop); + } + nf_ct_put(ct); + return dropped; +} + +static inline int helper_cmp(const struct nf_conntrack_helper *i, + const struct nf_conntrack_tuple *rtuple) +{ + return nf_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); +} + +static struct nf_conntrack_helper * +nf_ct_find_helper(const struct nf_conntrack_tuple *tuple) +{ + return LIST_FIND(&helpers, helper_cmp, + struct nf_conntrack_helper *, + tuple); +} + +/* Allocate a new conntrack: we return -ENOMEM if classification + failed due to stress. Otherwise it really is unclassifiable. */ +static struct nf_conntrack_tuple_hash * +init_conntrack(const struct nf_conntrack_tuple *tuple, + struct nf_conntrack_l3proto *l3proto, + struct nf_conntrack_protocol *protocol, + struct sk_buff *skb, + unsigned int dataoff) +{ + struct nf_conn *conntrack; + struct nf_conntrack_tuple repl_tuple; + size_t hash; + struct nf_conntrack_expect *exp; + u_int32_t features = 0; + int helper_used = 0; + + if (!nf_conntrack_hash_rnd_initted) { + get_random_bytes(&nf_conntrack_hash_rnd, 4); + nf_conntrack_hash_rnd_initted = 1; + } + + hash = hash_conntrack(tuple); + + if (nf_conntrack_max + && atomic_read(&nf_conntrack_count) >= nf_conntrack_max) { + /* Try dropping from this hash chain. */ + if (!early_drop(&nf_conntrack_hash[hash])) { + if (net_ratelimit()) + printk(KERN_WARNING + "nf_conntrack: table full, dropping" + " packet.\n"); + return ERR_PTR(-ENOMEM); + } + } + + if (!nf_ct_invert_tuple(&repl_tuple, tuple, l3proto, protocol)) { + DEBUGP("Can't invert tuple.\n"); + return NULL; + } + + /* find features needed by this conntrack. */ + features = l3proto->get_features(tuple); + READ_LOCK(&nf_conntrack_lock); + if (nf_ct_find_helper(&repl_tuple) != NULL) { + features |= NF_CT_F_HELP; + helper_used = 1; + } + READ_UNLOCK(&nf_conntrack_lock); + + conntrack = alloc_conntrack(features); + if (!conntrack) { + DEBUGP("Can't allocate conntrack.\n"); + return ERR_PTR(-ENOMEM); + } + + atomic_set(&conntrack->ct_general.use, 1); + conntrack->ct_general.destroy = destroy_conntrack; + conntrack->tuplehash[NF_CT_DIR_ORIGINAL].tuple = *tuple; + conntrack->tuplehash[NF_CT_DIR_REPLY].tuple = repl_tuple; + if (!protocol->new(conntrack, skb, dataoff)) { + free_conntrack(conntrack); + DEBUGP("init conntrack: can't track with proto module\n"); + return NULL; + } + /* Don't set timer yet: wait for confirmation */ + init_timer(&conntrack->timeout); + conntrack->timeout.data = (unsigned long)conntrack; + conntrack->timeout.function = death_by_timeout; + + WRITE_LOCK(&nf_conntrack_lock); + exp = find_expectation(tuple); + + if (exp) { + DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n", + conntrack, exp); + /* Welcome, Mr. Bond. We've been expecting you... */ + __set_bit(NF_S_EXPECTED_BIT, &conntrack->status); + conntrack->master = conntrack; +#if CONFIG_NF_CONNTRACK_MARK + conntrack->mark = exp->master->mark; +#endif + nf_conntrack_get(&conntrack->master->ct_general); + NF_CT_STAT_INC(expect_new); + } else { + conntrack->helper = nf_ct_find_helper(&repl_tuple); + + NF_CT_STAT_INC(new); + } + + /* Overload tuple linked list to put us in unconfirmed list. */ + list_add(&conntrack->tuplehash[NF_CT_DIR_ORIGINAL].list, &unconfirmed); + + atomic_inc(&nf_conntrack_count); + WRITE_UNLOCK(&nf_conntrack_lock); + + if (exp) { + if (exp->expectfn) + exp->expectfn(conntrack, exp); + destroy_expect(exp); + } + + return &conntrack->tuplehash[NF_CT_DIR_ORIGINAL]; +} + +/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ +static inline struct nf_conn * +resolve_normal_ct(struct sk_buff *skb, + unsigned int dataoff, + u_int16_t l3num, + u_int8_t protonum, + struct nf_conntrack_l3proto *l3proto, + struct nf_conntrack_protocol *proto, + int *set_reply, + enum nf_conntrack_info *ctinfo) +{ + struct nf_conntrack_tuple tuple; + struct nf_conntrack_tuple_hash *h; + struct nf_conn *ct; + + if (!nf_ct_get_tuple(skb, (unsigned int)(skb->nh.raw - skb->data), + dataoff, l3num, protonum, &tuple, l3proto, + proto)) { + DEBUGP("resolve_normal_ct: Can't get tuple\n"); + return NULL; + } + + /* look for tuple match */ + h = nf_conntrack_find_get(&tuple, NULL); + if (!h) { + h = init_conntrack(&tuple, l3proto, proto, skb, dataoff); + if (!h) + return NULL; + if (IS_ERR(h)) + return (void *)h; + } + ct = tuplehash_to_ctrack(h); + + /* It exists; we have (non-exclusive) reference. */ + if (NF_CT_DIRECTION(h) == NF_CT_DIR_REPLY) { + *ctinfo = NF_CT_ESTABLISHED + NF_CT_IS_REPLY; + /* Please set reply bit if this packet OK */ + *set_reply = 1; + } else { + /* Once we've had two way comms, always ESTABLISHED. */ + if (test_bit(NF_S_SEEN_REPLY_BIT, &ct->status)) { + DEBUGP("nf_conntrack_in: normal packet for %p\n", ct); + *ctinfo = NF_CT_ESTABLISHED; + } else if (test_bit(NF_S_EXPECTED_BIT, &ct->status)) { + DEBUGP("nf_conntrack_in: related packet for %p\n", ct); + *ctinfo = NF_CT_RELATED; + } else { + DEBUGP("nf_conntrack_in: new packet for %p\n", ct); + *ctinfo = NF_CT_NEW; + } + *set_reply = 0; + } + skb->nfct = &ct->ct_general; + skb->nfctinfo = *ctinfo; + return ct; +} + +unsigned int +nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb) +{ + struct nf_conn *ct; + enum nf_conntrack_info ctinfo; + struct nf_conntrack_l3proto *l3proto; + struct nf_conntrack_protocol *proto; + unsigned int dataoff; + u_int8_t protonum; + int set_reply; + int ret; + + /* Previously seen (loopback or untracked)? Ignore. */ + if ((*pskb)->nfct) { + NF_CT_STAT_INC(ignore); + return NF_ACCEPT; + } + + l3proto = nf_ct_find_l3proto((u_int16_t)pf); + if (l3proto->prepare(pskb, hooknum, &dataoff, &protonum, &ret) == 0) { + DEBUGP("not prepared to track yet or error occured\n"); + return ret; + } + + proto = nf_ct_find_proto((u_int16_t)pf, protonum); + + /* It may be an special packet, error, unclean... + * inverse of the return code tells to the netfilter + * core what to do with the packet. */ + if (proto->error != NULL && + (ret = proto->error(*pskb, dataoff, &ctinfo, pf, hooknum)) <= 0) { + NF_CT_STAT_INC(error); + NF_CT_STAT_INC(invalid); + return -ret; + } + + ct = resolve_normal_ct(*pskb, dataoff, pf, protonum, l3proto, proto, + &set_reply, &ctinfo); + if (!ct) { + /* Not valid part of a connection */ + NF_CT_STAT_INC(invalid); + return NF_ACCEPT; + } + + if (IS_ERR(ct)) { + /* Too stressed to deal. */ + NF_CT_STAT_INC(drop); + return NF_DROP; + } + + NF_CT_ASSERT((*pskb)->nfct); + + ret = proto->packet(ct, *pskb, dataoff, ctinfo, pf, hooknum); + if (ret < 0) { + /* Invalid: inverse of the return code tells + * the netfilter core what to do */ + DEBUGP("nf_conntrack_in: Can't track with proto module\n"); + nf_conntrack_put((*pskb)->nfct); + (*pskb)->nfct = NULL; + NF_CT_STAT_INC(invalid); + return -ret; + } + + if (set_reply) + set_bit(NF_S_SEEN_REPLY_BIT, &ct->status); + + return ret; +} + +int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse, + const struct nf_conntrack_tuple *orig) +{ + return nf_ct_invert_tuple(inverse, orig, + nf_ct_find_l3proto(orig->src.l3num), + nf_ct_find_proto(orig->src.l3num, + orig->dst.protonum)); +} + +/* Would two expected things clash? */ +static inline int expect_clash(const struct nf_conntrack_expect *a, + const struct nf_conntrack_expect *b) +{ + /* Part covered by intersection of masks must be unequal, + otherwise they clash */ + struct nf_conntrack_tuple intersect_mask; + int count; + + intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num; + intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all; + intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all; + intersect_mask.dst.protonum = a->mask.dst.protonum + & b->mask.dst.protonum; + + for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ + intersect_mask.src.u3.all[count] = + a->mask.src.u3.all[count] & b->mask.src.u3.all[count]; + } + + for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){ + intersect_mask.dst.u3.all[count] = + a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count]; + } + + return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask); +} + +static inline int expect_matches(const struct nf_conntrack_expect *a, + const struct nf_conntrack_expect *b) +{ + return a->master == b->master + && nf_ct_tuple_equal(&a->tuple, &b->tuple) + && nf_ct_tuple_equal(&a->mask, &b->mask); +} + +/* Generally a bad idea to call this: could have matched already. */ +void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp) +{ + struct nf_conntrack_expect *i; + + WRITE_LOCK(&nf_conntrack_lock); + /* choose the the oldest expectation to evict */ + list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) { + if (expect_matches(i, exp) && del_timer(&i->timeout)) { + unlink_expect(i); + WRITE_UNLOCK(&nf_conntrack_lock); + destroy_expect(i); + return; + } + } + WRITE_UNLOCK(&nf_conntrack_lock); +} + +struct nf_conntrack_expect *nf_conntrack_expect_alloc(void) +{ + struct nf_conntrack_expect *new; + + new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC); + if (!new) { + DEBUGP("expect_related: OOM allocating expect\n"); + return NULL; + } + new->master = NULL; + return new; +} + +void nf_conntrack_expect_free(struct nf_conntrack_expect *expect) +{ + kmem_cache_free(nf_conntrack_expect_cachep, expect); +} + +static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp) +{ + atomic_inc(&exp->master->ct_general.use); + exp->master->expecting++; + list_add(&exp->list, &nf_conntrack_expect_list); + + if (exp->master->helper->timeout) { + init_timer(&exp->timeout); + exp->timeout.data = (unsigned long)exp; + exp->timeout.function = expectation_timed_out; + exp->timeout.expires + = jiffies + exp->master->helper->timeout * HZ; + add_timer(&exp->timeout); + } else + exp->timeout.function = NULL; + + NF_CT_STAT_INC(expect_create); +} + +/* Race with expectations being used means we could have none to find; OK. */ +static void evict_oldest_expect(struct nf_conn *master) +{ + struct nf_conntrack_expect *i; + + list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) { + if (i->master == master) { + if (del_timer(&i->timeout)) { + unlink_expect(i); + destroy_expect(i); + } + break; + } + } +} + +static inline int refresh_timer(struct nf_conntrack_expect *i) +{ + if (!del_timer(&i->timeout)) + return 0; + + i->timeout.expires = jiffies + i->master->helper->timeout*HZ; + add_timer(&i->timeout); + return 1; +} + +int nf_conntrack_expect_related(struct nf_conntrack_expect *expect) +{ + struct nf_conntrack_expect *i; + int ret; + + DEBUGP("nf_conntrack_expect_related %p\n", related_to); + DEBUGP("tuple: "); NF_CT_DUMP_TUPLE(&expect->tuple); + DEBUGP("mask: "); NF_CT_DUMP_TUPLE(&expect->mask); + + WRITE_LOCK(&nf_conntrack_lock); + list_for_each_entry(i, &nf_conntrack_expect_list, list) { + if (expect_matches(i, expect)) { + /* Refresh timer: if it's dying, ignore.. */ + if (refresh_timer(i)) { + ret = 0; + /* We don't need the one they've given us. */ + nf_conntrack_expect_free(expect); + goto out; + } + } else if (expect_clash(i, expect)) { + ret = -EBUSY; + goto out; + } + } + /* Will be over limit? */ + if (expect->master->helper->max_expected && + expect->master->expecting >= expect->master->helper->max_expected) + evict_oldest_expect(expect->master); + + nf_conntrack_expect_insert(expect); + ret = 0; +out: + WRITE_UNLOCK(&nf_conntrack_lock); + return ret; +} + +/* Alter reply tuple (maybe alter helper). This is for NAT, and is + implicitly racy: see __nf_conntrack_confirm */ +void nf_conntrack_alter_reply(struct nf_conn *conntrack, + const struct nf_conntrack_tuple *newreply) +{ + WRITE_LOCK(&nf_conntrack_lock); + /* Should be unconfirmed, so not in hash table yet */ + NF_CT_ASSERT(!is_confirmed(conntrack)); + + DEBUGP("Altering reply tuple of %p to ", conntrack); + NF_CT_DUMP_TUPLE(newreply); + + conntrack->tuplehash[NF_CT_DIR_REPLY].tuple = *newreply; + if (!conntrack->master && conntrack->expecting == 0) + conntrack->helper = nf_ct_find_helper(newreply); + WRITE_UNLOCK(&nf_conntrack_lock); +} + +int nf_conntrack_helper_register(struct nf_conntrack_helper *me) +{ + int ret; + BUG_ON(me->timeout == 0); + + ret = nf_conntrack_register_cache(NF_CT_F_HELP, "nf_conntrack:help", + sizeof(struct nf_conn) + + sizeof(union nf_conntrack_help) + + __alignof__(union nf_conntrack_help), + init_conntrack_for_helper); + if (ret < 0) { + printk(KERN_ERR "nf_conntrack_helper_reigster: Unable to create slab cache for conntracks\n"); + return ret; + } + WRITE_LOCK(&nf_conntrack_lock); + list_prepend(&helpers, me); + WRITE_UNLOCK(&nf_conntrack_lock); + + return 0; +} + +static inline int unhelp(struct nf_conntrack_tuple_hash *i, + const struct nf_conntrack_helper *me) +{ + if (tuplehash_to_ctrack(i)->helper == me) + tuplehash_to_ctrack(i)->helper = NULL; + return 0; +} + +void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) +{ + unsigned int i; + struct nf_conntrack_expect *exp, *tmp; + + /* Need write lock here, to delete helper. */ + WRITE_LOCK(&nf_conntrack_lock); + LIST_DELETE(&helpers, me); + + /* Get rid of expectations */ + list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) { + if (exp->master->helper == me && del_timer(&exp->timeout)) { + unlink_expect(exp); + destroy_expect(exp); + } + } + + /* Get rid of expecteds, set helpers to NULL. */ + LIST_FIND_W(&unconfirmed, unhelp, struct nf_conntrack_tuple_hash*, me); + for (i = 0; i < nf_conntrack_htable_size; i++) + LIST_FIND_W(&nf_conntrack_hash[i], unhelp, + struct nf_conntrack_tuple_hash *, me); + WRITE_UNLOCK(&nf_conntrack_lock); + + /* Someone could be still looking at the helper in a bh. */ + synchronize_net(); +} + +static inline void ct_add_counters(struct nf_conn *ct, + enum nf_conntrack_info ctinfo, + const struct sk_buff *skb) +{ +#ifdef CONFIG_NF_CT_ACCT + if (skb) { + ct->counters[NFCTINFO2DIR(ctinfo)].packets++; + /* XXX totlen should be used ? - YK */ + ct->counters[NFCTINFO2DIR(ctinfo)].bytes += + ntohs(skb->len - (unsigned int)(skb->nh.raw + - skb->data)); + } +#endif +} + +/* Refresh conntrack for this many jiffies and do accounting (if skb != NULL) */ +void nf_ct_refresh_acct(struct nf_conn *ct, + enum nf_conntrack_info ctinfo, + const struct sk_buff *skb, + unsigned long extra_jiffies) +{ + NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); + + /* If not in hash table, timer will not be active yet */ + if (!is_confirmed(ct)) { + ct->timeout.expires = extra_jiffies; + ct_add_counters(ct, ctinfo, skb); + } else { + WRITE_LOCK(&nf_conntrack_lock); + /* Need del_timer for race avoidance (may already be dying). */ + if (del_timer(&ct->timeout)) { + ct->timeout.expires = jiffies + extra_jiffies; + add_timer(&ct->timeout); + } + ct_add_counters(ct, ctinfo, skb); + WRITE_UNLOCK(&nf_conntrack_lock); + } +} + +/* Used by ipt_REJECT and ip6t_REJECT. */ +void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) +{ + struct nf_conn *ct; + enum nf_conntrack_info ctinfo; + + /* This ICMP is in reverse direction to the packet which caused it */ + ct = nf_ct_get(skb, &ctinfo); + if (NFCTINFO2DIR(ctinfo) == NF_CT_DIR_ORIGINAL) + ctinfo = NF_CT_RELATED + NF_CT_IS_REPLY; + else + ctinfo = NF_CT_RELATED; + + /* Attach to new skbuff, and increment count */ + nskb->nfct = &ct->ct_general; + nskb->nfctinfo = ctinfo; + nf_conntrack_get(nskb->nfct); +} + +static inline int +do_iter(const struct nf_conntrack_tuple_hash *i, + int (*iter)(struct nf_conn *i, void *data), + void *data) +{ + return iter(tuplehash_to_ctrack(i), data); +} + +/* Bring out ya dead! */ +static struct nf_conntrack_tuple_hash * +get_next_corpse(int (*iter)(struct nf_conn *i, void *data), + void *data, unsigned int *bucket) +{ + struct nf_conntrack_tuple_hash *h = NULL; + + WRITE_LOCK(&nf_conntrack_lock); + for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { + h = LIST_FIND_W(&nf_conntrack_hash[*bucket], do_iter, + struct nf_conntrack_tuple_hash *, iter, data); + if (h) + break; + } + if (!h) + h = LIST_FIND_W(&unconfirmed, do_iter, + struct nf_conntrack_tuple_hash *, iter, data); + if (h) + atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use); + WRITE_UNLOCK(&nf_conntrack_lock); + + return h; +} + +void +nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data) +{ + struct nf_conntrack_tuple_hash *h; + unsigned int bucket = 0; + + while ((h = get_next_corpse(iter, data, &bucket)) != NULL) { + struct nf_conn *ct = tuplehash_to_ctrack(h); + /* Time to push up daises... */ + if (del_timer(&ct->timeout)) + death_by_timeout((unsigned long)ct); + /* ... else the timer will get him soon. */ + + nf_ct_put(ct); + } +} + +static int kill_all(struct nf_conn *i, void *data) +{ + return 1; +} + +static void free_conntrack_hash(void) +{ + if (nf_conntrack_vmalloc) + vfree(nf_conntrack_hash); + else + free_pages((unsigned long)nf_conntrack_hash, + get_order(sizeof(struct list_head) + * nf_conntrack_htable_size)); +} + +/* Mishearing the voices in his head, our hero wonders how he's + supposed to kill the mall. */ +void nf_conntrack_cleanup(void) +{ + int i; + + /* This makes sure all current packets have passed through + netfilter framework. Roll on, two-stage module + delete... */ + synchronize_net(); + + i_see_dead_people: + nf_ct_iterate_cleanup(kill_all, NULL); + if (atomic_read(&nf_conntrack_count) != 0) { + schedule(); + goto i_see_dead_people; + } + + for (i = 0; i < NF_CT_F_NUM; i++) { + if (nf_ct_cache[i].use == 0) + continue; + + NF_CT_ASSERT(nf_ct_cache[i].use == 1); + nf_ct_cache[i].use = 1; + nf_conntrack_unregister_cache(i); + } + kmem_cache_destroy(nf_conntrack_expect_cachep); + free_conntrack_hash(); +} + +static int hashsize; +module_param(hashsize, int, 0400); + +int __init nf_conntrack_init(void) +{ + unsigned int i; + int ret; + + /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB + * machine has 256 buckets. >= 1GB machines have 8192 buckets. */ + if (hashsize) { + nf_conntrack_htable_size = hashsize; + } else { + nf_conntrack_htable_size + = (((num_physpages << PAGE_SHIFT) / 16384) + / sizeof(struct list_head)); + if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) + nf_conntrack_htable_size = 8192; + if (nf_conntrack_htable_size < 16) + nf_conntrack_htable_size = 16; + } + nf_conntrack_max = 8 * nf_conntrack_htable_size; + + printk("nf_conntrack version %s (%u buckets, %d max)\n", + NF_CONNTRACK_VERSION, nf_conntrack_htable_size, + nf_conntrack_max); + + /* AK: the hash table is twice as big than needed because it + uses list_head. it would be much nicer to caches to use a + single pointer list head here. */ + nf_conntrack_vmalloc = 0; + nf_conntrack_hash + =(void*)__get_free_pages(GFP_KERNEL, + get_order(sizeof(struct list_head) + *nf_conntrack_htable_size)); + if (!nf_conntrack_hash) { + nf_conntrack_vmalloc = 1; + printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); + nf_conntrack_hash = vmalloc(sizeof(struct list_head) + * nf_conntrack_htable_size); + } + if (!nf_conntrack_hash) { + printk(KERN_ERR "Unable to create nf_conntrack_hash\n"); + goto err_unreg_sockopt; + } + + ret = nf_conntrack_register_cache(NF_CT_F_BASIC, "nf_conntrack:basic", + sizeof(struct nf_conn), NULL); + if (ret < 0) { + printk(KERN_ERR "Unable to create nf_conn slab cache\n"); + goto err_free_hash; + } + + nf_conntrack_expect_cachep = kmem_cache_create("nf_conntrack_expect", + sizeof(struct nf_conntrack_expect), + 0, 0, NULL, NULL); + if (!nf_conntrack_expect_cachep) { + printk(KERN_ERR "Unable to create nf_expect slab cache\n"); + goto err_free_conntrack_slab; + } + + /* Don't NEED lock here, but good form anyway. */ + WRITE_LOCK(&nf_conntrack_lock); + for (i = 0; i < PF_MAX; i++) + nf_ct_l3protos[i] = &nf_conntrack_generic_l3proto; + WRITE_UNLOCK(&nf_conntrack_lock); + + for (i = 0; i < nf_conntrack_htable_size; i++) + INIT_LIST_HEAD(&nf_conntrack_hash[i]); + + /* Set up fake conntrack: + - to never be deleted, not in any hashes */ + atomic_set(&nf_conntrack_untracked.ct_general.use, 1); + /* - and look it like as a confirmed connection */ + set_bit(NF_S_CONFIRMED_BIT, &nf_conntrack_untracked.status); + + return ret; + +err_free_conntrack_slab: + nf_conntrack_unregister_cache(NF_CT_F_BASIC); +err_free_hash: + vfree(nf_conntrack_hash); +err_unreg_sockopt: + return -ENOMEM; +} diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/netfilter/nf_conntrack_ftp.c linux-2.6.11.3/net/netfilter/nf_conntrack_ftp.c --- linux-2.6.11.3.org/net/netfilter/nf_conntrack_ftp.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/netfilter/nf_conntrack_ftp.c 2005-03-13 22:06:02.225215880 +0100 @@ -0,0 +1,690 @@ +/* FTP extension for connection tracking. */ + +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * (C) 2003,2004 USAGI/WIDE Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI + * - enable working with Layer 3 protocol independent connection tracking. + * - track EPRT and EPSV commands with IPv6 address. + * + * Derived from net/ipv4/netfilter/ip_conntrack_ftp.c + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Rusty Russell "); +MODULE_DESCRIPTION("ftp connection tracking helper"); + +/* This is slow, but it's simple. --RR */ +static char ftp_buffer[65536]; + +static DECLARE_LOCK(nf_ftp_lock); + +#define MAX_PORTS 8 +static int ports[MAX_PORTS]; +static int ports_c; +module_param_array(ports, int, &ports_c, 0400); + +static int loose; +module_param(loose, int, 0600); + +unsigned int (*nf_nat_ftp_hook)(struct sk_buff **pskb, + enum nf_conntrack_info ctinfo, + enum nf_ct_ftp_type type, + unsigned int matchoff, + unsigned int matchlen, + struct nf_conntrack_expect *exp, + u32 *seq); +EXPORT_SYMBOL_GPL(nf_nat_ftp_hook); + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, char); +static int try_eprt(const char *, size_t, struct nf_conntrack_man *, char); +static int try_epsv_response(const char *, size_t, struct nf_conntrack_man *, + char); + +static struct ftp_search { + enum nf_conntrack_dir dir; + const char *pattern; + size_t plen; + char skip; + char term; + enum nf_ct_ftp_type ftptype; + int (*getnum)(const char *, size_t, struct nf_conntrack_man *, char); +} search[] = { + { + NF_CT_DIR_ORIGINAL, + "PORT", sizeof("PORT") - 1, ' ', '\r', + NF_CT_FTP_PORT, + try_rfc959, + }, + { + NF_CT_DIR_REPLY, + "227 ", sizeof("227 ") - 1, '(', ')', + NF_CT_FTP_PASV, + try_rfc959, + }, + { + NF_CT_DIR_ORIGINAL, + "EPRT", sizeof("EPRT") - 1, ' ', '\r', + NF_CT_FTP_EPRT, + try_eprt, + }, + { + NF_CT_DIR_REPLY, + "229 ", sizeof("229 ") - 1, '(', ')', + NF_CT_FTP_EPSV, + try_epsv_response, + }, +}; + +/* This code is based on inet_pton() in glibc-2.2.4 */ +static int +get_ipv6_addr(const char *src, size_t dlen, struct in6_addr *dst, u_int8_t term) +{ + static const char xdigits[] = "0123456789abcdef"; + u_int8_t tmp[16], *tp, *endp, *colonp; + int ch, saw_xdigit; + u_int32_t val; + size_t clen = 0; + + tp = memset(tmp, '\0', sizeof(tmp)); + endp = tp + sizeof(tmp); + colonp = NULL; + + /* Leading :: requires some special handling. */ + if (*src == ':'){ + if (*++src != ':') { + DEBUGP("invalid \":\" at the head of addr\n"); + return 0; + } + clen++; + } + + saw_xdigit = 0; + val = 0; + while ((clen < dlen) && (*src != term)) { + const char *pch; + + ch = tolower(*src++); + clen++; + + pch = strchr(xdigits, ch); + if (pch != NULL) { + val <<= 4; + val |= (pch - xdigits); + if (val > 0xffff) + return 0; + + saw_xdigit = 1; + continue; + } + if (ch != ':') { + DEBUGP("get_ipv6_addr: invalid char. \'%c\'\n", ch); + return 0; + } + + if (!saw_xdigit) { + if (colonp) { + DEBUGP("invalid location of \"::\".\n"); + return 0; + } + colonp = tp; + continue; + } else if (*src == term) { + DEBUGP("trancated IPv6 addr\n"); + return 0; + } + + if (tp + 2 > endp) + return 0; + *tp++ = (u_int8_t) (val >> 8) & 0xff; + *tp++ = (u_int8_t) val & 0xff; + + saw_xdigit = 0; + val = 0; + continue; + } + if (saw_xdigit) { + if (tp + 2 > endp) + return 0; + *tp++ = (u_int8_t) (val >> 8) & 0xff; + *tp++ = (u_int8_t) val & 0xff; + } + if (colonp != NULL) { + /* + * Since some memmove()'s erroneously fail to handle + * overlapping regions, we'll do the shift by hand. + */ + const int n = tp - colonp; + int i; + + if (tp == endp) + return 0; + + for (i = 1; i <= n; i++) { + endp[- i] = colonp[n - i]; + colonp[n - i] = 0; + } + tp = endp; + } + if (tp != endp || (*src != term)) + return 0; + + memcpy(dst->s6_addr, tmp, sizeof(dst->s6_addr)); + return clen; +} + +static int try_number(const char *data, size_t dlen, u_int32_t array[], + int array_size, char sep, char term) +{ + u_int32_t i, len; + + memset(array, 0, sizeof(array[0])*array_size); + + /* Keep data pointing at next char. */ + for (i = 0, len = 0; len < dlen && i < array_size; len++, data++) { + if (*data >= '0' && *data <= '9') { + array[i] = array[i]*10 + *data - '0'; + } + else if (*data == sep) + i++; + else { + /* Unexpected character; true if it's the + terminator and we're finished. */ + if (*data == term && i == array_size - 1) + return len; + + DEBUGP("Char %u (got %u nums) `%u' unexpected\n", + len, i, *data); + return 0; + } + } + DEBUGP("Failed to fill %u numbers separated by %c\n", array_size, sep); + + return 0; +} + +/* Returns 0, or length of numbers: 192,168,1,1,5,6 */ +static int try_rfc959(const char *data, size_t dlen, + struct nf_conntrack_man *cmd, char term) +{ + int length; + u_int32_t array[6]; + + length = try_number(data, dlen, array, 6, ',', term); + if (length == 0) + return 0; + + cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16) | + (array[2] << 8) | array[3]); + cmd->u.tcp.port = htons((array[4] << 8) | array[5]); + return length; +} + +/* Grab port: number up to delimiter */ +static int get_port(const char *data, int start, size_t dlen, char delim, + u_int16_t *port) +{ + u_int16_t tmp_port = 0; + int i; + + for (i = start; i < dlen; i++) { + /* Finished? */ + if (data[i] == delim) { + if (tmp_port == 0) + break; + *port = htons(tmp_port); + DEBUGP("get_port: return %d\n", tmp_port); + return i + 1; + } + else if (data[i] >= '0' && data[i] <= '9') + tmp_port = tmp_port*10 + data[i] - '0'; + else { /* Some other crap */ + DEBUGP("get_port: invalid char.\n"); + break; + } + } + return 0; +} + +/* Returns 0, or length of numbers: |1|132.235.1.2|6275| or |2|3ffe::1|6275| */ +static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd, + char term) +{ + char delim; + int length; + + /* First character is delimiter, then "1" for IPv4 or "2" for IPv6, + then delimiter again. */ + if (dlen <= 3) { + DEBUGP("EPRT: too short\n"); + return 0; + } + delim = data[0]; + if (isdigit(delim) || delim < 33 || delim > 126 || data[2] != delim) { + DEBUGP("try_eprt: invalid delimitter.\n"); + return 0; + } + + if ((cmd->l3num == PF_INET && data[1] != '1') || + (cmd->l3num == PF_INET6 && data[1] != '2')) { + DEBUGP("EPRT: invalid protocol number.\n"); + return 0; + } + + DEBUGP("EPRT: Got %c%c%c\n", delim, data[1], delim); + + if (data[1] == '1') { + u_int32_t array[4]; + + /* Now we have IP address. */ + length = try_number(data + 3, dlen - 3, array, 4, '.', delim); + if (length != 0) + cmd->u3.ip = htonl((array[0] << 24) | (array[1] << 16) + | (array[2] << 8) | array[3]); + } else { + /* Now we have IPv6 address. */ + length = get_ipv6_addr(data + 3, dlen - 3, + (struct in6_addr *)cmd->u3.ip6, delim); + } + + if (length == 0) + return 0; + DEBUGP("EPRT: Got IP address!\n"); + /* Start offset includes initial "|1|", and trailing delimiter */ + return get_port(data, 3 + length + 1, dlen, delim, &cmd->u.tcp.port); +} + +/* Returns 0, or length of numbers: |||6446| */ +static int try_epsv_response(const char *data, size_t dlen, + struct nf_conntrack_man *cmd, char term) +{ + char delim; + + /* Three delimiters. */ + if (dlen <= 3) return 0; + delim = data[0]; + if (isdigit(delim) || delim < 33 || delim > 126 + || data[1] != delim || data[2] != delim) + return 0; + + return get_port(data, 3, dlen, delim, &cmd->u.tcp.port); +} + +/* Return 1 for match, 0 for accept, -1 for partial. */ +static int find_pattern(const char *data, size_t dlen, + const char *pattern, size_t plen, + char skip, char term, + unsigned int *numoff, + unsigned int *numlen, + struct nf_conntrack_man *cmd, + int (*getnum)(const char *, size_t, + struct nf_conntrack_man *, char)) +{ + size_t i; + + DEBUGP("find_pattern `%s': dlen = %u\n", pattern, dlen); + if (dlen == 0) + return 0; + + if (dlen <= plen) { + /* Short packet: try for partial? */ + if (strnicmp(data, pattern, dlen) == 0) + return -1; + else return 0; + } + + if (strnicmp(data, pattern, plen) != 0) { +#if 0 + size_t i; + + DEBUGP("ftp: string mismatch\n"); + for (i = 0; i < plen; i++) { + DEBUGP("ftp:char %u `%c'(%u) vs `%c'(%u)\n", + i, data[i], data[i], + pattern[i], pattern[i]); + } +#endif + return 0; + } + + DEBUGP("Pattern matches!\n"); + /* Now we've found the constant string, try to skip + to the 'skip' character */ + for (i = plen; data[i] != skip; i++) + if (i == dlen - 1) return -1; + + /* Skip over the last character */ + i++; + + DEBUGP("Skipped up to `%c'!\n", skip); + + *numoff = i; + *numlen = getnum(data + i, dlen - i, cmd, term); + if (!*numlen) + return -1; + + DEBUGP("Match succeeded!\n"); + return 1; +} + +/* Look up to see if we're just after a \n. */ +static int find_nl_seq(u32 seq, const struct nf_ct_ftp_master *info, int dir) +{ + unsigned int i; + + for (i = 0; i < info->seq_aft_nl_num[dir]; i++) + if (info->seq_aft_nl[dir][i] == seq) + return 1; + return 0; +} + +/* We don't update if it's older than what we have. */ +static void update_nl_seq(u32 nl_seq, struct nf_ct_ftp_master *info, int dir) +{ + unsigned int i, oldest = NUM_SEQ_TO_REMEMBER; + + /* Look for oldest: if we find exact match, we're done. */ + for (i = 0; i < info->seq_aft_nl_num[dir]; i++) { + if (info->seq_aft_nl[dir][i] == nl_seq) + return; + + if (oldest == info->seq_aft_nl_num[dir] + || before(info->seq_aft_nl[dir][i], oldest)) + oldest = i; + } + + if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) + info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq; + else if (oldest != NUM_SEQ_TO_REMEMBER) + info->seq_aft_nl[dir][oldest] = nl_seq; +} + +static int help(struct sk_buff **pskb, + unsigned int protoff, + struct nf_conn *ct, + enum nf_conntrack_info ctinfo) +{ + unsigned int dataoff, datalen; + struct tcphdr _tcph, *th; + char *fb_ptr; + int ret; + u32 seq; + int dir = NFCTINFO2DIR(ctinfo); + unsigned int matchlen, matchoff; + struct nf_ct_ftp_master *ct_ftp_info = &ct->help->ct_ftp_info; + struct nf_conntrack_expect *exp; + struct nf_conntrack_man cmd = {}; + + unsigned int i; + int found = 0, ends_in_nl; + + /* Until there's been traffic both ways, don't look in packets. */ + if (ctinfo != NF_CT_ESTABLISHED + && ctinfo != NF_CT_ESTABLISHED+NF_CT_IS_REPLY) { + DEBUGP("ftp: Conntrackinfo = %u\n", ctinfo); + return NF_ACCEPT; + } + + th = skb_header_pointer(*pskb, protoff, sizeof(_tcph), &_tcph); + if (th == NULL) + return NF_ACCEPT; + + dataoff = protoff + th->doff * 4; + /* No data? */ + if (dataoff >= (*pskb)->len) { + DEBUGP("ftp: dataoff(%u) >= skblen(%u)\n", dataoff, + (*pskb)->len); + return NF_ACCEPT; + } + datalen = (*pskb)->len - dataoff; + + LOCK_BH(&nf_ftp_lock); + fb_ptr = skb_header_pointer(*pskb, dataoff, datalen, ftp_buffer); + BUG_ON(fb_ptr == NULL); + + ends_in_nl = (fb_ptr[datalen - 1] == '\n'); + seq = ntohl(th->seq) + datalen; + + /* Look up to see if we're just after a \n. */ + if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) { + /* Now if this ends in \n, update ftp info. */ + DEBUGP("nf_conntrack_ftp_help: wrong seq pos %s(%u) or %s(%u)\n", + ct_ftp_info->seq_aft_nl_num[dir] > 0 ? "" : "(UNSET)", + ct_ftp_info->seq_aft_nl[dir][0], + ct_ftp_info->seq_aft_nl_num[dir] > 1 ? "" : "(UNSET)", + ct_ftp_info->seq_aft_nl[dir][1]); + ret = NF_ACCEPT; + goto out_update_nl; + } + + /* Initialize IP/IPv6 addr to expected address (it's not mentioned + in EPSV responses) */ + cmd.l3num = ct->tuplehash[dir].tuple.src.l3num; + memcpy(cmd.u3.all, &ct->tuplehash[dir].tuple.src.u3.all, + sizeof(cmd.u3.all)); + + for (i = 0; i < ARRAY_SIZE(search); i++) { + if (search[i].dir != dir) continue; + + found = find_pattern(fb_ptr, datalen, + search[i].pattern, + search[i].plen, + search[i].skip, + search[i].term, + &matchoff, &matchlen, + &cmd, + search[i].getnum); + if (found) break; + } + if (found == -1) { + /* We don't usually drop packets. After all, this is + connection tracking, not packet filtering. + However, it is necessary for accurate tracking in + this case. */ + if (net_ratelimit()) + printk("conntrack_ftp: partial %s %u+%u\n", + search[i].pattern, + ntohl(th->seq), datalen); + ret = NF_DROP; + goto out; + } else if (found == 0) { /* No match */ + ret = NF_ACCEPT; + goto out_update_nl; + } + + DEBUGP("conntrack_ftp: match `%.*s' (%u bytes at %u)\n", + (int)matchlen, fb_ptr + matchoff, + matchlen, ntohl(th->seq) + matchoff); + + exp = nf_conntrack_expect_alloc(); + if (exp == NULL) { + ret = NF_DROP; + goto out; + } + + /* We refer to the reverse direction ("!dir") tuples here, + * because we're expecting something in the other direction. + * Doesn't matter unless NAT is happening. */ + exp->tuple.dst.u3 = ct->tuplehash[!dir].tuple.dst.u3; + + /* Update the ftp info */ + if ((cmd.l3num == ct->tuplehash[dir].tuple.src.l3num) && + memcmp(&cmd.u3.all, &ct->tuplehash[dir].tuple.src.u3.all, + sizeof(cmd.u3.all))) { + /* Enrico Scholz's passive FTP to partially RNAT'd ftp + server: it really wants us to connect to a + different IP address. Simply don't record it for + NAT. */ + if (cmd.l3num == PF_INET) { + DEBUGP("conntrack_ftp: NOT RECORDING: %u,%u,%u,%u != %u.%u.%u.%u\n", + NIPQUAD(cmd.u3.ip), + NIPQUAD(ct->tuplehash[dir].tuple.src.u3.ip)); + } else { + DEBUGP("conntrack_ftp: NOT RECORDING: %x:%x:%x:%x:%x:%x:%x:%x != %x:%x:%x:%x:%x:%x:%x:%x\n", + NIP6(*((struct in6_addr *)cmd.u3.ip6)), + NIP6(*((struct in6_addr *)ct->tuplehash[dir] + .tuple.src.u3.ip6))); + } + + /* Thanks to Cristiano Lincoln Mattos + for reporting this potential + problem (DMZ machines opening holes to internal + networks, or the packet filter itself). */ + if (!loose) { + ret = NF_ACCEPT; + nf_conntrack_expect_free(exp); + goto out_update_nl; + } + memcpy(&exp->tuple.dst.u3, &cmd.u3.all, + sizeof(exp->tuple.dst.u3)); + } + + if (cmd.l3num == PF_INET) { + exp->tuple.src.u3.ip = ct->tuplehash[!dir].tuple.src.u3.ip; + exp->mask.src.u3.ip = 0xFFFFFFFF; + exp->mask.dst.u3.ip = 0xFFFFFFFF; + } else { + memcpy(exp->tuple.src.u3.ip6, + ct->tuplehash[!dir].tuple.src.u3.ip6, + sizeof(exp->tuple.src.u3.ip6)); + memset(exp->mask.src.u3.ip6, 0xFF, + sizeof(exp->mask.src.u3.ip6)); + memset(exp->mask.dst.u3.ip6, 0xFF, + sizeof(exp->mask.src.u3.ip6)); + } + exp->tuple.src.l3num = cmd.l3num; + exp->tuple.src.u.tcp.port = 0; + exp->tuple.dst.u.tcp.port = cmd.u.tcp.port; + exp->tuple.dst.protonum = IPPROTO_TCP; + + exp->mask.src.l3num = 0xFFFF; + exp->mask.src.u.tcp.port = 0; + exp->mask.dst.u.tcp.port = 0xFFFF; + exp->mask.dst.protonum = 0xFF; + + exp->expectfn = NULL; + exp->master = ct; + + /* Now, NAT might want to mangle the packet, and register the + * (possibly changed) expectation itself. */ + if (nf_nat_ftp_hook) + ret = nf_nat_ftp_hook(pskb, ctinfo, search[i].ftptype, + matchoff, matchlen, exp, &seq); + else { + /* Can't expect this? Best to drop packet now. */ + if (nf_conntrack_expect_related(exp) != 0) { + nf_conntrack_expect_free(exp); + ret = NF_DROP; + } else + ret = NF_ACCEPT; + } + +out_update_nl: + /* Now if this ends in \n, update ftp info. Seq may have been + * adjusted by NAT code. */ + if (ends_in_nl) + update_nl_seq(seq, ct_ftp_info,dir); + out: + UNLOCK_BH(&nf_ftp_lock); + return ret; +} + +static struct nf_conntrack_helper ftp[MAX_PORTS][2]; +static char ftp_names[MAX_PORTS][2][10]; + +static void __exit fini(void) +{ + int i, j; + for (i = 0; i < ports_c; i++) { + for (j = 0; j < 2; j++) { + DEBUGP("nf_ct_ftp: unregistering helper for pf: %d " + "port: %d\n", + ftp[i][j].tuple.src.l3num, ports[i]); + nf_conntrack_helper_unregister(&ftp[i][j]); + } + } +} + +static int __init init(void) +{ + int i, j = -1, ret = 0; + char *tmpname; + + if (ports_c == 0) + ports[ports_c++] = FTP_PORT; + + /* FIXME should be configurable whether IPv4 and IPv6 FTP connections + are tracked or not - YK */ + for (i = 0; i < ports_c; i++) { + memset(&ftp[i], 0, sizeof(struct nf_conntrack_helper)); + + ftp[i][0].tuple.src.l3num = PF_INET; + ftp[i][1].tuple.src.l3num = PF_INET6; + for (j = 0; j < 2; j++) { + ftp[i][j].tuple.src.u.tcp.port = htons(ports[i]); + ftp[i][j].tuple.dst.protonum = IPPROTO_TCP; + ftp[i][j].mask.src.u.tcp.port = 0xFFFF; + ftp[i][j].mask.dst.protonum = 0xFF; + ftp[i][j].max_expected = 1; + ftp[i][j].timeout = 5 * 60; /* 5 Minutes */ + ftp[i][j].me = THIS_MODULE; + ftp[i][j].help = help; + tmpname = &ftp_names[i][j][0]; + if (ports[i] == FTP_PORT) + sprintf(tmpname, "ftp"); + else + sprintf(tmpname, "ftp-%d", ports[i]); + ftp[i][j].name = tmpname; + + DEBUGP("nf_ct_ftp: registering helper for pf: %d " + "port: %d\n", + ftp[i][j].tuple.src.l3num, ports[i]); + ret = nf_conntrack_helper_register(&ftp[i][j]); + if (ret) { + printk("nf_ct_ftp: failed to register helper " + " for pf: %d port: %d\n", + ftp[i][j].tuple.src.l3num, ports[i]); + goto done; + } + } + } + +done: + if (i < ports_c) { + for (;i >= 0; i--) { + for (; j >= 0; j--) + nf_conntrack_helper_unregister(&ftp[i][j]); + j = 1; + } + } + return ret; +} + +module_init(init); +module_exit(fini); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/netfilter/nf_conntrack_l3proto_generic.c linux-2.6.11.3/net/netfilter/nf_conntrack_l3proto_generic.c --- linux-2.6.11.3.org/net/netfilter/nf_conntrack_l3proto_generic.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/netfilter/nf_conntrack_l3proto_generic.c 2005-03-13 22:06:02.226215728 +0100 @@ -0,0 +1,99 @@ +/* + * (C) 2003,2004 USAGI/WIDE Project + * + * Based largely upon the original ip_conntrack code which + * had the following copyright information: + * + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Author: + * Yasuyuki Kozakai @USAGI + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +DECLARE_PER_CPU(struct nf_conntrack_stat, nf_conntrack_stat); + +static int generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, + struct nf_conntrack_tuple *tuple) +{ + memset(tuple->src.u3.all, 0, NF_CT_TUPLE_L3SIZE); + memset(tuple->dst.u3.all, 0, NF_CT_TUPLE_L3SIZE); + + return 1; +} + +static int generic_invert_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) +{ + memset(tuple->src.u3.all, 0, NF_CT_TUPLE_L3SIZE); + memset(tuple->dst.u3.all, 0, NF_CT_TUPLE_L3SIZE); + + return 1; +} + +static int generic_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) +{ + return 0; +} + +static int generic_print_conntrack(struct seq_file *s, + const struct nf_conn *conntrack) +{ + return 0; +} + +static int +generic_prepare(struct sk_buff **pskb, unsigned int hooknum, + unsigned int *dataoff, u_int8_t *protonum, int *ret) +{ + /* Never track !!! */ + *ret = NF_ACCEPT; + return 0; +} + + +static u_int32_t generic_get_features(const struct nf_conntrack_tuple *tuple) + +{ + return NF_CT_F_BASIC; +} + +struct nf_conntrack_l3proto nf_conntrack_generic_l3proto = { + .l3proto = PF_UNSPEC, + .name = "unknown", + .pkt_to_tuple = generic_pkt_to_tuple, + .invert_tuple = generic_invert_tuple, + .print_tuple = generic_print_tuple, + .print_conntrack = generic_print_conntrack, + .prepare = generic_prepare, + .get_features = generic_get_features, + .me = THIS_MODULE, +}; diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/netfilter/nf_conntrack_proto_generic.c linux-2.6.11.3/net/netfilter/nf_conntrack_proto_generic.c --- linux-2.6.11.3.org/net/netfilter/nf_conntrack_proto_generic.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/netfilter/nf_conntrack_proto_generic.c 2005-03-13 22:06:02.227215576 +0100 @@ -0,0 +1,85 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI + * - enable working with L3 protocol independent connection tracking. + * + * Derived from net/ipv4/netfilter/ip_conntrack_proto_generic.c + */ + +#include +#include +#include +#include +#include + +unsigned long nf_ct_generic_timeout = 600*HZ; + +static int generic_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct nf_conntrack_tuple *tuple) +{ + tuple->src.u.all = 0; + tuple->dst.u.all = 0; + + return 1; +} + +static int generic_invert_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) +{ + tuple->src.u.all = 0; + tuple->dst.u.all = 0; + + return 1; +} + +/* Print out the per-protocol part of the tuple. */ +static int generic_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) +{ + return 0; +} + +/* Print out the private part of the conntrack. */ +static int generic_print_conntrack(struct seq_file *s, + const struct nf_conn *state) +{ + return 0; +} + +/* Returns verdict for packet, or -1 for invalid. */ +static int packet(struct nf_conn *conntrack, + const struct sk_buff *skb, + unsigned int dataoff, + enum nf_conntrack_info ctinfo, + int pf, + unsigned int hooknum) +{ + nf_ct_refresh_acct(conntrack, ctinfo, skb, nf_ct_generic_timeout); + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int new(struct nf_conn *conntrack, const struct sk_buff *skb, + unsigned int dataoff) +{ + return 1; +} + +struct nf_conntrack_protocol nf_conntrack_generic_protocol = +{ + .l3proto = PF_UNSPEC, + .proto = 0, + .name = "unknown", + .pkt_to_tuple = generic_pkt_to_tuple, + .invert_tuple = generic_invert_tuple, + .print_tuple = generic_print_tuple, + .print_conntrack = generic_print_conntrack, + .packet = packet, + .new = new, +}; diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/netfilter/nf_conntrack_proto_sctp.c linux-2.6.11.3/net/netfilter/nf_conntrack_proto_sctp.c --- linux-2.6.11.3.org/net/netfilter/nf_conntrack_proto_sctp.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/netfilter/nf_conntrack_proto_sctp.c 2005-03-13 22:06:02.229215272 +0100 @@ -0,0 +1,668 @@ +/* + * Connection tracking protocol helper module for SCTP. + * + * SCTP is defined in RFC 2960. References to various sections in this code + * are to this RFC. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 17 Oct 2004: Yasuyuki Kozakai @USAGI + * - enable working with L3 protocol independent connection tracking. + * + * Derived from net/ipv4/ip_conntrack_sctp.c + */ + +/* + * Added support for proc manipulation of timeouts. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#if 0 +#define DEBUGP(format, ...) printk(format, ## __VA_ARGS__) +#else +#define DEBUGP(format, args...) +#endif + +/* Protects conntrack->proto.sctp */ +static DECLARE_RWLOCK(sctp_lock); + +/* FIXME: Examine ipfilter's timeouts and conntrack transitions more + closely. They're more complex. --RR + + And so for me for SCTP :D -Kiran */ + +static const char *sctp_conntrack_names[] = { + "NONE", + "CLOSED", + "COOKIE_WAIT", + "COOKIE_ECHOED", + "ESTABLISHED", + "SHUTDOWN_SENT", + "SHUTDOWN_RECD", + "SHUTDOWN_ACK_SENT", +}; + +#define SECS * HZ +#define MINS * 60 SECS +#define HOURS * 60 MINS +#define DAYS * 24 HOURS + +static unsigned long nf_ct_sctp_timeout_closed = 10 SECS; +static unsigned long nf_ct_sctp_timeout_cookie_wait = 3 SECS; +static unsigned long nf_ct_sctp_timeout_cookie_echoed = 3 SECS; +static unsigned long nf_ct_sctp_timeout_established = 5 DAYS; +static unsigned long nf_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000; +static unsigned long nf_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000; +static unsigned long nf_ct_sctp_timeout_shutdown_ack_sent = 3 SECS; + +static unsigned long * sctp_timeouts[] += { NULL, /* SCTP_CONNTRACK_NONE */ + &nf_ct_sctp_timeout_closed, /* SCTP_CONNTRACK_CLOSED */ + &nf_ct_sctp_timeout_cookie_wait, /* SCTP_CONNTRACK_COOKIE_WAIT */ + &nf_ct_sctp_timeout_cookie_echoed, /* SCTP_CONNTRACK_COOKIE_ECHOED */ + &nf_ct_sctp_timeout_established, /* SCTP_CONNTRACK_ESTABLISHED */ + &nf_ct_sctp_timeout_shutdown_sent, /* SCTP_CONNTRACK_SHUTDOWN_SENT */ + &nf_ct_sctp_timeout_shutdown_recd, /* SCTP_CONNTRACK_SHUTDOWN_RECD */ + &nf_ct_sctp_timeout_shutdown_ack_sent /* SCTP_CONNTRACK_SHUTDOWN_ACK_SENT */ + }; + +#define sNO SCTP_CONNTRACK_NONE +#define sCL SCTP_CONNTRACK_CLOSED +#define sCW SCTP_CONNTRACK_COOKIE_WAIT +#define sCE SCTP_CONNTRACK_COOKIE_ECHOED +#define sES SCTP_CONNTRACK_ESTABLISHED +#define sSS SCTP_CONNTRACK_SHUTDOWN_SENT +#define sSR SCTP_CONNTRACK_SHUTDOWN_RECD +#define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT +#define sIV SCTP_CONNTRACK_MAX + +/* + These are the descriptions of the states: + +NOTE: These state names are tantalizingly similar to the states of an +SCTP endpoint. But the interpretation of the states is a little different, +considering that these are the states of the connection and not of an end +point. Please note the subtleties. -Kiran + +NONE - Nothing so far. +COOKIE WAIT - We have seen an INIT chunk in the original direction, or also + an INIT_ACK chunk in the reply direction. +COOKIE ECHOED - We have seen a COOKIE_ECHO chunk in the original direction. +ESTABLISHED - We have seen a COOKIE_ACK in the reply direction. +SHUTDOWN_SENT - We have seen a SHUTDOWN chunk in the original direction. +SHUTDOWN_RECD - We have seen a SHUTDOWN chunk in the reply directoin. +SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite + to that of the SHUTDOWN chunk. +CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of + the SHUTDOWN chunk. Connection is closed. +*/ + +/* TODO + - I have assumed that the first INIT is in the original direction. + This messes things when an INIT comes in the reply direction in CLOSED + state. + - Check the error type in the reply dir before transitioning from +cookie echoed to closed. + - Sec 5.2.4 of RFC 2960 + - Multi Homing support. +*/ + +/* SCTP conntrack state transitions */ +static enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = { + { +/* ORIGINAL */ +/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */ +/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA}, +/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA}, +/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, +/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA}, +/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA}, +/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant have Stale cookie*/ +/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */ +/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in orig dir */ +/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL} + }, + { +/* REPLY */ +/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */ +/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* INIT in sCL Big TODO */ +/* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA}, +/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, +/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA}, +/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA}, +/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA}, +/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in reply dir */ +/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA}, +/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL} + } +}; + +static int sctp_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct nf_conntrack_tuple *tuple) +{ + sctp_sctphdr_t _hdr, *hp; + + DEBUGP(__FUNCTION__); + DEBUGP("\n"); + + /* Actually only need first 8 bytes. */ + hp = skb_header_pointer(skb, dataoff, 8, &_hdr); + if (hp == NULL) + return 0; + + tuple->src.u.sctp.port = hp->source; + tuple->dst.u.sctp.port = hp->dest; + return 1; +} + +static int sctp_invert_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) +{ + DEBUGP(__FUNCTION__); + DEBUGP("\n"); + + tuple->src.u.sctp.port = orig->dst.u.sctp.port; + tuple->dst.u.sctp.port = orig->src.u.sctp.port; + return 1; +} + +/* Print out the per-protocol part of the tuple. */ +static int sctp_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) +{ + DEBUGP(__FUNCTION__); + DEBUGP("\n"); + + return seq_printf(s, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.sctp.port), + ntohs(tuple->dst.u.sctp.port)); +} + +/* Print out the private part of the conntrack. */ +static int sctp_print_conntrack(struct seq_file *s, + const struct nf_conn *conntrack) +{ + enum sctp_conntrack state; + + DEBUGP(__FUNCTION__); + DEBUGP("\n"); + + READ_LOCK(&sctp_lock); + state = conntrack->proto.sctp.state; + READ_UNLOCK(&sctp_lock); + + return seq_printf(s, "%s ", sctp_conntrack_names[state]); +} + +#define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \ +for (offset = dataoff + sizeof(sctp_sctphdr_t), count = 0; \ + offset < skb->len && \ + (sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch)); \ + offset += (htons(sch->length) + 3) & ~3, count++) + +/* Some validity checks to make sure the chunks are fine */ +static int do_basic_checks(struct nf_conn *conntrack, + const struct sk_buff *skb, + unsigned int dataoff, + char *map) +{ + u_int32_t offset, count; + sctp_chunkhdr_t _sch, *sch; + int flag; + + DEBUGP(__FUNCTION__); + DEBUGP("\n"); + + flag = 0; + + for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { + DEBUGP("Chunk Num: %d Type: %d\n", count, sch->type); + + if (sch->type == SCTP_CID_INIT + || sch->type == SCTP_CID_INIT_ACK + || sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { + flag = 1; + } + + /* Cookie Ack/Echo chunks not the first OR + Init / Init Ack / Shutdown compl chunks not the only chunks */ + if ((sch->type == SCTP_CID_COOKIE_ACK + || sch->type == SCTP_CID_COOKIE_ECHO + || flag) + && count !=0 ) { + DEBUGP("Basic checks failed\n"); + return 1; + } + + if (map) { + set_bit(sch->type, (void *)map); + } + } + + DEBUGP("Basic checks passed\n"); + return 0; +} + +static int new_state(enum nf_conntrack_dir dir, + enum sctp_conntrack cur_state, + int chunk_type) +{ + int i; + + DEBUGP(__FUNCTION__); + DEBUGP("\n"); + + DEBUGP("Chunk type: %d\n", chunk_type); + + switch (chunk_type) { + case SCTP_CID_INIT: + DEBUGP("SCTP_CID_INIT\n"); + i = 0; break; + case SCTP_CID_INIT_ACK: + DEBUGP("SCTP_CID_INIT_ACK\n"); + i = 1; break; + case SCTP_CID_ABORT: + DEBUGP("SCTP_CID_ABORT\n"); + i = 2; break; + case SCTP_CID_SHUTDOWN: + DEBUGP("SCTP_CID_SHUTDOWN\n"); + i = 3; break; + case SCTP_CID_SHUTDOWN_ACK: + DEBUGP("SCTP_CID_SHUTDOWN_ACK\n"); + i = 4; break; + case SCTP_CID_ERROR: + DEBUGP("SCTP_CID_ERROR\n"); + i = 5; break; + case SCTP_CID_COOKIE_ECHO: + DEBUGP("SCTP_CID_COOKIE_ECHO\n"); + i = 6; break; + case SCTP_CID_COOKIE_ACK: + DEBUGP("SCTP_CID_COOKIE_ACK\n"); + i = 7; break; + case SCTP_CID_SHUTDOWN_COMPLETE: + DEBUGP("SCTP_CID_SHUTDOWN_COMPLETE\n"); + i = 8; break; + default: + /* Other chunks like DATA, SACK, HEARTBEAT and + its ACK do not cause a change in state */ + DEBUGP("Unknown chunk type, Will stay in %s\n", + sctp_conntrack_names[cur_state]); + return cur_state; + } + + DEBUGP("dir: %d cur_state: %s chunk_type: %d new_state: %s\n", + dir, sctp_conntrack_names[cur_state], chunk_type, + sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]); + + return sctp_conntracks[dir][i][cur_state]; +} + +/* Returns verdict for packet, or -1 for invalid. */ +static int sctp_packet(struct nf_conn *conntrack, + const struct sk_buff *skb, + unsigned int dataoff, + enum nf_conntrack_info ctinfo, + int pf, + unsigned int hooknum) +{ + enum sctp_conntrack newconntrack, oldsctpstate; + sctp_sctphdr_t _sctph, *sh; + sctp_chunkhdr_t _sch, *sch; + u_int32_t offset, count; + char map[256 / sizeof (char)] = {0}; + + DEBUGP(__FUNCTION__); + DEBUGP("\n"); + + sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); + if (sh == NULL) + return -1; + + if (do_basic_checks(conntrack, skb, dataoff, map) != 0) + return -1; + + /* Check the verification tag (Sec 8.5) */ + if (!test_bit(SCTP_CID_INIT, (void *)map) + && !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, (void *)map) + && !test_bit(SCTP_CID_COOKIE_ECHO, (void *)map) + && !test_bit(SCTP_CID_ABORT, (void *)map) + && !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map) + && (sh->vtag != conntrack->proto.sctp.vtag[NFCTINFO2DIR(ctinfo)])) { + DEBUGP("Verification tag check failed\n"); + return -1; + } + + oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX; + for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { + WRITE_LOCK(&sctp_lock); + + /* Special cases of Verification tag check (Sec 8.5.1) */ + if (sch->type == SCTP_CID_INIT) { + /* Sec 8.5.1 (A) */ + if (sh->vtag != 0) { + WRITE_UNLOCK(&sctp_lock); + return -1; + } + } else if (sch->type == SCTP_CID_ABORT) { + /* Sec 8.5.1 (B) */ + if (!(sh->vtag == conntrack->proto.sctp.vtag[NFCTINFO2DIR(ctinfo)]) + && !(sh->vtag == conntrack->proto.sctp.vtag + [1 - NFCTINFO2DIR(ctinfo)])) { + WRITE_UNLOCK(&sctp_lock); + return -1; + } + } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { + /* Sec 8.5.1 (C) */ + if (!(sh->vtag == conntrack->proto.sctp.vtag[NFCTINFO2DIR(ctinfo)]) + && !(sh->vtag == conntrack->proto.sctp.vtag + [1 - NFCTINFO2DIR(ctinfo)] + && (sch->flags & 1))) { + WRITE_UNLOCK(&sctp_lock); + return -1; + } + } else if (sch->type == SCTP_CID_COOKIE_ECHO) { + /* Sec 8.5.1 (D) */ + if (!(sh->vtag == conntrack->proto.sctp.vtag[NFCTINFO2DIR(ctinfo)])) { + WRITE_UNLOCK(&sctp_lock); + return -1; + } + } + + oldsctpstate = conntrack->proto.sctp.state; + newconntrack = new_state(NFCTINFO2DIR(ctinfo), oldsctpstate, sch->type); + + /* Invalid */ + if (newconntrack == SCTP_CONNTRACK_MAX) { + DEBUGP("nf_conntrack_sctp: Invalid dir=%i ctype=%u conntrack=%u\n", + NFCTINFO2DIR(ctinfo), sch->type, oldsctpstate); + WRITE_UNLOCK(&sctp_lock); + return -1; + } + + /* If it is an INIT or an INIT ACK note down the vtag */ + if (sch->type == SCTP_CID_INIT + || sch->type == SCTP_CID_INIT_ACK) { + sctp_inithdr_t _inithdr, *ih; + + ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t), + sizeof(_inithdr), &_inithdr); + if (ih == NULL) { + WRITE_UNLOCK(&sctp_lock); + return -1; + } + DEBUGP("Setting vtag %x for dir %d\n", + ih->init_tag, NFCTINFO2DIR(ctinfo)); + conntrack->proto.sctp.vtag[NF_CT_DIR_ORIGINAL] = ih->init_tag; + } + + conntrack->proto.sctp.state = newconntrack; + WRITE_UNLOCK(&sctp_lock); + } + + nf_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]); + + if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED + && NFCTINFO2DIR(ctinfo) == NF_CT_DIR_REPLY + && newconntrack == SCTP_CONNTRACK_ESTABLISHED) { + DEBUGP("Setting assured bit\n"); + set_bit(NF_S_ASSURED_BIT, &conntrack->status); + } + + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int sctp_new(struct nf_conn *conntrack, const struct sk_buff *skb, + unsigned int dataoff) +{ + enum sctp_conntrack newconntrack; + sctp_sctphdr_t _sctph, *sh; + sctp_chunkhdr_t _sch, *sch; + u_int32_t offset, count; + char map[256 / sizeof (char)] = {0}; + + DEBUGP(__FUNCTION__); + DEBUGP("\n"); + + sh = skb_header_pointer(skb, dataoff, sizeof(_sctph), &_sctph); + if (sh == NULL) + return 0; + + if (do_basic_checks(conntrack, skb, dataoff, map) != 0) + return 0; + + /* If an OOTB packet has any of these chunks discard (Sec 8.4) */ + if ((test_bit (SCTP_CID_ABORT, (void *)map)) + || (test_bit (SCTP_CID_SHUTDOWN_COMPLETE, (void *)map)) + || (test_bit (SCTP_CID_COOKIE_ACK, (void *)map))) { + return 0; + } + + newconntrack = SCTP_CONNTRACK_MAX; + for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { + /* Don't need lock here: this conntrack not in circulation yet */ + newconntrack = new_state(NF_CT_DIR_ORIGINAL, + SCTP_CONNTRACK_NONE, sch->type); + + /* Invalid: delete conntrack */ + if (newconntrack == SCTP_CONNTRACK_MAX) { + DEBUGP("nf_conntrack_sctp: invalid new deleting.\n"); + return 0; + } + + /* Copy the vtag into the state info */ + if (sch->type == SCTP_CID_INIT) { + if (sh->vtag == 0) { + sctp_inithdr_t _inithdr, *ih; + + ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t), + sizeof(_inithdr), &_inithdr); + if (ih == NULL) + return 0; + + DEBUGP("Setting vtag %x for new conn\n", + ih->init_tag); + + conntrack->proto.sctp.vtag[NF_CT_DIR_REPLY] = + ih->init_tag; + } else { + /* Sec 8.5.1 (A) */ + return 0; + } + } + /* If it is a shutdown ack OOTB packet, we expect a return + shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */ + else { + DEBUGP("Setting vtag %x for new conn OOTB\n", + sh->vtag); + conntrack->proto.sctp.vtag[NF_CT_DIR_REPLY] = sh->vtag; + } + + conntrack->proto.sctp.state = newconntrack; + } + + return 1; +} + +struct nf_conntrack_protocol nf_conntrack_protocol_sctp4 = { + .l3proto = PF_INET, + .proto = IPPROTO_SCTP, + .name = "sctp", + .pkt_to_tuple = sctp_pkt_to_tuple, + .invert_tuple = sctp_invert_tuple, + .print_tuple = sctp_print_tuple, + .print_conntrack = sctp_print_conntrack, + .packet = sctp_packet, + .new = sctp_new, + .destroy = NULL, + .me = THIS_MODULE +}; + +struct nf_conntrack_protocol nf_conntrack_protocol_sctp6 = { + .l3proto = PF_INET6, + .proto = IPPROTO_SCTP, + .name = "sctp", + .pkt_to_tuple = sctp_pkt_to_tuple, + .invert_tuple = sctp_invert_tuple, + .print_tuple = sctp_print_tuple, + .print_conntrack = sctp_print_conntrack, + .packet = sctp_packet, + .new = sctp_new, + .destroy = NULL, + .me = THIS_MODULE +}; + +#ifdef CONFIG_SYSCTL +static ctl_table nf_ct_sysctl_table[] = { + { + .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED, + .procname = "nf_conntrack_sctp_timeout_closed", + .data = &nf_ct_sctp_timeout_closed, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT, + .procname = "nf_conntrack_sctp_timeout_cookie_wait", + .data = &nf_ct_sctp_timeout_cookie_wait, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED, + .procname = "nf_conntrack_sctp_timeout_cookie_echoed", + .data = &nf_ct_sctp_timeout_cookie_echoed, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED, + .procname = "nf_conntrack_sctp_timeout_established", + .data = &nf_ct_sctp_timeout_established, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT, + .procname = "nf_conntrack_sctp_timeout_shutdown_sent", + .data = &nf_ct_sctp_timeout_shutdown_sent, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD, + .procname = "nf_conntrack_sctp_timeout_shutdown_recd", + .data = &nf_ct_sctp_timeout_shutdown_recd, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT, + .procname = "nf_conntrack_sctp_timeout_shutdown_ack_sent", + .data = &nf_ct_sctp_timeout_shutdown_ack_sent, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { .ctl_name = 0 } +}; + +static ctl_table nf_ct_netfilter_table[] = { + { + .ctl_name = NET_NETFILTER, + .procname = "netfilter", + .mode = 0555, + .child = nf_ct_sysctl_table, + }, + { .ctl_name = 0 } +}; + +static ctl_table nf_ct_net_table[] = { + { + .ctl_name = CTL_NET, + .procname = "net", + .mode = 0555, + .child = nf_ct_netfilter_table, + }, + { .ctl_name = 0 } +}; + +static struct ctl_table_header *nf_ct_sysctl_header; +#endif + +int __init init(void) +{ + int ret; + + ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_sctp4); + if (ret) { + printk("nf_conntrack_proto_sctp4: protocol register failed\n"); + goto out; + } + ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_sctp6); + if (ret) { + printk("nf_conntrack_proto_sctp6: protocol register failed\n"); + goto cleanup_sctp4; + } + +#ifdef CONFIG_SYSCTL + nf_ct_sysctl_header = register_sysctl_table(nf_ct_net_table, 0); + if (nf_ct_sysctl_header == NULL) { + printk("nf_conntrack_proto_sctp: can't register to sysctl.\n"); + goto cleanup; + } +#endif + + return ret; + +#ifdef CONFIG_SYSCTL + cleanup: + nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp6); +#endif + cleanup_sctp4: + nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp4); + out: + DEBUGP("SCTP conntrack module loading %s\n", + ret ? "failed": "succeeded"); + return ret; +} + +void __exit fini(void) +{ + nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp6); + nf_conntrack_protocol_unregister(&nf_conntrack_protocol_sctp4); +#ifdef CONFIG_SYSCTL + unregister_sysctl_table(nf_ct_sysctl_header); +#endif + DEBUGP("SCTP conntrack module unloaded\n"); +} + +module_init(init); +module_exit(fini); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Kiran Kumar Immidi"); +MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP"); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/netfilter/nf_conntrack_proto_tcp.c linux-2.6.11.3/net/netfilter/nf_conntrack_proto_tcp.c --- linux-2.6.11.3.org/net/netfilter/nf_conntrack_proto_tcp.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/netfilter/nf_conntrack_proto_tcp.c 2005-03-13 22:06:02.237214056 +0100 @@ -0,0 +1,1146 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Jozsef Kadlecsik : + * - Real stateful connection tracking + * - Modified state transitions table + * - Window scaling support added + * - SACK support added + * + * Willy Tarreau: + * - State table bugfixes + * - More robust state changes + * - Tuning timer parameters + * + * 27 Oct 2004: Yasuyuki Kozakai @USAGI + * - genelized Layer 3 protocol part. + * + * Derived from net/ipv4/netfilter/ip_conntrack_proto_tcp.c + * + * version 2.2 + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#define DEBUGP_VARS +#else +#define DEBUGP(format, args...) +#endif + +/* Protects conntrack->proto.tcp */ +static DECLARE_RWLOCK(tcp_lock); + +/* "Be conservative in what you do, + be liberal in what you accept from others." + If it's non-zero, we mark only out of window RST segments as INVALID. */ +int nf_ct_tcp_be_liberal = 0; + +/* When connection is picked up from the middle, how many packets are required + to pass in each direction when we assume we are in sync - if any side uses + window scaling, we lost the game. + If it is set to zero, we disable picking up already established + connections. */ +int nf_ct_tcp_loose = 3; + +/* Max number of the retransmitted packets without receiving an (acceptable) + ACK from the destination. If this number is reached, a shorter timer + will be started. */ +int nf_ct_tcp_max_retrans = 3; + + /* FIXME: Examine ipfilter's timeouts and conntrack transitions more + closely. They're more complex. --RR */ + +static const char *tcp_conntrack_names[] = { + "NONE", + "SYN_SENT", + "SYN_RECV", + "ESTABLISHED", + "FIN_WAIT", + "CLOSE_WAIT", + "LAST_ACK", + "TIME_WAIT", + "CLOSE", + "LISTEN" +}; + +#define SECS * HZ +#define MINS * 60 SECS +#define HOURS * 60 MINS +#define DAYS * 24 HOURS + +unsigned long nf_ct_tcp_timeout_syn_sent = 2 MINS; +unsigned long nf_ct_tcp_timeout_syn_recv = 60 SECS; +unsigned long nf_ct_tcp_timeout_established = 5 DAYS; +unsigned long nf_ct_tcp_timeout_fin_wait = 2 MINS; +unsigned long nf_ct_tcp_timeout_close_wait = 60 SECS; +unsigned long nf_ct_tcp_timeout_last_ack = 30 SECS; +unsigned long nf_ct_tcp_timeout_time_wait = 2 MINS; +unsigned long nf_ct_tcp_timeout_close = 10 SECS; + +/* RFC1122 says the R2 limit should be at least 100 seconds. + Linux uses 15 packets as limit, which corresponds + to ~13-30min depending on RTO. */ +unsigned long nf_ct_tcp_timeout_max_retrans = 5 MINS; + +static unsigned long * tcp_timeouts[] += { NULL, /* TCP_CONNTRACK_NONE */ + &nf_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */ + &nf_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */ + &nf_ct_tcp_timeout_established, /* TCP_CONNTRACK_ESTABLISHED, */ + &nf_ct_tcp_timeout_fin_wait, /* TCP_CONNTRACK_FIN_WAIT, */ + &nf_ct_tcp_timeout_close_wait, /* TCP_CONNTRACK_CLOSE_WAIT, */ + &nf_ct_tcp_timeout_last_ack, /* TCP_CONNTRACK_LAST_ACK, */ + &nf_ct_tcp_timeout_time_wait, /* TCP_CONNTRACK_TIME_WAIT, */ + &nf_ct_tcp_timeout_close, /* TCP_CONNTRACK_CLOSE, */ + NULL, /* TCP_CONNTRACK_LISTEN */ + }; + +#define sNO TCP_CONNTRACK_NONE +#define sSS TCP_CONNTRACK_SYN_SENT +#define sSR TCP_CONNTRACK_SYN_RECV +#define sES TCP_CONNTRACK_ESTABLISHED +#define sFW TCP_CONNTRACK_FIN_WAIT +#define sCW TCP_CONNTRACK_CLOSE_WAIT +#define sLA TCP_CONNTRACK_LAST_ACK +#define sTW TCP_CONNTRACK_TIME_WAIT +#define sCL TCP_CONNTRACK_CLOSE +#define sLI TCP_CONNTRACK_LISTEN +#define sIV TCP_CONNTRACK_MAX +#define sIG TCP_CONNTRACK_IGNORE + +/* What TCP flags are set from RST/SYN/FIN/ACK. */ +enum tcp_bit_set { + TCP_SYN_SET, + TCP_SYNACK_SET, + TCP_FIN_SET, + TCP_ACK_SET, + TCP_RST_SET, + TCP_NONE_SET, +}; + +/* + * The TCP state transition table needs a few words... + * + * We are the man in the middle. All the packets go through us + * but might get lost in transit to the destination. + * It is assumed that the destinations can't receive segments + * we haven't seen. + * + * The checked segment is in window, but our windows are *not* + * equivalent with the ones of the sender/receiver. We always + * try to guess the state of the current sender. + * + * The meaning of the states are: + * + * NONE: initial state + * SYN_SENT: SYN-only packet seen + * SYN_RECV: SYN-ACK packet seen + * ESTABLISHED: ACK packet seen + * FIN_WAIT: FIN packet seen + * CLOSE_WAIT: ACK seen (after FIN) + * LAST_ACK: FIN seen (after FIN) + * TIME_WAIT: last ACK seen + * CLOSE: closed connection + * + * LISTEN state is not used. + * + * Packets marked as IGNORED (sIG): + * if they may be either invalid or valid + * and the receiver may send back a connection + * closing RST or a SYN/ACK. + * + * Packets marked as INVALID (sIV): + * if they are invalid + * or we do not support the request (simultaneous open) + */ +static enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { + { +/* ORIGINAL */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV }, +/* + * sNO -> sSS Initialize a new connection + * sSS -> sSS Retransmitted SYN + * sSR -> sIG Late retransmitted SYN? + * sES -> sIG Error: SYNs in window outside the SYN_SENT state + * are errors. Receiver will reply with RST + * and close the connection. + * Or we are not in sync and hold a dead connection. + * sFW -> sIG + * sCW -> sIG + * sLA -> sIG + * sTW -> sSS Reopened connection (RFC 1122). + * sCL -> sSS + */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, +/* + * A SYN/ACK from the client is always invalid: + * - either it tries to set up a simultaneous open, which is + * not supported; + * - or the firewall has just been inserted between the two hosts + * during the session set-up. The SYN will be retransmitted + * by the true client (or it'll time out). + */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, +/* + * sNO -> sIV Too late and no reason to do anything... + * sSS -> sIV Client migth not send FIN in this state: + * we enforce waiting for a SYN/ACK reply first. + * sSR -> sFW Close started. + * sES -> sFW + * sFW -> sLA FIN seen in both directions, waiting for + * the last ACK. + * Migth be a retransmitted FIN as well... + * sCW -> sLA + * sLA -> sLA Retransmitted FIN. Remain in the same state. + * sTW -> sTW + * sCL -> sCL + */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV }, +/* + * sNO -> sES Assumed. + * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet. + * sSR -> sES Established state is reached. + * sES -> sES :-) + * sFW -> sCW Normal close request answered by ACK. + * sCW -> sCW + * sLA -> sTW Last ACK detected. + * sTW -> sTW Retransmitted last ACK. Remain in the same state. + * sCL -> sCL + */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, +/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } + }, + { +/* REPLY */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, +/* + * sNO -> sIV Never reached. + * sSS -> sIV Simultaneous open, not supported + * sSR -> sIV Simultaneous open, not supported. + * sES -> sIV Server may not initiate a connection. + * sFW -> sIV + * sCW -> sIV + * sLA -> sIV + * sTW -> sIV Reopened connection, but server may not do it. + * sCL -> sIV + */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV }, +/* + * sSS -> sSR Standard open. + * sSR -> sSR Retransmitted SYN/ACK. + * sES -> sIG Late retransmitted SYN/ACK? + * sFW -> sIG + * sCW -> sIG + * sLA -> sIG + * sTW -> sIG + * sCL -> sIG + */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, +/* + * sSS -> sIV Server might not send FIN in this state. + * sSR -> sFW Close started. + * sES -> sFW + * sFW -> sLA FIN seen in both directions. + * sCW -> sLA + * sLA -> sLA Retransmitted FIN. + * sTW -> sTW + * sCL -> sCL + */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*ack*/ { sIV, sIG, sIV, sES, sCW, sCW, sTW, sTW, sCL, sIV }, +/* + * sSS -> sIG Might be a half-open connection. + * sSR -> sIV Simultaneous open. + * sES -> sES :-) + * sFW -> sCW Normal close request answered by ACK. + * sCW -> sCW + * sLA -> sTW Last ACK detected. + * sTW -> sTW Retransmitted last ACK. + * sCL -> sCL + */ +/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ +/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, +/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } + } +}; + +static int tcp_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct nf_conntrack_tuple *tuple) +{ + struct tcphdr _hdr, *hp; + + /* Actually only need first 8 bytes. */ + hp = skb_header_pointer(skb, dataoff, 8, &_hdr); + if (hp == NULL) + return 0; + + tuple->src.u.tcp.port = hp->source; + tuple->dst.u.tcp.port = hp->dest; + + return 1; +} + +static int tcp_invert_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) +{ + tuple->src.u.tcp.port = orig->dst.u.tcp.port; + tuple->dst.u.tcp.port = orig->src.u.tcp.port; + return 1; +} + +/* Print out the per-protocol part of the tuple. */ +static int tcp_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) +{ + return seq_printf(s, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.tcp.port), + ntohs(tuple->dst.u.tcp.port)); +} + +/* Print out the private part of the conntrack. */ +static int tcp_print_conntrack(struct seq_file *s, + const struct nf_conn *conntrack) +{ + enum tcp_conntrack state; + + READ_LOCK(&tcp_lock); + state = conntrack->proto.tcp.state; + READ_UNLOCK(&tcp_lock); + + return seq_printf(s, "%s ", tcp_conntrack_names[state]); +} + +static unsigned int get_conntrack_index(const struct tcphdr *tcph) +{ + if (tcph->rst) return TCP_RST_SET; + else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET); + else if (tcph->fin) return TCP_FIN_SET; + else if (tcph->ack) return TCP_ACK_SET; + else return TCP_NONE_SET; +} + +/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering + in IP Filter' by Guido van Rooij. + + http://www.nluug.nl/events/sane2000/papers.html + http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz + + The boundaries and the conditions are slightly changed: + + td_maxend = max(sack + max(win,1)) seen in reply packets + td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets + td_end = max(seq + len) seen in sent packets + + I. Upper bound for valid data: seq + len <= sender.td_maxend + II. Lower bound for valid data: seq >= sender.td_end - receiver.td_maxwin + III. Upper bound for valid ack: sack <= receiver.td_end + IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW + + where sack is the highest right edge of sack block found in the packet. + + The upper bound limit for a valid ack is not ignored - + we doesn't have to deal with fragments. +*/ + +static inline __u32 segment_seq_plus_len(__u32 seq, + size_t len, + unsigned int dataoff, + struct tcphdr *tcph) +{ + /* XXX Should I use payload length field in IP/IPv6 header ? + * - YK */ + return (seq + len - dataoff - tcph->doff*4 + + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0)); +} + +/* Fixme: what about big packets? */ +#define MAXACKWINCONST 66000 +#define MAXACKWINDOW(sender) \ + ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \ + : MAXACKWINCONST) + +/* + * Simplified tcp_parse_options routine from tcp_input.c + */ +static void tcp_options(const struct sk_buff *skb, + unsigned int dataoff, + struct tcphdr *tcph, + struct nf_ct_tcp_state *state) +{ + unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; + unsigned char *ptr; + int length = (tcph->doff*4) - sizeof(struct tcphdr); + + if (!length) + return; + + ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr), + length, buff); + BUG_ON(ptr == NULL); + + state->td_scale = + state->flags = 0; + + while (length > 0) { + int opcode=*ptr++; + int opsize; + + switch (opcode) { + case TCPOPT_EOL: + return; + case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ + length--; + continue; + default: + opsize=*ptr++; + if (opsize < 2) /* "silly options" */ + return; + if (opsize > length) + break; /* don't parse partial options */ + + if (opcode == TCPOPT_SACK_PERM + && opsize == TCPOLEN_SACK_PERM) + state->flags |= NF_CT_TCP_FLAG_SACK_PERM; + else if (opcode == TCPOPT_WINDOW + && opsize == TCPOLEN_WINDOW) { + state->td_scale = *(u_int8_t *)ptr; + + if (state->td_scale > 14) { + /* See RFC1323 */ + state->td_scale = 14; + } + state->flags |= + NF_CT_TCP_FLAG_WINDOW_SCALE; + } + ptr += opsize - 2; + length -= opsize; + } + } +} + +static void tcp_sack(const struct sk_buff *skb, unsigned int dataoff, + struct tcphdr *tcph, __u32 *sack) +{ + __u32 tmp; + unsigned char *ptr; + unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; + int length = (tcph->doff*4) - sizeof(struct tcphdr); + + ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr), + length, buff); + BUG_ON(ptr == NULL); + + /* Fast path for timestamp-only option */ + if (length == TCPOLEN_TSTAMP_ALIGNED*4 + && *(__u32 *)ptr == + __constant_ntohl((TCPOPT_NOP << 24) + | (TCPOPT_NOP << 16) + | (TCPOPT_TIMESTAMP << 8) + | TCPOLEN_TIMESTAMP)) + return; + + while (length > 0) { + int opcode = *ptr++; + int opsize, i; + + switch (opcode) { + case TCPOPT_EOL: + return; + case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ + length--; + continue; + default: + opsize = *ptr++; + if (opsize < 2) /* "silly options" */ + return; + if (opsize > length) + break; /* don't parse partial options */ + + if (opcode == TCPOPT_SACK + && opsize >= (TCPOLEN_SACK_BASE + + TCPOLEN_SACK_PERBLOCK) + && !((opsize - TCPOLEN_SACK_BASE) + % TCPOLEN_SACK_PERBLOCK)) { + for (i = 0; + i < (opsize - TCPOLEN_SACK_BASE); + i += TCPOLEN_SACK_PERBLOCK) { + memcpy(&tmp, (__u32 *)(ptr + i) + 1, + sizeof(__u32)); + tmp = ntohl(tmp); + + if (after(tmp, *sack)) + *sack = tmp; + } + return; + } + ptr += opsize - 2; + length -= opsize; + } + } +} + +static int tcp_in_window(struct nf_ct_tcp *state, + enum nf_conntrack_dir dir, + unsigned int *index, + const struct sk_buff *skb, + unsigned int dataoff, + struct tcphdr *tcph, + int pf) +{ + struct nf_ct_tcp_state *sender = &state->seen[dir]; + struct nf_ct_tcp_state *receiver = &state->seen[!dir]; + __u32 seq, ack, sack, end, win, swin; + int res; + + /* + * Get the required data from the packet. + */ + seq = ntohl(tcph->seq); + ack = sack = ntohl(tcph->ack_seq); + win = ntohs(tcph->window); + end = segment_seq_plus_len(seq, skb->len, dataoff, tcph); + + if (receiver->flags & NF_CT_TCP_FLAG_SACK_PERM) + tcp_sack(skb, dataoff, tcph, &sack); + + DEBUGP("tcp_in_window: START\n"); + DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " + "seq=%u ack=%u sack=%u win=%u end=%u\n", + NIPQUAD(iph->saddr), ntohs(tcph->source), + NIPQUAD(iph->daddr), ntohs(tcph->dest), + seq, ack, sack, win, end); + DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " + "receiver end=%u maxend=%u maxwin=%u scale=%i\n", + sender->td_end, sender->td_maxend, sender->td_maxwin, + sender->td_scale, + receiver->td_end, receiver->td_maxend, receiver->td_maxwin, + receiver->td_scale); + + if (sender->td_end == 0) { + /* + * Initialize sender data. + */ + if (tcph->syn && tcph->ack) { + /* + * Outgoing SYN-ACK in reply to a SYN. + */ + sender->td_end = + sender->td_maxend = end; + sender->td_maxwin = (win == 0 ? 1 : win); + + tcp_options(skb, dataoff, tcph, sender); + /* + * RFC 1323: + * Both sides must send the Window Scale option + * to enable window scaling in either direction. + */ + if (!(sender->flags & NF_CT_TCP_FLAG_WINDOW_SCALE + && receiver->flags & NF_CT_TCP_FLAG_WINDOW_SCALE)) + sender->td_scale = + receiver->td_scale = 0; + } else { + /* + * We are in the middle of a connection, + * its history is lost for us. + * Let's try to use the data from the packet. + */ + sender->td_end = end; + sender->td_maxwin = (win == 0 ? 1 : win); + sender->td_maxend = end + sender->td_maxwin; + } + } else if (((state->state == TCP_CONNTRACK_SYN_SENT + && dir == NF_CT_DIR_ORIGINAL) + || (state->state == TCP_CONNTRACK_SYN_RECV + && dir == NF_CT_DIR_REPLY)) + && after(end, sender->td_end)) { + /* + * RFC 793: "if a TCP is reinitialized ... then it need + * not wait at all; it must only be sure to use sequence + * numbers larger than those recently used." + */ + sender->td_end = + sender->td_maxend = end; + sender->td_maxwin = (win == 0 ? 1 : win); + + tcp_options(skb, dataoff, tcph, sender); + } + + if (!(tcph->ack)) { + /* + * If there is no ACK, just pretend it was set and OK. + */ + ack = sack = receiver->td_end; + } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) == + (TCP_FLAG_ACK|TCP_FLAG_RST)) + && (ack == 0)) { + /* + * Broken TCP stacks, that set ACK in RST packets as well + * with zero ack value. + */ + ack = sack = receiver->td_end; + } + + if (seq == end) + /* + * Packets contains no data: we assume it is valid + * and check the ack value only. + */ + seq = end = sender->td_end; + + DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " + "seq=%u ack=%u sack =%u win=%u end=%u trim=%u\n", + NIPQUAD(iph->saddr), ntohs(tcph->source), + NIPQUAD(iph->daddr), ntohs(tcph->dest), + seq, ack, sack, win, end, + after(end, sender->td_maxend) && before(seq, sender->td_maxend) + ? sender->td_maxend : end); + DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " + "receiver end=%u maxend=%u maxwin=%u scale=%i\n", + sender->td_end, sender->td_maxend, sender->td_maxwin, + sender->td_scale, + receiver->td_end, receiver->td_maxend, receiver->td_maxwin, + receiver->td_scale); + + /* Ignore data over the right edge of the receiver's window. */ + if (after(end, sender->td_maxend) && + before(seq, sender->td_maxend)) { + end = sender->td_maxend; + if (*index == TCP_FIN_SET) + *index = TCP_ACK_SET; + } + DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n", + before(end, sender->td_maxend + 1) + || before(seq, sender->td_maxend + 1), + after(seq, sender->td_end - receiver->td_maxwin - 1) + || after(end, sender->td_end - receiver->td_maxwin - 1), + before(sack, receiver->td_end + 1), + after(ack, receiver->td_end - MAXACKWINDOW(sender))); + + if (sender->loose || receiver->loose || + (before(end, sender->td_maxend + 1) && + after(seq, sender->td_end - receiver->td_maxwin - 1) && + before(sack, receiver->td_end + 1) && + after(ack, receiver->td_end - MAXACKWINDOW(sender)))) { + /* + * Take into account window scaling (RFC 1323). + */ + if (!tcph->syn) + win <<= sender->td_scale; + + /* + * Update sender data. + */ + swin = win + (sack - ack); + if (sender->td_maxwin < swin) + sender->td_maxwin = swin; + if (after(end, sender->td_end)) + sender->td_end = end; + if (after(sack + win, receiver->td_maxend - 1)) { + receiver->td_maxend = sack + win; + if (win == 0) + receiver->td_maxend++; + } + + /* + * Check retransmissions. + */ + if (*index == TCP_ACK_SET) { + if (state->last_dir == dir + && state->last_seq == seq + && state->last_ack == ack + && state->last_end == end) + state->retrans++; + else { + state->last_dir = dir; + state->last_seq = seq; + state->last_ack = ack; + state->last_end = end; + state->retrans = 0; + } + } + /* + * Close the window of disabled window tracking :-) + */ + if (sender->loose) + sender->loose--; + + res = 1; + } else { + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(pf, 0, skb, NULL, NULL, + "nf_ct_tcp: %s ", + before(end, sender->td_maxend + 1) ? + after(seq, sender->td_end - receiver->td_maxwin - 1) ? + before(sack, receiver->td_end + 1) ? + after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG" + : "ACK is under the lower bound (possibly overly delayed ACK)" + : "ACK is over the upper bound (ACKed data has never seen yet)" + : "SEQ is under the lower bound (retransmitted already ACKed data)" + : "SEQ is over the upper bound (over the window of the receiver)"); + + res = nf_ct_tcp_be_liberal && !tcph->rst; + } + + DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u " + "receiver end=%u maxend=%u maxwin=%u\n", + res, sender->td_end, sender->td_maxend, sender->td_maxwin, + receiver->td_end, receiver->td_maxend, receiver->td_maxwin); + + return res; +} + +#ifdef CONFIG_IP_NF_NAT_NEEDED +/* Update sender->td_end after NAT successfully mangled the packet */ +/* Caller must linearize skb at tcp header. */ +void nf_conntrack_tcp_update(struct sk_buff *skb, + unsigned int dataoff, + struct nf_conn *conntrack, + int dir) +{ + struct tcphdr *tcph = (void *)skb->data + dataoff; + __u32 end; +#ifdef DEBUGP_VARS + struct nf_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir]; + struct nf_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir]; +#endif + + end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, dataoff, tcph); + + WRITE_LOCK(&tcp_lock); + /* + * We have to worry for the ack in the reply packet only... + */ + if (after(end, conntrack->proto.tcp.seen[dir].td_end)) + conntrack->proto.tcp.seen[dir].td_end = end; + conntrack->proto.tcp.last_end = end; + WRITE_UNLOCK(&tcp_lock); + DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i " + "receiver end=%u maxend=%u maxwin=%u scale=%i\n", + sender->td_end, sender->td_maxend, sender->td_maxwin, + sender->td_scale, + receiver->td_end, receiver->td_maxend, receiver->td_maxwin, + receiver->td_scale); +} + +#endif + +#define TH_FIN 0x01 +#define TH_SYN 0x02 +#define TH_RST 0x04 +#define TH_PUSH 0x08 +#define TH_ACK 0x10 +#define TH_URG 0x20 +#define TH_ECE 0x40 +#define TH_CWR 0x80 + +/* table of valid flag combinations - ECE and CWR are always valid */ +static u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] = +{ + [TH_SYN] = 1, + [TH_SYN|TH_ACK] = 1, + [TH_RST] = 1, + [TH_RST|TH_ACK] = 1, + [TH_RST|TH_ACK|TH_PUSH] = 1, + [TH_FIN|TH_ACK] = 1, + [TH_ACK] = 1, + [TH_ACK|TH_PUSH] = 1, + [TH_ACK|TH_URG] = 1, + [TH_ACK|TH_URG|TH_PUSH] = 1, + [TH_FIN|TH_ACK|TH_PUSH] = 1, + [TH_FIN|TH_ACK|TH_URG] = 1, + [TH_FIN|TH_ACK|TH_URG|TH_PUSH] = 1, +}; + +/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ +static int tcp_error(struct sk_buff *skb, + unsigned int dataoff, + enum nf_conntrack_info *ctinfo, + int pf, + unsigned int hooknum, + int(*csum)(const struct sk_buff *,unsigned int)) +{ + struct tcphdr _tcph, *th; + unsigned int tcplen = skb->len - dataoff; + u_int8_t tcpflags; + + /* Smaller that minimal TCP header? */ + th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); + if (th == NULL) { + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(pf, 0, skb, NULL, NULL, + "nf_ct_tcp: short packet "); + return -NF_ACCEPT; + } + + /* Not whole TCP header or malformed packet */ + if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(pf, 0, skb, NULL, NULL, + "nf_ct_tcp: truncated/malformed packet "); + return -NF_ACCEPT; + } + + /* Checksum invalid? Ignore. + * We skip checking packets on the outgoing path + * because the semantic of CHECKSUM_HW is different there + * and moreover root might send raw packets. + */ + /* FIXME: Source route IP option packets --RR */ + if (((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) || + (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING)) && + csum(skb, dataoff)) { + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(pf, 0, skb, NULL, NULL, + "nf_ct_tcp: bad TCP checksum "); + return -NF_ACCEPT; + } + + /* Check TCP flags. */ + tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR)); + if (!tcp_valid_flags[tcpflags]) { + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(pf, 0, skb, NULL, NULL, + "nf_ct_tcp: invalid TCP flag combination "); + return -NF_ACCEPT; + } + + return NF_ACCEPT; +} + +static int csum4(const struct sk_buff *skb, unsigned int dataoff) +{ + return csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, + skb->len - dataoff, IPPROTO_TCP, + skb->ip_summed == CHECKSUM_HW ? skb->csum + : skb_checksum(skb, dataoff, + skb->len - dataoff, 0)); +} + +static int csum6(const struct sk_buff *skb, unsigned int dataoff) +{ + return csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, + skb->len - dataoff, IPPROTO_TCP, + skb->ip_summed == CHECKSUM_HW ? skb->csum + : skb_checksum(skb, dataoff, skb->len - dataoff, + 0)); +} + +static int tcp_error4(struct sk_buff *skb, + unsigned int dataoff, + enum nf_conntrack_info *ctinfo, + int pf, + unsigned int hooknum) +{ + return tcp_error(skb, dataoff, ctinfo, pf, hooknum, csum4); +} + +static int tcp_error6(struct sk_buff *skb, + unsigned int dataoff, + enum nf_conntrack_info *ctinfo, + int pf, + unsigned int hooknum) +{ + return tcp_error(skb, dataoff, ctinfo, pf, hooknum, csum6); +} + +/* Returns verdict for packet, or -1 for invalid. */ +static int tcp_packet(struct nf_conn *conntrack, + const struct sk_buff *skb, + unsigned int dataoff, + enum nf_conntrack_info ctinfo, + int pf, + unsigned int hooknum) +{ + enum tcp_conntrack new_state, old_state; + enum nf_conntrack_dir dir; + struct tcphdr *th, _tcph; + unsigned long timeout; + unsigned int index; + + th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); + BUG_ON(th == NULL); + + WRITE_LOCK(&tcp_lock); + old_state = conntrack->proto.tcp.state; + dir = NFCTINFO2DIR(ctinfo); + index = get_conntrack_index(th); + new_state = tcp_conntracks[dir][index][old_state]; + + switch (new_state) { + case TCP_CONNTRACK_IGNORE: + /* Either SYN in ORIGINAL + * or SYN/ACK in REPLY + * or ACK in REPLY direction (half-open connection). */ + if (index == TCP_SYNACK_SET + && conntrack->proto.tcp.last_index == TCP_SYN_SET + && conntrack->proto.tcp.last_dir != dir + && after(ntohl(th->ack_seq), + conntrack->proto.tcp.last_seq)) { + /* This SYN/ACK acknowledges a SYN that we earlier + * ignored as invalid. This means that the client and + * the server are both in sync, while the firewall is + * not. We kill this session and block the SYN/ACK so + * that the client cannot but retransmit its SYN and + * thus initiate a clean new session. + */ + WRITE_UNLOCK(&tcp_lock); + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(pf, 0, skb, NULL, NULL, + "nf_ct_tcp: killing out of sync session "); + if (del_timer(&conntrack->timeout)) + conntrack->timeout.function((unsigned long) + conntrack); + return -NF_DROP; + } + conntrack->proto.tcp.last_index = index; + conntrack->proto.tcp.last_dir = dir; + conntrack->proto.tcp.last_seq = ntohl(th->seq); + + WRITE_UNLOCK(&tcp_lock); + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(pf, 0, skb, NULL, NULL, + "nf_ct_tcp: invalid packed ignored "); + return NF_ACCEPT; + case TCP_CONNTRACK_MAX: + /* Invalid packet */ + DEBUGP("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", + dir, get_conntrack_index(th), + old_state); + WRITE_UNLOCK(&tcp_lock); + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(pf, 0, skb, NULL, NULL, + "nf_ct_tcp: invalid state "); + return -NF_ACCEPT; + case TCP_CONNTRACK_SYN_SENT: + if (old_state >= TCP_CONNTRACK_TIME_WAIT) { + /* Attempt to reopen a closed connection. + * Delete this connection and look up again. */ + WRITE_UNLOCK(&tcp_lock); + if (del_timer(&conntrack->timeout)) + conntrack->timeout.function((unsigned long) + conntrack); + return -NF_REPEAT; + } + break; + case TCP_CONNTRACK_CLOSE: + if (index == TCP_RST_SET + && ((test_bit(NF_S_SEEN_REPLY_BIT, &conntrack->status) + && conntrack->proto.tcp.last_index <= TCP_SYNACK_SET) + || (!test_bit(NF_S_ASSURED_BIT, &conntrack->status) + && conntrack->proto.tcp.last_index == TCP_ACK_SET)) + && after(ntohl(th->ack_seq), + conntrack->proto.tcp.last_seq)) { + /* Ignore RST closing down invalid SYN or ACK + we had let trough. */ + WRITE_UNLOCK(&tcp_lock); + if (LOG_INVALID(IPPROTO_TCP)) + nf_log_packet(pf, 0, skb, NULL, NULL, + "nf_ct_tcp: invalid RST (ignored) "); + return NF_ACCEPT; + } + /* Just fall trough */ + default: + /* Keep compilers happy. */ + break; + } + + if (!tcp_in_window(&conntrack->proto.tcp, dir, &index, + skb, dataoff, th, pf)) { + WRITE_UNLOCK(&tcp_lock); + return -NF_ACCEPT; + } + /* From now on we have got in-window packets */ + + /* If FIN was trimmed off, we don't change state. */ + conntrack->proto.tcp.last_index = index; + new_state = tcp_conntracks[dir][index][old_state]; + + DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " + "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", + NIPQUAD(iph->saddr), ntohs(th->source), + NIPQUAD(iph->daddr), ntohs(th->dest), + (th->syn ? 1 : 0), (th->ack ? 1 : 0), + (th->fin ? 1 : 0), (th->rst ? 1 : 0), + old_state, new_state); + + conntrack->proto.tcp.state = new_state; + timeout = conntrack->proto.tcp.retrans >= nf_ct_tcp_max_retrans + && *tcp_timeouts[new_state] > nf_ct_tcp_timeout_max_retrans + ? nf_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state]; + WRITE_UNLOCK(&tcp_lock); + + if (!test_bit(NF_S_SEEN_REPLY_BIT, &conntrack->status)) { + /* If only reply is a RST, we can consider ourselves not to + have an established connection: this is a fairly common + problem case, so we can delete the conntrack + immediately. --RR */ + if (th->rst) { + if (del_timer(&conntrack->timeout)) + conntrack->timeout.function((unsigned long) + conntrack); + return NF_ACCEPT; + } + } else if (!test_bit(NF_S_ASSURED_BIT, &conntrack->status) + && (old_state == TCP_CONNTRACK_SYN_RECV + || old_state == TCP_CONNTRACK_ESTABLISHED) + && new_state == TCP_CONNTRACK_ESTABLISHED) { + /* Set ASSURED if we see see valid ack in ESTABLISHED + after SYN_RECV or a valid answer for a picked up + connection. */ + set_bit(NF_S_ASSURED_BIT, &conntrack->status); + } + nf_ct_refresh_acct(conntrack, ctinfo, skb, timeout); + + return NF_ACCEPT; +} + + /* Called when a new connection for this protocol found. */ +static int tcp_new(struct nf_conn *conntrack, + const struct sk_buff *skb, + unsigned int dataoff) +{ + enum tcp_conntrack new_state; + struct tcphdr *th, _tcph; +#ifdef DEBUGP_VARS + struct nf_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0]; + struct nf_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1]; +#endif + + th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); + BUG_ON(th == NULL); + + /* Don't need lock here: this conntrack not in circulation yet */ + new_state + = tcp_conntracks[0][get_conntrack_index(th)] + [TCP_CONNTRACK_NONE]; + + /* Invalid: delete conntrack */ + if (new_state >= TCP_CONNTRACK_MAX) { + DEBUGP("nf_ct_tcp: invalid new deleting.\n"); + return 0; + } + + if (new_state == TCP_CONNTRACK_SYN_SENT) { + /* SYN packet */ + conntrack->proto.tcp.seen[0].td_end = + segment_seq_plus_len(ntohl(th->seq), skb->len, + dataoff, th); + conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window); + if (conntrack->proto.tcp.seen[0].td_maxwin == 0) + conntrack->proto.tcp.seen[0].td_maxwin = 1; + conntrack->proto.tcp.seen[0].td_maxend = + conntrack->proto.tcp.seen[0].td_end; + + tcp_options(skb, dataoff, th, &conntrack->proto.tcp.seen[0]); + conntrack->proto.tcp.seen[1].flags = 0; + conntrack->proto.tcp.seen[0].loose = + conntrack->proto.tcp.seen[1].loose = 0; + } else if (nf_ct_tcp_loose == 0) { + /* Don't try to pick up connections. */ + return 0; + } else { + /* + * We are in the middle of a connection, + * its history is lost for us. + * Let's try to use the data from the packet. + */ + conntrack->proto.tcp.seen[0].td_end = + segment_seq_plus_len(ntohl(th->seq), skb->len, + dataoff, th); + conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window); + if (conntrack->proto.tcp.seen[0].td_maxwin == 0) + conntrack->proto.tcp.seen[0].td_maxwin = 1; + conntrack->proto.tcp.seen[0].td_maxend = + conntrack->proto.tcp.seen[0].td_end + + conntrack->proto.tcp.seen[0].td_maxwin; + conntrack->proto.tcp.seen[0].td_scale = 0; + + /* We assume SACK. Should we assume window scaling too? */ + conntrack->proto.tcp.seen[0].flags = + conntrack->proto.tcp.seen[1].flags = NF_CT_TCP_FLAG_SACK_PERM; + conntrack->proto.tcp.seen[0].loose = + conntrack->proto.tcp.seen[1].loose = nf_ct_tcp_loose; + } + + conntrack->proto.tcp.seen[1].td_end = 0; + conntrack->proto.tcp.seen[1].td_maxend = 0; + conntrack->proto.tcp.seen[1].td_maxwin = 1; + conntrack->proto.tcp.seen[1].td_scale = 0; + + /* tcp_packet will set them */ + conntrack->proto.tcp.state = TCP_CONNTRACK_NONE; + conntrack->proto.tcp.last_index = TCP_NONE_SET; + + DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i " + "receiver end=%u maxend=%u maxwin=%u scale=%i\n", + sender->td_end, sender->td_maxend, sender->td_maxwin, + sender->td_scale, + receiver->td_end, receiver->td_maxend, receiver->td_maxwin, + receiver->td_scale); + return 1; +} + +struct nf_conntrack_protocol nf_conntrack_protocol_tcp4 = +{ + .l3proto = PF_INET, + .proto = IPPROTO_TCP, + .name = "tcp", + .pkt_to_tuple = tcp_pkt_to_tuple, + .invert_tuple = tcp_invert_tuple, + .print_tuple = tcp_print_tuple, + .print_conntrack = tcp_print_conntrack, + .packet = tcp_packet, + .new = tcp_new, + .error = tcp_error4, +}; + +struct nf_conntrack_protocol nf_conntrack_protocol_tcp6 = +{ + .l3proto = PF_INET6, + .proto = IPPROTO_TCP, + .name = "tcp", + .pkt_to_tuple = tcp_pkt_to_tuple, + .invert_tuple = tcp_invert_tuple, + .print_tuple = tcp_print_tuple, + .print_conntrack = tcp_print_conntrack, + .packet = tcp_packet, + .new = tcp_new, + .error = tcp_error6, +}; + +EXPORT_SYMBOL(nf_conntrack_protocol_tcp4); +EXPORT_SYMBOL(nf_conntrack_protocol_tcp6); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/netfilter/nf_conntrack_proto_udp.c linux-2.6.11.3/net/netfilter/nf_conntrack_proto_udp.c --- linux-2.6.11.3.org/net/netfilter/nf_conntrack_proto_udp.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/netfilter/nf_conntrack_proto_udp.c 2005-03-13 22:06:02.239213752 +0100 @@ -0,0 +1,212 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI + * - enable working with Layer 3 protocol independent connection tracking. + * + * Derived from net/ipv4/netfilter/ip_conntrack_proto_udp.c + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +unsigned long nf_ct_udp_timeout = 30*HZ; +unsigned long nf_ct_udp_timeout_stream = 180*HZ; + +static int udp_pkt_to_tuple(const struct sk_buff *skb, + unsigned int dataoff, + struct nf_conntrack_tuple *tuple) +{ + struct udphdr _hdr, *hp; + + /* Actually only need first 8 bytes. */ + hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + if (hp == NULL) + return 0; + + tuple->src.u.udp.port = hp->source; + tuple->dst.u.udp.port = hp->dest; + + return 1; +} + +static int udp_invert_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) +{ + tuple->src.u.udp.port = orig->dst.u.udp.port; + tuple->dst.u.udp.port = orig->src.u.udp.port; + return 1; +} + +/* Print out the per-protocol part of the tuple. */ +static int udp_print_tuple(struct seq_file *s, + const struct nf_conntrack_tuple *tuple) +{ + return seq_printf(s, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.udp.port), + ntohs(tuple->dst.u.udp.port)); +} + +/* Print out the private part of the conntrack. */ +static int udp_print_conntrack(struct seq_file *s, + const struct nf_conn *conntrack) +{ + return 0; +} + +/* Returns verdict for packet, and may modify conntracktype */ +static int udp_packet(struct nf_conn *conntrack, + const struct sk_buff *skb, + unsigned int dataoff, + enum nf_conntrack_info ctinfo, + int pf, + unsigned int hooknum) +{ + /* If we've seen traffic both ways, this is some kind of UDP + stream. Extend timeout. */ + if (test_bit(NF_S_SEEN_REPLY_BIT, &conntrack->status)) { + nf_ct_refresh_acct(conntrack, ctinfo, skb, + nf_ct_udp_timeout_stream); + /* Also, more likely to be important, and not a probe */ + set_bit(NF_S_ASSURED_BIT, &conntrack->status); + } else + nf_ct_refresh_acct(conntrack, ctinfo, skb, nf_ct_udp_timeout); + + return NF_ACCEPT; +} + +/* Called when a new connection for this protocol found. */ +static int udp_new(struct nf_conn *conntrack, const struct sk_buff *skb, + unsigned int dataoff) +{ + return 1; +} + +static int udp_error(struct sk_buff *skb, unsigned int dataoff, + enum nf_conntrack_info *ctinfo, + int pf, + unsigned int hooknum, + int (*csum)(const struct sk_buff *, unsigned int)) +{ + unsigned int udplen = skb->len - dataoff; + struct udphdr _hdr, *hdr; + + /* Header is too small? */ + hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + if (hdr == NULL) { + if (LOG_INVALID(IPPROTO_UDP)) + nf_log_packet(pf, 0, skb, NULL, NULL, + "nf_ct_udp: short packet "); + return -NF_ACCEPT; + } + + /* Truncated/malformed packets */ + if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { + if (LOG_INVALID(IPPROTO_UDP)) + nf_log_packet(pf, 0, skb, NULL, NULL, + "nf_ct_udp: truncated/malformed packet "); + return -NF_ACCEPT; + } + + /* Packet with no checksum */ + if (!hdr->check) + return NF_ACCEPT; + + /* Checksum invalid? Ignore. + * We skip checking packets on the outgoing path + * because the semantic of CHECKSUM_HW is different there + * and moreover root might send raw packets. + * FIXME: Source route IP option packets --RR */ + if (((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) || + (pf == PF_INET6 && hooknum == NF_IP6_PRE_ROUTING)) + && csum(skb, dataoff)) { + if (LOG_INVALID(IPPROTO_UDP)) + nf_log_packet(pf, 0, skb, NULL, NULL, + "nf_ct_udp: bad UDP checksum "); + return -NF_ACCEPT; + } + + return NF_ACCEPT; +} + +static int csum4(const struct sk_buff *skb, unsigned int dataoff) +{ + return csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, + skb->len - dataoff, IPPROTO_UDP, + skb->ip_summed == CHECKSUM_HW ? skb->csum + : skb_checksum(skb, dataoff, + skb->len - dataoff, 0)); +} + +static int csum6(const struct sk_buff *skb, unsigned int dataoff) +{ + return csum_ipv6_magic(&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr, + skb->len - dataoff, IPPROTO_UDP, + skb->ip_summed == CHECKSUM_HW ? skb->csum + : skb_checksum(skb, dataoff, skb->len - dataoff, + 0)); +} + +static int udp_error4(struct sk_buff *skb, + unsigned int dataoff, + enum nf_conntrack_info *ctinfo, + int pf, + unsigned int hooknum) +{ + return udp_error(skb, dataoff, ctinfo, pf, hooknum, csum4); +} + +static int udp_error6(struct sk_buff *skb, + unsigned int dataoff, + enum nf_conntrack_info *ctinfo, + int pf, + unsigned int hooknum) +{ + return udp_error(skb, dataoff, ctinfo, pf, hooknum, csum6); +} + +struct nf_conntrack_protocol nf_conntrack_protocol_udp4 = +{ + .l3proto = PF_INET, + .proto = IPPROTO_UDP, + .name = "udp", + .pkt_to_tuple = udp_pkt_to_tuple, + .invert_tuple = udp_invert_tuple, + .print_tuple = udp_print_tuple, + .print_conntrack = udp_print_conntrack, + .packet = udp_packet, + .new = udp_new, + .error = udp_error4, +}; + +struct nf_conntrack_protocol nf_conntrack_protocol_udp6 = +{ + .l3proto = PF_INET6, + .proto = IPPROTO_UDP, + .name = "udp", + .pkt_to_tuple = udp_pkt_to_tuple, + .invert_tuple = udp_invert_tuple, + .print_tuple = udp_print_tuple, + .print_conntrack = udp_print_conntrack, + .packet = udp_packet, + .new = udp_new, + .error = udp_error6, +}; + +EXPORT_SYMBOL(nf_conntrack_protocol_udp4); +EXPORT_SYMBOL(nf_conntrack_protocol_udp6); diff -Nur --exclude '*.orig' linux-2.6.11.3.org/net/netfilter/nf_conntrack_standalone.c linux-2.6.11.3/net/netfilter/nf_conntrack_standalone.c --- linux-2.6.11.3.org/net/netfilter/nf_conntrack_standalone.c 1970-01-01 01:00:00.000000000 +0100 +++ linux-2.6.11.3/net/netfilter/nf_conntrack_standalone.c 2005-03-13 22:06:02.245212840 +0100 @@ -0,0 +1,821 @@ +/* This file contains all the functions required for the standalone + nf_conntrack module. + + These are not required by the compatibility layer. +*/ + +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI + * - generalize L3 protocol dependent part. + * + * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_SYSCTL +#include +#endif + +#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&nf_conntrack_lock) +#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&nf_conntrack_lock) + +#include +#include +#include +#include +#include +#include + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +MODULE_LICENSE("GPL"); + +extern atomic_t nf_conntrack_count; +DECLARE_PER_CPU(struct nf_conntrack_stat, nf_conntrack_stat); + +static int kill_l3proto(struct nf_conn *i, void *data) +{ + return (i->tuplehash[NF_CT_DIR_ORIGINAL].tuple.src.l3num == + ((struct nf_conntrack_l3proto *)data)->l3proto); +} + +static int kill_proto(struct nf_conn *i, void *data) +{ + struct nf_conntrack_protocol *proto; + proto = (struct nf_conntrack_protocol *)data; + return (i->tuplehash[NF_CT_DIR_ORIGINAL].tuple.dst.protonum == + proto->proto) && + (i->tuplehash[NF_CT_DIR_ORIGINAL].tuple.src.l3num == + proto->l3proto); +} + +#ifdef CONFIG_PROC_FS +static int +print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, + struct nf_conntrack_l3proto *l3proto, + struct nf_conntrack_protocol *proto) +{ + return l3proto->print_tuple(s, tuple) || proto->print_tuple(s, tuple); +} + +#ifdef CONFIG_NF_CT_ACCT +static unsigned int +seq_print_counters(struct seq_file *s, + const struct nf_conntrack_counter *counter) +{ + return seq_printf(s, "packets=%llu bytes=%llu ", + (unsigned long long)counter->packets, + (unsigned long long)counter->bytes); +} +#else +#define seq_print_counters(x, y) 0 +#endif + +static void *ct_seq_start(struct seq_file *s, loff_t *pos) +{ + if (*pos >= nf_conntrack_htable_size) + return NULL; + return &nf_conntrack_hash[*pos]; +} + +static void ct_seq_stop(struct seq_file *s, void *v) +{ +} + +static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + (*pos)++; + if (*pos >= nf_conntrack_htable_size) + return NULL; + return &nf_conntrack_hash[*pos]; +} + +/* return 0 on success, 1 in case of error */ +static int ct_seq_real_show(const struct nf_conntrack_tuple_hash *hash, + struct seq_file *s) +{ + const struct nf_conn *conntrack = tuplehash_to_ctrack(hash); + struct nf_conntrack_l3proto *l3proto; + struct nf_conntrack_protocol *proto; + + MUST_BE_READ_LOCKED(&nf_conntrack_lock); + + NF_CT_ASSERT(conntrack); + + /* we only want to print DIR_ORIGINAL */ + if (NF_CT_DIRECTION(hash)) + return 0; + + l3proto = nf_ct_find_l3proto(conntrack->tuplehash[NF_CT_DIR_ORIGINAL] + .tuple.src.l3num); + + NF_CT_ASSERT(l3proto); + proto = nf_ct_find_proto(conntrack->tuplehash[NF_CT_DIR_ORIGINAL] + .tuple.src.l3num, + conntrack->tuplehash[NF_CT_DIR_ORIGINAL] + .tuple.dst.protonum); + NF_CT_ASSERT(proto); + + if (seq_printf(s, "%-8s %u %-8s %u %lu ", + l3proto->name, + conntrack->tuplehash[NF_CT_DIR_ORIGINAL].tuple.src.l3num, + proto->name, + conntrack->tuplehash[NF_CT_DIR_ORIGINAL].tuple.dst.protonum, + timer_pending(&conntrack->timeout) + ? (conntrack->timeout.expires - jiffies)/HZ : 0) != 0) + return 1; + + if (l3proto->print_conntrack(s, conntrack)) + return 1; + + if (proto->print_conntrack(s, conntrack)) + return 1; + + if (print_tuple(s, &conntrack->tuplehash[NF_CT_DIR_ORIGINAL].tuple, + l3proto, proto)) + return 1; + + if (seq_print_counters(s, &conntrack->counters[NF_CT_DIR_ORIGINAL])) + return 1; + + if (!(test_bit(NF_S_SEEN_REPLY_BIT, &conntrack->status))) + if (seq_printf(s, "[UNREPLIED] ")) + return 1; + + if (print_tuple(s, &conntrack->tuplehash[NF_CT_DIR_REPLY].tuple, + l3proto, proto)) + return 1; + + if (seq_print_counters(s, &conntrack->counters[NF_CT_DIR_REPLY])) + return 1; + + if (test_bit(NF_S_ASSURED_BIT, &conntrack->status)) + if (seq_printf(s, "[ASSURED] ")) + return 1; + +#if defined(CONFIG_NF_CONNTRACK_MARK) + if (seq_printf(s, "mark=%ld ", conntrack->mark)) + return 1; +#endif + + if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use))) + return 1; + + return 0; +} + +static int ct_seq_show(struct seq_file *s, void *v) +{ + struct list_head *list = v; + int ret = 0; + + /* FIXME: Simply truncates if hash chain too long. */ + READ_LOCK(&nf_conntrack_lock); + if (LIST_FIND(list, ct_seq_real_show, + struct nf_conntrack_tuple_hash *, s)) + ret = -ENOSPC; + READ_UNLOCK(&nf_conntrack_lock); + return ret; +} + +static struct seq_operations ct_seq_ops = { + .start = ct_seq_start, + .next = ct_seq_next, + .stop = ct_seq_stop, + .show = ct_seq_show +}; + +static int ct_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &ct_seq_ops); +} + +static struct file_operations ct_file_ops = { + .owner = THIS_MODULE, + .open = ct_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +/* expects */ +static void *exp_seq_start(struct seq_file *s, loff_t *pos) +{ + struct list_head *e = &nf_conntrack_expect_list; + loff_t i; + + /* strange seq_file api calls stop even if we fail, + * thus we need to grab lock since stop unlocks */ + READ_LOCK(&nf_conntrack_lock); + + if (list_empty(e)) + return NULL; + + for (i = 0; i <= *pos; i++) { + e = e->next; + if (e == &nf_conntrack_expect_list) + return NULL; + } + return e; +} + +static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct list_head *e = v; + + e = e->next; + + if (e == &nf_conntrack_expect_list) + return NULL; + + return e; +} + +static void exp_seq_stop(struct seq_file *s, void *v) +{ + READ_UNLOCK(&nf_conntrack_lock); +} + +static int exp_seq_show(struct seq_file *s, void *v) +{ + struct nf_conntrack_expect *expect = v; + + if (expect->timeout.function) + seq_printf(s, "%lu ", timer_pending(&expect->timeout) + ? (expect->timeout.expires - jiffies)/HZ : 0); + else + seq_printf(s, "- "); + seq_printf(s, "l3proto = %u proto=%u ", + expect->tuple.src.l3num, + expect->tuple.dst.protonum); + print_tuple(s, &expect->tuple, + nf_ct_find_l3proto(expect->tuple.src.l3num), + nf_ct_find_proto(expect->tuple.src.l3num, + expect->tuple.dst.protonum)); + return seq_putc(s, '\n'); +} + +static struct seq_operations exp_seq_ops = { + .start = exp_seq_start, + .next = exp_seq_next, + .stop = exp_seq_stop, + .show = exp_seq_show +}; + +static int exp_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &exp_seq_ops); +} + +static struct file_operations exp_file_ops = { + .owner = THIS_MODULE, + .open = exp_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) +{ + int cpu; + + if (*pos == 0) + return SEQ_START_TOKEN; + + for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) { + if (!cpu_possible(cpu)) + continue; + *pos = cpu + 1; + return &per_cpu(nf_conntrack_stat, cpu); + } + + return NULL; +} + +static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + int cpu; + + for (cpu = *pos; cpu < NR_CPUS; ++cpu) { + if (!cpu_possible(cpu)) + continue; + *pos = cpu + 1; + return &per_cpu(nf_conntrack_stat, cpu); + } + + return NULL; +} + +static void ct_cpu_seq_stop(struct seq_file *seq, void *v) +{ +} + +static int ct_cpu_seq_show(struct seq_file *seq, void *v) +{ + unsigned int nr_conntracks = atomic_read(&nf_conntrack_count); + struct nf_conntrack_stat *st = v; + + if (v == SEQ_START_TOKEN) { + seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n"); + return 0; + } + + seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " + "%08x %08x %08x %08x %08x %08x %08x %08x \n", + nr_conntracks, + st->searched, + st->found, + st->new, + st->invalid, + st->ignore, + st->delete, + st->delete_list, + st->insert, + st->insert_failed, + st->drop, + st->early_drop, + st->error, + + st->expect_new, + st->expect_create, + st->expect_delete + ); + return 0; +} + +static struct seq_operations ct_cpu_seq_ops = { + .start = ct_cpu_seq_start, + .next = ct_cpu_seq_next, + .stop = ct_cpu_seq_stop, + .show = ct_cpu_seq_show, +}; + +static int ct_cpu_seq_open(struct inode *inode, struct file *file) +{ + return seq_open(file, &ct_cpu_seq_ops); +} + +static struct file_operations ct_cpu_seq_fops = { + .owner = THIS_MODULE, + .open = ct_cpu_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_private, +}; +#endif /* CONFIG_PROC_FS */ + +/* Sysctl support */ + +#ifdef CONFIG_SYSCTL + +/* From nf_conntrack_core.c */ +extern int nf_conntrack_max; +extern unsigned int nf_conntrack_htable_size; + +/* From nf_conntrack_proto_tcp.c */ +extern unsigned long nf_ct_tcp_timeout_syn_sent; +extern unsigned long nf_ct_tcp_timeout_syn_recv; +extern unsigned long nf_ct_tcp_timeout_established; +extern unsigned long nf_ct_tcp_timeout_fin_wait; +extern unsigned long nf_ct_tcp_timeout_close_wait; +extern unsigned long nf_ct_tcp_timeout_last_ack; +extern unsigned long nf_ct_tcp_timeout_time_wait; +extern unsigned long nf_ct_tcp_timeout_close; +extern unsigned long nf_ct_tcp_timeout_max_retrans; +extern int nf_ct_tcp_loose; +extern int nf_ct_tcp_be_liberal; +extern int nf_ct_tcp_max_retrans; + +/* From nf_conntrack_proto_udp.c */ +extern unsigned long nf_ct_udp_timeout; +extern unsigned long nf_ct_udp_timeout_stream; + +/* From nf_conntrack_proto_generic.c */ +extern unsigned long nf_ct_generic_timeout; + +/* Log invalid packets of a given protocol */ +static int log_invalid_proto_min = 0; +static int log_invalid_proto_max = 255; + +static struct ctl_table_header *nf_ct_sysctl_header; + +static ctl_table nf_ct_sysctl_table[] = { + { + .ctl_name = NET_NF_CONNTRACK_MAX, + .procname = "nf_conntrack_max", + .data = &nf_conntrack_max, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_NF_CONNTRACK_COUNT, + .procname = "nf_conntrack_count", + .data = &nf_conntrack_count, + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_NF_CONNTRACK_BUCKETS, + .procname = "nf_conntrack_buckets", + .data = &nf_conntrack_htable_size, + .maxlen = sizeof(unsigned int), + .mode = 0444, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, + .procname = "nf_conntrack_tcp_timeout_syn_sent", + .data = &nf_ct_tcp_timeout_syn_sent, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV, + .procname = "nf_conntrack_tcp_timeout_syn_recv", + .data = &nf_ct_tcp_timeout_syn_recv, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED, + .procname = "nf_conntrack_tcp_timeout_established", + .data = &nf_ct_tcp_timeout_established, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT, + .procname = "nf_conntrack_tcp_timeout_fin_wait", + .data = &nf_ct_tcp_timeout_fin_wait, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT, + .procname = "nf_conntrack_tcp_timeout_close_wait", + .data = &nf_ct_tcp_timeout_close_wait, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK, + .procname = "nf_conntrack_tcp_timeout_last_ack", + .data = &nf_ct_tcp_timeout_last_ack, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT, + .procname = "nf_conntrack_tcp_timeout_time_wait", + .data = &nf_ct_tcp_timeout_time_wait, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_CLOSE, + .procname = "nf_conntrack_tcp_timeout_close", + .data = &nf_ct_tcp_timeout_close, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_UDP_TIMEOUT, + .procname = "nf_conntrack_udp_timeout", + .data = &nf_ct_udp_timeout, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_UDP_TIMEOUT_STREAM, + .procname = "nf_conntrack_udp_timeout_stream", + .data = &nf_ct_udp_timeout_stream, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_GENERIC_TIMEOUT, + .procname = "nf_conntrack_generic_timeout", + .data = &nf_ct_generic_timeout, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_LOG_INVALID, + .procname = "nf_conntrack_log_invalid", + .data = &nf_ct_log_invalid, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &log_invalid_proto_min, + .extra2 = &log_invalid_proto_max, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS, + .procname = "nf_conntrack_tcp_timeout_max_retrans", + .data = &nf_ct_tcp_timeout_max_retrans, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_LOOSE, + .procname = "nf_conntrack_tcp_loose", + .data = &nf_ct_tcp_loose, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_BE_LIBERAL, + .procname = "nf_conntrack_tcp_be_liberal", + .data = &nf_ct_tcp_be_liberal, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { + .ctl_name = NET_NF_CONNTRACK_TCP_MAX_RETRANS, + .procname = "nf_conntrack_tcp_max_retrans", + .data = &nf_ct_tcp_max_retrans, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + + { .ctl_name = 0 } +}; + +#define NET_NF_CONNTRACK_MAX 2089 + +static ctl_table nf_ct_netfilter_table[] = { + { + .ctl_name = NET_NETFILTER, + .procname = "netfilter", + .mode = 0555, + .child = nf_ct_sysctl_table, + }, + { + .ctl_name = NET_NF_CONNTRACK_MAX, + .procname = "nf_conntrack_max", + .data = &nf_conntrack_max, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, + { .ctl_name = 0 } +}; + +static ctl_table nf_ct_net_table[] = { + { + .ctl_name = CTL_NET, + .procname = "net", + .mode = 0555, + .child = nf_ct_netfilter_table, + }, + { .ctl_name = 0 } +}; +EXPORT_SYMBOL(nf_ct_log_invalid); +#endif /* CONFIG_SYSCTL */ + +static int init_or_cleanup(int init) +{ +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *proc, *proc_exp, *proc_stat; +#endif + int ret = 0; + + if (!init) goto cleanup; + + ret = nf_conntrack_init(); + if (ret < 0) + goto cleanup_nothing; + +#ifdef CONFIG_PROC_FS + proc = proc_net_fops_create("nf_conntrack", 0440, &ct_file_ops); + if (!proc) goto cleanup_init; + + proc_exp = proc_net_fops_create("nf_conntrack_expect", 0440, + &exp_file_ops); + if (!proc_exp) goto cleanup_proc; + + proc_stat = create_proc_entry("nf_conntrack", S_IRUGO, proc_net_stat); + if (!proc_stat) + goto cleanup_proc_exp; + + proc_stat->proc_fops = &ct_cpu_seq_fops; + proc_stat->owner = THIS_MODULE; +#endif +#ifdef CONFIG_SYSCTL + nf_ct_sysctl_header = register_sysctl_table(nf_ct_net_table, 0); + if (nf_ct_sysctl_header == NULL) { + printk("nf_conntrack: can't register to sysctl.\n"); + ret = -ENOMEM; + goto cleanup_proc_stat; + } +#endif + + return ret; + + cleanup: +#ifdef CONFIG_SYSCTL + unregister_sysctl_table(nf_ct_sysctl_header); + cleanup_proc_stat: +#endif +#ifdef CONFIG_PROC_FS + proc_net_remove("nf_conntrack_stat"); + cleanup_proc_exp: + proc_net_remove("nf_conntrack_expect"); + cleanup_proc: + proc_net_remove("nf_conntrack"); + cleanup_init: +#endif /* CNFIG_PROC_FS */ + nf_conntrack_cleanup(); + cleanup_nothing: + return ret; +} + +int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto) +{ + int ret = 0; + + WRITE_LOCK(&nf_conntrack_lock); + if (nf_ct_l3protos[proto->l3proto] != &nf_conntrack_generic_l3proto) { + ret = -EBUSY; + goto out; + } + nf_ct_l3protos[proto->l3proto] = proto; +out: + WRITE_UNLOCK(&nf_conntrack_lock); + + return ret; +} + +void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto) +{ + WRITE_LOCK(&nf_conntrack_lock); + nf_ct_l3protos[proto->l3proto] = &nf_conntrack_generic_l3proto; + WRITE_UNLOCK(&nf_conntrack_lock); + + /* Somebody could be still looking at the proto in bh. */ + synchronize_net(); + + /* Remove all contrack entries for this protocol */ + nf_ct_iterate_cleanup(kill_l3proto, proto); +} + +/* FIXME: Allow NULL functions and sub in pointers to generic for + them. --RR */ +int nf_conntrack_protocol_register(struct nf_conntrack_protocol *proto) +{ + int ret = 0; + +retry: + WRITE_LOCK(&nf_conntrack_lock); + if (nf_ct_protos[proto->l3proto]) { + if (nf_ct_protos[proto->l3proto][proto->proto] + != &nf_conntrack_generic_protocol) { + ret = -EBUSY; + goto out_unlock; + } + } else { + /* l3proto may be loaded latter. */ + struct nf_conntrack_protocol **proto_array; + int i; + + WRITE_UNLOCK(&nf_conntrack_lock); + + proto_array = (struct nf_conntrack_protocol **) + kmalloc(MAX_NF_CT_PROTO * + sizeof(struct nf_conntrack_protocol *), + GFP_KERNEL); + if (proto_array == NULL) { + ret = -ENOMEM; + goto out; + } + for (i = 0; i < MAX_NF_CT_PROTO; i++) + proto_array[i] = &nf_conntrack_generic_protocol; + + WRITE_LOCK(&nf_conntrack_lock); + if (nf_ct_protos[proto->l3proto]) { + /* bad timing, but no problem */ + WRITE_UNLOCK(&nf_conntrack_lock); + kfree(proto_array); + } else { + nf_ct_protos[proto->l3proto] = proto_array; + WRITE_UNLOCK(&nf_conntrack_lock); + } + + /* + * Just once because array is never freed until unloading + * nf_conntrack.ko + */ + goto retry; + } + + nf_ct_protos[proto->l3proto][proto->proto] = proto; + +out_unlock: + WRITE_UNLOCK(&nf_conntrack_lock); +out: + return ret; +} + +void nf_conntrack_protocol_unregister(struct nf_conntrack_protocol *proto) +{ + WRITE_LOCK(&nf_conntrack_lock); + nf_ct_protos[proto->l3proto][proto->proto] + = &nf_conntrack_generic_protocol; + WRITE_UNLOCK(&nf_conntrack_lock); + + /* Somebody could be still looking at the proto in bh. */ + synchronize_net(); + + /* Remove all contrack entries for this protocol */ + nf_ct_iterate_cleanup(kill_proto, proto); +} + +static int __init init(void) +{ + return init_or_cleanup(1); +} + +static void __exit fini(void) +{ + init_or_cleanup(0); +} + +module_init(init); +module_exit(fini); + +/* Some modules need us, but don't depend directly on any symbol. + They should call this. */ +void need_nf_conntrack(void) +{ +} + +EXPORT_SYMBOL(nf_conntrack_l3proto_register); +EXPORT_SYMBOL(nf_conntrack_l3proto_unregister); +EXPORT_SYMBOL(nf_conntrack_protocol_register); +EXPORT_SYMBOL(nf_conntrack_protocol_unregister); +EXPORT_SYMBOL(nf_ct_invert_tuplepr); +EXPORT_SYMBOL(nf_conntrack_alter_reply); +EXPORT_SYMBOL(nf_conntrack_destroyed); +EXPORT_SYMBOL(need_nf_conntrack); +EXPORT_SYMBOL(nf_conntrack_helper_register); +EXPORT_SYMBOL(nf_conntrack_helper_unregister); +EXPORT_SYMBOL(nf_ct_iterate_cleanup); +EXPORT_SYMBOL(nf_ct_refresh_acct); +EXPORT_SYMBOL(nf_ct_protos); +EXPORT_SYMBOL(nf_ct_find_proto); +EXPORT_SYMBOL(nf_ct_l3protos); +EXPORT_SYMBOL(nf_conntrack_expect_alloc); +EXPORT_SYMBOL(nf_conntrack_expect_free); +EXPORT_SYMBOL(nf_conntrack_expect_related); +EXPORT_SYMBOL(nf_conntrack_unexpect_related); +EXPORT_SYMBOL(nf_conntrack_tuple_taken); +EXPORT_SYMBOL(nf_conntrack_htable_size); +EXPORT_SYMBOL(nf_conntrack_lock); +EXPORT_SYMBOL(nf_conntrack_hash); +EXPORT_SYMBOL(nf_conntrack_untracked); +EXPORT_SYMBOL_GPL(nf_conntrack_find_get); +EXPORT_SYMBOL_GPL(nf_ct_put); +#ifdef CONFIG_IP_NF_NAT_NEEDED +EXPORT_SYMBOL(nf_conntrack_tcp_update); +#endif +EXPORT_SYMBOL(__nf_conntrack_confirm); +EXPORT_SYMBOL(nf_ct_get_tuple); +EXPORT_SYMBOL(nf_ct_invert_tuple); +EXPORT_SYMBOL(nf_conntrack_in); +EXPORT_SYMBOL(__nf_conntrack_attach);