]> git.pld-linux.org Git - packages/kernel.git/blame - 2.6.x-patch-o-matic-ng-base-20040225.patch
- CSZ scheduler removed from kernel tree.
[packages/kernel.git] / 2.6.x-patch-o-matic-ng-base-20040225.patch
CommitLineData
6e943073 1diff -Nur linux-2.6.3.org/include/linux/netfilter.h linux-2.6.3/include/linux/netfilter.h
2--- linux-2.6.3.org/include/linux/netfilter.h 2004-02-18 04:57:59.000000000 +0100
3+++ linux-2.6.3/include/linux/netfilter.h 2004-02-27 00:03:00.000228144 +0100
4@@ -99,6 +99,24 @@
5
6 extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
7
8+typedef void nf_logfn(unsigned int hooknum,
9+ const struct sk_buff *skb,
10+ const struct net_device *in,
11+ const struct net_device *out,
12+ const char *prefix);
13+
14+/* Function to register/unregister log function. */
15+int nf_log_register(int pf, nf_logfn *logfn);
16+void nf_log_unregister(int pf, nf_logfn *logfn);
17+
18+/* Calls the registered backend logging function */
19+void nf_log_packet(int pf,
20+ unsigned int hooknum,
21+ const struct sk_buff *skb,
22+ const struct net_device *in,
23+ const struct net_device *out,
24+ const char *fmt, ...);
25+
26 /* Activate hook; either okfn or kfree_skb called, unless a hook
27 returns NF_STOLEN (in which case, it's up to the hook to deal with
28 the consequences).
29diff -Nur linux-2.6.3.org/include/linux/netfilter_ipv4/ip_conntrack.h linux-2.6.3/include/linux/netfilter_ipv4/ip_conntrack.h
30--- linux-2.6.3.org/include/linux/netfilter_ipv4/ip_conntrack.h 2004-02-18 04:59:30.000000000 +0100
31+++ linux-2.6.3/include/linux/netfilter_ipv4/ip_conntrack.h 2004-02-27 00:03:14.480026880 +0100
32@@ -251,6 +251,9 @@
33 /* Call me when a conntrack is destroyed. */
34 extern void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack);
35
36+/* Fake conntrack entry for untracked connections */
37+extern struct ip_conntrack ip_conntrack_untracked;
38+
39 /* Returns new sk_buff, or NULL */
40 struct sk_buff *
41 ip_ct_gather_frags(struct sk_buff *skb);
42diff -Nur linux-2.6.3.org/include/linux/netfilter_ipv4/ipt_connlimit.h linux-2.6.3/include/linux/netfilter_ipv4/ipt_connlimit.h
43--- linux-2.6.3.org/include/linux/netfilter_ipv4/ipt_connlimit.h 1970-01-01 01:00:00.000000000 +0100
44+++ linux-2.6.3/include/linux/netfilter_ipv4/ipt_connlimit.h 2004-02-27 00:03:07.981014880 +0100
45@@ -0,0 +1,12 @@
46+#ifndef _IPT_CONNLIMIT_H
47+#define _IPT_CONNLIMIT_H
48+
49+struct ipt_connlimit_data;
50+
51+struct ipt_connlimit_info {
52+ int limit;
53+ int inverse;
54+ u_int32_t mask;
55+ struct ipt_connlimit_data *data;
56+};
57+#endif /* _IPT_CONNLIMIT_H */
58diff -Nur linux-2.6.3.org/include/linux/netfilter_ipv4/ipt_conntrack.h linux-2.6.3/include/linux/netfilter_ipv4/ipt_conntrack.h
59--- linux-2.6.3.org/include/linux/netfilter_ipv4/ipt_conntrack.h 2004-02-18 04:59:05.000000000 +0100
60+++ linux-2.6.3/include/linux/netfilter_ipv4/ipt_conntrack.h 2004-02-27 00:03:14.480026880 +0100
61@@ -10,6 +10,7 @@
62
63 #define IPT_CONNTRACK_STATE_SNAT (1 << (IP_CT_NUMBER + 1))
64 #define IPT_CONNTRACK_STATE_DNAT (1 << (IP_CT_NUMBER + 2))
65+#define IPT_CONNTRACK_STATE_UNTRACKED (1 << (IP_CT_NUMBER + 3))
66
67 /* flags, invflags: */
68 #define IPT_CONNTRACK_STATE 0x01
69diff -Nur linux-2.6.3.org/include/linux/netfilter_ipv4/ipt_dstlimit.h linux-2.6.3/include/linux/netfilter_ipv4/ipt_dstlimit.h
70--- linux-2.6.3.org/include/linux/netfilter_ipv4/ipt_dstlimit.h 1970-01-01 01:00:00.000000000 +0100
71+++ linux-2.6.3/include/linux/netfilter_ipv4/ipt_dstlimit.h 2004-02-27 00:03:08.651912888 +0100
72@@ -0,0 +1,39 @@
73+#ifndef _IPT_DSTLIMIT_H
74+#define _IPT_DSTLIMIT_H
75+
76+/* timings are in milliseconds. */
77+#define IPT_DSTLIMIT_SCALE 10000
78+/* 1/10,000 sec period => max of 10,000/sec. Min rate is then 429490
79+ seconds, or one every 59 hours. */
80+
81+/* details of this structure hidden by the implementation */
82+struct ipt_dstlimit_htable;
83+
84+#define IPT_DSTLIMIT_HASH_DIP 0x0001
85+#define IPT_DSTLIMIT_HASH_DPT 0x0002
86+#define IPT_DSTLIMIT_HASH_SIP 0x0004
87+
88+struct dstlimit_cfg {
89+ u_int32_t mode; /* bitmask of IPT_DSTLIMIT_HASH_* */
90+ u_int32_t avg; /* Average secs between packets * scale */
91+ u_int32_t burst; /* Period multiplier for upper limit. */
92+
93+ /* user specified */
94+ u_int32_t size; /* how many buckets */
95+ u_int32_t max; /* max number of entries */
96+ u_int32_t gc_interval; /* gc interval */
97+ u_int32_t expire; /* when do entries expire? */
98+};
99+
100+struct ipt_dstlimit_info {
101+ char name [IFNAMSIZ]; /* name */
102+ struct dstlimit_cfg cfg;
103+ struct ipt_dstlimit_htable *hinfo;
104+
105+ /* Used internally by the kernel */
106+ union {
107+ void *ptr;
108+ struct ipt_dstlimit_info *master;
109+ } u;
110+};
111+#endif /*_IPT_DSTLIMIT_H*/
112diff -Nur linux-2.6.3.org/include/linux/netfilter_ipv4/ipt_fuzzy.h linux-2.6.3/include/linux/netfilter_ipv4/ipt_fuzzy.h
113--- linux-2.6.3.org/include/linux/netfilter_ipv4/ipt_fuzzy.h 1970-01-01 01:00:00.000000000 +0100
114+++ linux-2.6.3/include/linux/netfilter_ipv4/ipt_fuzzy.h 2004-02-27 00:03:09.359805272 +0100
115@@ -0,0 +1,21 @@
116+#ifndef _IPT_FUZZY_H
117+#define _IPT_FUZZY_H
118+
119+#include <linux/param.h>
120+#include <linux/types.h>
121+
122+#define MAXFUZZYRATE 10000000
123+#define MINFUZZYRATE 3
124+
125+struct ipt_fuzzy_info {
126+ u_int32_t minimum_rate;
127+ u_int32_t maximum_rate;
128+ u_int32_t packets_total;
129+ u_int32_t bytes_total;
130+ u_int32_t previous_time;
131+ u_int32_t present_time;
132+ u_int32_t mean_rate;
133+ u_int8_t acceptance_rate;
134+};
135+
136+#endif /*_IPT_FUZZY_H*/
137diff -Nur linux-2.6.3.org/include/linux/netfilter_ipv4/ipt_ipv4options.h linux-2.6.3/include/linux/netfilter_ipv4/ipt_ipv4options.h
138--- linux-2.6.3.org/include/linux/netfilter_ipv4/ipt_ipv4options.h 1970-01-01 01:00:00.000000000 +0100
139+++ linux-2.6.3/include/linux/netfilter_ipv4/ipt_ipv4options.h 2004-02-27 00:03:10.065697960 +0100
140@@ -0,0 +1,21 @@
141+#ifndef __ipt_ipv4options_h_included__
142+#define __ipt_ipv4options_h_included__
143+
144+#define IPT_IPV4OPTION_MATCH_SSRR 0x01 /* For strict source routing */
145+#define IPT_IPV4OPTION_MATCH_LSRR 0x02 /* For loose source routing */
146+#define IPT_IPV4OPTION_DONT_MATCH_SRR 0x04 /* any source routing */
147+#define IPT_IPV4OPTION_MATCH_RR 0x08 /* For Record route */
148+#define IPT_IPV4OPTION_DONT_MATCH_RR 0x10
149+#define IPT_IPV4OPTION_MATCH_TIMESTAMP 0x20 /* For timestamp request */
150+#define IPT_IPV4OPTION_DONT_MATCH_TIMESTAMP 0x40
151+#define IPT_IPV4OPTION_MATCH_ROUTER_ALERT 0x80 /* For router-alert */
152+#define IPT_IPV4OPTION_DONT_MATCH_ROUTER_ALERT 0x100
153+#define IPT_IPV4OPTION_MATCH_ANY_OPT 0x200 /* match packet with any option */
154+#define IPT_IPV4OPTION_DONT_MATCH_ANY_OPT 0x400 /* match packet with no option */
155+
156+struct ipt_ipv4options_info {
157+ u_int16_t options;
158+};
159+
160+
161+#endif /* __ipt_ipv4options_h_included__ */
162diff -Nur linux-2.6.3.org/include/linux/netfilter_ipv4/ipt_mport.h linux-2.6.3/include/linux/netfilter_ipv4/ipt_mport.h
163--- linux-2.6.3.org/include/linux/netfilter_ipv4/ipt_mport.h 1970-01-01 01:00:00.000000000 +0100
164+++ linux-2.6.3/include/linux/netfilter_ipv4/ipt_mport.h 2004-02-27 00:03:10.772590496 +0100
165@@ -0,0 +1,24 @@
166+#ifndef _IPT_MPORT_H
167+#define _IPT_MPORT_H
168+#include <linux/netfilter_ipv4/ip_tables.h>
169+
170+#define IPT_MPORT_SOURCE (1<<0)
171+#define IPT_MPORT_DESTINATION (1<<1)
172+#define IPT_MPORT_EITHER (IPT_MPORT_SOURCE|IPT_MPORT_DESTINATION)
173+
174+#define IPT_MULTI_PORTS 15
175+
176+/* Must fit inside union ipt_matchinfo: 32 bytes */
177+/* every entry in ports[] except for the last one has one bit in pflags
178+ * associated with it. If this bit is set, the port is the first port of
179+ * a portrange, with the next entry being the last.
180+ * End of list is marked with pflags bit set and port=65535.
181+ * If 14 ports are used (last one does not have a pflag), the last port
182+ * is repeated to fill the last entry in ports[] */
183+struct ipt_mport
184+{
185+ u_int8_t flags:2; /* Type of comparison */
186+ u_int16_t pflags:14; /* Port flags */
187+ u_int16_t ports[IPT_MULTI_PORTS]; /* Ports */
188+};
189+#endif /*_IPT_MPORT_H*/
190diff -Nur linux-2.6.3.org/include/linux/netfilter_ipv4/ipt_nth.h linux-2.6.3/include/linux/netfilter_ipv4/ipt_nth.h
191--- linux-2.6.3.org/include/linux/netfilter_ipv4/ipt_nth.h 1970-01-01 01:00:00.000000000 +0100
192+++ linux-2.6.3/include/linux/netfilter_ipv4/ipt_nth.h 2004-02-27 00:03:12.719294552 +0100
193@@ -0,0 +1,19 @@
194+#ifndef _IPT_NTH_H
195+#define _IPT_NTH_H
196+
197+#include <linux/param.h>
198+#include <linux/types.h>
199+
200+#ifndef IPT_NTH_NUM_COUNTERS
201+#define IPT_NTH_NUM_COUNTERS 16
202+#endif
203+
204+struct ipt_nth_info {
205+ u_int8_t every;
206+ u_int8_t not;
207+ u_int8_t startat;
208+ u_int8_t counter;
209+ u_int8_t packet;
210+};
211+
212+#endif /*_IPT_NTH_H*/
213diff -Nur linux-2.6.3.org/include/linux/netfilter_ipv4/ipt_quota.h linux-2.6.3/include/linux/netfilter_ipv4/ipt_quota.h
214--- linux-2.6.3.org/include/linux/netfilter_ipv4/ipt_quota.h 1970-01-01 01:00:00.000000000 +0100
215+++ linux-2.6.3/include/linux/netfilter_ipv4/ipt_quota.h 2004-02-27 00:03:13.672149696 +0100
216@@ -0,0 +1,11 @@
217+#ifndef _IPT_QUOTA_H
218+#define _IPT_QUOTA_H
219+
220+/* print debug info in both kernel/netfilter module & iptable library */
221+//#define DEBUG_IPT_QUOTA
222+
223+struct ipt_quota_info {
224+ u_int64_t quota;
225+};
226+
227+#endif /*_IPT_QUOTA_H*/
228diff -Nur linux-2.6.3.org/include/linux/netfilter_ipv4/ipt_realm.h linux-2.6.3/include/linux/netfilter_ipv4/ipt_realm.h
229--- linux-2.6.3.org/include/linux/netfilter_ipv4/ipt_realm.h 1970-01-01 01:00:00.000000000 +0100
230+++ linux-2.6.3/include/linux/netfilter_ipv4/ipt_realm.h 2004-02-27 00:03:15.261908016 +0100
231@@ -0,0 +1,9 @@
232+#ifndef _IPT_REALM_H
233+#define _IPT_REALM_H
234+
235+struct ipt_realm_info {
236+ u_int32_t id;
237+ u_int32_t mask;
238+ u_int8_t invert;
239+};
240+#endif /*_IPT_REALM_H*/
241diff -Nur linux-2.6.3.org/include/linux/netfilter_ipv4/ipt_sctp.h linux-2.6.3/include/linux/netfilter_ipv4/ipt_sctp.h
242--- linux-2.6.3.org/include/linux/netfilter_ipv4/ipt_sctp.h 1970-01-01 01:00:00.000000000 +0100
243+++ linux-2.6.3/include/linux/netfilter_ipv4/ipt_sctp.h 2004-02-27 00:03:16.145773648 +0100
244@@ -0,0 +1,96 @@
245+#ifndef _IPT_SCTP_H_
246+#define _IPT_SCTP_H_
247+
248+#define IPT_SCTP_SRC_PORTS 0x01
249+#define IPT_SCTP_DEST_PORTS 0x02
250+#define IPT_SCTP_CHUNK_TYPES 0x04
251+
252+#define IPT_SCTP_VALID_FLAGS 0x07
253+
254+#define ELEMCOUNT(x) (sizeof(x)/sizeof(x[0]))
255+
256+struct ipt_sctp_info {
257+ u_int16_t dpts[2]; /* Min, Max */
258+ u_int16_t spts[2]; /* Min, Max */
259+
260+ u_int32_t chunkmap[256 / sizeof (u_int32_t)]; /* Bit mask of chunks to be matched according to RFC 2960 */
261+
262+#define SCTP_CHUNK_MATCH_ANY 0x01 /* Match if any of the chunk types are present */
263+#define SCTP_CHUNK_MATCH_ALL 0x02 /* Match if all of the chunk types are present */
264+#define SCTP_CHUNK_MATCH_ONLY 0x04 /* Match if these are the only chunk types present */
265+
266+ u_int32_t chunk_match_type;
267+
268+ u_int32_t flags;
269+ u_int32_t invflags;
270+};
271+
272+#define bytes(type) (sizeof(type) * 8)
273+
274+#define SCTP_CHUNKMAP_SET(chunkmap, type) \
275+ do { \
276+ chunkmap[type / bytes(u_int32_t)] |= \
277+ 1 << (type % bytes(u_int32_t)); \
278+ } while (0)
279+
280+#define SCTP_CHUNKMAP_CLEAR(chunkmap, type) \
281+ do { \
282+ chunkmap[type / bytes(u_int32_t)] &= \
283+ ~(1 << (type % bytes(u_int32_t))); \
284+ } while (0)
285+
286+#define SCTP_CHUNKMAP_IS_SET(chunkmap, type) \
287+({ \
288+ (chunkmap[type / bytes (u_int32_t)] & \
289+ (1 << (type % bytes (u_int32_t)))) ? 1: 0; \
290+})
291+
292+#define SCTP_CHUNKMAP_RESET(chunkmap) \
293+ do { \
294+ int i; \
295+ for (i = 0; i < ELEMCOUNT(chunkmap); i++) \
296+ chunkmap[i] = 0; \
297+ } while (0)
298+
299+#define SCTP_CHUNKMAP_SET_ALL(chunkmap) \
300+ do { \
301+ int i; \
302+ for (i = 0; i < ELEMCOUNT(chunkmap); i++) \
303+ chunkmap[i] = ~0; \
304+ } while (0)
305+
306+#define SCTP_CHUNKMAP_COPY(destmap, srcmap) \
307+ do { \
308+ int i; \
309+ for (i = 0; i < ELEMCOUNT(chunkmap); i++) \
310+ destmap[i] = srcmap[i]; \
311+ } while (0)
312+
313+#define SCTP_CHUNKMAP_IS_CLEAR(chunkmap) \
314+({ \
315+ int i; \
316+ int flag = 1; \
317+ for (i = 0; i < ELEMCOUNT(chunkmap); i++) { \
318+ if (chunkmap[i]) { \
319+ flag = 0; \
320+ break; \
321+ } \
322+ } \
323+ flag; \
324+})
325+
326+#define SCTP_CHUNKMAP_IS_ALL_SET(chunkmap) \
327+({ \
328+ int i; \
329+ int flag = 1; \
330+ for (i = 0; i < ELEMCOUNT(chunkmap); i++) { \
331+ if (chunkmap[i] != ~0) { \
332+ flag = 0; \
333+ break; \
334+ } \
335+ } \
336+ flag; \
337+})
338+
339+#endif /* _IPT_SCTP_H_ */
340+
341diff -Nur linux-2.6.3.org/include/linux/netfilter_ipv4/ipt_state.h linux-2.6.3/include/linux/netfilter_ipv4/ipt_state.h
342--- linux-2.6.3.org/include/linux/netfilter_ipv4/ipt_state.h 2004-02-18 04:59:18.000000000 +0100
343+++ linux-2.6.3/include/linux/netfilter_ipv4/ipt_state.h 2004-02-27 00:03:14.480026880 +0100
344@@ -4,6 +4,8 @@
345 #define IPT_STATE_BIT(ctinfo) (1 << ((ctinfo)%IP_CT_IS_REPLY+1))
346 #define IPT_STATE_INVALID (1 << 0)
347
348+#define IPT_STATE_UNTRACKED (1 << (IP_CT_NUMBER + 1))
349+
350 struct ipt_state_info
351 {
352 unsigned int statemask;
353diff -Nur linux-2.6.3.org/include/linux/netfilter_ipv4/ipt_TTL.h linux-2.6.3/include/linux/netfilter_ipv4/ipt_TTL.h
354--- linux-2.6.3.org/include/linux/netfilter_ipv4/ipt_TTL.h 1970-01-01 01:00:00.000000000 +0100
355+++ linux-2.6.3/include/linux/netfilter_ipv4/ipt_TTL.h 2004-02-27 00:03:07.345111552 +0100
356@@ -0,0 +1,21 @@
357+/* TTL modification module for IP tables
358+ * (C) 2000 by Harald Welte <laforge@gnumonks.org> */
359+
360+#ifndef _IPT_TTL_H
361+#define _IPT_TTL_H
362+
363+enum {
364+ IPT_TTL_SET = 0,
365+ IPT_TTL_INC,
366+ IPT_TTL_DEC
367+};
368+
369+#define IPT_TTL_MAXMODE IPT_TTL_DEC
370+
371+struct ipt_TTL_info {
372+ u_int8_t mode;
373+ u_int8_t ttl;
374+};
375+
376+
377+#endif
378diff -Nur linux-2.6.3.org/include/linux/netfilter_ipv4/ipt_ULOG.h linux-2.6.3/include/linux/netfilter_ipv4/ipt_ULOG.h
379--- linux-2.6.3.org/include/linux/netfilter_ipv4/ipt_ULOG.h 2004-02-18 04:57:31.000000000 +0100
380+++ linux-2.6.3/include/linux/netfilter_ipv4/ipt_ULOG.h 2004-02-27 00:03:00.000228144 +0100
381@@ -11,6 +11,9 @@
382 #define NETLINK_NFLOG 5
383 #endif
384
385+#define ULOG_DEFAULT_NLGROUP 1
386+#define ULOG_DEFAULT_QTHRESHOLD 1
387+
388 #define ULOG_MAC_LEN 80
389 #define ULOG_PREFIX_LEN 32
390
391diff -Nur linux-2.6.3.org/include/linux/netfilter_ipv4.h linux-2.6.3/include/linux/netfilter_ipv4.h
392--- linux-2.6.3.org/include/linux/netfilter_ipv4.h 2004-02-18 04:59:16.000000000 +0100
393+++ linux-2.6.3/include/linux/netfilter_ipv4.h 2004-02-27 00:03:14.480026880 +0100
394@@ -51,6 +51,8 @@
395
396 enum nf_ip_hook_priorities {
397 NF_IP_PRI_FIRST = INT_MIN,
398+ NF_IP_PRI_CONNTRACK_DEFRAG = -400,
399+ NF_IP_PRI_RAW = -300,
400 NF_IP_PRI_SELINUX_FIRST = -225,
401 NF_IP_PRI_CONNTRACK = -200,
402 NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD = -175,
403diff -Nur linux-2.6.3.org/include/linux/netfilter_ipv6/ip6t_fuzzy.h linux-2.6.3/include/linux/netfilter_ipv6/ip6t_fuzzy.h
404--- linux-2.6.3.org/include/linux/netfilter_ipv6/ip6t_fuzzy.h 1970-01-01 01:00:00.000000000 +0100
405+++ linux-2.6.3/include/linux/netfilter_ipv6/ip6t_fuzzy.h 2004-02-27 00:03:09.360805120 +0100
406@@ -0,0 +1,21 @@
407+#ifndef _IP6T_FUZZY_H
408+#define _IP6T_FUZZY_H
409+
410+#include <linux/param.h>
411+#include <linux/types.h>
412+
413+#define MAXFUZZYRATE 10000000
414+#define MINFUZZYRATE 3
415+
416+struct ip6t_fuzzy_info {
417+ u_int32_t minimum_rate;
418+ u_int32_t maximum_rate;
419+ u_int32_t packets_total;
420+ u_int32_t bytes_total;
421+ u_int32_t previous_time;
422+ u_int32_t present_time;
423+ u_int32_t mean_rate;
424+ u_int8_t acceptance_rate;
425+};
426+
427+#endif /*_IP6T_FUZZY_H*/
428diff -Nur linux-2.6.3.org/include/linux/netfilter_ipv6/ip6t_HL.h linux-2.6.3/include/linux/netfilter_ipv6/ip6t_HL.h
429--- linux-2.6.3.org/include/linux/netfilter_ipv6/ip6t_HL.h 1970-01-01 01:00:00.000000000 +0100
430+++ linux-2.6.3/include/linux/netfilter_ipv6/ip6t_HL.h 2004-02-27 00:03:05.118450056 +0100
431@@ -0,0 +1,22 @@
432+/* Hop Limit modification module for ip6tables
433+ * Maciej Soltysiak <solt@dns.toxicfilms.tv>
434+ * Based on HW's TTL module */
435+
436+#ifndef _IP6T_HL_H
437+#define _IP6T_HL_H
438+
439+enum {
440+ IP6T_HL_SET = 0,
441+ IP6T_HL_INC,
442+ IP6T_HL_DEC
443+};
444+
445+#define IP6T_HL_MAXMODE IP6T_HL_DEC
446+
447+struct ip6t_HL_info {
448+ u_int8_t mode;
449+ u_int8_t hop_limit;
450+};
451+
452+
453+#endif
454diff -Nur linux-2.6.3.org/include/linux/netfilter_ipv6/ip6t_nth.h linux-2.6.3/include/linux/netfilter_ipv6/ip6t_nth.h
455--- linux-2.6.3.org/include/linux/netfilter_ipv6/ip6t_nth.h 1970-01-01 01:00:00.000000000 +0100
456+++ linux-2.6.3/include/linux/netfilter_ipv6/ip6t_nth.h 2004-02-27 00:03:12.719294552 +0100
457@@ -0,0 +1,19 @@
458+#ifndef _IP6T_NTH_H
459+#define _IP6T_NTH_H
460+
461+#include <linux/param.h>
462+#include <linux/types.h>
463+
464+#ifndef IP6T_NTH_NUM_COUNTERS
465+#define IP6T_NTH_NUM_COUNTERS 16
466+#endif
467+
468+struct ip6t_nth_info {
469+ u_int8_t every;
470+ u_int8_t not;
471+ u_int8_t startat;
472+ u_int8_t counter;
473+ u_int8_t packet;
474+};
475+
476+#endif /*_IP6T_NTH_H*/
477diff -Nur linux-2.6.3.org/include/linux/netfilter_ipv6/ip6t_REJECT.h linux-2.6.3/include/linux/netfilter_ipv6/ip6t_REJECT.h
478--- linux-2.6.3.org/include/linux/netfilter_ipv6/ip6t_REJECT.h 2004-02-18 04:57:12.000000000 +0100
479+++ linux-2.6.3/include/linux/netfilter_ipv6/ip6t_REJECT.h 2004-02-27 00:03:06.649217344 +0100
480@@ -2,15 +2,17 @@
481 #define _IP6T_REJECT_H
482
483 enum ip6t_reject_with {
484- IP6T_ICMP_NET_UNREACHABLE,
485- IP6T_ICMP_HOST_UNREACHABLE,
486- IP6T_ICMP_PROT_UNREACHABLE,
487- IP6T_ICMP_PORT_UNREACHABLE,
488- IP6T_ICMP_ECHOREPLY
489+ IP6T_ICMP6_NO_ROUTE,
490+ IP6T_ICMP6_ADM_PROHIBITED,
491+ IP6T_ICMP6_NOT_NEIGHBOUR,
492+ IP6T_ICMP6_ADDR_UNREACH,
493+ IP6T_ICMP6_PORT_UNREACH,
494+ IP6T_ICMP6_ECHOREPLY,
495+ IP6T_TCP_RESET
496 };
497
498 struct ip6t_reject_info {
499 enum ip6t_reject_with with; /* reject type */
500 };
501
502-#endif /*_IPT_REJECT_H*/
503+#endif /*_IP6T_REJECT_H*/
504diff -Nur linux-2.6.3.org/net/core/netfilter.c linux-2.6.3/net/core/netfilter.c
505--- linux-2.6.3.org/net/core/netfilter.c 2004-02-26 23:36:59.000000000 +0100
506+++ linux-2.6.3/net/core/netfilter.c 2004-02-27 00:03:00.001227992 +0100
507@@ -8,8 +8,10 @@
508 *
509 * February 2000: Modified by James Morris to have 1 queue per protocol.
510 * 15-Mar-2000: Added NF_REPEAT --RR.
511+ * 08-May-2003: Internal logging interface added by Jozsef Kadlecsik.
512 */
513 #include <linux/config.h>
514+#include <linux/kernel.h>
515 #include <linux/netfilter.h>
516 #include <net/protocol.h>
517 #include <linux/init.h>
518@@ -740,6 +742,72 @@
519 EXPORT_SYMBOL(skb_ip_make_writable);
520 #endif /*CONFIG_INET*/
521
522+/* Internal logging interface, which relies on the real
523+ LOG target modules */
524+
525+#define NF_LOG_PREFIXLEN 128
526+
527+static nf_logfn *nf_logging[NPROTO]; /* = NULL */
528+static int reported = 0;
529+static spinlock_t nf_log_lock = SPIN_LOCK_UNLOCKED;
530+
531+int nf_log_register(int pf, nf_logfn *logfn)
532+{
533+ int ret = -EBUSY;
534+
535+ /* Any setup of logging members must be done before
536+ * substituting pointer. */
537+ smp_wmb();
538+ spin_lock(&nf_log_lock);
539+ if (!nf_logging[pf]) {
540+ nf_logging[pf] = logfn;
541+ ret = 0;
542+ }
543+ spin_unlock(&nf_log_lock);
544+ return ret;
545+}
546+
547+void nf_log_unregister(int pf, nf_logfn *logfn)
548+{
549+ spin_lock(&nf_log_lock);
550+ if (nf_logging[pf] == logfn)
551+ nf_logging[pf] = NULL;
552+ spin_unlock(&nf_log_lock);
553+
554+ /* Give time to concurrent readers. */
555+ synchronize_net();
556+}
557+
558+void nf_log_packet(int pf,
559+ unsigned int hooknum,
560+ const struct sk_buff *skb,
561+ const struct net_device *in,
562+ const struct net_device *out,
563+ const char *fmt, ...)
564+{
565+ va_list args;
566+ char prefix[NF_LOG_PREFIXLEN];
567+ nf_logfn *logfn;
568+
569+ rcu_read_lock();
570+ logfn = nf_logging[pf];
571+ if (logfn) {
572+ va_start(args, fmt);
573+ vsnprintf(prefix, sizeof(prefix), fmt, args);
574+ va_end(args);
575+ /* We must read logging before nf_logfn[pf] */
576+ smp_read_barrier_depends();
577+ logfn(hooknum, skb, in, out, prefix);
578+ } else if (!reported) {
579+ printk(KERN_WARNING "nf_log_packet: can\'t log yet, "
580+ "no backend logging module loaded in!\n");
581+ reported++;
582+ }
583+ rcu_read_unlock();
584+}
585+EXPORT_SYMBOL(nf_log_register);
586+EXPORT_SYMBOL(nf_log_unregister);
587+EXPORT_SYMBOL(nf_log_packet);
588
589 /* This does not belong here, but ipt_REJECT needs it if connection
590 tracking in use: without this, connection may not be in hash table,
591diff -Nur linux-2.6.3.org/net/core/netfilter.c.orig linux-2.6.3/net/core/netfilter.c.orig
592--- linux-2.6.3.org/net/core/netfilter.c.orig 1970-01-01 01:00:00.000000000 +0100
593+++ linux-2.6.3/net/core/netfilter.c.orig 2004-02-27 00:02:49.299854848 +0100
594@@ -0,0 +1,772 @@
595+/* netfilter.c: look after the filters for various protocols.
596+ * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
597+ *
598+ * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
599+ * way.
600+ *
601+ * Rusty Russell (C)2000 -- This code is GPL.
602+ *
603+ * February 2000: Modified by James Morris to have 1 queue per protocol.
604+ * 15-Mar-2000: Added NF_REPEAT --RR.
605+ */
606+#include <linux/config.h>
607+#include <linux/netfilter.h>
608+#include <net/protocol.h>
609+#include <linux/init.h>
610+#include <linux/skbuff.h>
611+#include <linux/wait.h>
612+#include <linux/module.h>
613+#include <linux/interrupt.h>
614+#include <linux/if.h>
615+#include <linux/netdevice.h>
616+#include <linux/inetdevice.h>
617+#include <linux/tcp.h>
618+#include <linux/udp.h>
619+#include <linux/icmp.h>
620+#include <net/sock.h>
621+#include <net/route.h>
622+#include <linux/ip.h>
623+
624+/* In this code, we can be waiting indefinitely for userspace to
625+ * service a packet if a hook returns NF_QUEUE. We could keep a count
626+ * of skbuffs queued for userspace, and not deregister a hook unless
627+ * this is zero, but that sucks. Now, we simply check when the
628+ * packets come back: if the hook is gone, the packet is discarded. */
629+#ifdef CONFIG_NETFILTER_DEBUG
630+#define NFDEBUG(format, args...) printk(format , ## args)
631+#else
632+#define NFDEBUG(format, args...)
633+#endif
634+
635+/* Sockopts only registered and called from user context, so
636+ net locking would be overkill. Also, [gs]etsockopt calls may
637+ sleep. */
638+static DECLARE_MUTEX(nf_sockopt_mutex);
639+
640+struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
641+static LIST_HEAD(nf_sockopts);
642+static spinlock_t nf_hook_lock = SPIN_LOCK_UNLOCKED;
643+
644+/*
645+ * A queue handler may be registered for each protocol. Each is protected by
646+ * long term mutex. The handler must provide an an outfn() to accept packets
647+ * for queueing and must reinject all packets it receives, no matter what.
648+ */
649+static struct nf_queue_handler_t {
650+ nf_queue_outfn_t outfn;
651+ void *data;
652+} queue_handler[NPROTO];
653+static rwlock_t queue_handler_lock = RW_LOCK_UNLOCKED;
654+
655+int nf_register_hook(struct nf_hook_ops *reg)
656+{
657+ struct list_head *i;
658+
659+ spin_lock_bh(&nf_hook_lock);
660+ list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
661+ if (reg->priority < ((struct nf_hook_ops *)i)->priority)
662+ break;
663+ }
664+ list_add_rcu(&reg->list, i->prev);
665+ spin_unlock_bh(&nf_hook_lock);
666+
667+ synchronize_net();
668+ return 0;
669+}
670+
671+void nf_unregister_hook(struct nf_hook_ops *reg)
672+{
673+ spin_lock_bh(&nf_hook_lock);
674+ list_del_rcu(&reg->list);
675+ spin_unlock_bh(&nf_hook_lock);
676+
677+ synchronize_net();
678+}
679+
680+/* Do exclusive ranges overlap? */
681+static inline int overlap(int min1, int max1, int min2, int max2)
682+{
683+ return max1 > min2 && min1 < max2;
684+}
685+
686+/* Functions to register sockopt ranges (exclusive). */
687+int nf_register_sockopt(struct nf_sockopt_ops *reg)
688+{
689+ struct list_head *i;
690+ int ret = 0;
691+
692+ if (down_interruptible(&nf_sockopt_mutex) != 0)
693+ return -EINTR;
694+
695+ list_for_each(i, &nf_sockopts) {
696+ struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
697+ if (ops->pf == reg->pf
698+ && (overlap(ops->set_optmin, ops->set_optmax,
699+ reg->set_optmin, reg->set_optmax)
700+ || overlap(ops->get_optmin, ops->get_optmax,
701+ reg->get_optmin, reg->get_optmax))) {
702+ NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
703+ ops->set_optmin, ops->set_optmax,
704+ ops->get_optmin, ops->get_optmax,
705+ reg->set_optmin, reg->set_optmax,
706+ reg->get_optmin, reg->get_optmax);
707+ ret = -EBUSY;
708+ goto out;
709+ }
710+ }
711+
712+ list_add(&reg->list, &nf_sockopts);
713+out:
714+ up(&nf_sockopt_mutex);
715+ return ret;
716+}
717+
718+void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
719+{
720+ /* No point being interruptible: we're probably in cleanup_module() */
721+ restart:
722+ down(&nf_sockopt_mutex);
723+ if (reg->use != 0) {
724+ /* To be woken by nf_sockopt call... */
725+ /* FIXME: Stuart Young's name appears gratuitously. */
726+ set_current_state(TASK_UNINTERRUPTIBLE);
727+ reg->cleanup_task = current;
728+ up(&nf_sockopt_mutex);
729+ schedule();
730+ goto restart;
731+ }
732+ list_del(&reg->list);
733+ up(&nf_sockopt_mutex);
734+}
735+
736+#ifdef CONFIG_NETFILTER_DEBUG
737+#include <net/ip.h>
738+#include <net/tcp.h>
739+#include <linux/netfilter_ipv4.h>
740+
741+static void debug_print_hooks_ip(unsigned int nf_debug)
742+{
743+ if (nf_debug & (1 << NF_IP_PRE_ROUTING)) {
744+ printk("PRE_ROUTING ");
745+ nf_debug ^= (1 << NF_IP_PRE_ROUTING);
746+ }
747+ if (nf_debug & (1 << NF_IP_LOCAL_IN)) {
748+ printk("LOCAL_IN ");
749+ nf_debug ^= (1 << NF_IP_LOCAL_IN);
750+ }
751+ if (nf_debug & (1 << NF_IP_FORWARD)) {
752+ printk("FORWARD ");
753+ nf_debug ^= (1 << NF_IP_FORWARD);
754+ }
755+ if (nf_debug & (1 << NF_IP_LOCAL_OUT)) {
756+ printk("LOCAL_OUT ");
757+ nf_debug ^= (1 << NF_IP_LOCAL_OUT);
758+ }
759+ if (nf_debug & (1 << NF_IP_POST_ROUTING)) {
760+ printk("POST_ROUTING ");
761+ nf_debug ^= (1 << NF_IP_POST_ROUTING);
762+ }
763+ if (nf_debug)
764+ printk("Crap bits: 0x%04X", nf_debug);
765+ printk("\n");
766+}
767+
768+void nf_dump_skb(int pf, struct sk_buff *skb)
769+{
770+ printk("skb: pf=%i %s dev=%s len=%u\n",
771+ pf,
772+ skb->sk ? "(owned)" : "(unowned)",
773+ skb->dev ? skb->dev->name : "(no dev)",
774+ skb->len);
775+ switch (pf) {
776+ case PF_INET: {
777+ const struct iphdr *ip = skb->nh.iph;
778+ __u32 *opt = (__u32 *) (ip + 1);
779+ int opti;
780+ __u16 src_port = 0, dst_port = 0;
781+
782+ if (ip->protocol == IPPROTO_TCP
783+ || ip->protocol == IPPROTO_UDP) {
784+ struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
785+ src_port = ntohs(tcp->source);
786+ dst_port = ntohs(tcp->dest);
787+ }
788+
789+ printk("PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu"
790+ " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
791+ ip->protocol, NIPQUAD(ip->saddr),
792+ src_port, NIPQUAD(ip->daddr),
793+ dst_port,
794+ ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
795+ ntohs(ip->frag_off), ip->ttl);
796+
797+ for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
798+ printk(" O=0x%8.8X", *opt++);
799+ printk("\n");
800+ }
801+ }
802+}
803+
804+void nf_debug_ip_local_deliver(struct sk_buff *skb)
805+{
806+ /* If it's a loopback packet, it must have come through
807+ * NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and
808+ * NF_IP_LOCAL_IN. Otherwise, must have gone through
809+ * NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING. */
810+ if (!skb->dev) {
811+ printk("ip_local_deliver: skb->dev is NULL.\n");
812+ }
813+ else if (strcmp(skb->dev->name, "lo") == 0) {
814+ if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
815+ | (1 << NF_IP_POST_ROUTING)
816+ | (1 << NF_IP_PRE_ROUTING)
817+ | (1 << NF_IP_LOCAL_IN))) {
818+ printk("ip_local_deliver: bad loopback skb: ");
819+ debug_print_hooks_ip(skb->nf_debug);
820+ nf_dump_skb(PF_INET, skb);
821+ }
822+ }
823+ else {
824+ if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING)
825+ | (1<<NF_IP_LOCAL_IN))) {
826+ printk("ip_local_deliver: bad non-lo skb: ");
827+ debug_print_hooks_ip(skb->nf_debug);
828+ nf_dump_skb(PF_INET, skb);
829+ }
830+ }
831+}
832+
833+void nf_debug_ip_loopback_xmit(struct sk_buff *newskb)
834+{
835+ if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
836+ | (1 << NF_IP_POST_ROUTING))) {
837+ printk("ip_dev_loopback_xmit: bad owned skb = %p: ",
838+ newskb);
839+ debug_print_hooks_ip(newskb->nf_debug);
840+ nf_dump_skb(PF_INET, newskb);
841+ }
842+ /* Clear to avoid confusing input check */
843+ newskb->nf_debug = 0;
844+}
845+
846+void nf_debug_ip_finish_output2(struct sk_buff *skb)
847+{
848+ /* If it's owned, it must have gone through the
849+ * NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING.
850+ * Otherwise, must have gone through
851+ * NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING.
852+ */
853+ if (skb->sk) {
854+ if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
855+ | (1 << NF_IP_POST_ROUTING))) {
856+ printk("ip_finish_output: bad owned skb = %p: ", skb);
857+ debug_print_hooks_ip(skb->nf_debug);
858+ nf_dump_skb(PF_INET, skb);
859+ }
860+ } else {
861+ if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING)
862+ | (1 << NF_IP_FORWARD)
863+ | (1 << NF_IP_POST_ROUTING))) {
864+ /* Fragments, entunnelled packets, TCP RSTs
865+ generated by ipt_REJECT will have no
866+ owners, but still may be local */
867+ if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
868+ | (1 << NF_IP_POST_ROUTING))){
869+ printk("ip_finish_output:"
870+ " bad unowned skb = %p: ",skb);
871+ debug_print_hooks_ip(skb->nf_debug);
872+ nf_dump_skb(PF_INET, skb);
873+ }
874+ }
875+ }
876+}
877+#endif /*CONFIG_NETFILTER_DEBUG*/
878+
879+/* Call get/setsockopt() */
880+static int nf_sockopt(struct sock *sk, int pf, int val,
881+ char *opt, int *len, int get)
882+{
883+ struct list_head *i;
884+ struct nf_sockopt_ops *ops;
885+ int ret;
886+
887+ if (down_interruptible(&nf_sockopt_mutex) != 0)
888+ return -EINTR;
889+
890+ list_for_each(i, &nf_sockopts) {
891+ ops = (struct nf_sockopt_ops *)i;
892+ if (ops->pf == pf) {
893+ if (get) {
894+ if (val >= ops->get_optmin
895+ && val < ops->get_optmax) {
896+ ops->use++;
897+ up(&nf_sockopt_mutex);
898+ ret = ops->get(sk, val, opt, len);
899+ goto out;
900+ }
901+ } else {
902+ if (val >= ops->set_optmin
903+ && val < ops->set_optmax) {
904+ ops->use++;
905+ up(&nf_sockopt_mutex);
906+ ret = ops->set(sk, val, opt, *len);
907+ goto out;
908+ }
909+ }
910+ }
911+ }
912+ up(&nf_sockopt_mutex);
913+ return -ENOPROTOOPT;
914+
915+ out:
916+ down(&nf_sockopt_mutex);
917+ ops->use--;
918+ if (ops->cleanup_task)
919+ wake_up_process(ops->cleanup_task);
920+ up(&nf_sockopt_mutex);
921+ return ret;
922+}
923+
924+int nf_setsockopt(struct sock *sk, int pf, int val, char *opt,
925+ int len)
926+{
927+ return nf_sockopt(sk, pf, val, opt, &len, 0);
928+}
929+
930+int nf_getsockopt(struct sock *sk, int pf, int val, char *opt, int *len)
931+{
932+ return nf_sockopt(sk, pf, val, opt, len, 1);
933+}
934+
935+static unsigned int nf_iterate(struct list_head *head,
936+ struct sk_buff **skb,
937+ int hook,
938+ const struct net_device *indev,
939+ const struct net_device *outdev,
940+ struct list_head **i,
941+ int (*okfn)(struct sk_buff *),
942+ int hook_thresh)
943+{
944+ /*
945+ * The caller must not block between calls to this
946+ * function because of risk of continuing from deleted element.
947+ */
948+ list_for_each_continue_rcu(*i, head) {
949+ struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
950+
951+ if (hook_thresh > elem->priority)
952+ continue;
953+
954+ /* Optimization: we don't need to hold module
955+ reference here, since function can't sleep. --RR */
956+ switch (elem->hook(hook, skb, indev, outdev, okfn)) {
957+ case NF_QUEUE:
958+ return NF_QUEUE;
959+
960+ case NF_STOLEN:
961+ return NF_STOLEN;
962+
963+ case NF_DROP:
964+ return NF_DROP;
965+
966+ case NF_REPEAT:
967+ *i = (*i)->prev;
968+ break;
969+
970+#ifdef CONFIG_NETFILTER_DEBUG
971+ case NF_ACCEPT:
972+ break;
973+
974+ default:
975+ NFDEBUG("Evil return from %p(%u).\n",
976+ elem->hook, hook);
977+#endif
978+ }
979+ }
980+ return NF_ACCEPT;
981+}
982+
983+int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
984+{
985+ int ret;
986+
987+ write_lock_bh(&queue_handler_lock);
988+ if (queue_handler[pf].outfn)
989+ ret = -EBUSY;
990+ else {
991+ queue_handler[pf].outfn = outfn;
992+ queue_handler[pf].data = data;
993+ ret = 0;
994+ }
995+ write_unlock_bh(&queue_handler_lock);
996+
997+ return ret;
998+}
999+
1000+/* The caller must flush their queue before this */
1001+int nf_unregister_queue_handler(int pf)
1002+{
1003+ write_lock_bh(&queue_handler_lock);
1004+ queue_handler[pf].outfn = NULL;
1005+ queue_handler[pf].data = NULL;
1006+ write_unlock_bh(&queue_handler_lock);
1007+
1008+ return 0;
1009+}
1010+
1011+/*
1012+ * Any packet that leaves via this function must come back
1013+ * through nf_reinject().
1014+ */
1015+static int nf_queue(struct sk_buff *skb,
1016+ struct list_head *elem,
1017+ int pf, unsigned int hook,
1018+ struct net_device *indev,
1019+ struct net_device *outdev,
1020+ int (*okfn)(struct sk_buff *))
1021+{
1022+ int status;
1023+ struct nf_info *info;
1024+#ifdef CONFIG_BRIDGE_NETFILTER
1025+ struct net_device *physindev = NULL;
1026+ struct net_device *physoutdev = NULL;
1027+#endif
1028+
1029+ /* QUEUE == DROP if noone is waiting, to be safe. */
1030+ read_lock(&queue_handler_lock);
1031+ if (!queue_handler[pf].outfn) {
1032+ read_unlock(&queue_handler_lock);
1033+ kfree_skb(skb);
1034+ return 1;
1035+ }
1036+
1037+ info = kmalloc(sizeof(*info), GFP_ATOMIC);
1038+ if (!info) {
1039+ if (net_ratelimit())
1040+ printk(KERN_ERR "OOM queueing packet %p\n",
1041+ skb);
1042+ read_unlock(&queue_handler_lock);
1043+ kfree_skb(skb);
1044+ return 1;
1045+ }
1046+
1047+ *info = (struct nf_info) {
1048+ (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
1049+
1050+ /* If it's going away, ignore hook. */
1051+ if (!try_module_get(info->elem->owner)) {
1052+ read_unlock(&queue_handler_lock);
1053+ kfree(info);
1054+ return 0;
1055+ }
1056+
1057+ /* Bump dev refs so they don't vanish while packet is out */
1058+ if (indev) dev_hold(indev);
1059+ if (outdev) dev_hold(outdev);
1060+
1061+#ifdef CONFIG_BRIDGE_NETFILTER
1062+ if (skb->nf_bridge) {
1063+ physindev = skb->nf_bridge->physindev;
1064+ if (physindev) dev_hold(physindev);
1065+ physoutdev = skb->nf_bridge->physoutdev;
1066+ if (physoutdev) dev_hold(physoutdev);
1067+ }
1068+#endif
1069+
1070+ status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
1071+ read_unlock(&queue_handler_lock);
1072+
1073+ if (status < 0) {
1074+ /* James M doesn't say fuck enough. */
1075+ if (indev) dev_put(indev);
1076+ if (outdev) dev_put(outdev);
1077+#ifdef CONFIG_BRIDGE_NETFILTER
1078+ if (physindev) dev_put(physindev);
1079+ if (physoutdev) dev_put(physoutdev);
1080+#endif
1081+ module_put(info->elem->owner);
1082+ kfree(info);
1083+ kfree_skb(skb);
1084+ return 1;
1085+ }
1086+ return 1;
1087+}
1088+
1089+int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
1090+ struct net_device *indev,
1091+ struct net_device *outdev,
1092+ int (*okfn)(struct sk_buff *),
1093+ int hook_thresh)
1094+{
1095+ struct list_head *elem;
1096+ unsigned int verdict;
1097+ int ret = 0;
1098+
1099+ if (skb->ip_summed == CHECKSUM_HW) {
1100+ if (outdev == NULL) {
1101+ skb->ip_summed = CHECKSUM_NONE;
1102+ } else {
1103+ skb_checksum_help(skb);
1104+ }
1105+ }
1106+
1107+ /* We may already have this, but read-locks nest anyway */
1108+ rcu_read_lock();
1109+
1110+#ifdef CONFIG_NETFILTER_DEBUG
1111+ if (skb->nf_debug & (1 << hook)) {
1112+ printk("nf_hook: hook %i already set.\n", hook);
1113+ nf_dump_skb(pf, skb);
1114+ }
1115+ skb->nf_debug |= (1 << hook);
1116+#endif
1117+
1118+ elem = &nf_hooks[pf][hook];
1119+ next_hook:
1120+ verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev,
1121+ outdev, &elem, okfn, hook_thresh);
1122+ if (verdict == NF_QUEUE) {
1123+ NFDEBUG("nf_hook: Verdict = QUEUE.\n");
1124+ if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn))
1125+ goto next_hook;
1126+ }
1127+
1128+ switch (verdict) {
1129+ case NF_ACCEPT:
1130+ ret = okfn(skb);
1131+ break;
1132+
1133+ case NF_DROP:
1134+ kfree_skb(skb);
1135+ ret = -EPERM;
1136+ break;
1137+ }
1138+
1139+ rcu_read_unlock();
1140+ return ret;
1141+}
1142+
1143+void nf_reinject(struct sk_buff *skb, struct nf_info *info,
1144+ unsigned int verdict)
1145+{
1146+ struct list_head *elem = &info->elem->list;
1147+ struct list_head *i;
1148+
1149+ rcu_read_lock();
1150+
1151+ /* Release those devices we held, or Alexey will kill me. */
1152+ if (info->indev) dev_put(info->indev);
1153+ if (info->outdev) dev_put(info->outdev);
1154+#ifdef CONFIG_BRIDGE_NETFILTER
1155+ if (skb->nf_bridge) {
1156+ if (skb->nf_bridge->physindev)
1157+ dev_put(skb->nf_bridge->physindev);
1158+ if (skb->nf_bridge->physoutdev)
1159+ dev_put(skb->nf_bridge->physoutdev);
1160+ }
1161+#endif
1162+
1163+ /* Drop reference to owner of hook which queued us. */
1164+ module_put(info->elem->owner);
1165+
1166+ list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
1167+ if (i == elem)
1168+ break;
1169+ }
1170+
1171+ if (elem == &nf_hooks[info->pf][info->hook]) {
1172+ /* The module which sent it to userspace is gone. */
1173+ NFDEBUG("%s: module disappeared, dropping packet.\n",
1174+ __FUNCTION__);
1175+ verdict = NF_DROP;
1176+ }
1177+
1178+ /* Continue traversal iff userspace said ok... */
1179+ if (verdict == NF_REPEAT) {
1180+ elem = elem->prev;
1181+ verdict = NF_ACCEPT;
1182+ }
1183+
1184+ if (verdict == NF_ACCEPT) {
1185+ next_hook:
1186+ verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
1187+ &skb, info->hook,
1188+ info->indev, info->outdev, &elem,
1189+ info->okfn, INT_MIN);
1190+ }
1191+
1192+ switch (verdict) {
1193+ case NF_ACCEPT:
1194+ info->okfn(skb);
1195+ break;
1196+
1197+ case NF_QUEUE:
1198+ if (!nf_queue(skb, elem, info->pf, info->hook,
1199+ info->indev, info->outdev, info->okfn))
1200+ goto next_hook;
1201+ break;
1202+ }
1203+ rcu_read_unlock();
1204+
1205+ if (verdict == NF_DROP)
1206+ kfree_skb(skb);
1207+
1208+ kfree(info);
1209+ return;
1210+}
1211+
1212+#ifdef CONFIG_INET
1213+/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
1214+int ip_route_me_harder(struct sk_buff **pskb)
1215+{
1216+ struct iphdr *iph = (*pskb)->nh.iph;
1217+ struct rtable *rt;
1218+ struct flowi fl = {};
1219+ struct dst_entry *odst;
1220+ unsigned int hh_len;
1221+
1222+ /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
1223+ * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook.
1224+ */
1225+ if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
1226+ fl.nl_u.ip4_u.daddr = iph->daddr;
1227+ fl.nl_u.ip4_u.saddr = iph->saddr;
1228+ fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
1229+ fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0;
1230+#ifdef CONFIG_IP_ROUTE_FWMARK
1231+ fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
1232+#endif
1233+ if (ip_route_output_key(&rt, &fl) != 0)
1234+ return -1;
1235+
1236+ /* Drop old route. */
1237+ dst_release((*pskb)->dst);
1238+ (*pskb)->dst = &rt->u.dst;
1239+ } else {
1240+ /* non-local src, find valid iif to satisfy
1241+ * rp-filter when calling ip_route_input. */
1242+ fl.nl_u.ip4_u.daddr = iph->saddr;
1243+ if (ip_route_output_key(&rt, &fl) != 0)
1244+ return -1;
1245+
1246+ odst = (*pskb)->dst;
1247+ if (ip_route_input(*pskb, iph->daddr, iph->saddr,
1248+ RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
1249+ dst_release(&rt->u.dst);
1250+ return -1;
1251+ }
1252+ dst_release(&rt->u.dst);
1253+ dst_release(odst);
1254+ }
1255+
1256+ if ((*pskb)->dst->error)
1257+ return -1;
1258+
1259+ /* Change in oif may mean change in hh_len. */
1260+ hh_len = (*pskb)->dst->dev->hard_header_len;
1261+ if (skb_headroom(*pskb) < hh_len) {
1262+ struct sk_buff *nskb;
1263+
1264+ nskb = skb_realloc_headroom(*pskb, hh_len);
1265+ if (!nskb)
1266+ return -1;
1267+ if ((*pskb)->sk)
1268+ skb_set_owner_w(nskb, (*pskb)->sk);
1269+ kfree_skb(*pskb);
1270+ *pskb = nskb;
1271+ }
1272+
1273+ return 0;
1274+}
1275+
1276+int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len)
1277+{
1278+ struct sk_buff *nskb;
1279+ unsigned int iplen;
1280+
1281+ if (writable_len > (*pskb)->len)
1282+ return 0;
1283+
1284+ /* Not exclusive use of packet? Must copy. */
1285+ if (skb_shared(*pskb) || skb_cloned(*pskb))
1286+ goto copy_skb;
1287+
1288+ /* Alexey says IP hdr is always modifiable and linear, so ok. */
1289+ if (writable_len <= (*pskb)->nh.iph->ihl*4)
1290+ return 1;
1291+
1292+ iplen = writable_len - (*pskb)->nh.iph->ihl*4;
1293+
1294+ /* DaveM says protocol headers are also modifiable. */
1295+ switch ((*pskb)->nh.iph->protocol) {
1296+ case IPPROTO_TCP: {
1297+ struct tcphdr hdr;
1298+ if (skb_copy_bits(*pskb, (*pskb)->nh.iph->ihl*4,
1299+ &hdr, sizeof(hdr)) != 0)
1300+ goto copy_skb;
1301+ if (writable_len <= (*pskb)->nh.iph->ihl*4 + hdr.doff*4)
1302+ goto pull_skb;
1303+ goto copy_skb;
1304+ }
1305+ case IPPROTO_UDP:
1306+ if (writable_len<=(*pskb)->nh.iph->ihl*4+sizeof(struct udphdr))
1307+ goto pull_skb;
1308+ goto copy_skb;
1309+ case IPPROTO_ICMP:
1310+ if (writable_len
1311+ <= (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr))
1312+ goto pull_skb;
1313+ goto copy_skb;
1314+ /* Insert other cases here as desired */
1315+ }
1316+
1317+copy_skb:
1318+ nskb = skb_copy(*pskb, GFP_ATOMIC);
1319+ if (!nskb)
1320+ return 0;
1321+ BUG_ON(skb_is_nonlinear(nskb));
1322+
1323+ /* Rest of kernel will get very unhappy if we pass it a
1324+ suddenly-orphaned skbuff */
1325+ if ((*pskb)->sk)
1326+ skb_set_owner_w(nskb, (*pskb)->sk);
1327+ kfree_skb(*pskb);
1328+ *pskb = nskb;
1329+ return 1;
1330+
1331+pull_skb:
1332+ return pskb_may_pull(*pskb, writable_len);
1333+}
1334+EXPORT_SYMBOL(skb_ip_make_writable);
1335+#endif /*CONFIG_INET*/
1336+
1337+
1338+/* This does not belong here, but ipt_REJECT needs it if connection
1339+ tracking in use: without this, connection may not be in hash table,
1340+ and hence manufactured ICMP or RST packets will not be associated
1341+ with it. */
1342+void (*ip_ct_attach)(struct sk_buff *, struct nf_ct_info *);
1343+
1344+void __init netfilter_init(void)
1345+{
1346+ int i, h;
1347+
1348+ for (i = 0; i < NPROTO; i++) {
1349+ for (h = 0; h < NF_MAX_HOOKS; h++)
1350+ INIT_LIST_HEAD(&nf_hooks[i][h]);
1351+ }
1352+}
1353+
1354+EXPORT_SYMBOL(ip_ct_attach);
1355+EXPORT_SYMBOL(ip_route_me_harder);
1356+EXPORT_SYMBOL(nf_getsockopt);
1357+EXPORT_SYMBOL(nf_hook_slow);
1358+EXPORT_SYMBOL(nf_hooks);
1359+EXPORT_SYMBOL(nf_register_hook);
1360+EXPORT_SYMBOL(nf_register_queue_handler);
1361+EXPORT_SYMBOL(nf_register_sockopt);
1362+EXPORT_SYMBOL(nf_reinject);
1363+EXPORT_SYMBOL(nf_setsockopt);
1364+EXPORT_SYMBOL(nf_unregister_hook);
1365+EXPORT_SYMBOL(nf_unregister_queue_handler);
1366+EXPORT_SYMBOL(nf_unregister_sockopt);
1367diff -Nur linux-2.6.3.org/net/ipv4/netfilter/ip_conntrack_core.c linux-2.6.3/net/ipv4/netfilter/ip_conntrack_core.c
1368--- linux-2.6.3.org/net/ipv4/netfilter/ip_conntrack_core.c 2004-02-26 23:36:59.000000000 +0100
1369+++ linux-2.6.3/net/ipv4/netfilter/ip_conntrack_core.c 2004-02-27 00:03:14.481026728 +0100
1370@@ -67,6 +67,7 @@
1371 static atomic_t ip_conntrack_count = ATOMIC_INIT(0);
1372 struct list_head *ip_conntrack_hash;
1373 static kmem_cache_t *ip_conntrack_cachep;
1374+struct ip_conntrack ip_conntrack_untracked;
1375
1376 extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;
1377
1378@@ -794,6 +795,15 @@
1379 int set_reply;
1380 int ret;
1381
1382+ /* Never happen */
1383+ if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
1384+ if (net_ratelimit()) {
1385+ printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
1386+ (*pskb)->nh.iph->protocol, hooknum);
1387+ }
1388+ return NF_DROP;
1389+ }
1390+
1391 /* FIXME: Do this right please. --RR */
1392 (*pskb)->nfcache |= NFC_UNKNOWN;
1393
1394@@ -812,18 +822,10 @@
1395 }
1396 #endif
1397
1398- /* Previously seen (loopback)? Ignore. Do this before
1399- fragment check. */
1400+ /* Previously seen (loopback or untracked)? Ignore. */
1401 if ((*pskb)->nfct)
1402 return NF_ACCEPT;
1403
1404- /* Gather fragments. */
1405- if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
1406- *pskb = ip_ct_gather_frags(*pskb);
1407- if (!*pskb)
1408- return NF_STOLEN;
1409- }
1410-
1411 proto = ip_ct_find_proto((*pskb)->nh.iph->protocol);
1412
1413 /* It may be an icmp error... */
1414@@ -1422,6 +1424,18 @@
1415
1416 /* For use by ipt_REJECT */
1417 ip_ct_attach = ip_conntrack_attach;
1418+
1419+ /* Set up fake conntrack:
1420+ - to never be deleted, not in any hashes */
1421+ atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
1422+ /* - and look it like as a confirmed connection */
1423+ set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
1424+ /* - and prepare the ctinfo field for REJECT & NAT. */
1425+ ip_conntrack_untracked.infos[IP_CT_NEW].master =
1426+ ip_conntrack_untracked.infos[IP_CT_RELATED].master =
1427+ ip_conntrack_untracked.infos[IP_CT_RELATED + IP_CT_IS_REPLY].master =
1428+ &ip_conntrack_untracked.ct_general;
1429+
1430 return ret;
1431
1432 err_free_hash:
1433diff -Nur linux-2.6.3.org/net/ipv4/netfilter/ip_conntrack_core.c.orig linux-2.6.3/net/ipv4/netfilter/ip_conntrack_core.c.orig
1434--- linux-2.6.3.org/net/ipv4/netfilter/ip_conntrack_core.c.orig 1970-01-01 01:00:00.000000000 +0100
1435+++ linux-2.6.3/net/ipv4/netfilter/ip_conntrack_core.c.orig 2004-02-27 00:02:49.320851656 +0100
1436@@ -0,0 +1,1433 @@
1437+/* Connection state tracking for netfilter. This is separated from,
1438+ but required by, the NAT layer; it can also be used by an iptables
1439+ extension. */
1440+
1441+/* (C) 1999-2001 Paul `Rusty' Russell
1442+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
1443+ *
1444+ * This program is free software; you can redistribute it and/or modify
1445+ * it under the terms of the GNU General Public License version 2 as
1446+ * published by the Free Software Foundation.
1447+ *
1448+ * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
1449+ * - new API and handling of conntrack/nat helpers
1450+ * - now capable of multiple expectations for one master
1451+ * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
1452+ * - add usage/reference counts to ip_conntrack_expect
1453+ * - export ip_conntrack[_expect]_{find_get,put} functions
1454+ * */
1455+
1456+#include <linux/config.h>
1457+#include <linux/types.h>
1458+#include <linux/icmp.h>
1459+#include <linux/ip.h>
1460+#include <linux/netfilter.h>
1461+#include <linux/netfilter_ipv4.h>
1462+#include <linux/module.h>
1463+#include <linux/skbuff.h>
1464+#include <linux/proc_fs.h>
1465+#include <linux/vmalloc.h>
1466+#include <net/checksum.h>
1467+#include <linux/stddef.h>
1468+#include <linux/sysctl.h>
1469+#include <linux/slab.h>
1470+#include <linux/random.h>
1471+#include <linux/jhash.h>
1472+/* For ERR_PTR(). Yeah, I know... --RR */
1473+#include <linux/fs.h>
1474+
1475+/* This rwlock protects the main hash table, protocol/helper/expected
1476+ registrations, conntrack timers*/
1477+#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)
1478+#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)
1479+
1480+#include <linux/netfilter_ipv4/ip_conntrack.h>
1481+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
1482+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
1483+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
1484+#include <linux/netfilter_ipv4/listhelp.h>
1485+
1486+#define IP_CONNTRACK_VERSION "2.1"
1487+
1488+#if 0
1489+#define DEBUGP printk
1490+#else
1491+#define DEBUGP(format, args...)
1492+#endif
1493+
1494+DECLARE_RWLOCK(ip_conntrack_lock);
1495+DECLARE_RWLOCK(ip_conntrack_expect_tuple_lock);
1496+
1497+void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
1498+LIST_HEAD(ip_conntrack_expect_list);
1499+LIST_HEAD(protocol_list);
1500+static LIST_HEAD(helpers);
1501+unsigned int ip_conntrack_htable_size = 0;
1502+int ip_conntrack_max;
1503+static atomic_t ip_conntrack_count = ATOMIC_INIT(0);
1504+struct list_head *ip_conntrack_hash;
1505+static kmem_cache_t *ip_conntrack_cachep;
1506+
1507+extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;
1508+
1509+static inline int proto_cmpfn(const struct ip_conntrack_protocol *curr,
1510+ u_int8_t protocol)
1511+{
1512+ return protocol == curr->proto;
1513+}
1514+
1515+struct ip_conntrack_protocol *__ip_ct_find_proto(u_int8_t protocol)
1516+{
1517+ struct ip_conntrack_protocol *p;
1518+
1519+ MUST_BE_READ_LOCKED(&ip_conntrack_lock);
1520+ p = LIST_FIND(&protocol_list, proto_cmpfn,
1521+ struct ip_conntrack_protocol *, protocol);
1522+ if (!p)
1523+ p = &ip_conntrack_generic_protocol;
1524+
1525+ return p;
1526+}
1527+
1528+struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol)
1529+{
1530+ struct ip_conntrack_protocol *p;
1531+
1532+ READ_LOCK(&ip_conntrack_lock);
1533+ p = __ip_ct_find_proto(protocol);
1534+ READ_UNLOCK(&ip_conntrack_lock);
1535+ return p;
1536+}
1537+
1538+inline void
1539+ip_conntrack_put(struct ip_conntrack *ct)
1540+{
1541+ IP_NF_ASSERT(ct);
1542+ IP_NF_ASSERT(ct->infos[0].master);
1543+ /* nf_conntrack_put wants to go via an info struct, so feed it
1544+ one at random. */
1545+ nf_conntrack_put(&ct->infos[0]);
1546+}
1547+
1548+static int ip_conntrack_hash_rnd_initted;
1549+static unsigned int ip_conntrack_hash_rnd;
1550+
1551+static u_int32_t
1552+hash_conntrack(const struct ip_conntrack_tuple *tuple)
1553+{
1554+#if 0
1555+ dump_tuple(tuple);
1556+#endif
1557+ return (jhash_3words(tuple->src.ip,
1558+ (tuple->dst.ip ^ tuple->dst.protonum),
1559+ (tuple->src.u.all | (tuple->dst.u.all << 16)),
1560+ ip_conntrack_hash_rnd) % ip_conntrack_htable_size);
1561+}
1562+
1563+int
1564+get_tuple(const struct iphdr *iph,
1565+ const struct sk_buff *skb,
1566+ unsigned int dataoff,
1567+ struct ip_conntrack_tuple *tuple,
1568+ const struct ip_conntrack_protocol *protocol)
1569+{
1570+ /* Never happen */
1571+ if (iph->frag_off & htons(IP_OFFSET)) {
1572+ printk("ip_conntrack_core: Frag of proto %u.\n",
1573+ iph->protocol);
1574+ return 0;
1575+ }
1576+
1577+ tuple->src.ip = iph->saddr;
1578+ tuple->dst.ip = iph->daddr;
1579+ tuple->dst.protonum = iph->protocol;
1580+
1581+ return protocol->pkt_to_tuple(skb, dataoff, tuple);
1582+}
1583+
1584+static int
1585+invert_tuple(struct ip_conntrack_tuple *inverse,
1586+ const struct ip_conntrack_tuple *orig,
1587+ const struct ip_conntrack_protocol *protocol)
1588+{
1589+ inverse->src.ip = orig->dst.ip;
1590+ inverse->dst.ip = orig->src.ip;
1591+ inverse->dst.protonum = orig->dst.protonum;
1592+
1593+ return protocol->invert_tuple(inverse, orig);
1594+}
1595+
1596+
1597+/* ip_conntrack_expect helper functions */
1598+
1599+/* Compare tuple parts depending on mask. */
1600+static inline int expect_cmp(const struct ip_conntrack_expect *i,
1601+ const struct ip_conntrack_tuple *tuple)
1602+{
1603+ MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock);
1604+ return ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask);
1605+}
1606+
1607+static void
1608+destroy_expect(struct ip_conntrack_expect *exp)
1609+{
1610+ DEBUGP("destroy_expect(%p) use=%d\n", exp, atomic_read(&exp->use));
1611+ IP_NF_ASSERT(atomic_read(&exp->use));
1612+ IP_NF_ASSERT(!timer_pending(&exp->timeout));
1613+
1614+ kfree(exp);
1615+}
1616+
1617+
1618+inline void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
1619+{
1620+ IP_NF_ASSERT(exp);
1621+
1622+ if (atomic_dec_and_test(&exp->use)) {
1623+ /* usage count dropped to zero */
1624+ destroy_expect(exp);
1625+ }
1626+}
1627+
1628+static inline struct ip_conntrack_expect *
1629+__ip_ct_expect_find(const struct ip_conntrack_tuple *tuple)
1630+{
1631+ MUST_BE_READ_LOCKED(&ip_conntrack_lock);
1632+ MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock);
1633+ return LIST_FIND(&ip_conntrack_expect_list, expect_cmp,
1634+ struct ip_conntrack_expect *, tuple);
1635+}
1636+
1637+/* Find a expectation corresponding to a tuple. */
1638+struct ip_conntrack_expect *
1639+ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
1640+{
1641+ struct ip_conntrack_expect *exp;
1642+
1643+ READ_LOCK(&ip_conntrack_lock);
1644+ READ_LOCK(&ip_conntrack_expect_tuple_lock);
1645+ exp = __ip_ct_expect_find(tuple);
1646+ if (exp)
1647+ atomic_inc(&exp->use);
1648+ READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
1649+ READ_UNLOCK(&ip_conntrack_lock);
1650+
1651+ return exp;
1652+}
1653+
1654+/* remove one specific expectation from all lists and drop refcount,
1655+ * does _NOT_ delete the timer. */
1656+static void __unexpect_related(struct ip_conntrack_expect *expect)
1657+{
1658+ DEBUGP("unexpect_related(%p)\n", expect);
1659+ MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
1660+
1661+ /* we're not allowed to unexpect a confirmed expectation! */
1662+ IP_NF_ASSERT(!expect->sibling);
1663+
1664+ /* delete from global and local lists */
1665+ list_del(&expect->list);
1666+ list_del(&expect->expected_list);
1667+
1668+ /* decrement expect-count of master conntrack */
1669+ if (expect->expectant)
1670+ expect->expectant->expecting--;
1671+
1672+ ip_conntrack_expect_put(expect);
1673+}
1674+
1675+/* remove one specific expecatation from all lists, drop refcount
1676+ * and expire timer.
1677+ * This function can _NOT_ be called for confirmed expects! */
1678+static void unexpect_related(struct ip_conntrack_expect *expect)
1679+{
1680+ IP_NF_ASSERT(expect->expectant);
1681+ IP_NF_ASSERT(expect->expectant->helper);
1682+ /* if we are supposed to have a timer, but we can't delete
1683+ * it: race condition. __unexpect_related will
1684+ * be calledd by timeout function */
1685+ if (expect->expectant->helper->timeout
1686+ && !del_timer(&expect->timeout))
1687+ return;
1688+
1689+ __unexpect_related(expect);
1690+}
1691+
1692+/* delete all unconfirmed expectations for this conntrack */
1693+static void remove_expectations(struct ip_conntrack *ct, int drop_refcount)
1694+{
1695+ struct list_head *exp_entry, *next;
1696+ struct ip_conntrack_expect *exp;
1697+
1698+ DEBUGP("remove_expectations(%p)\n", ct);
1699+
1700+ list_for_each_safe(exp_entry, next, &ct->sibling_list) {
1701+ exp = list_entry(exp_entry, struct ip_conntrack_expect,
1702+ expected_list);
1703+
1704+ /* we skip established expectations, as we want to delete
1705+ * the un-established ones only */
1706+ if (exp->sibling) {
1707+ DEBUGP("remove_expectations: skipping established %p of %p\n", exp->sibling, ct);
1708+ if (drop_refcount) {
1709+ /* Indicate that this expectations parent is dead */
1710+ ip_conntrack_put(exp->expectant);
1711+ exp->expectant = NULL;
1712+ }
1713+ continue;
1714+ }
1715+
1716+ IP_NF_ASSERT(list_inlist(&ip_conntrack_expect_list, exp));
1717+ IP_NF_ASSERT(exp->expectant == ct);
1718+
1719+ /* delete expectation from global and private lists */
1720+ unexpect_related(exp);
1721+ }
1722+}
1723+
1724+static void
1725+clean_from_lists(struct ip_conntrack *ct)
1726+{
1727+ unsigned int ho, hr;
1728+
1729+ DEBUGP("clean_from_lists(%p)\n", ct);
1730+ MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
1731+
1732+ ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
1733+ hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
1734+ LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
1735+ LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
1736+
1737+ /* Destroy all un-established, pending expectations */
1738+ remove_expectations(ct, 1);
1739+}
1740+
1741+static void
1742+destroy_conntrack(struct nf_conntrack *nfct)
1743+{
1744+ struct ip_conntrack *ct = (struct ip_conntrack *)nfct, *master = NULL;
1745+ struct ip_conntrack_protocol *proto;
1746+
1747+ DEBUGP("destroy_conntrack(%p)\n", ct);
1748+ IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
1749+ IP_NF_ASSERT(!timer_pending(&ct->timeout));
1750+
1751+ /* To make sure we don't get any weird locking issues here:
1752+ * destroy_conntrack() MUST NOT be called with a write lock
1753+ * to ip_conntrack_lock!!! -HW */
1754+ proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
1755+ if (proto && proto->destroy)
1756+ proto->destroy(ct);
1757+
1758+ if (ip_conntrack_destroyed)
1759+ ip_conntrack_destroyed(ct);
1760+
1761+ WRITE_LOCK(&ip_conntrack_lock);
1762+ /* Delete us from our own list to prevent corruption later */
1763+ list_del(&ct->sibling_list);
1764+
1765+ /* Delete our master expectation */
1766+ if (ct->master) {
1767+ if (ct->master->expectant) {
1768+ /* can't call __unexpect_related here,
1769+ * since it would screw up expect_list */
1770+ list_del(&ct->master->expected_list);
1771+ master = ct->master->expectant;
1772+ }
1773+ kfree(ct->master);
1774+ }
1775+ WRITE_UNLOCK(&ip_conntrack_lock);
1776+
1777+ if (master)
1778+ ip_conntrack_put(master);
1779+
1780+ DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
1781+ kmem_cache_free(ip_conntrack_cachep, ct);
1782+ atomic_dec(&ip_conntrack_count);
1783+}
1784+
1785+static void death_by_timeout(unsigned long ul_conntrack)
1786+{
1787+ struct ip_conntrack *ct = (void *)ul_conntrack;
1788+
1789+ WRITE_LOCK(&ip_conntrack_lock);
1790+ clean_from_lists(ct);
1791+ WRITE_UNLOCK(&ip_conntrack_lock);
1792+ ip_conntrack_put(ct);
1793+}
1794+
1795+static inline int
1796+conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
1797+ const struct ip_conntrack_tuple *tuple,
1798+ const struct ip_conntrack *ignored_conntrack)
1799+{
1800+ MUST_BE_READ_LOCKED(&ip_conntrack_lock);
1801+ return i->ctrack != ignored_conntrack
1802+ && ip_ct_tuple_equal(tuple, &i->tuple);
1803+}
1804+
1805+static struct ip_conntrack_tuple_hash *
1806+__ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
1807+ const struct ip_conntrack *ignored_conntrack)
1808+{
1809+ struct ip_conntrack_tuple_hash *h;
1810+ unsigned int hash = hash_conntrack(tuple);
1811+
1812+ MUST_BE_READ_LOCKED(&ip_conntrack_lock);
1813+ h = LIST_FIND(&ip_conntrack_hash[hash],
1814+ conntrack_tuple_cmp,
1815+ struct ip_conntrack_tuple_hash *,
1816+ tuple, ignored_conntrack);
1817+ return h;
1818+}
1819+
1820+/* Find a connection corresponding to a tuple. */
1821+struct ip_conntrack_tuple_hash *
1822+ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
1823+ const struct ip_conntrack *ignored_conntrack)
1824+{
1825+ struct ip_conntrack_tuple_hash *h;
1826+
1827+ READ_LOCK(&ip_conntrack_lock);
1828+ h = __ip_conntrack_find(tuple, ignored_conntrack);
1829+ if (h)
1830+ atomic_inc(&h->ctrack->ct_general.use);
1831+ READ_UNLOCK(&ip_conntrack_lock);
1832+
1833+ return h;
1834+}
1835+
1836+static inline struct ip_conntrack *
1837+__ip_conntrack_get(struct nf_ct_info *nfct, enum ip_conntrack_info *ctinfo)
1838+{
1839+ struct ip_conntrack *ct
1840+ = (struct ip_conntrack *)nfct->master;
1841+
1842+ /* ctinfo is the index of the nfct inside the conntrack */
1843+ *ctinfo = nfct - ct->infos;
1844+ IP_NF_ASSERT(*ctinfo >= 0 && *ctinfo < IP_CT_NUMBER);
1845+ return ct;
1846+}
1847+
1848+/* Return conntrack and conntrack_info given skb->nfct->master */
1849+struct ip_conntrack *
1850+ip_conntrack_get(struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
1851+{
1852+ if (skb->nfct)
1853+ return __ip_conntrack_get(skb->nfct, ctinfo);
1854+ return NULL;
1855+}
1856+
1857+/* Confirm a connection given skb->nfct; places it in hash table */
1858+int
1859+__ip_conntrack_confirm(struct nf_ct_info *nfct)
1860+{
1861+ unsigned int hash, repl_hash;
1862+ struct ip_conntrack *ct;
1863+ enum ip_conntrack_info ctinfo;
1864+
1865+ ct = __ip_conntrack_get(nfct, &ctinfo);
1866+
1867+ /* ipt_REJECT uses ip_conntrack_attach to attach related
1868+ ICMP/TCP RST packets in other direction. Actual packet
1869+ which created connection will be IP_CT_NEW or for an
1870+ expected connection, IP_CT_RELATED. */
1871+ if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
1872+ return NF_ACCEPT;
1873+
1874+ hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
1875+ repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
1876+
1877+ /* We're not in hash table, and we refuse to set up related
1878+ connections for unconfirmed conns. But packet copies and
1879+ REJECT will give spurious warnings here. */
1880+ /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
1881+
1882+ /* No external references means noone else could have
1883+ confirmed us. */
1884+ IP_NF_ASSERT(!is_confirmed(ct));
1885+ DEBUGP("Confirming conntrack %p\n", ct);
1886+
1887+ WRITE_LOCK(&ip_conntrack_lock);
1888+ /* See if there's one in the list already, including reverse:
1889+ NAT could have grabbed it without realizing, since we're
1890+ not in the hash. If there is, we lost race. */
1891+ if (!LIST_FIND(&ip_conntrack_hash[hash],
1892+ conntrack_tuple_cmp,
1893+ struct ip_conntrack_tuple_hash *,
1894+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
1895+ && !LIST_FIND(&ip_conntrack_hash[repl_hash],
1896+ conntrack_tuple_cmp,
1897+ struct ip_conntrack_tuple_hash *,
1898+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
1899+ list_prepend(&ip_conntrack_hash[hash],
1900+ &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
1901+ list_prepend(&ip_conntrack_hash[repl_hash],
1902+ &ct->tuplehash[IP_CT_DIR_REPLY]);
1903+ /* Timer relative to confirmation time, not original
1904+ setting time, otherwise we'd get timer wrap in
1905+ weird delay cases. */
1906+ ct->timeout.expires += jiffies;
1907+ add_timer(&ct->timeout);
1908+ atomic_inc(&ct->ct_general.use);
1909+ set_bit(IPS_CONFIRMED_BIT, &ct->status);
1910+ WRITE_UNLOCK(&ip_conntrack_lock);
1911+ return NF_ACCEPT;
1912+ }
1913+
1914+ WRITE_UNLOCK(&ip_conntrack_lock);
1915+ return NF_DROP;
1916+}
1917+
1918+/* Returns true if a connection correspondings to the tuple (required
1919+ for NAT). */
1920+int
1921+ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
1922+ const struct ip_conntrack *ignored_conntrack)
1923+{
1924+ struct ip_conntrack_tuple_hash *h;
1925+
1926+ READ_LOCK(&ip_conntrack_lock);
1927+ h = __ip_conntrack_find(tuple, ignored_conntrack);
1928+ READ_UNLOCK(&ip_conntrack_lock);
1929+
1930+ return h != NULL;
1931+}
1932+
1933+/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
1934+struct ip_conntrack *
1935+icmp_error_track(struct sk_buff *skb,
1936+ enum ip_conntrack_info *ctinfo,
1937+ unsigned int hooknum)
1938+{
1939+ struct ip_conntrack_tuple innertuple, origtuple;
1940+ struct {
1941+ struct icmphdr icmp;
1942+ struct iphdr ip;
1943+ } inside;
1944+ struct ip_conntrack_protocol *innerproto;
1945+ struct ip_conntrack_tuple_hash *h;
1946+ int dataoff;
1947+
1948+ IP_NF_ASSERT(skb->nfct == NULL);
1949+
1950+ /* Not enough header? */
1951+ if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &inside, sizeof(inside))!=0)
1952+ return NULL;
1953+
1954+ if (inside.icmp.type != ICMP_DEST_UNREACH
1955+ && inside.icmp.type != ICMP_SOURCE_QUENCH
1956+ && inside.icmp.type != ICMP_TIME_EXCEEDED
1957+ && inside.icmp.type != ICMP_PARAMETERPROB
1958+ && inside.icmp.type != ICMP_REDIRECT)
1959+ return NULL;
1960+
1961+ /* Ignore ICMP's containing fragments (shouldn't happen) */
1962+ if (inside.ip.frag_off & htons(IP_OFFSET)) {
1963+ DEBUGP("icmp_error_track: fragment of proto %u\n",
1964+ inside.ip.protocol);
1965+ return NULL;
1966+ }
1967+
1968+ innerproto = ip_ct_find_proto(inside.ip.protocol);
1969+ dataoff = skb->nh.iph->ihl*4 + sizeof(inside.icmp) + inside.ip.ihl*4;
1970+ /* Are they talking about one of our connections? */
1971+ if (!get_tuple(&inside.ip, skb, dataoff, &origtuple, innerproto)) {
1972+ DEBUGP("icmp_error: ! get_tuple p=%u", inside.ip.protocol);
1973+ return NULL;
1974+ }
1975+
1976+ /* Ordinarily, we'd expect the inverted tupleproto, but it's
1977+ been preserved inside the ICMP. */
1978+ if (!invert_tuple(&innertuple, &origtuple, innerproto)) {
1979+ DEBUGP("icmp_error_track: Can't invert tuple\n");
1980+ return NULL;
1981+ }
1982+
1983+ *ctinfo = IP_CT_RELATED;
1984+
1985+ h = ip_conntrack_find_get(&innertuple, NULL);
1986+ if (!h) {
1987+ /* Locally generated ICMPs will match inverted if they
1988+ haven't been SNAT'ed yet */
1989+ /* FIXME: NAT code has to handle half-done double NAT --RR */
1990+ if (hooknum == NF_IP_LOCAL_OUT)
1991+ h = ip_conntrack_find_get(&origtuple, NULL);
1992+
1993+ if (!h) {
1994+ DEBUGP("icmp_error_track: no match\n");
1995+ return NULL;
1996+ }
1997+ /* Reverse direction from that found */
1998+ if (DIRECTION(h) != IP_CT_DIR_REPLY)
1999+ *ctinfo += IP_CT_IS_REPLY;
2000+ } else {
2001+ if (DIRECTION(h) == IP_CT_DIR_REPLY)
2002+ *ctinfo += IP_CT_IS_REPLY;
2003+ }
2004+
2005+ /* Update skb to refer to this connection */
2006+ skb->nfct = &h->ctrack->infos[*ctinfo];
2007+ return h->ctrack;
2008+}
2009+
2010+/* There's a small race here where we may free a just-assured
2011+ connection. Too bad: we're in trouble anyway. */
2012+static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
2013+{
2014+ return !(test_bit(IPS_ASSURED_BIT, &i->ctrack->status));
2015+}
2016+
2017+static int early_drop(struct list_head *chain)
2018+{
2019+ /* Traverse backwards: gives us oldest, which is roughly LRU */
2020+ struct ip_conntrack_tuple_hash *h;
2021+ int dropped = 0;
2022+
2023+ READ_LOCK(&ip_conntrack_lock);
2024+ h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
2025+ if (h)
2026+ atomic_inc(&h->ctrack->ct_general.use);
2027+ READ_UNLOCK(&ip_conntrack_lock);
2028+
2029+ if (!h)
2030+ return dropped;
2031+
2032+ if (del_timer(&h->ctrack->timeout)) {
2033+ death_by_timeout((unsigned long)h->ctrack);
2034+ dropped = 1;
2035+ }
2036+ ip_conntrack_put(h->ctrack);
2037+ return dropped;
2038+}
2039+
2040+static inline int helper_cmp(const struct ip_conntrack_helper *i,
2041+ const struct ip_conntrack_tuple *rtuple)
2042+{
2043+ return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
2044+}
2045+
2046+struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple)
2047+{
2048+ return LIST_FIND(&helpers, helper_cmp,
2049+ struct ip_conntrack_helper *,
2050+ tuple);
2051+}
2052+
2053+/* Allocate a new conntrack: we return -ENOMEM if classification
2054+ failed due to stress. Otherwise it really is unclassifiable. */
2055+static struct ip_conntrack_tuple_hash *
2056+init_conntrack(const struct ip_conntrack_tuple *tuple,
2057+ struct ip_conntrack_protocol *protocol,
2058+ struct sk_buff *skb)
2059+{
2060+ struct ip_conntrack *conntrack;
2061+ struct ip_conntrack_tuple repl_tuple;
2062+ size_t hash;
2063+ struct ip_conntrack_expect *expected;
2064+ int i;
2065+ static unsigned int drop_next;
2066+
2067+ if (!ip_conntrack_hash_rnd_initted) {
2068+ get_random_bytes(&ip_conntrack_hash_rnd, 4);
2069+ ip_conntrack_hash_rnd_initted = 1;
2070+ }
2071+
2072+ hash = hash_conntrack(tuple);
2073+
2074+ if (ip_conntrack_max &&
2075+ atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
2076+ /* Try dropping from random chain, or else from the
2077+ chain about to put into (in case they're trying to
2078+ bomb one hash chain). */
2079+ unsigned int next = (drop_next++)%ip_conntrack_htable_size;
2080+
2081+ if (!early_drop(&ip_conntrack_hash[next])
2082+ && !early_drop(&ip_conntrack_hash[hash])) {
2083+ if (net_ratelimit())
2084+ printk(KERN_WARNING
2085+ "ip_conntrack: table full, dropping"
2086+ " packet.\n");
2087+ return ERR_PTR(-ENOMEM);
2088+ }
2089+ }
2090+
2091+ if (!invert_tuple(&repl_tuple, tuple, protocol)) {
2092+ DEBUGP("Can't invert tuple.\n");
2093+ return NULL;
2094+ }
2095+
2096+ conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
2097+ if (!conntrack) {
2098+ DEBUGP("Can't allocate conntrack.\n");
2099+ return ERR_PTR(-ENOMEM);
2100+ }
2101+
2102+ memset(conntrack, 0, sizeof(*conntrack));
2103+ atomic_set(&conntrack->ct_general.use, 1);
2104+ conntrack->ct_general.destroy = destroy_conntrack;
2105+ conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple;
2106+ conntrack->tuplehash[IP_CT_DIR_ORIGINAL].ctrack = conntrack;
2107+ conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple;
2108+ conntrack->tuplehash[IP_CT_DIR_REPLY].ctrack = conntrack;
2109+ for (i=0; i < IP_CT_NUMBER; i++)
2110+ conntrack->infos[i].master = &conntrack->ct_general;
2111+
2112+ if (!protocol->new(conntrack, skb)) {
2113+ kmem_cache_free(ip_conntrack_cachep, conntrack);
2114+ return NULL;
2115+ }
2116+ /* Don't set timer yet: wait for confirmation */
2117+ init_timer(&conntrack->timeout);
2118+ conntrack->timeout.data = (unsigned long)conntrack;
2119+ conntrack->timeout.function = death_by_timeout;
2120+
2121+ INIT_LIST_HEAD(&conntrack->sibling_list);
2122+
2123+ WRITE_LOCK(&ip_conntrack_lock);
2124+ /* Need finding and deleting of expected ONLY if we win race */
2125+ READ_LOCK(&ip_conntrack_expect_tuple_lock);
2126+ expected = LIST_FIND(&ip_conntrack_expect_list, expect_cmp,
2127+ struct ip_conntrack_expect *, tuple);
2128+ READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
2129+
2130+ /* If master is not in hash table yet (ie. packet hasn't left
2131+ this machine yet), how can other end know about expected?
2132+ Hence these are not the droids you are looking for (if
2133+ master ct never got confirmed, we'd hold a reference to it
2134+ and weird things would happen to future packets). */
2135+ if (expected && !is_confirmed(expected->expectant))
2136+ expected = NULL;
2137+
2138+ /* Look up the conntrack helper for master connections only */
2139+ if (!expected)
2140+ conntrack->helper = ip_ct_find_helper(&repl_tuple);
2141+
2142+ /* If the expectation is dying, then this is a loser. */
2143+ if (expected
2144+ && expected->expectant->helper->timeout
2145+ && ! del_timer(&expected->timeout))
2146+ expected = NULL;
2147+
2148+ if (expected) {
2149+ DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
2150+ conntrack, expected);
2151+ /* Welcome, Mr. Bond. We've been expecting you... */
2152+ IP_NF_ASSERT(master_ct(conntrack));
2153+ __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
2154+ conntrack->master = expected;
2155+ expected->sibling = conntrack;
2156+ LIST_DELETE(&ip_conntrack_expect_list, expected);
2157+ expected->expectant->expecting--;
2158+ nf_conntrack_get(&master_ct(conntrack)->infos[0]);
2159+ }
2160+ atomic_inc(&ip_conntrack_count);
2161+ WRITE_UNLOCK(&ip_conntrack_lock);
2162+
2163+ if (expected && expected->expectfn)
2164+ expected->expectfn(conntrack);
2165+ return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
2166+}
2167+
2168+/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
2169+static inline struct ip_conntrack *
2170+resolve_normal_ct(struct sk_buff *skb,
2171+ struct ip_conntrack_protocol *proto,
2172+ int *set_reply,
2173+ unsigned int hooknum,
2174+ enum ip_conntrack_info *ctinfo)
2175+{
2176+ struct ip_conntrack_tuple tuple;
2177+ struct ip_conntrack_tuple_hash *h;
2178+
2179+ IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
2180+
2181+ if (!get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4, &tuple, proto))
2182+ return NULL;
2183+
2184+ /* look for tuple match */
2185+ h = ip_conntrack_find_get(&tuple, NULL);
2186+ if (!h) {
2187+ h = init_conntrack(&tuple, proto, skb);
2188+ if (!h)
2189+ return NULL;
2190+ if (IS_ERR(h))
2191+ return (void *)h;
2192+ }
2193+
2194+ /* It exists; we have (non-exclusive) reference. */
2195+ if (DIRECTION(h) == IP_CT_DIR_REPLY) {
2196+ *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
2197+ /* Please set reply bit if this packet OK */
2198+ *set_reply = 1;
2199+ } else {
2200+ /* Once we've had two way comms, always ESTABLISHED. */
2201+ if (test_bit(IPS_SEEN_REPLY_BIT, &h->ctrack->status)) {
2202+ DEBUGP("ip_conntrack_in: normal packet for %p\n",
2203+ h->ctrack);
2204+ *ctinfo = IP_CT_ESTABLISHED;
2205+ } else if (test_bit(IPS_EXPECTED_BIT, &h->ctrack->status)) {
2206+ DEBUGP("ip_conntrack_in: related packet for %p\n",
2207+ h->ctrack);
2208+ *ctinfo = IP_CT_RELATED;
2209+ } else {
2210+ DEBUGP("ip_conntrack_in: new packet for %p\n",
2211+ h->ctrack);
2212+ *ctinfo = IP_CT_NEW;
2213+ }
2214+ *set_reply = 0;
2215+ }
2216+ skb->nfct = &h->ctrack->infos[*ctinfo];
2217+ return h->ctrack;
2218+}
2219+
2220+/* Netfilter hook itself. */
2221+unsigned int ip_conntrack_in(unsigned int hooknum,
2222+ struct sk_buff **pskb,
2223+ const struct net_device *in,
2224+ const struct net_device *out,
2225+ int (*okfn)(struct sk_buff *))
2226+{
2227+ struct ip_conntrack *ct;
2228+ enum ip_conntrack_info ctinfo;
2229+ struct ip_conntrack_protocol *proto;
2230+ int set_reply;
2231+ int ret;
2232+
2233+ /* FIXME: Do this right please. --RR */
2234+ (*pskb)->nfcache |= NFC_UNKNOWN;
2235+
2236+/* Doesn't cover locally-generated broadcast, so not worth it. */
2237+#if 0
2238+ /* Ignore broadcast: no `connection'. */
2239+ if ((*pskb)->pkt_type == PACKET_BROADCAST) {
2240+ printk("Broadcast packet!\n");
2241+ return NF_ACCEPT;
2242+ } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF))
2243+ == htonl(0x000000FF)) {
2244+ printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
2245+ NIPQUAD((*pskb)->nh.iph->saddr),
2246+ NIPQUAD((*pskb)->nh.iph->daddr),
2247+ (*pskb)->sk, (*pskb)->pkt_type);
2248+ }
2249+#endif
2250+
2251+ /* Previously seen (loopback)? Ignore. Do this before
2252+ fragment check. */
2253+ if ((*pskb)->nfct)
2254+ return NF_ACCEPT;
2255+
2256+ /* Gather fragments. */
2257+ if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
2258+ *pskb = ip_ct_gather_frags(*pskb);
2259+ if (!*pskb)
2260+ return NF_STOLEN;
2261+ }
2262+
2263+ proto = ip_ct_find_proto((*pskb)->nh.iph->protocol);
2264+
2265+ /* It may be an icmp error... */
2266+ if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP
2267+ && icmp_error_track(*pskb, &ctinfo, hooknum))
2268+ return NF_ACCEPT;
2269+
2270+ if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo)))
2271+ /* Not valid part of a connection */
2272+ return NF_ACCEPT;
2273+
2274+ if (IS_ERR(ct))
2275+ /* Too stressed to deal. */
2276+ return NF_DROP;
2277+
2278+ IP_NF_ASSERT((*pskb)->nfct);
2279+
2280+ ret = proto->packet(ct, *pskb, ctinfo);
2281+ if (ret == -1) {
2282+ /* Invalid */
2283+ nf_conntrack_put((*pskb)->nfct);
2284+ (*pskb)->nfct = NULL;
2285+ return NF_ACCEPT;
2286+ }
2287+
2288+ if (ret != NF_DROP && ct->helper) {
2289+ ret = ct->helper->help(*pskb, ct, ctinfo);
2290+ if (ret == -1) {
2291+ /* Invalid */
2292+ nf_conntrack_put((*pskb)->nfct);
2293+ (*pskb)->nfct = NULL;
2294+ return NF_ACCEPT;
2295+ }
2296+ }
2297+ if (set_reply)
2298+ set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
2299+
2300+ return ret;
2301+}
2302+
2303+int invert_tuplepr(struct ip_conntrack_tuple *inverse,
2304+ const struct ip_conntrack_tuple *orig)
2305+{
2306+ return invert_tuple(inverse, orig, ip_ct_find_proto(orig->dst.protonum));
2307+}
2308+
2309+static inline int resent_expect(const struct ip_conntrack_expect *i,
2310+ const struct ip_conntrack_tuple *tuple,
2311+ const struct ip_conntrack_tuple *mask)
2312+{
2313+ DEBUGP("resent_expect\n");
2314+ DEBUGP(" tuple: "); DUMP_TUPLE(&i->tuple);
2315+ DEBUGP("ct_tuple: "); DUMP_TUPLE(&i->ct_tuple);
2316+ DEBUGP("test tuple: "); DUMP_TUPLE(tuple);
2317+ return (((i->ct_tuple.dst.protonum == 0 && ip_ct_tuple_equal(&i->tuple, tuple))
2318+ || (i->ct_tuple.dst.protonum && ip_ct_tuple_equal(&i->ct_tuple, tuple)))
2319+ && ip_ct_tuple_equal(&i->mask, mask));
2320+}
2321+
2322+/* Would two expected things clash? */
2323+static inline int expect_clash(const struct ip_conntrack_expect *i,
2324+ const struct ip_conntrack_tuple *tuple,
2325+ const struct ip_conntrack_tuple *mask)
2326+{
2327+ /* Part covered by intersection of masks must be unequal,
2328+ otherwise they clash */
2329+ struct ip_conntrack_tuple intersect_mask
2330+ = { { i->mask.src.ip & mask->src.ip,
2331+ { i->mask.src.u.all & mask->src.u.all } },
2332+ { i->mask.dst.ip & mask->dst.ip,
2333+ { i->mask.dst.u.all & mask->dst.u.all },
2334+ i->mask.dst.protonum & mask->dst.protonum } };
2335+
2336+ return ip_ct_tuple_mask_cmp(&i->tuple, tuple, &intersect_mask);
2337+}
2338+
2339+inline void ip_conntrack_unexpect_related(struct ip_conntrack_expect *expect)
2340+{
2341+ WRITE_LOCK(&ip_conntrack_lock);
2342+ unexpect_related(expect);
2343+ WRITE_UNLOCK(&ip_conntrack_lock);
2344+}
2345+
2346+static void expectation_timed_out(unsigned long ul_expect)
2347+{
2348+ struct ip_conntrack_expect *expect = (void *) ul_expect;
2349+
2350+ DEBUGP("expectation %p timed out\n", expect);
2351+ WRITE_LOCK(&ip_conntrack_lock);
2352+ __unexpect_related(expect);
2353+ WRITE_UNLOCK(&ip_conntrack_lock);
2354+}
2355+
2356+/* Add a related connection. */
2357+int ip_conntrack_expect_related(struct ip_conntrack *related_to,
2358+ struct ip_conntrack_expect *expect)
2359+{
2360+ struct ip_conntrack_expect *old, *new;
2361+ int ret = 0;
2362+
2363+ WRITE_LOCK(&ip_conntrack_lock);
2364+ /* Because of the write lock, no reader can walk the lists,
2365+ * so there is no need to use the tuple lock too */
2366+
2367+ DEBUGP("ip_conntrack_expect_related %p\n", related_to);
2368+ DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
2369+ DEBUGP("mask: "); DUMP_TUPLE(&expect->mask);
2370+
2371+ old = LIST_FIND(&ip_conntrack_expect_list, resent_expect,
2372+ struct ip_conntrack_expect *, &expect->tuple,
2373+ &expect->mask);
2374+ if (old) {
2375+ /* Helper private data may contain offsets but no pointers
2376+ pointing into the payload - otherwise we should have to copy
2377+ the data filled out by the helper over the old one */
2378+ DEBUGP("expect_related: resent packet\n");
2379+ if (related_to->helper->timeout) {
2380+ if (!del_timer(&old->timeout)) {
2381+ /* expectation is dying. Fall through */
2382+ old = NULL;
2383+ } else {
2384+ old->timeout.expires = jiffies +
2385+ related_to->helper->timeout * HZ;
2386+ add_timer(&old->timeout);
2387+ }
2388+ }
2389+
2390+ if (old) {
2391+ WRITE_UNLOCK(&ip_conntrack_lock);
2392+ return -EEXIST;
2393+ }
2394+ } else if (related_to->helper->max_expected &&
2395+ related_to->expecting >= related_to->helper->max_expected) {
2396+ struct list_head *cur_item;
2397+ /* old == NULL */
2398+ if (!(related_to->helper->flags &
2399+ IP_CT_HELPER_F_REUSE_EXPECT)) {
2400+ WRITE_UNLOCK(&ip_conntrack_lock);
2401+ if (net_ratelimit())
2402+ printk(KERN_WARNING
2403+ "ip_conntrack: max number of expected "
2404+ "connections %i of %s reached for "
2405+ "%u.%u.%u.%u->%u.%u.%u.%u\n",
2406+ related_to->helper->max_expected,
2407+ related_to->helper->name,
2408+ NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
2409+ NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip));
2410+ return -EPERM;
2411+ }
2412+ DEBUGP("ip_conntrack: max number of expected "
2413+ "connections %i of %s reached for "
2414+ "%u.%u.%u.%u->%u.%u.%u.%u, reusing\n",
2415+ related_to->helper->max_expected,
2416+ related_to->helper->name,
2417+ NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
2418+ NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip));
2419+
2420+ /* choose the the oldest expectation to evict */
2421+ list_for_each(cur_item, &related_to->sibling_list) {
2422+ struct ip_conntrack_expect *cur;
2423+
2424+ cur = list_entry(cur_item,
2425+ struct ip_conntrack_expect,
2426+ expected_list);
2427+ if (cur->sibling == NULL) {
2428+ old = cur;
2429+ break;
2430+ }
2431+ }
2432+
2433+ /* (!old) cannot happen, since related_to->expecting is the
2434+ * number of unconfirmed expects */
2435+ IP_NF_ASSERT(old);
2436+
2437+ /* newnat14 does not reuse the real allocated memory
2438+ * structures but rather unexpects the old and
2439+ * allocates a new. unexpect_related will decrement
2440+ * related_to->expecting.
2441+ */
2442+ unexpect_related(old);
2443+ ret = -EPERM;
2444+ } else if (LIST_FIND(&ip_conntrack_expect_list, expect_clash,
2445+ struct ip_conntrack_expect *, &expect->tuple,
2446+ &expect->mask)) {
2447+ WRITE_UNLOCK(&ip_conntrack_lock);
2448+ DEBUGP("expect_related: busy!\n");
2449+ return -EBUSY;
2450+ }
2451+
2452+ new = (struct ip_conntrack_expect *)
2453+ kmalloc(sizeof(struct ip_conntrack_expect), GFP_ATOMIC);
2454+ if (!new) {
2455+ WRITE_UNLOCK(&ip_conntrack_lock);
2456+ DEBUGP("expect_relaed: OOM allocating expect\n");
2457+ return -ENOMEM;
2458+ }
2459+
2460+ DEBUGP("new expectation %p of conntrack %p\n", new, related_to);
2461+ memcpy(new, expect, sizeof(*expect));
2462+ new->expectant = related_to;
2463+ new->sibling = NULL;
2464+ atomic_set(&new->use, 1);
2465+
2466+ /* add to expected list for this connection */
2467+ list_add(&new->expected_list, &related_to->sibling_list);
2468+ /* add to global list of expectations */
2469+ list_prepend(&ip_conntrack_expect_list, &new->list);
2470+ /* add and start timer if required */
2471+ if (related_to->helper->timeout) {
2472+ init_timer(&new->timeout);
2473+ new->timeout.data = (unsigned long)new;
2474+ new->timeout.function = expectation_timed_out;
2475+ new->timeout.expires = jiffies +
2476+ related_to->helper->timeout * HZ;
2477+ add_timer(&new->timeout);
2478+ }
2479+ related_to->expecting++;
2480+
2481+ WRITE_UNLOCK(&ip_conntrack_lock);
2482+
2483+ return ret;
2484+}
2485+
2486+/* Change tuple in an existing expectation */
2487+int ip_conntrack_change_expect(struct ip_conntrack_expect *expect,
2488+ struct ip_conntrack_tuple *newtuple)
2489+{
2490+ int ret;
2491+
2492+ MUST_BE_READ_LOCKED(&ip_conntrack_lock);
2493+ WRITE_LOCK(&ip_conntrack_expect_tuple_lock);
2494+
2495+ DEBUGP("change_expect:\n");
2496+ DEBUGP("exp tuple: "); DUMP_TUPLE(&expect->tuple);
2497+ DEBUGP("exp mask: "); DUMP_TUPLE(&expect->mask);
2498+ DEBUGP("newtuple: "); DUMP_TUPLE(newtuple);
2499+ if (expect->ct_tuple.dst.protonum == 0) {
2500+ /* Never seen before */
2501+ DEBUGP("change expect: never seen before\n");
2502+ if (!ip_ct_tuple_equal(&expect->tuple, newtuple)
2503+ && LIST_FIND(&ip_conntrack_expect_list, expect_clash,
2504+ struct ip_conntrack_expect *, newtuple, &expect->mask)) {
2505+ /* Force NAT to find an unused tuple */
2506+ ret = -1;
2507+ } else {
2508+ memcpy(&expect->ct_tuple, &expect->tuple, sizeof(expect->tuple));
2509+ memcpy(&expect->tuple, newtuple, sizeof(expect->tuple));
2510+ ret = 0;
2511+ }
2512+ } else {
2513+ /* Resent packet */
2514+ DEBUGP("change expect: resent packet\n");
2515+ if (ip_ct_tuple_equal(&expect->tuple, newtuple)) {
2516+ ret = 0;
2517+ } else {
2518+ /* Force NAT to choose again the same port */
2519+ ret = -1;
2520+ }
2521+ }
2522+ WRITE_UNLOCK(&ip_conntrack_expect_tuple_lock);
2523+
2524+ return ret;
2525+}
2526+
2527+/* Alter reply tuple (maybe alter helper). If it's already taken,
2528+ return 0 and don't do alteration. */
2529+int ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
2530+ const struct ip_conntrack_tuple *newreply)
2531+{
2532+ WRITE_LOCK(&ip_conntrack_lock);
2533+ if (__ip_conntrack_find(newreply, conntrack)) {
2534+ WRITE_UNLOCK(&ip_conntrack_lock);
2535+ return 0;
2536+ }
2537+ /* Should be unconfirmed, so not in hash table yet */
2538+ IP_NF_ASSERT(!is_confirmed(conntrack));
2539+
2540+ DEBUGP("Altering reply tuple of %p to ", conntrack);
2541+ DUMP_TUPLE(newreply);
2542+
2543+ conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
2544+ if (!conntrack->master)
2545+ conntrack->helper = LIST_FIND(&helpers, helper_cmp,
2546+ struct ip_conntrack_helper *,
2547+ newreply);
2548+ WRITE_UNLOCK(&ip_conntrack_lock);
2549+
2550+ return 1;
2551+}
2552+
2553+int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
2554+{
2555+ WRITE_LOCK(&ip_conntrack_lock);
2556+ list_prepend(&helpers, me);
2557+ WRITE_UNLOCK(&ip_conntrack_lock);
2558+
2559+ return 0;
2560+}
2561+
2562+static inline int unhelp(struct ip_conntrack_tuple_hash *i,
2563+ const struct ip_conntrack_helper *me)
2564+{
2565+ if (i->ctrack->helper == me) {
2566+ /* Get rid of any expected. */
2567+ remove_expectations(i->ctrack, 0);
2568+ /* And *then* set helper to NULL */
2569+ i->ctrack->helper = NULL;
2570+ }
2571+ return 0;
2572+}
2573+
2574+void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
2575+{
2576+ unsigned int i;
2577+
2578+ /* Need write lock here, to delete helper. */
2579+ WRITE_LOCK(&ip_conntrack_lock);
2580+ LIST_DELETE(&helpers, me);
2581+
2582+ /* Get rid of expecteds, set helpers to NULL. */
2583+ for (i = 0; i < ip_conntrack_htable_size; i++)
2584+ LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
2585+ struct ip_conntrack_tuple_hash *, me);
2586+ WRITE_UNLOCK(&ip_conntrack_lock);
2587+
2588+ /* Someone could be still looking at the helper in a bh. */
2589+ synchronize_net();
2590+}
2591+
2592+/* Refresh conntrack for this many jiffies. */
2593+void ip_ct_refresh(struct ip_conntrack *ct, unsigned long extra_jiffies)
2594+{
2595+ IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
2596+
2597+ WRITE_LOCK(&ip_conntrack_lock);
2598+ /* If not in hash table, timer will not be active yet */
2599+ if (!is_confirmed(ct))
2600+ ct->timeout.expires = extra_jiffies;
2601+ else {
2602+ /* Need del_timer for race avoidance (may already be dying). */
2603+ if (del_timer(&ct->timeout)) {
2604+ ct->timeout.expires = jiffies + extra_jiffies;
2605+ add_timer(&ct->timeout);
2606+ }
2607+ }
2608+ WRITE_UNLOCK(&ip_conntrack_lock);
2609+}
2610+
2611+/* Returns new sk_buff, or NULL */
2612+struct sk_buff *
2613+ip_ct_gather_frags(struct sk_buff *skb)
2614+{
2615+ struct sock *sk = skb->sk;
2616+#ifdef CONFIG_NETFILTER_DEBUG
2617+ unsigned int olddebug = skb->nf_debug;
2618+#endif
2619+ if (sk) {
2620+ sock_hold(sk);
2621+ skb_orphan(skb);
2622+ }
2623+
2624+ local_bh_disable();
2625+ skb = ip_defrag(skb);
2626+ local_bh_enable();
2627+
2628+ if (!skb) {
2629+ if (sk)
2630+ sock_put(sk);
2631+ return skb;
2632+ }
2633+
2634+ if (sk) {
2635+ skb_set_owner_w(skb, sk);
2636+ sock_put(sk);
2637+ }
2638+
2639+ ip_send_check(skb->nh.iph);
2640+ skb->nfcache |= NFC_ALTERED;
2641+#ifdef CONFIG_NETFILTER_DEBUG
2642+ /* Packet path as if nothing had happened. */
2643+ skb->nf_debug = olddebug;
2644+#endif
2645+ return skb;
2646+}
2647+
2648+/* Used by ipt_REJECT. */
2649+static void ip_conntrack_attach(struct sk_buff *nskb, struct nf_ct_info *nfct)
2650+{
2651+ struct ip_conntrack *ct;
2652+ enum ip_conntrack_info ctinfo;
2653+
2654+ ct = __ip_conntrack_get(nfct, &ctinfo);
2655+
2656+ /* This ICMP is in reverse direction to the packet which
2657+ caused it */
2658+ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
2659+ ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
2660+ else
2661+ ctinfo = IP_CT_RELATED;
2662+
2663+ /* Attach new skbuff, and increment count */
2664+ nskb->nfct = &ct->infos[ctinfo];
2665+ atomic_inc(&ct->ct_general.use);
2666+}
2667+
2668+static inline int
2669+do_kill(const struct ip_conntrack_tuple_hash *i,
2670+ int (*kill)(const struct ip_conntrack *i, void *data),
2671+ void *data)
2672+{
2673+ return kill(i->ctrack, data);
2674+}
2675+
2676+/* Bring out ya dead! */
2677+static struct ip_conntrack_tuple_hash *
2678+get_next_corpse(int (*kill)(const struct ip_conntrack *i, void *data),
2679+ void *data, unsigned int *bucket)
2680+{
2681+ struct ip_conntrack_tuple_hash *h = NULL;
2682+
2683+ READ_LOCK(&ip_conntrack_lock);
2684+ for (; !h && *bucket < ip_conntrack_htable_size; (*bucket)++) {
2685+ h = LIST_FIND(&ip_conntrack_hash[*bucket], do_kill,
2686+ struct ip_conntrack_tuple_hash *, kill, data);
2687+ }
2688+ if (h)
2689+ atomic_inc(&h->ctrack->ct_general.use);
2690+ READ_UNLOCK(&ip_conntrack_lock);
2691+
2692+ return h;
2693+}
2694+
2695+void
2696+ip_ct_selective_cleanup(int (*kill)(const struct ip_conntrack *i, void *data),
2697+ void *data)
2698+{
2699+ struct ip_conntrack_tuple_hash *h;
2700+ unsigned int bucket = 0;
2701+
2702+ while ((h = get_next_corpse(kill, data, &bucket)) != NULL) {
2703+ /* Time to push up daises... */
2704+ if (del_timer(&h->ctrack->timeout))
2705+ death_by_timeout((unsigned long)h->ctrack);
2706+ /* ... else the timer will get him soon. */
2707+
2708+ ip_conntrack_put(h->ctrack);
2709+ }
2710+}
2711+
2712+/* Fast function for those who don't want to parse /proc (and I don't
2713+ blame them). */
2714+/* Reversing the socket's dst/src point of view gives us the reply
2715+ mapping. */
2716+static int
2717+getorigdst(struct sock *sk, int optval, void *user, int *len)
2718+{
2719+ struct inet_opt *inet = inet_sk(sk);
2720+ struct ip_conntrack_tuple_hash *h;
2721+ struct ip_conntrack_tuple tuple;
2722+
2723+ IP_CT_TUPLE_U_BLANK(&tuple);
2724+ tuple.src.ip = inet->rcv_saddr;
2725+ tuple.src.u.tcp.port = inet->sport;
2726+ tuple.dst.ip = inet->daddr;
2727+ tuple.dst.u.tcp.port = inet->dport;
2728+ tuple.dst.protonum = IPPROTO_TCP;
2729+
2730+ /* We only do TCP at the moment: is there a better way? */
2731+ if (strcmp(sk->sk_prot->name, "TCP")) {
2732+ DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
2733+ return -ENOPROTOOPT;
2734+ }
2735+
2736+ if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
2737+ DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
2738+ *len, sizeof(struct sockaddr_in));
2739+ return -EINVAL;
2740+ }
2741+
2742+ h = ip_conntrack_find_get(&tuple, NULL);
2743+ if (h) {
2744+ struct sockaddr_in sin;
2745+
2746+ sin.sin_family = AF_INET;
2747+ sin.sin_port = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL]
2748+ .tuple.dst.u.tcp.port;
2749+ sin.sin_addr.s_addr = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL]
2750+ .tuple.dst.ip;
2751+
2752+ DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
2753+ NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
2754+ ip_conntrack_put(h->ctrack);
2755+ if (copy_to_user(user, &sin, sizeof(sin)) != 0)
2756+ return -EFAULT;
2757+ else
2758+ return 0;
2759+ }
2760+ DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
2761+ NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
2762+ NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
2763+ return -ENOENT;
2764+}
2765+
2766+static struct nf_sockopt_ops so_getorigdst = {
2767+ .pf = PF_INET,
2768+ .get_optmin = SO_ORIGINAL_DST,
2769+ .get_optmax = SO_ORIGINAL_DST+1,
2770+ .get = &getorigdst,
2771+};
2772+
2773+static int kill_all(const struct ip_conntrack *i, void *data)
2774+{
2775+ return 1;
2776+}
2777+
2778+/* Mishearing the voices in his head, our hero wonders how he's
2779+ supposed to kill the mall. */
2780+void ip_conntrack_cleanup(void)
2781+{
2782+ ip_ct_attach = NULL;
2783+ /* This makes sure all current packets have passed through
2784+ netfilter framework. Roll on, two-stage module
2785+ delete... */
2786+ synchronize_net();
2787+
2788+ i_see_dead_people:
2789+ ip_ct_selective_cleanup(kill_all, NULL);
2790+ if (atomic_read(&ip_conntrack_count) != 0) {
2791+ schedule();
2792+ goto i_see_dead_people;
2793+ }
2794+
2795+ kmem_cache_destroy(ip_conntrack_cachep);
2796+ vfree(ip_conntrack_hash);
2797+ nf_unregister_sockopt(&so_getorigdst);
2798+}
2799+
2800+static int hashsize;
2801+MODULE_PARM(hashsize, "i");
2802+
2803+int __init ip_conntrack_init(void)
2804+{
2805+ unsigned int i;
2806+ int ret;
2807+
2808+ /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
2809+ * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
2810+ if (hashsize) {
2811+ ip_conntrack_htable_size = hashsize;
2812+ } else {
2813+ ip_conntrack_htable_size
2814+ = (((num_physpages << PAGE_SHIFT) / 16384)
2815+ / sizeof(struct list_head));
2816+ if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
2817+ ip_conntrack_htable_size = 8192;
2818+ if (ip_conntrack_htable_size < 16)
2819+ ip_conntrack_htable_size = 16;
2820+ }
2821+ ip_conntrack_max = 8 * ip_conntrack_htable_size;
2822+
2823+ printk("ip_conntrack version %s (%u buckets, %d max)"
2824+ " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
2825+ ip_conntrack_htable_size, ip_conntrack_max,
2826+ sizeof(struct ip_conntrack));
2827+
2828+ ret = nf_register_sockopt(&so_getorigdst);
2829+ if (ret != 0) {
2830+ printk(KERN_ERR "Unable to register netfilter socket option\n");
2831+ return ret;
2832+ }
2833+
2834+ ip_conntrack_hash = vmalloc(sizeof(struct list_head)
2835+ * ip_conntrack_htable_size);
2836+ if (!ip_conntrack_hash) {
2837+ printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
2838+ goto err_unreg_sockopt;
2839+ }
2840+
2841+ ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
2842+ sizeof(struct ip_conntrack), 0,
2843+ SLAB_HWCACHE_ALIGN, NULL, NULL);
2844+ if (!ip_conntrack_cachep) {
2845+ printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
2846+ goto err_free_hash;
2847+ }
2848+ /* Don't NEED lock here, but good form anyway. */
2849+ WRITE_LOCK(&ip_conntrack_lock);
2850+ /* Sew in builtin protocols. */
2851+ list_append(&protocol_list, &ip_conntrack_protocol_tcp);
2852+ list_append(&protocol_list, &ip_conntrack_protocol_udp);
2853+ list_append(&protocol_list, &ip_conntrack_protocol_icmp);
2854+ WRITE_UNLOCK(&ip_conntrack_lock);
2855+
2856+ for (i = 0; i < ip_conntrack_htable_size; i++)
2857+ INIT_LIST_HEAD(&ip_conntrack_hash[i]);
2858+
2859+ /* For use by ipt_REJECT */
2860+ ip_ct_attach = ip_conntrack_attach;
2861+ return ret;
2862+
2863+err_free_hash:
2864+ vfree(ip_conntrack_hash);
2865+err_unreg_sockopt:
2866+ nf_unregister_sockopt(&so_getorigdst);
2867+
2868+ return -ENOMEM;
2869+}
2870diff -Nur linux-2.6.3.org/net/ipv4/netfilter/ip_conntrack_standalone.c linux-2.6.3/net/ipv4/netfilter/ip_conntrack_standalone.c
2871--- linux-2.6.3.org/net/ipv4/netfilter/ip_conntrack_standalone.c 2004-02-26 23:36:59.000000000 +0100
2872+++ linux-2.6.3/net/ipv4/netfilter/ip_conntrack_standalone.c 2004-02-27 00:03:14.482026576 +0100
2873@@ -194,6 +194,26 @@
2874 return ip_conntrack_confirm(*pskb);
2875 }
2876
2877+static unsigned int ip_conntrack_defrag(unsigned int hooknum,
2878+ struct sk_buff **pskb,
2879+ const struct net_device *in,
2880+ const struct net_device *out,
2881+ int (*okfn)(struct sk_buff *))
2882+{
2883+ /* Previously seen (loopback)? Ignore. Do this before
2884+ fragment check. */
2885+ if ((*pskb)->nfct)
2886+ return NF_ACCEPT;
2887+
2888+ /* Gather fragments. */
2889+ if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
2890+ *pskb = ip_ct_gather_frags(*pskb);
2891+ if (!*pskb)
2892+ return NF_STOLEN;
2893+ }
2894+ return NF_ACCEPT;
2895+}
2896+
2897 static unsigned int ip_refrag(unsigned int hooknum,
2898 struct sk_buff **pskb,
2899 const struct net_device *in,
2900@@ -236,6 +256,14 @@
2901
2902 /* Connection tracking may drop packets, but never alters them, so
2903 make it the first hook. */
2904+static struct nf_hook_ops ip_conntrack_defrag_ops = {
2905+ .hook = ip_conntrack_defrag,
2906+ .owner = THIS_MODULE,
2907+ .pf = PF_INET,
2908+ .hooknum = NF_IP_PRE_ROUTING,
2909+ .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
2910+};
2911+
2912 static struct nf_hook_ops ip_conntrack_in_ops = {
2913 .hook = ip_conntrack_in,
2914 .owner = THIS_MODULE,
2915@@ -244,6 +272,14 @@
2916 .priority = NF_IP_PRI_CONNTRACK,
2917 };
2918
2919+static struct nf_hook_ops ip_conntrack_defrag_local_out_ops = {
2920+ .hook = ip_conntrack_defrag,
2921+ .owner = THIS_MODULE,
2922+ .pf = PF_INET,
2923+ .hooknum = NF_IP_LOCAL_OUT,
2924+ .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
2925+};
2926+
2927 static struct nf_hook_ops ip_conntrack_local_out_ops = {
2928 .hook = ip_conntrack_local,
2929 .owner = THIS_MODULE,
2930@@ -470,10 +506,20 @@
2931 if (!proc) goto cleanup_init;
2932 proc->owner = THIS_MODULE;
2933
2934+ ret = nf_register_hook(&ip_conntrack_defrag_ops);
2935+ if (ret < 0) {
2936+ printk("ip_conntrack: can't register pre-routing defrag hook.\n");
2937+ goto cleanup_proc;
2938+ }
2939+ ret = nf_register_hook(&ip_conntrack_defrag_local_out_ops);
2940+ if (ret < 0) {
2941+ printk("ip_conntrack: can't register local_out defrag hook.\n");
2942+ goto cleanup_defragops;
2943+ }
2944 ret = nf_register_hook(&ip_conntrack_in_ops);
2945 if (ret < 0) {
2946 printk("ip_conntrack: can't register pre-routing hook.\n");
2947- goto cleanup_proc;
2948+ goto cleanup_defraglocalops;
2949 }
2950 ret = nf_register_hook(&ip_conntrack_local_out_ops);
2951 if (ret < 0) {
2952@@ -511,6 +557,10 @@
2953 nf_unregister_hook(&ip_conntrack_local_out_ops);
2954 cleanup_inops:
2955 nf_unregister_hook(&ip_conntrack_in_ops);
2956+ cleanup_defraglocalops:
2957+ nf_unregister_hook(&ip_conntrack_defrag_local_out_ops);
2958+ cleanup_defragops:
2959+ nf_unregister_hook(&ip_conntrack_defrag_ops);
2960 cleanup_proc:
2961 proc_net_remove("ip_conntrack");
2962 cleanup_init:
2963@@ -602,5 +652,6 @@
2964 EXPORT_SYMBOL(ip_conntrack_expect_list);
2965 EXPORT_SYMBOL(ip_conntrack_lock);
2966 EXPORT_SYMBOL(ip_conntrack_hash);
2967+EXPORT_SYMBOL(ip_conntrack_untracked);
2968 EXPORT_SYMBOL_GPL(ip_conntrack_find_get);
2969 EXPORT_SYMBOL_GPL(ip_conntrack_put);
2970diff -Nur linux-2.6.3.org/net/ipv4/netfilter/ip_conntrack_standalone.c.orig linux-2.6.3/net/ipv4/netfilter/ip_conntrack_standalone.c.orig
2971--- linux-2.6.3.org/net/ipv4/netfilter/ip_conntrack_standalone.c.orig 1970-01-01 01:00:00.000000000 +0100
2972+++ linux-2.6.3/net/ipv4/netfilter/ip_conntrack_standalone.c.orig 2004-02-27 00:02:49.321851504 +0100
2973@@ -0,0 +1,606 @@
2974+/* This file contains all the functions required for the standalone
2975+ ip_conntrack module.
2976+
2977+ These are not required by the compatibility layer.
2978+*/
2979+
2980+/* (C) 1999-2001 Paul `Rusty' Russell
2981+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
2982+ *
2983+ * This program is free software; you can redistribute it and/or modify
2984+ * it under the terms of the GNU General Public License version 2 as
2985+ * published by the Free Software Foundation.
2986+ */
2987+
2988+#include <linux/config.h>
2989+#include <linux/types.h>
2990+#include <linux/ip.h>
2991+#include <linux/netfilter.h>
2992+#include <linux/netfilter_ipv4.h>
2993+#include <linux/module.h>
2994+#include <linux/skbuff.h>
2995+#include <linux/proc_fs.h>
2996+#ifdef CONFIG_SYSCTL
2997+#include <linux/sysctl.h>
2998+#endif
2999+#include <net/checksum.h>
3000+
3001+#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)
3002+#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)
3003+
3004+#include <linux/netfilter_ipv4/ip_conntrack.h>
3005+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
3006+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
3007+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
3008+#include <linux/netfilter_ipv4/listhelp.h>
3009+
3010+#if 0
3011+#define DEBUGP printk
3012+#else
3013+#define DEBUGP(format, args...)
3014+#endif
3015+
3016+MODULE_LICENSE("GPL");
3017+
3018+static int kill_proto(const struct ip_conntrack *i, void *data)
3019+{
3020+ return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum ==
3021+ *((u_int8_t *) data));
3022+}
3023+
3024+static unsigned int
3025+print_tuple(char *buffer, const struct ip_conntrack_tuple *tuple,
3026+ struct ip_conntrack_protocol *proto)
3027+{
3028+ int len;
3029+
3030+ len = sprintf(buffer, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ",
3031+ NIPQUAD(tuple->src.ip), NIPQUAD(tuple->dst.ip));
3032+
3033+ len += proto->print_tuple(buffer + len, tuple);
3034+
3035+ return len;
3036+}
3037+
3038+/* FIXME: Don't print source proto part. --RR */
3039+static unsigned int
3040+print_expect(char *buffer, const struct ip_conntrack_expect *expect)
3041+{
3042+ unsigned int len;
3043+
3044+ if (expect->expectant->helper->timeout)
3045+ len = sprintf(buffer, "EXPECTING: %lu ",
3046+ timer_pending(&expect->timeout)
3047+ ? (expect->timeout.expires - jiffies)/HZ : 0);
3048+ else
3049+ len = sprintf(buffer, "EXPECTING: - ");
3050+ len += sprintf(buffer + len, "use=%u proto=%u ",
3051+ atomic_read(&expect->use), expect->tuple.dst.protonum);
3052+ len += print_tuple(buffer + len, &expect->tuple,
3053+ __ip_ct_find_proto(expect->tuple.dst.protonum));
3054+ len += sprintf(buffer + len, "\n");
3055+ return len;
3056+}
3057+
3058+static unsigned int
3059+print_conntrack(char *buffer, struct ip_conntrack *conntrack)
3060+{
3061+ unsigned int len;
3062+ struct ip_conntrack_protocol *proto
3063+ = __ip_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
3064+ .tuple.dst.protonum);
3065+
3066+ len = sprintf(buffer, "%-8s %u %lu ",
3067+ proto->name,
3068+ conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
3069+ .tuple.dst.protonum,
3070+ timer_pending(&conntrack->timeout)
3071+ ? (conntrack->timeout.expires - jiffies)/HZ : 0);
3072+
3073+ len += proto->print_conntrack(buffer + len, conntrack);
3074+ len += print_tuple(buffer + len,
3075+ &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
3076+ proto);
3077+ if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)))
3078+ len += sprintf(buffer + len, "[UNREPLIED] ");
3079+ len += print_tuple(buffer + len,
3080+ &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple,
3081+ proto);
3082+ if (test_bit(IPS_ASSURED_BIT, &conntrack->status))
3083+ len += sprintf(buffer + len, "[ASSURED] ");
3084+ len += sprintf(buffer + len, "use=%u ",
3085+ atomic_read(&conntrack->ct_general.use));
3086+ len += sprintf(buffer + len, "\n");
3087+
3088+ return len;
3089+}
3090+
3091+/* Returns true when finished. */
3092+static inline int
3093+conntrack_iterate(const struct ip_conntrack_tuple_hash *hash,
3094+ char *buffer, off_t offset, off_t *upto,
3095+ unsigned int *len, unsigned int maxlen)
3096+{
3097+ unsigned int newlen;
3098+ IP_NF_ASSERT(hash->ctrack);
3099+
3100+ MUST_BE_READ_LOCKED(&ip_conntrack_lock);
3101+
3102+ /* Only count originals */
3103+ if (DIRECTION(hash))
3104+ return 0;
3105+
3106+ if ((*upto)++ < offset)
3107+ return 0;
3108+
3109+ newlen = print_conntrack(buffer + *len, hash->ctrack);
3110+ if (*len + newlen > maxlen)
3111+ return 1;
3112+ else *len += newlen;
3113+
3114+ return 0;
3115+}
3116+
3117+static int
3118+list_conntracks(char *buffer, char **start, off_t offset, int length)
3119+{
3120+ unsigned int i;
3121+ unsigned int len = 0;
3122+ off_t upto = 0;
3123+ struct list_head *e;
3124+
3125+ READ_LOCK(&ip_conntrack_lock);
3126+ /* Traverse hash; print originals then reply. */
3127+ for (i = 0; i < ip_conntrack_htable_size; i++) {
3128+ if (LIST_FIND(&ip_conntrack_hash[i], conntrack_iterate,
3129+ struct ip_conntrack_tuple_hash *,
3130+ buffer, offset, &upto, &len, length))
3131+ goto finished;
3132+ }
3133+
3134+ /* Now iterate through expecteds. */
3135+ READ_LOCK(&ip_conntrack_expect_tuple_lock);
3136+ list_for_each(e, &ip_conntrack_expect_list) {
3137+ unsigned int last_len;
3138+ struct ip_conntrack_expect *expect
3139+ = (struct ip_conntrack_expect *)e;
3140+ if (upto++ < offset) continue;
3141+
3142+ last_len = len;
3143+ len += print_expect(buffer + len, expect);
3144+ if (len > length) {
3145+ len = last_len;
3146+ goto finished_expects;
3147+ }
3148+ }
3149+
3150+ finished_expects:
3151+ READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
3152+ finished:
3153+ READ_UNLOCK(&ip_conntrack_lock);
3154+
3155+ /* `start' hack - see fs/proc/generic.c line ~165 */
3156+ *start = (char *)((unsigned int)upto - offset);
3157+ return len;
3158+}
3159+
3160+static unsigned int ip_confirm(unsigned int hooknum,
3161+ struct sk_buff **pskb,
3162+ const struct net_device *in,
3163+ const struct net_device *out,
3164+ int (*okfn)(struct sk_buff *))
3165+{
3166+ /* We've seen it coming out the other side: confirm it */
3167+ return ip_conntrack_confirm(*pskb);
3168+}
3169+
3170+static unsigned int ip_refrag(unsigned int hooknum,
3171+ struct sk_buff **pskb,
3172+ const struct net_device *in,
3173+ const struct net_device *out,
3174+ int (*okfn)(struct sk_buff *))
3175+{
3176+ struct rtable *rt = (struct rtable *)(*pskb)->dst;
3177+
3178+ /* We've seen it coming out the other side: confirm */
3179+ if (ip_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT)
3180+ return NF_DROP;
3181+
3182+ /* Local packets are never produced too large for their
3183+ interface. We degfragment them at LOCAL_OUT, however,
3184+ so we have to refragment them here. */
3185+ if ((*pskb)->len > dst_pmtu(&rt->u.dst) &&
3186+ !skb_shinfo(*pskb)->tso_size) {
3187+ /* No hook can be after us, so this should be OK. */
3188+ ip_fragment(*pskb, okfn);
3189+ return NF_STOLEN;
3190+ }
3191+ return NF_ACCEPT;
3192+}
3193+
3194+static unsigned int ip_conntrack_local(unsigned int hooknum,
3195+ struct sk_buff **pskb,
3196+ const struct net_device *in,
3197+ const struct net_device *out,
3198+ int (*okfn)(struct sk_buff *))
3199+{
3200+ /* root is playing with raw sockets. */
3201+ if ((*pskb)->len < sizeof(struct iphdr)
3202+ || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
3203+ if (net_ratelimit())
3204+ printk("ipt_hook: happy cracking.\n");
3205+ return NF_ACCEPT;
3206+ }
3207+ return ip_conntrack_in(hooknum, pskb, in, out, okfn);
3208+}
3209+
3210+/* Connection tracking may drop packets, but never alters them, so
3211+ make it the first hook. */
3212+static struct nf_hook_ops ip_conntrack_in_ops = {
3213+ .hook = ip_conntrack_in,
3214+ .owner = THIS_MODULE,
3215+ .pf = PF_INET,
3216+ .hooknum = NF_IP_PRE_ROUTING,
3217+ .priority = NF_IP_PRI_CONNTRACK,
3218+};
3219+
3220+static struct nf_hook_ops ip_conntrack_local_out_ops = {
3221+ .hook = ip_conntrack_local,
3222+ .owner = THIS_MODULE,
3223+ .pf = PF_INET,
3224+ .hooknum = NF_IP_LOCAL_OUT,
3225+ .priority = NF_IP_PRI_CONNTRACK,
3226+};
3227+
3228+/* Refragmenter; last chance. */
3229+static struct nf_hook_ops ip_conntrack_out_ops = {
3230+ .hook = ip_refrag,
3231+ .owner = THIS_MODULE,
3232+ .pf = PF_INET,
3233+ .hooknum = NF_IP_POST_ROUTING,
3234+ .priority = NF_IP_PRI_LAST,
3235+};
3236+
3237+static struct nf_hook_ops ip_conntrack_local_in_ops = {
3238+ .hook = ip_confirm,
3239+ .owner = THIS_MODULE,
3240+ .pf = PF_INET,
3241+ .hooknum = NF_IP_LOCAL_IN,
3242+ .priority = NF_IP_PRI_LAST-1,
3243+};
3244+
3245+/* Sysctl support */
3246+
3247+#ifdef CONFIG_SYSCTL
3248+
3249+/* From ip_conntrack_core.c */
3250+extern int ip_conntrack_max;
3251+extern unsigned int ip_conntrack_htable_size;
3252+
3253+/* From ip_conntrack_proto_tcp.c */
3254+extern unsigned long ip_ct_tcp_timeout_syn_sent;
3255+extern unsigned long ip_ct_tcp_timeout_syn_recv;
3256+extern unsigned long ip_ct_tcp_timeout_established;
3257+extern unsigned long ip_ct_tcp_timeout_fin_wait;
3258+extern unsigned long ip_ct_tcp_timeout_close_wait;
3259+extern unsigned long ip_ct_tcp_timeout_last_ack;
3260+extern unsigned long ip_ct_tcp_timeout_time_wait;
3261+extern unsigned long ip_ct_tcp_timeout_close;
3262+
3263+/* From ip_conntrack_proto_udp.c */
3264+extern unsigned long ip_ct_udp_timeout;
3265+extern unsigned long ip_ct_udp_timeout_stream;
3266+
3267+/* From ip_conntrack_proto_icmp.c */
3268+extern unsigned long ip_ct_icmp_timeout;
3269+
3270+/* From ip_conntrack_proto_icmp.c */
3271+extern unsigned long ip_ct_generic_timeout;
3272+
3273+static struct ctl_table_header *ip_ct_sysctl_header;
3274+
3275+static ctl_table ip_ct_sysctl_table[] = {
3276+ {
3277+ .ctl_name = NET_IPV4_NF_CONNTRACK_MAX,
3278+ .procname = "ip_conntrack_max",
3279+ .data = &ip_conntrack_max,
3280+ .maxlen = sizeof(int),
3281+ .mode = 0644,
3282+ .proc_handler = &proc_dointvec,
3283+ },
3284+ {
3285+ .ctl_name = NET_IPV4_NF_CONNTRACK_BUCKETS,
3286+ .procname = "ip_conntrack_buckets",
3287+ .data = &ip_conntrack_htable_size,
3288+ .maxlen = sizeof(unsigned int),
3289+ .mode = 0444,
3290+ .proc_handler = &proc_dointvec,
3291+ },
3292+ {
3293+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT,
3294+ .procname = "ip_conntrack_tcp_timeout_syn_sent",
3295+ .data = &ip_ct_tcp_timeout_syn_sent,
3296+ .maxlen = sizeof(unsigned int),
3297+ .mode = 0644,
3298+ .proc_handler = &proc_dointvec_jiffies,
3299+ },
3300+ {
3301+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV,
3302+ .procname = "ip_conntrack_tcp_timeout_syn_recv",
3303+ .data = &ip_ct_tcp_timeout_syn_recv,
3304+ .maxlen = sizeof(unsigned int),
3305+ .mode = 0644,
3306+ .proc_handler = &proc_dointvec_jiffies,
3307+ },
3308+ {
3309+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED,
3310+ .procname = "ip_conntrack_tcp_timeout_established",
3311+ .data = &ip_ct_tcp_timeout_established,
3312+ .maxlen = sizeof(unsigned int),
3313+ .mode = 0644,
3314+ .proc_handler = &proc_dointvec_jiffies,
3315+ },
3316+ {
3317+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT,
3318+ .procname = "ip_conntrack_tcp_timeout_fin_wait",
3319+ .data = &ip_ct_tcp_timeout_fin_wait,
3320+ .maxlen = sizeof(unsigned int),
3321+ .mode = 0644,
3322+ .proc_handler = &proc_dointvec_jiffies,
3323+ },
3324+ {
3325+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT,
3326+ .procname = "ip_conntrack_tcp_timeout_close_wait",
3327+ .data = &ip_ct_tcp_timeout_close_wait,
3328+ .maxlen = sizeof(unsigned int),
3329+ .mode = 0644,
3330+ .proc_handler = &proc_dointvec_jiffies,
3331+ },
3332+ {
3333+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK,
3334+ .procname = "ip_conntrack_tcp_timeout_last_ack",
3335+ .data = &ip_ct_tcp_timeout_last_ack,
3336+ .maxlen = sizeof(unsigned int),
3337+ .mode = 0644,
3338+ .proc_handler = &proc_dointvec_jiffies,
3339+ },
3340+ {
3341+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT,
3342+ .procname = "ip_conntrack_tcp_timeout_time_wait",
3343+ .data = &ip_ct_tcp_timeout_time_wait,
3344+ .maxlen = sizeof(unsigned int),
3345+ .mode = 0644,
3346+ .proc_handler = &proc_dointvec_jiffies,
3347+ },
3348+ {
3349+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE,
3350+ .procname = "ip_conntrack_tcp_timeout_close",
3351+ .data = &ip_ct_tcp_timeout_close,
3352+ .maxlen = sizeof(unsigned int),
3353+ .mode = 0644,
3354+ .proc_handler = &proc_dointvec_jiffies,
3355+ },
3356+ {
3357+ .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT,
3358+ .procname = "ip_conntrack_udp_timeout",
3359+ .data = &ip_ct_udp_timeout,
3360+ .maxlen = sizeof(unsigned int),
3361+ .mode = 0644,
3362+ .proc_handler = &proc_dointvec_jiffies,
3363+ },
3364+ {
3365+ .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM,
3366+ .procname = "ip_conntrack_udp_timeout_stream",
3367+ .data = &ip_ct_udp_timeout_stream,
3368+ .maxlen = sizeof(unsigned int),
3369+ .mode = 0644,
3370+ .proc_handler = &proc_dointvec_jiffies,
3371+ },
3372+ {
3373+ .ctl_name = NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT,
3374+ .procname = "ip_conntrack_icmp_timeout",
3375+ .data = &ip_ct_icmp_timeout,
3376+ .maxlen = sizeof(unsigned int),
3377+ .mode = 0644,
3378+ .proc_handler = &proc_dointvec_jiffies,
3379+ },
3380+ {
3381+ .ctl_name = NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT,
3382+ .procname = "ip_conntrack_generic_timeout",
3383+ .data = &ip_ct_generic_timeout,
3384+ .maxlen = sizeof(unsigned int),
3385+ .mode = 0644,
3386+ .proc_handler = &proc_dointvec_jiffies,
3387+ },
3388+ { .ctl_name = 0 }
3389+};
3390+
3391+#define NET_IP_CONNTRACK_MAX 2089
3392+
3393+static ctl_table ip_ct_netfilter_table[] = {
3394+ {
3395+ .ctl_name = NET_IPV4_NETFILTER,
3396+ .procname = "netfilter",
3397+ .mode = 0555,
3398+ .child = ip_ct_sysctl_table,
3399+ },
3400+ {
3401+ .ctl_name = NET_IP_CONNTRACK_MAX,
3402+ .procname = "ip_conntrack_max",
3403+ .data = &ip_conntrack_max,
3404+ .maxlen = sizeof(int),
3405+ .mode = 0644,
3406+ .proc_handler = &proc_dointvec
3407+ },
3408+ { .ctl_name = 0 }
3409+};
3410+
3411+static ctl_table ip_ct_ipv4_table[] = {
3412+ {
3413+ .ctl_name = NET_IPV4,
3414+ .procname = "ipv4",
3415+ .mode = 0555,
3416+ .child = ip_ct_netfilter_table,
3417+ },
3418+ { .ctl_name = 0 }
3419+};
3420+
3421+static ctl_table ip_ct_net_table[] = {
3422+ {
3423+ .ctl_name = CTL_NET,
3424+ .procname = "net",
3425+ .mode = 0555,
3426+ .child = ip_ct_ipv4_table,
3427+ },
3428+ { .ctl_name = 0 }
3429+};
3430+#endif
3431+static int init_or_cleanup(int init)
3432+{
3433+ struct proc_dir_entry *proc;
3434+ int ret = 0;
3435+
3436+ if (!init) goto cleanup;
3437+
3438+ ret = ip_conntrack_init();
3439+ if (ret < 0)
3440+ goto cleanup_nothing;
3441+
3442+ proc = proc_net_create("ip_conntrack",0,list_conntracks);
3443+ if (!proc) goto cleanup_init;
3444+ proc->owner = THIS_MODULE;
3445+
3446+ ret = nf_register_hook(&ip_conntrack_in_ops);
3447+ if (ret < 0) {
3448+ printk("ip_conntrack: can't register pre-routing hook.\n");
3449+ goto cleanup_proc;
3450+ }
3451+ ret = nf_register_hook(&ip_conntrack_local_out_ops);
3452+ if (ret < 0) {
3453+ printk("ip_conntrack: can't register local out hook.\n");
3454+ goto cleanup_inops;
3455+ }
3456+ ret = nf_register_hook(&ip_conntrack_out_ops);
3457+ if (ret < 0) {
3458+ printk("ip_conntrack: can't register post-routing hook.\n");
3459+ goto cleanup_inandlocalops;
3460+ }
3461+ ret = nf_register_hook(&ip_conntrack_local_in_ops);
3462+ if (ret < 0) {
3463+ printk("ip_conntrack: can't register local in hook.\n");
3464+ goto cleanup_inoutandlocalops;
3465+ }
3466+#ifdef CONFIG_SYSCTL
3467+ ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table, 0);
3468+ if (ip_ct_sysctl_header == NULL) {
3469+ printk("ip_conntrack: can't register to sysctl.\n");
3470+ goto cleanup;
3471+ }
3472+#endif
3473+
3474+ return ret;
3475+
3476+ cleanup:
3477+#ifdef CONFIG_SYSCTL
3478+ unregister_sysctl_table(ip_ct_sysctl_header);
3479+#endif
3480+ nf_unregister_hook(&ip_conntrack_local_in_ops);
3481+ cleanup_inoutandlocalops:
3482+ nf_unregister_hook(&ip_conntrack_out_ops);
3483+ cleanup_inandlocalops:
3484+ nf_unregister_hook(&ip_conntrack_local_out_ops);
3485+ cleanup_inops:
3486+ nf_unregister_hook(&ip_conntrack_in_ops);
3487+ cleanup_proc:
3488+ proc_net_remove("ip_conntrack");
3489+ cleanup_init:
3490+ ip_conntrack_cleanup();
3491+ cleanup_nothing:
3492+ return ret;
3493+}
3494+
3495+/* FIXME: Allow NULL functions and sub in pointers to generic for
3496+ them. --RR */
3497+int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto)
3498+{
3499+ int ret = 0;
3500+ struct list_head *i;
3501+
3502+ WRITE_LOCK(&ip_conntrack_lock);
3503+ list_for_each(i, &protocol_list) {
3504+ if (((struct ip_conntrack_protocol *)i)->proto
3505+ == proto->proto) {
3506+ ret = -EBUSY;
3507+ goto out;
3508+ }
3509+ }
3510+
3511+ list_prepend(&protocol_list, proto);
3512+
3513+ out:
3514+ WRITE_UNLOCK(&ip_conntrack_lock);
3515+ return ret;
3516+}
3517+
3518+void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
3519+{
3520+ WRITE_LOCK(&ip_conntrack_lock);
3521+
3522+ /* ip_ct_find_proto() returns proto_generic in case there is no protocol
3523+ * helper. So this should be enough - HW */
3524+ LIST_DELETE(&protocol_list, proto);
3525+ WRITE_UNLOCK(&ip_conntrack_lock);
3526+
3527+ /* Somebody could be still looking at the proto in bh. */
3528+ synchronize_net();
3529+
3530+ /* Remove all contrack entries for this protocol */
3531+ ip_ct_selective_cleanup(kill_proto, &proto->proto);
3532+}
3533+
3534+static int __init init(void)
3535+{
3536+ return init_or_cleanup(1);
3537+}
3538+
3539+static void __exit fini(void)
3540+{
3541+ init_or_cleanup(0);
3542+}
3543+
3544+module_init(init);
3545+module_exit(fini);
3546+
3547+/* Some modules need us, but don't depend directly on any symbol.
3548+ They should call this. */
3549+void need_ip_conntrack(void)
3550+{
3551+}
3552+
3553+EXPORT_SYMBOL(ip_conntrack_protocol_register);
3554+EXPORT_SYMBOL(ip_conntrack_protocol_unregister);
3555+EXPORT_SYMBOL(invert_tuplepr);
3556+EXPORT_SYMBOL(ip_conntrack_alter_reply);
3557+EXPORT_SYMBOL(ip_conntrack_destroyed);
3558+EXPORT_SYMBOL(ip_conntrack_get);
3559+EXPORT_SYMBOL(need_ip_conntrack);
3560+EXPORT_SYMBOL(ip_conntrack_helper_register);
3561+EXPORT_SYMBOL(ip_conntrack_helper_unregister);
3562+EXPORT_SYMBOL(ip_ct_selective_cleanup);
3563+EXPORT_SYMBOL(ip_ct_refresh);
3564+EXPORT_SYMBOL(ip_ct_find_proto);
3565+EXPORT_SYMBOL(__ip_ct_find_proto);
3566+EXPORT_SYMBOL(ip_ct_find_helper);
3567+EXPORT_SYMBOL(ip_conntrack_expect_related);
3568+EXPORT_SYMBOL(ip_conntrack_change_expect);
3569+EXPORT_SYMBOL(ip_conntrack_unexpect_related);
3570+EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get);
3571+EXPORT_SYMBOL_GPL(ip_conntrack_expect_put);
3572+EXPORT_SYMBOL(ip_conntrack_tuple_taken);
3573+EXPORT_SYMBOL(ip_ct_gather_frags);
3574+EXPORT_SYMBOL(ip_conntrack_htable_size);
3575+EXPORT_SYMBOL(ip_conntrack_expect_list);
3576+EXPORT_SYMBOL(ip_conntrack_lock);
3577+EXPORT_SYMBOL(ip_conntrack_hash);
3578+EXPORT_SYMBOL_GPL(ip_conntrack_find_get);
3579+EXPORT_SYMBOL_GPL(ip_conntrack_put);
3580diff -Nur linux-2.6.3.org/net/ipv4/netfilter/ip_nat_core.c linux-2.6.3/net/ipv4/netfilter/ip_nat_core.c
3581--- linux-2.6.3.org/net/ipv4/netfilter/ip_nat_core.c 2004-02-18 04:57:16.000000000 +0100
3582+++ linux-2.6.3/net/ipv4/netfilter/ip_nat_core.c 2004-02-27 00:03:14.483026424 +0100
3583@@ -1016,6 +1016,10 @@
3584 /* FIXME: Man, this is a hack. <SIGH> */
3585 IP_NF_ASSERT(ip_conntrack_destroyed == NULL);
3586 ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
3587+
3588+ /* Initialize fake conntrack so that NAT will skip it */
3589+ ip_conntrack_untracked.nat.info.initialized |=
3590+ (1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST);
3591
3592 return 0;
3593 }
3594diff -Nur linux-2.6.3.org/net/ipv4/netfilter/ip_nat_core.c.orig linux-2.6.3/net/ipv4/netfilter/ip_nat_core.c.orig
3595--- linux-2.6.3.org/net/ipv4/netfilter/ip_nat_core.c.orig 1970-01-01 01:00:00.000000000 +0100
3596+++ linux-2.6.3/net/ipv4/netfilter/ip_nat_core.c.orig 2004-02-18 04:57:16.000000000 +0100
3597@@ -0,0 +1,1036 @@
3598+/* NAT for netfilter; shared with compatibility layer. */
3599+
3600+/* (C) 1999-2001 Paul `Rusty' Russell
3601+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3602+ *
3603+ * This program is free software; you can redistribute it and/or modify
3604+ * it under the terms of the GNU General Public License version 2 as
3605+ * published by the Free Software Foundation.
3606+ */
3607+
3608+#include <linux/module.h>
3609+#include <linux/types.h>
3610+#include <linux/timer.h>
3611+#include <linux/skbuff.h>
3612+#include <linux/netfilter_ipv4.h>
3613+#include <linux/vmalloc.h>
3614+#include <net/checksum.h>
3615+#include <net/icmp.h>
3616+#include <net/ip.h>
3617+#include <net/tcp.h> /* For tcp_prot in getorigdst */
3618+#include <linux/icmp.h>
3619+#include <linux/udp.h>
3620+
3621+#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
3622+#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
3623+
3624+#include <linux/netfilter_ipv4/ip_conntrack.h>
3625+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
3626+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
3627+#include <linux/netfilter_ipv4/ip_nat.h>
3628+#include <linux/netfilter_ipv4/ip_nat_protocol.h>
3629+#include <linux/netfilter_ipv4/ip_nat_core.h>
3630+#include <linux/netfilter_ipv4/ip_nat_helper.h>
3631+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
3632+#include <linux/netfilter_ipv4/listhelp.h>
3633+
3634+#if 0
3635+#define DEBUGP printk
3636+#else
3637+#define DEBUGP(format, args...)
3638+#endif
3639+
3640+DECLARE_RWLOCK(ip_nat_lock);
3641+DECLARE_RWLOCK_EXTERN(ip_conntrack_lock);
3642+
3643+/* Calculated at init based on memory size */
3644+static unsigned int ip_nat_htable_size;
3645+
3646+static struct list_head *bysource;
3647+static struct list_head *byipsproto;
3648+LIST_HEAD(protos);
3649+LIST_HEAD(helpers);
3650+
3651+extern struct ip_nat_protocol unknown_nat_protocol;
3652+
3653+/* We keep extra hashes for each conntrack, for fast searching. */
3654+static inline size_t
3655+hash_by_ipsproto(u_int32_t src, u_int32_t dst, u_int16_t proto)
3656+{
3657+ /* Modified src and dst, to ensure we don't create two
3658+ identical streams. */
3659+ return (src + dst + proto) % ip_nat_htable_size;
3660+}
3661+
3662+static inline size_t
3663+hash_by_src(const struct ip_conntrack_manip *manip, u_int16_t proto)
3664+{
3665+ /* Original src, to ensure we map it consistently if poss. */
3666+ return (manip->ip + manip->u.all + proto) % ip_nat_htable_size;
3667+}
3668+
3669+/* Noone using conntrack by the time this called. */
3670+static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn)
3671+{
3672+ struct ip_nat_info *info = &conn->nat.info;
3673+ unsigned int hs, hp;
3674+
3675+ if (!info->initialized)
3676+ return;
3677+
3678+ IP_NF_ASSERT(info->bysource.conntrack);
3679+ IP_NF_ASSERT(info->byipsproto.conntrack);
3680+
3681+ hs = hash_by_src(&conn->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src,
3682+ conn->tuplehash[IP_CT_DIR_ORIGINAL]
3683+ .tuple.dst.protonum);
3684+
3685+ hp = hash_by_ipsproto(conn->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip,
3686+ conn->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip,
3687+ conn->tuplehash[IP_CT_DIR_REPLY]
3688+ .tuple.dst.protonum);
3689+
3690+ WRITE_LOCK(&ip_nat_lock);
3691+ LIST_DELETE(&bysource[hs], &info->bysource);
3692+ LIST_DELETE(&byipsproto[hp], &info->byipsproto);
3693+ WRITE_UNLOCK(&ip_nat_lock);
3694+}
3695+
3696+/* We do checksum mangling, so if they were wrong before they're still
3697+ * wrong. Also works for incomplete packets (eg. ICMP dest
3698+ * unreachables.) */
3699+u_int16_t
3700+ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
3701+{
3702+ u_int32_t diffs[] = { oldvalinv, newval };
3703+ return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
3704+ oldcheck^0xFFFF));
3705+}
3706+
3707+static inline int cmp_proto(const struct ip_nat_protocol *i, int proto)
3708+{
3709+ return i->protonum == proto;
3710+}
3711+
3712+struct ip_nat_protocol *
3713+find_nat_proto(u_int16_t protonum)
3714+{
3715+ struct ip_nat_protocol *i;
3716+
3717+ MUST_BE_READ_LOCKED(&ip_nat_lock);
3718+ i = LIST_FIND(&protos, cmp_proto, struct ip_nat_protocol *, protonum);
3719+ if (!i)
3720+ i = &unknown_nat_protocol;
3721+ return i;
3722+}
3723+
3724+/* Is this tuple already taken? (not by us) */
3725+int
3726+ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
3727+ const struct ip_conntrack *ignored_conntrack)
3728+{
3729+ /* Conntrack tracking doesn't keep track of outgoing tuples; only
3730+ incoming ones. NAT means they don't have a fixed mapping,
3731+ so we invert the tuple and look for the incoming reply.
3732+
3733+ We could keep a separate hash if this proves too slow. */
3734+ struct ip_conntrack_tuple reply;
3735+
3736+ invert_tuplepr(&reply, tuple);
3737+ return ip_conntrack_tuple_taken(&reply, ignored_conntrack);
3738+}
3739+
3740+/* Does tuple + the source manip come within the range mr */
3741+static int
3742+in_range(const struct ip_conntrack_tuple *tuple,
3743+ const struct ip_conntrack_manip *manip,
3744+ const struct ip_nat_multi_range *mr)
3745+{
3746+ struct ip_nat_protocol *proto = find_nat_proto(tuple->dst.protonum);
3747+ unsigned int i;
3748+ struct ip_conntrack_tuple newtuple = { *manip, tuple->dst };
3749+
3750+ for (i = 0; i < mr->rangesize; i++) {
3751+ /* If we are allowed to map IPs, then we must be in the
3752+ range specified, otherwise we must be unchanged. */
3753+ if (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS) {
3754+ if (ntohl(newtuple.src.ip) < ntohl(mr->range[i].min_ip)
3755+ || (ntohl(newtuple.src.ip)
3756+ > ntohl(mr->range[i].max_ip)))
3757+ continue;
3758+ } else {
3759+ if (newtuple.src.ip != tuple->src.ip)
3760+ continue;
3761+ }
3762+
3763+ if (!(mr->range[i].flags & IP_NAT_RANGE_PROTO_SPECIFIED)
3764+ || proto->in_range(&newtuple, IP_NAT_MANIP_SRC,
3765+ &mr->range[i].min, &mr->range[i].max))
3766+ return 1;
3767+ }
3768+ return 0;
3769+}
3770+
3771+static inline int
3772+src_cmp(const struct ip_nat_hash *i,
3773+ const struct ip_conntrack_tuple *tuple,
3774+ const struct ip_nat_multi_range *mr)
3775+{
3776+ return (i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum
3777+ == tuple->dst.protonum
3778+ && i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip
3779+ == tuple->src.ip
3780+ && i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all
3781+ == tuple->src.u.all
3782+ && in_range(tuple,
3783+ &i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
3784+ .tuple.src,
3785+ mr));
3786+}
3787+
3788+/* Only called for SRC manip */
3789+static struct ip_conntrack_manip *
3790+find_appropriate_src(const struct ip_conntrack_tuple *tuple,
3791+ const struct ip_nat_multi_range *mr)
3792+{
3793+ unsigned int h = hash_by_src(&tuple->src, tuple->dst.protonum);
3794+ struct ip_nat_hash *i;
3795+
3796+ MUST_BE_READ_LOCKED(&ip_nat_lock);
3797+ i = LIST_FIND(&bysource[h], src_cmp, struct ip_nat_hash *, tuple, mr);
3798+ if (i)
3799+ return &i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src;
3800+ else
3801+ return NULL;
3802+}
3803+
3804+#ifdef CONFIG_IP_NF_NAT_LOCAL
3805+/* If it's really a local destination manip, it may need to do a
3806+ source manip too. */
3807+static int
3808+do_extra_mangle(u_int32_t var_ip, u_int32_t *other_ipp)
3809+{
3810+ struct flowi fl = { .nl_u = { .ip4_u = { .daddr = var_ip } } };
3811+ struct rtable *rt;
3812+
3813+ /* FIXME: IPTOS_TOS(iph->tos) --RR */
3814+ if (ip_route_output_key(&rt, &fl) != 0) {
3815+ DEBUGP("do_extra_mangle: Can't get route to %u.%u.%u.%u\n",
3816+ NIPQUAD(var_ip));
3817+ return 0;
3818+ }
3819+
3820+ *other_ipp = rt->rt_src;
3821+ ip_rt_put(rt);
3822+ return 1;
3823+}
3824+#endif
3825+
3826+/* Simple way to iterate through all. */
3827+static inline int fake_cmp(const struct ip_nat_hash *i,
3828+ u_int32_t src, u_int32_t dst, u_int16_t protonum,
3829+ unsigned int *score,
3830+ const struct ip_conntrack *conntrack)
3831+{
3832+ /* Compare backwards: we're dealing with OUTGOING tuples, and
3833+ inside the conntrack is the REPLY tuple. Don't count this
3834+ conntrack. */
3835+ if (i->conntrack != conntrack
3836+ && i->conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip == dst
3837+ && i->conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip == src
3838+ && (i->conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum
3839+ == protonum))
3840+ (*score)++;
3841+ return 0;
3842+}
3843+
3844+static inline unsigned int
3845+count_maps(u_int32_t src, u_int32_t dst, u_int16_t protonum,
3846+ const struct ip_conntrack *conntrack)
3847+{
3848+ unsigned int score = 0;
3849+ unsigned int h;
3850+
3851+ MUST_BE_READ_LOCKED(&ip_nat_lock);
3852+ h = hash_by_ipsproto(src, dst, protonum);
3853+ LIST_FIND(&byipsproto[h], fake_cmp, struct ip_nat_hash *,
3854+ src, dst, protonum, &score, conntrack);
3855+
3856+ return score;
3857+}
3858+
3859+/* For [FUTURE] fragmentation handling, we want the least-used
3860+ src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus
3861+ if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
3862+ 1-65535, we don't do pro-rata allocation based on ports; we choose
3863+ the ip with the lowest src-ip/dst-ip/proto usage.
3864+
3865+ If an allocation then fails (eg. all 6 ports used in the 1.2.3.4
3866+ range), we eliminate that and try again. This is not the most
3867+ efficient approach, but if you're worried about that, don't hand us
3868+ ranges you don't really have. */
3869+static struct ip_nat_range *
3870+find_best_ips_proto(struct ip_conntrack_tuple *tuple,
3871+ const struct ip_nat_multi_range *mr,
3872+ const struct ip_conntrack *conntrack,
3873+ unsigned int hooknum)
3874+{
3875+ unsigned int i;
3876+ struct {
3877+ const struct ip_nat_range *range;
3878+ unsigned int score;
3879+ struct ip_conntrack_tuple tuple;
3880+ } best = { NULL, 0xFFFFFFFF };
3881+ u_int32_t *var_ipp, *other_ipp, saved_ip, orig_dstip;
3882+ static unsigned int randomness;
3883+
3884+ if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC) {
3885+ var_ipp = &tuple->src.ip;
3886+ saved_ip = tuple->dst.ip;
3887+ other_ipp = &tuple->dst.ip;
3888+ } else {
3889+ var_ipp = &tuple->dst.ip;
3890+ saved_ip = tuple->src.ip;
3891+ other_ipp = &tuple->src.ip;
3892+ }
3893+ /* Don't do do_extra_mangle unless necessary (overrides
3894+ explicit socket bindings, for example) */
3895+ orig_dstip = tuple->dst.ip;
3896+
3897+ IP_NF_ASSERT(mr->rangesize >= 1);
3898+ for (i = 0; i < mr->rangesize; i++) {
3899+ /* Host order */
3900+ u_int32_t minip, maxip, j;
3901+
3902+ /* Don't do ranges which are already eliminated. */
3903+ if (mr->range[i].flags & IP_NAT_RANGE_FULL) {
3904+ continue;
3905+ }
3906+
3907+ if (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS) {
3908+ minip = ntohl(mr->range[i].min_ip);
3909+ maxip = ntohl(mr->range[i].max_ip);
3910+ } else
3911+ minip = maxip = ntohl(*var_ipp);
3912+
3913+ randomness++;
3914+ for (j = 0; j < maxip - minip + 1; j++) {
3915+ unsigned int score;
3916+
3917+ *var_ipp = htonl(minip + (randomness + j)
3918+ % (maxip - minip + 1));
3919+
3920+ /* Reset the other ip in case it was mangled by
3921+ * do_extra_mangle last time. */
3922+ *other_ipp = saved_ip;
3923+
3924+#ifdef CONFIG_IP_NF_NAT_LOCAL
3925+ if (hooknum == NF_IP_LOCAL_OUT
3926+ && *var_ipp != orig_dstip
3927+ && !do_extra_mangle(*var_ipp, other_ipp)) {
3928+ DEBUGP("Range %u %u.%u.%u.%u rt failed!\n",
3929+ i, NIPQUAD(*var_ipp));
3930+ /* Can't route? This whole range part is
3931+ * probably screwed, but keep trying
3932+ * anyway. */
3933+ continue;
3934+ }
3935+#endif
3936+
3937+ /* Count how many others map onto this. */
3938+ score = count_maps(tuple->src.ip, tuple->dst.ip,
3939+ tuple->dst.protonum, conntrack);
3940+ if (score < best.score) {
3941+ /* Optimization: doesn't get any better than
3942+ this. */
3943+ if (score == 0)
3944+ return (struct ip_nat_range *)
3945+ &mr->range[i];
3946+
3947+ best.score = score;
3948+ best.tuple = *tuple;
3949+ best.range = &mr->range[i];
3950+ }
3951+ }
3952+ }
3953+ *tuple = best.tuple;
3954+
3955+ /* Discard const. */
3956+ return (struct ip_nat_range *)best.range;
3957+}
3958+
3959+/* Fast version doesn't iterate through hash chains, but only handles
3960+ common case of single IP address (null NAT, masquerade) */
3961+static struct ip_nat_range *
3962+find_best_ips_proto_fast(struct ip_conntrack_tuple *tuple,
3963+ const struct ip_nat_multi_range *mr,
3964+ const struct ip_conntrack *conntrack,
3965+ unsigned int hooknum)
3966+{
3967+ if (mr->rangesize != 1
3968+ || (mr->range[0].flags & IP_NAT_RANGE_FULL)
3969+ || ((mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
3970+ && mr->range[0].min_ip != mr->range[0].max_ip))
3971+ return find_best_ips_proto(tuple, mr, conntrack, hooknum);
3972+
3973+ if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
3974+ if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC)
3975+ tuple->src.ip = mr->range[0].min_ip;
3976+ else {
3977+ /* Only do extra mangle when required (breaks
3978+ socket binding) */
3979+#ifdef CONFIG_IP_NF_NAT_LOCAL
3980+ if (tuple->dst.ip != mr->range[0].min_ip
3981+ && hooknum == NF_IP_LOCAL_OUT
3982+ && !do_extra_mangle(mr->range[0].min_ip,
3983+ &tuple->src.ip))
3984+ return NULL;
3985+#endif
3986+ tuple->dst.ip = mr->range[0].min_ip;
3987+ }
3988+ }
3989+
3990+ /* Discard const. */
3991+ return (struct ip_nat_range *)&mr->range[0];
3992+}
3993+
3994+static int
3995+get_unique_tuple(struct ip_conntrack_tuple *tuple,
3996+ const struct ip_conntrack_tuple *orig_tuple,
3997+ const struct ip_nat_multi_range *mrr,
3998+ struct ip_conntrack *conntrack,
3999+ unsigned int hooknum)
4000+{
4001+ struct ip_nat_protocol *proto
4002+ = find_nat_proto(orig_tuple->dst.protonum);
4003+ struct ip_nat_range *rptr;
4004+ unsigned int i;
4005+ int ret;
4006+
4007+ /* We temporarily use flags for marking full parts, but we
4008+ always clean up afterwards */
4009+ struct ip_nat_multi_range *mr = (void *)mrr;
4010+
4011+ /* 1) If this srcip/proto/src-proto-part is currently mapped,
4012+ and that same mapping gives a unique tuple within the given
4013+ range, use that.
4014+
4015+ This is only required for source (ie. NAT/masq) mappings.
4016+ So far, we don't do local source mappings, so multiple
4017+ manips not an issue. */
4018+ if (hooknum == NF_IP_POST_ROUTING) {
4019+ struct ip_conntrack_manip *manip;
4020+
4021+ manip = find_appropriate_src(orig_tuple, mr);
4022+ if (manip) {
4023+ /* Apply same source manipulation. */
4024+ *tuple = ((struct ip_conntrack_tuple)
4025+ { *manip, orig_tuple->dst });
4026+ DEBUGP("get_unique_tuple: Found current src map\n");
4027+ if (!ip_nat_used_tuple(tuple, conntrack))
4028+ return 1;
4029+ }
4030+ }
4031+
4032+ /* 2) Select the least-used IP/proto combination in the given
4033+ range.
4034+ */
4035+ *tuple = *orig_tuple;
4036+ while ((rptr = find_best_ips_proto_fast(tuple, mr, conntrack, hooknum))
4037+ != NULL) {
4038+ DEBUGP("Found best for "); DUMP_TUPLE(tuple);
4039+ /* 3) The per-protocol part of the manip is made to
4040+ map into the range to make a unique tuple. */
4041+
4042+ /* Only bother mapping if it's not already in range
4043+ and unique */
4044+ if ((!(rptr->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
4045+ || proto->in_range(tuple, HOOK2MANIP(hooknum),
4046+ &rptr->min, &rptr->max))
4047+ && !ip_nat_used_tuple(tuple, conntrack)) {
4048+ ret = 1;
4049+ goto clear_fulls;
4050+ } else {
4051+ if (proto->unique_tuple(tuple, rptr,
4052+ HOOK2MANIP(hooknum),
4053+ conntrack)) {
4054+ /* Must be unique. */
4055+ IP_NF_ASSERT(!ip_nat_used_tuple(tuple,
4056+ conntrack));
4057+ ret = 1;
4058+ goto clear_fulls;
4059+ } else if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST) {
4060+ /* Try implicit source NAT; protocol
4061+ may be able to play with ports to
4062+ make it unique. */
4063+ struct ip_nat_range r
4064+ = { IP_NAT_RANGE_MAP_IPS,
4065+ tuple->src.ip, tuple->src.ip,
4066+ { 0 }, { 0 } };
4067+ DEBUGP("Trying implicit mapping\n");
4068+ if (proto->unique_tuple(tuple, &r,
4069+ IP_NAT_MANIP_SRC,
4070+ conntrack)) {
4071+ /* Must be unique. */
4072+ IP_NF_ASSERT(!ip_nat_used_tuple
4073+ (tuple, conntrack));
4074+ ret = 1;
4075+ goto clear_fulls;
4076+ }
4077+ }
4078+ DEBUGP("Protocol can't get unique tuple %u.\n",
4079+ hooknum);
4080+ }
4081+
4082+ /* Eliminate that from range, and try again. */
4083+ rptr->flags |= IP_NAT_RANGE_FULL;
4084+ *tuple = *orig_tuple;
4085+ }
4086+
4087+ ret = 0;
4088+
4089+ clear_fulls:
4090+ /* Clear full flags. */
4091+ IP_NF_ASSERT(mr->rangesize >= 1);
4092+ for (i = 0; i < mr->rangesize; i++)
4093+ mr->range[i].flags &= ~IP_NAT_RANGE_FULL;
4094+
4095+ return ret;
4096+}
4097+
4098+static inline int
4099+helper_cmp(const struct ip_nat_helper *helper,
4100+ const struct ip_conntrack_tuple *tuple)
4101+{
4102+ return ip_ct_tuple_mask_cmp(tuple, &helper->tuple, &helper->mask);
4103+}
4104+
4105+/* Where to manip the reply packets (will be reverse manip). */
4106+static unsigned int opposite_hook[NF_IP_NUMHOOKS]
4107+= { [NF_IP_PRE_ROUTING] = NF_IP_POST_ROUTING,
4108+ [NF_IP_POST_ROUTING] = NF_IP_PRE_ROUTING,
4109+#ifdef CONFIG_IP_NF_NAT_LOCAL
4110+ [NF_IP_LOCAL_OUT] = NF_IP_LOCAL_IN,
4111+ [NF_IP_LOCAL_IN] = NF_IP_LOCAL_OUT,
4112+#endif
4113+};
4114+
4115+unsigned int
4116+ip_nat_setup_info(struct ip_conntrack *conntrack,
4117+ const struct ip_nat_multi_range *mr,
4118+ unsigned int hooknum)
4119+{
4120+ struct ip_conntrack_tuple new_tuple, inv_tuple, reply;
4121+ struct ip_conntrack_tuple orig_tp;
4122+ struct ip_nat_info *info = &conntrack->nat.info;
4123+ int in_hashes = info->initialized;
4124+
4125+ MUST_BE_WRITE_LOCKED(&ip_nat_lock);
4126+ IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
4127+ || hooknum == NF_IP_POST_ROUTING
4128+ || hooknum == NF_IP_LOCAL_OUT);
4129+ IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS);
4130+ IP_NF_ASSERT(!(info->initialized & (1 << HOOK2MANIP(hooknum))));
4131+
4132+ /* What we've got will look like inverse of reply. Normally
4133+ this is what is in the conntrack, except for prior
4134+ manipulations (future optimization: if num_manips == 0,
4135+ orig_tp =
4136+ conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
4137+ invert_tuplepr(&orig_tp,
4138+ &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple);
4139+
4140+#if 0
4141+ {
4142+ unsigned int i;
4143+
4144+ DEBUGP("Hook %u (%s), ", hooknum,
4145+ HOOK2MANIP(hooknum)==IP_NAT_MANIP_SRC ? "SRC" : "DST");
4146+ DUMP_TUPLE(&orig_tp);
4147+ DEBUGP("Range %p: ", mr);
4148+ for (i = 0; i < mr->rangesize; i++) {
4149+ DEBUGP("%u:%s%s%s %u.%u.%u.%u - %u.%u.%u.%u %u - %u\n",
4150+ i,
4151+ (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS)
4152+ ? " MAP_IPS" : "",
4153+ (mr->range[i].flags
4154+ & IP_NAT_RANGE_PROTO_SPECIFIED)
4155+ ? " PROTO_SPECIFIED" : "",
4156+ (mr->range[i].flags & IP_NAT_RANGE_FULL)
4157+ ? " FULL" : "",
4158+ NIPQUAD(mr->range[i].min_ip),
4159+ NIPQUAD(mr->range[i].max_ip),
4160+ mr->range[i].min.all,
4161+ mr->range[i].max.all);
4162+ }
4163+ }
4164+#endif
4165+
4166+ do {
4167+ if (!get_unique_tuple(&new_tuple, &orig_tp, mr, conntrack,
4168+ hooknum)) {
4169+ DEBUGP("ip_nat_setup_info: Can't get unique for %p.\n",
4170+ conntrack);
4171+ return NF_DROP;
4172+ }
4173+
4174+#if 0
4175+ DEBUGP("Hook %u (%s) %p\n", hooknum,
4176+ HOOK2MANIP(hooknum)==IP_NAT_MANIP_SRC ? "SRC" : "DST",
4177+ conntrack);
4178+ DEBUGP("Original: ");
4179+ DUMP_TUPLE(&orig_tp);
4180+ DEBUGP("New: ");
4181+ DUMP_TUPLE(&new_tuple);
4182+#endif
4183+
4184+ /* We now have two tuples (SRCIP/SRCPT/DSTIP/DSTPT):
4185+ the original (A/B/C/D') and the mangled one (E/F/G/H').
4186+
4187+ We're only allowed to work with the SRC per-proto
4188+ part, so we create inverses of both to start, then
4189+ derive the other fields we need. */
4190+
4191+ /* Reply connection: simply invert the new tuple
4192+ (G/H/E/F') */
4193+ invert_tuplepr(&reply, &new_tuple);
4194+
4195+ /* Alter conntrack table so it recognizes replies.
4196+ If fail this race (reply tuple now used), repeat. */
4197+ } while (!ip_conntrack_alter_reply(conntrack, &reply));
4198+
4199+ /* FIXME: We can simply used existing conntrack reply tuple
4200+ here --RR */
4201+ /* Create inverse of original: C/D/A/B' */
4202+ invert_tuplepr(&inv_tuple, &orig_tp);
4203+
4204+ /* Has source changed?. */
4205+ if (!ip_ct_tuple_src_equal(&new_tuple, &orig_tp)) {
4206+ /* In this direction, a source manip. */
4207+ info->manips[info->num_manips++] =
4208+ ((struct ip_nat_info_manip)
4209+ { IP_CT_DIR_ORIGINAL, hooknum,
4210+ IP_NAT_MANIP_SRC, new_tuple.src });
4211+
4212+ IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS);
4213+
4214+ /* In the reverse direction, a destination manip. */
4215+ info->manips[info->num_manips++] =
4216+ ((struct ip_nat_info_manip)
4217+ { IP_CT_DIR_REPLY, opposite_hook[hooknum],
4218+ IP_NAT_MANIP_DST, orig_tp.src });
4219+ IP_NF_ASSERT(info->num_manips <= IP_NAT_MAX_MANIPS);
4220+ }
4221+
4222+ /* Has destination changed? */
4223+ if (!ip_ct_tuple_dst_equal(&new_tuple, &orig_tp)) {
4224+ /* In this direction, a destination manip */
4225+ info->manips[info->num_manips++] =
4226+ ((struct ip_nat_info_manip)
4227+ { IP_CT_DIR_ORIGINAL, hooknum,
4228+ IP_NAT_MANIP_DST, reply.src });
4229+
4230+ IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS);
4231+
4232+ /* In the reverse direction, a source manip. */
4233+ info->manips[info->num_manips++] =
4234+ ((struct ip_nat_info_manip)
4235+ { IP_CT_DIR_REPLY, opposite_hook[hooknum],
4236+ IP_NAT_MANIP_SRC, inv_tuple.src });
4237+ IP_NF_ASSERT(info->num_manips <= IP_NAT_MAX_MANIPS);
4238+ }
4239+
4240+ /* If there's a helper, assign it; based on new tuple. */
4241+ if (!conntrack->master)
4242+ info->helper = LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *,
4243+ &reply);
4244+
4245+ /* It's done. */
4246+ info->initialized |= (1 << HOOK2MANIP(hooknum));
4247+
4248+ if (in_hashes) {
4249+ IP_NF_ASSERT(info->bysource.conntrack);
4250+ replace_in_hashes(conntrack, info);
4251+ } else {
4252+ place_in_hashes(conntrack, info);
4253+ }
4254+
4255+ return NF_ACCEPT;
4256+}
4257+
4258+void replace_in_hashes(struct ip_conntrack *conntrack,
4259+ struct ip_nat_info *info)
4260+{
4261+ /* Source has changed, so replace in hashes. */
4262+ unsigned int srchash
4263+ = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
4264+ .tuple.src,
4265+ conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
4266+ .tuple.dst.protonum);
4267+ /* We place packet as seen OUTGOUNG in byips_proto hash
4268+ (ie. reverse dst and src of reply packet. */
4269+ unsigned int ipsprotohash
4270+ = hash_by_ipsproto(conntrack->tuplehash[IP_CT_DIR_REPLY]
4271+ .tuple.dst.ip,
4272+ conntrack->tuplehash[IP_CT_DIR_REPLY]
4273+ .tuple.src.ip,
4274+ conntrack->tuplehash[IP_CT_DIR_REPLY]
4275+ .tuple.dst.protonum);
4276+
4277+ IP_NF_ASSERT(info->bysource.conntrack == conntrack);
4278+ MUST_BE_WRITE_LOCKED(&ip_nat_lock);
4279+
4280+ list_del(&info->bysource.list);
4281+ list_del(&info->byipsproto.list);
4282+
4283+ list_prepend(&bysource[srchash], &info->bysource);
4284+ list_prepend(&byipsproto[ipsprotohash], &info->byipsproto);
4285+}
4286+
4287+void place_in_hashes(struct ip_conntrack *conntrack,
4288+ struct ip_nat_info *info)
4289+{
4290+ unsigned int srchash
4291+ = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
4292+ .tuple.src,
4293+ conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
4294+ .tuple.dst.protonum);
4295+ /* We place packet as seen OUTGOUNG in byips_proto hash
4296+ (ie. reverse dst and src of reply packet. */
4297+ unsigned int ipsprotohash
4298+ = hash_by_ipsproto(conntrack->tuplehash[IP_CT_DIR_REPLY]
4299+ .tuple.dst.ip,
4300+ conntrack->tuplehash[IP_CT_DIR_REPLY]
4301+ .tuple.src.ip,
4302+ conntrack->tuplehash[IP_CT_DIR_REPLY]
4303+ .tuple.dst.protonum);
4304+
4305+ IP_NF_ASSERT(!info->bysource.conntrack);
4306+
4307+ MUST_BE_WRITE_LOCKED(&ip_nat_lock);
4308+ info->byipsproto.conntrack = conntrack;
4309+ info->bysource.conntrack = conntrack;
4310+
4311+ list_prepend(&bysource[srchash], &info->bysource);
4312+ list_prepend(&byipsproto[ipsprotohash], &info->byipsproto);
4313+}
4314+
4315+/* Returns true if succeeded. */
4316+static int
4317+manip_pkt(u_int16_t proto,
4318+ struct sk_buff **pskb,
4319+ unsigned int iphdroff,
4320+ const struct ip_conntrack_manip *manip,
4321+ enum ip_nat_manip_type maniptype)
4322+{
4323+ struct iphdr *iph;
4324+
4325+ (*pskb)->nfcache |= NFC_ALTERED;
4326+ if (!skb_ip_make_writable(pskb, iphdroff+sizeof(iph)))
4327+ return 0;
4328+
4329+ iph = (void *)(*pskb)->data + iphdroff;
4330+
4331+ /* Manipulate protcol part. */
4332+ if (!find_nat_proto(proto)->manip_pkt(pskb,
4333+ iphdroff + iph->ihl*4,
4334+ manip, maniptype))
4335+ return 0;
4336+
4337+ iph = (void *)(*pskb)->data + iphdroff;
4338+
4339+ if (maniptype == IP_NAT_MANIP_SRC) {
4340+ iph->check = ip_nat_cheat_check(~iph->saddr, manip->ip,
4341+ iph->check);
4342+ iph->saddr = manip->ip;
4343+ } else {
4344+ iph->check = ip_nat_cheat_check(~iph->daddr, manip->ip,
4345+ iph->check);
4346+ iph->daddr = manip->ip;
4347+ }
4348+ return 1;
4349+}
4350+
4351+static inline int exp_for_packet(struct ip_conntrack_expect *exp,
4352+ struct sk_buff *skb)
4353+{
4354+ struct ip_conntrack_protocol *proto;
4355+ int ret = 1;
4356+
4357+ MUST_BE_READ_LOCKED(&ip_conntrack_lock);
4358+ proto = __ip_ct_find_proto(skb->nh.iph->protocol);
4359+ if (proto->exp_matches_pkt)
4360+ ret = proto->exp_matches_pkt(exp, skb);
4361+
4362+ return ret;
4363+}
4364+
4365+/* Do packet manipulations according to binding. */
4366+unsigned int
4367+do_bindings(struct ip_conntrack *ct,
4368+ enum ip_conntrack_info ctinfo,
4369+ struct ip_nat_info *info,
4370+ unsigned int hooknum,
4371+ struct sk_buff **pskb)
4372+{
4373+ unsigned int i;
4374+ struct ip_nat_helper *helper;
4375+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
4376+ int proto = (*pskb)->nh.iph->protocol;
4377+
4378+ /* Need nat lock to protect against modification, but neither
4379+ conntrack (referenced) and helper (deleted with
4380+ synchronize_bh()) can vanish. */
4381+ READ_LOCK(&ip_nat_lock);
4382+ for (i = 0; i < info->num_manips; i++) {
4383+ if (info->manips[i].direction == dir
4384+ && info->manips[i].hooknum == hooknum) {
4385+ DEBUGP("Mangling %p: %s to %u.%u.%u.%u %u\n",
4386+ *pskb,
4387+ info->manips[i].maniptype == IP_NAT_MANIP_SRC
4388+ ? "SRC" : "DST",
4389+ NIPQUAD(info->manips[i].manip.ip),
4390+ htons(info->manips[i].manip.u.all));
4391+ if (!manip_pkt(proto, pskb, 0,
4392+ &info->manips[i].manip,
4393+ info->manips[i].maniptype)) {
4394+ READ_UNLOCK(&ip_nat_lock);
4395+ return NF_DROP;
4396+ }
4397+ }
4398+ }
4399+ helper = info->helper;
4400+ READ_UNLOCK(&ip_nat_lock);
4401+
4402+ if (helper) {
4403+ struct ip_conntrack_expect *exp = NULL;
4404+ struct list_head *cur_item;
4405+ int ret = NF_ACCEPT;
4406+ int helper_called = 0;
4407+
4408+ DEBUGP("do_bindings: helper existing for (%p)\n", ct);
4409+
4410+ /* Always defragged for helpers */
4411+ IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
4412+ & htons(IP_MF|IP_OFFSET)));
4413+
4414+ /* Have to grab read lock before sibling_list traversal */
4415+ READ_LOCK(&ip_conntrack_lock);
4416+ list_for_each(cur_item, &ct->sibling_list) {
4417+ exp = list_entry(cur_item, struct ip_conntrack_expect,
4418+ expected_list);
4419+
4420+ /* if this expectation is already established, skip */
4421+ if (exp->sibling)
4422+ continue;
4423+
4424+ if (exp_for_packet(exp, *pskb)) {
4425+ /* FIXME: May be true multiple times in the
4426+ * case of UDP!! */
4427+ DEBUGP("calling nat helper (exp=%p) for packet\n", exp);
4428+ ret = helper->help(ct, exp, info, ctinfo,
4429+ hooknum, pskb);
4430+ if (ret != NF_ACCEPT) {
4431+ READ_UNLOCK(&ip_conntrack_lock);
4432+ return ret;
4433+ }
4434+ helper_called = 1;
4435+ }
4436+ }
4437+ /* Helper might want to manip the packet even when there is no
4438+ * matching expectation for this packet */
4439+ if (!helper_called && helper->flags & IP_NAT_HELPER_F_ALWAYS) {
4440+ DEBUGP("calling nat helper for packet without expectation\n");
4441+ ret = helper->help(ct, NULL, info, ctinfo,
4442+ hooknum, pskb);
4443+ if (ret != NF_ACCEPT) {
4444+ READ_UNLOCK(&ip_conntrack_lock);
4445+ return ret;
4446+ }
4447+ }
4448+ READ_UNLOCK(&ip_conntrack_lock);
4449+
4450+ /* Adjust sequence number only once per packet
4451+ * (helper is called at all hooks) */
4452+ if (proto == IPPROTO_TCP
4453+ && (hooknum == NF_IP_POST_ROUTING
4454+ || hooknum == NF_IP_LOCAL_IN)) {
4455+ DEBUGP("ip_nat_core: adjusting sequence number\n");
4456+ /* future: put this in a l4-proto specific function,
4457+ * and call this function here. */
4458+ if (!ip_nat_seq_adjust(pskb, ct, ctinfo))
4459+ ret = NF_DROP;
4460+ }
4461+
4462+ return ret;
4463+
4464+ } else
4465+ return NF_ACCEPT;
4466+
4467+ /* not reached */
4468+}
4469+
4470+int
4471+icmp_reply_translation(struct sk_buff **pskb,
4472+ struct ip_conntrack *conntrack,
4473+ unsigned int hooknum,
4474+ int dir)
4475+{
4476+ struct {
4477+ struct icmphdr icmp;
4478+ struct iphdr ip;
4479+ } *inside;
4480+ unsigned int i;
4481+ struct ip_nat_info *info = &conntrack->nat.info;
4482+ int hdrlen;
4483+
4484+ if (!skb_ip_make_writable(pskb,(*pskb)->nh.iph->ihl*4+sizeof(*inside)))
4485+ return 0;
4486+ inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
4487+
4488+ /* We're actually going to mangle it beyond trivial checksum
4489+ adjustment, so make sure the current checksum is correct. */
4490+ if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) {
4491+ hdrlen = (*pskb)->nh.iph->ihl * 4;
4492+ if ((u16)csum_fold(skb_checksum(*pskb, hdrlen,
4493+ (*pskb)->len - hdrlen, 0)))
4494+ return 0;
4495+ }
4496+
4497+ /* Must be RELATED */
4498+ IP_NF_ASSERT((*pskb)->nfct
4499+ - (struct ip_conntrack *)(*pskb)->nfct->master
4500+ == IP_CT_RELATED
4501+ || (*pskb)->nfct
4502+ - (struct ip_conntrack *)(*pskb)->nfct->master
4503+ == IP_CT_RELATED+IP_CT_IS_REPLY);
4504+
4505+ /* Redirects on non-null nats must be dropped, else they'll
4506+ start talking to each other without our translation, and be
4507+ confused... --RR */
4508+ if (inside->icmp.type == ICMP_REDIRECT) {
4509+ /* Don't care about races here. */
4510+ if (info->initialized
4511+ != ((1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST))
4512+ || info->num_manips != 0)
4513+ return 0;
4514+ }
4515+
4516+ DEBUGP("icmp_reply_translation: translating error %p hook %u dir %s\n",
4517+ *pskb, hooknum, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
4518+ /* Note: May not be from a NAT'd host, but probably safest to
4519+ do translation always as if it came from the host itself
4520+ (even though a "host unreachable" coming from the host
4521+ itself is a bit weird).
4522+
4523+ More explanation: some people use NAT for anonymizing.
4524+ Also, CERT recommends dropping all packets from private IP
4525+ addresses (although ICMP errors from internal links with
4526+ such addresses are not too uncommon, as Alan Cox points
4527+ out) */
4528+
4529+ READ_LOCK(&ip_nat_lock);
4530+ for (i = 0; i < info->num_manips; i++) {
4531+ DEBUGP("icmp_reply: manip %u dir %s hook %u\n",
4532+ i, info->manips[i].direction == IP_CT_DIR_ORIGINAL ?
4533+ "ORIG" : "REPLY", info->manips[i].hooknum);
4534+
4535+ if (info->manips[i].direction != dir)
4536+ continue;
4537+
4538+ /* Mapping the inner packet is just like a normal
4539+ packet, except it was never src/dst reversed, so
4540+ where we would normally apply a dst manip, we apply
4541+ a src, and vice versa. */
4542+ if (info->manips[i].hooknum == hooknum) {
4543+ DEBUGP("icmp_reply: inner %s -> %u.%u.%u.%u %u\n",
4544+ info->manips[i].maniptype == IP_NAT_MANIP_SRC
4545+ ? "DST" : "SRC",
4546+ NIPQUAD(info->manips[i].manip.ip),
4547+ ntohs(info->manips[i].manip.u.udp.port));
4548+ if (!manip_pkt(inside->ip.protocol, pskb,
4549+ (*pskb)->nh.iph->ihl*4
4550+ + sizeof(inside->icmp),
4551+ &info->manips[i].manip,
4552+ !info->manips[i].maniptype))
4553+ goto unlock_fail;
4554+
4555+ /* Outer packet needs to have IP header NATed like
4556+ it's a reply. */
4557+
4558+ /* Use mapping to map outer packet: 0 give no
4559+ per-proto mapping */
4560+ DEBUGP("icmp_reply: outer %s -> %u.%u.%u.%u\n",
4561+ info->manips[i].maniptype == IP_NAT_MANIP_SRC
4562+ ? "SRC" : "DST",
4563+ NIPQUAD(info->manips[i].manip.ip));
4564+ if (!manip_pkt(0, pskb, 0,
4565+ &info->manips[i].manip,
4566+ info->manips[i].maniptype))
4567+ goto unlock_fail;
4568+ }
4569+ }
4570+ READ_UNLOCK(&ip_nat_lock);
4571+
4572+ hdrlen = (*pskb)->nh.iph->ihl * 4;
4573+
4574+ inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
4575+
4576+ inside->icmp.checksum = 0;
4577+ inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
4578+ (*pskb)->len - hdrlen,
4579+ 0));
4580+ return 1;
4581+
4582+ unlock_fail:
4583+ READ_UNLOCK(&ip_nat_lock);
4584+ return 0;
4585+}
4586+
4587+int __init ip_nat_init(void)
4588+{
4589+ size_t i;
4590+
4591+ /* Leave them the same for the moment. */
4592+ ip_nat_htable_size = ip_conntrack_htable_size;
4593+
4594+ /* One vmalloc for both hash tables */
4595+ bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size*2);
4596+ if (!bysource) {
4597+ return -ENOMEM;
4598+ }
4599+ byipsproto = bysource + ip_nat_htable_size;
4600+
4601+ /* Sew in builtin protocols. */
4602+ WRITE_LOCK(&ip_nat_lock);
4603+ list_append(&protos, &ip_nat_protocol_tcp);
4604+ list_append(&protos, &ip_nat_protocol_udp);
4605+ list_append(&protos, &ip_nat_protocol_icmp);
4606+ WRITE_UNLOCK(&ip_nat_lock);
4607+
4608+ for (i = 0; i < ip_nat_htable_size; i++) {
4609+ INIT_LIST_HEAD(&bysource[i]);
4610+ INIT_LIST_HEAD(&byipsproto[i]);
4611+ }
4612+
4613+ /* FIXME: Man, this is a hack. <SIGH> */
4614+ IP_NF_ASSERT(ip_conntrack_destroyed == NULL);
4615+ ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
4616+
4617+ return 0;
4618+}
4619+
4620+/* Clear NAT section of all conntracks, in case we're loaded again. */
4621+static int clean_nat(const struct ip_conntrack *i, void *data)
4622+{
4623+ memset((void *)&i->nat, 0, sizeof(i->nat));
4624+ return 0;
4625+}
4626+
4627+/* Not __exit: called from ip_nat_standalone.c:init_or_cleanup() --RR */
4628+void ip_nat_cleanup(void)
4629+{
4630+ ip_ct_selective_cleanup(&clean_nat, NULL);
4631+ ip_conntrack_destroyed = NULL;
4632+ vfree(bysource);
4633+}
4634diff -Nur linux-2.6.3.org/net/ipv4/netfilter/iptable_raw.c linux-2.6.3/net/ipv4/netfilter/iptable_raw.c
4635--- linux-2.6.3.org/net/ipv4/netfilter/iptable_raw.c 1970-01-01 01:00:00.000000000 +0100
4636+++ linux-2.6.3/net/ipv4/netfilter/iptable_raw.c 2004-02-27 00:03:14.470028400 +0100
4637@@ -0,0 +1,149 @@
4638+/*
4639+ * 'raw' table, which is the very first hooked in at PRE_ROUTING and LOCAL_OUT .
4640+ *
4641+ * Copyright (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
4642+ */
4643+#include <linux/module.h>
4644+#include <linux/netfilter_ipv4/ip_tables.h>
4645+
4646+#define RAW_VALID_HOOKS ((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT))
4647+
4648+/* Standard entry. */
4649+struct ipt_standard
4650+{
4651+ struct ipt_entry entry;
4652+ struct ipt_standard_target target;
4653+};
4654+
4655+struct ipt_error_target
4656+{
4657+ struct ipt_entry_target target;
4658+ char errorname[IPT_FUNCTION_MAXNAMELEN];
4659+};
4660+
4661+struct ipt_error
4662+{
4663+ struct ipt_entry entry;
4664+ struct ipt_error_target target;
4665+};
4666+
4667+static struct
4668+{
4669+ struct ipt_replace repl;
4670+ struct ipt_standard entries[2];
4671+ struct ipt_error term;
4672+} initial_table __initdata
4673+= { { "raw", RAW_VALID_HOOKS, 3,
4674+ sizeof(struct ipt_standard) * 2 + sizeof(struct ipt_error),
4675+ { [NF_IP_PRE_ROUTING] 0,
4676+ [NF_IP_LOCAL_OUT] sizeof(struct ipt_standard) },
4677+ { [NF_IP_PRE_ROUTING] 0,
4678+ [NF_IP_LOCAL_OUT] sizeof(struct ipt_standard) },
4679+ 0, NULL, { } },
4680+ {
4681+ /* PRE_ROUTING */
4682+ { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
4683+ 0,
4684+ sizeof(struct ipt_entry),
4685+ sizeof(struct ipt_standard),
4686+ 0, { 0, 0 }, { } },
4687+ { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
4688+ -NF_ACCEPT - 1 } },
4689+ /* LOCAL_OUT */
4690+ { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
4691+ 0,
4692+ sizeof(struct ipt_entry),
4693+ sizeof(struct ipt_standard),
4694+ 0, { 0, 0 }, { } },
4695+ { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
4696+ -NF_ACCEPT - 1 } }
4697+ },
4698+ /* ERROR */
4699+ { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
4700+ 0,
4701+ sizeof(struct ipt_entry),
4702+ sizeof(struct ipt_error),
4703+ 0, { 0, 0 }, { } },
4704+ { { { { IPT_ALIGN(sizeof(struct ipt_error_target)), IPT_ERROR_TARGET } },
4705+ { } },
4706+ "ERROR"
4707+ }
4708+ }
4709+};
4710+
4711+static struct ipt_table packet_raw = {
4712+ .name = "raw",
4713+ .table = &initial_table.repl,
4714+ .valid_hooks = RAW_VALID_HOOKS,
4715+ .lock = RW_LOCK_UNLOCKED,
4716+ .me = THIS_MODULE
4717+};
4718+
4719+/* The work comes in here from netfilter.c. */
4720+static unsigned int
4721+ipt_hook(unsigned int hook,
4722+ struct sk_buff **pskb,
4723+ const struct net_device *in,
4724+ const struct net_device *out,
4725+ int (*okfn)(struct sk_buff *))
4726+{
4727+ return ipt_do_table(pskb, hook, in, out, &packet_raw, NULL);
4728+}
4729+
4730+/* 'raw' is the very first table. */
4731+static struct nf_hook_ops ipt_ops[] = {
4732+ {
4733+ .hook = ipt_hook,
4734+ .pf = PF_INET,
4735+ .hooknum = NF_IP_PRE_ROUTING,
4736+ .priority = NF_IP_PRI_RAW
4737+ },
4738+ {
4739+ .hook = ipt_hook,
4740+ .pf = PF_INET,
4741+ .hooknum = NF_IP_LOCAL_OUT,
4742+ .priority = NF_IP_PRI_RAW
4743+ },
4744+};
4745+
4746+static int __init init(void)
4747+{
4748+ int ret;
4749+
4750+ /* Register table */
4751+ ret = ipt_register_table(&packet_raw);
4752+ if (ret < 0)
4753+ return ret;
4754+
4755+ /* Register hooks */
4756+ ret = nf_register_hook(&ipt_ops[0]);
4757+ if (ret < 0)
4758+ goto cleanup_table;
4759+
4760+ ret = nf_register_hook(&ipt_ops[1]);
4761+ if (ret < 0)
4762+ goto cleanup_hook0;
4763+
4764+ return ret;
4765+
4766+ cleanup_hook0:
4767+ nf_unregister_hook(&ipt_ops[0]);
4768+ cleanup_table:
4769+ ipt_unregister_table(&packet_raw);
4770+
4771+ return ret;
4772+}
4773+
4774+static void __exit fini(void)
4775+{
4776+ unsigned int i;
4777+
4778+ for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++)
4779+ nf_unregister_hook(&ipt_ops[i]);
4780+
4781+ ipt_unregister_table(&packet_raw);
4782+}
4783+
4784+module_init(init);
4785+module_exit(fini);
4786+MODULE_LICENSE("GPL");
4787diff -Nur linux-2.6.3.org/net/ipv4/netfilter/ipt_connlimit.c linux-2.6.3/net/ipv4/netfilter/ipt_connlimit.c
4788--- linux-2.6.3.org/net/ipv4/netfilter/ipt_connlimit.c 1970-01-01 01:00:00.000000000 +0100
4789+++ linux-2.6.3/net/ipv4/netfilter/ipt_connlimit.c 2004-02-27 00:03:07.981014880 +0100
4790@@ -0,0 +1,230 @@
4791+/*
4792+ * netfilter module to limit the number of parallel tcp
4793+ * connections per IP address.
4794+ * (c) 2000 Gerd Knorr <kraxel@bytesex.org>
4795+ * Nov 2002: Martin Bene <martin.bene@icomedias.com>:
4796+ * only ignore TIME_WAIT or gone connections
4797+ *
4798+ * based on ...
4799+ *
4800+ * Kernel module to match connection tracking information.
4801+ * GPL (C) 1999 Rusty Russell (rusty@rustcorp.com.au).
4802+ */
4803+#include <linux/module.h>
4804+#include <linux/skbuff.h>
4805+#include <linux/list.h>
4806+#include <linux/netfilter_ipv4/ip_conntrack.h>
4807+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
4808+#include <linux/netfilter_ipv4/ip_conntrack_tcp.h>
4809+#include <linux/netfilter_ipv4/ip_tables.h>
4810+#include <linux/netfilter_ipv4/ipt_connlimit.h>
4811+
4812+#define DEBUG 0
4813+
4814+MODULE_LICENSE("GPL");
4815+
4816+/* we'll save the tuples of all connections we care about */
4817+struct ipt_connlimit_conn
4818+{
4819+ struct list_head list;
4820+ struct ip_conntrack_tuple tuple;
4821+};
4822+
4823+struct ipt_connlimit_data {
4824+ spinlock_t lock;
4825+ struct list_head iphash[256];
4826+};
4827+
4828+static int ipt_iphash(u_int32_t addr)
4829+{
4830+ int hash;
4831+
4832+ hash = addr & 0xff;
4833+ hash ^= (addr >> 8) & 0xff;
4834+ hash ^= (addr >> 16) & 0xff;
4835+ hash ^= (addr >> 24) & 0xff;
4836+ return hash;
4837+}
4838+
4839+static int count_them(struct ipt_connlimit_data *data,
4840+ u_int32_t addr, u_int32_t mask,
4841+ struct ip_conntrack *ct)
4842+{
4843+#if DEBUG
4844+ const static char *tcp[] = { "none", "established", "syn_sent", "syn_recv",
4845+ "fin_wait", "time_wait", "close", "close_wait",
4846+ "last_ack", "listen" };
4847+#endif
4848+ int addit = 1, matches = 0;
4849+ struct ip_conntrack_tuple tuple;
4850+ struct ip_conntrack_tuple_hash *found;
4851+ struct ipt_connlimit_conn *conn;
4852+ struct list_head *hash,*lh;
4853+
4854+ spin_lock(&data->lock);
4855+ tuple = ct->tuplehash[0].tuple;
4856+ hash = &data->iphash[ipt_iphash(addr & mask)];
4857+
4858+ /* check the saved connections */
4859+ for (lh = hash->next; lh != hash; lh = lh->next) {
4860+ conn = list_entry(lh,struct ipt_connlimit_conn,list);
4861+ found = ip_conntrack_find_get(&conn->tuple,ct);
4862+ if (0 == memcmp(&conn->tuple,&tuple,sizeof(tuple)) &&
4863+ found != NULL &&
4864+ found->ctrack->proto.tcp.state != TCP_CONNTRACK_TIME_WAIT) {
4865+ /* Just to be sure we have it only once in the list.
4866+ We should'nt see tuples twice unless someone hooks this
4867+ into a table without "-p tcp --syn" */
4868+ addit = 0;
4869+ }
4870+#if DEBUG
4871+ printk("ipt_connlimit [%d]: src=%u.%u.%u.%u:%d dst=%u.%u.%u.%u:%d %s\n",
4872+ ipt_iphash(addr & mask),
4873+ NIPQUAD(conn->tuple.src.ip), ntohs(conn->tuple.src.u.tcp.port),
4874+ NIPQUAD(conn->tuple.dst.ip), ntohs(conn->tuple.dst.u.tcp.port),
4875+ (NULL != found) ? tcp[found->ctrack->proto.tcp.state] : "gone");
4876+#endif
4877+ if (NULL == found) {
4878+ /* this one is gone */
4879+ lh = lh->prev;
4880+ list_del(lh->next);
4881+ kfree(conn);
4882+ continue;
4883+ }
4884+ if (found->ctrack->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT) {
4885+ /* we don't care about connections which are
4886+ closed already -> ditch it */
4887+ lh = lh->prev;
4888+ list_del(lh->next);
4889+ kfree(conn);
4890+ nf_conntrack_put(&found->ctrack->infos[0]);
4891+ continue;
4892+ }
4893+ if ((addr & mask) == (conn->tuple.src.ip & mask)) {
4894+ /* same source IP address -> be counted! */
4895+ matches++;
4896+ }
4897+ nf_conntrack_put(&found->ctrack->infos[0]);
4898+ }
4899+ if (addit) {
4900+ /* save the new connection in our list */
4901+#if DEBUG
4902+ printk("ipt_connlimit [%d]: src=%u.%u.%u.%u:%d dst=%u.%u.%u.%u:%d new\n",
4903+ ipt_iphash(addr & mask),
4904+ NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
4905+ NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
4906+#endif
4907+ conn = kmalloc(sizeof(*conn),GFP_ATOMIC);
4908+ if (NULL == conn)
4909+ return -1;
4910+ memset(conn,0,sizeof(*conn));
4911+ INIT_LIST_HEAD(&conn->list);
4912+ conn->tuple = tuple;
4913+ list_add(&conn->list,hash);
4914+ matches++;
4915+ }
4916+ spin_unlock(&data->lock);
4917+ return matches;
4918+}
4919+
4920+static int
4921+match(const struct sk_buff *skb,
4922+ const struct net_device *in,
4923+ const struct net_device *out,
4924+ const void *matchinfo,
4925+ int offset,
4926+ int *hotdrop)
4927+{
4928+ const struct ipt_connlimit_info *info = matchinfo;
4929+ int connections, match;
4930+ struct ip_conntrack *ct;
4931+ enum ip_conntrack_info ctinfo;
4932+
4933+ ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
4934+ if (NULL == ct) {
4935+ printk("ipt_connlimit: Oops: invalid ct state ?\n");
4936+ *hotdrop = 1;
4937+ return 0;
4938+ }
4939+ connections = count_them(info->data,skb->nh.iph->saddr,info->mask,ct);
4940+ if (-1 == connections) {
4941+ printk("ipt_connlimit: Hmm, kmalloc failed :-(\n");
4942+ *hotdrop = 1; /* let's free some memory :-) */
4943+ return 0;
4944+ }
4945+ match = (info->inverse) ? (connections <= info->limit) : (connections > info->limit);
4946+#if DEBUG
4947+ printk("ipt_connlimit: src=%u.%u.%u.%u mask=%u.%u.%u.%u "
4948+ "connections=%d limit=%d match=%s\n",
4949+ NIPQUAD(skb->nh.iph->saddr), NIPQUAD(info->mask),
4950+ connections, info->limit, match ? "yes" : "no");
4951+#endif
4952+
4953+ return match;
4954+}
4955+
4956+static int check(const char *tablename,
4957+ const struct ipt_ip *ip,
4958+ void *matchinfo,
4959+ unsigned int matchsize,
4960+ unsigned int hook_mask)
4961+{
4962+ struct ipt_connlimit_info *info = matchinfo;
4963+ int i;
4964+
4965+ /* verify size */
4966+ if (matchsize != IPT_ALIGN(sizeof(struct ipt_connlimit_info)))
4967+ return 0;
4968+
4969+ /* refuse anything but tcp */
4970+ if (ip->proto != IPPROTO_TCP)
4971+ return 0;
4972+
4973+ /* init private data */
4974+ info->data = kmalloc(sizeof(struct ipt_connlimit_data),GFP_KERNEL);
4975+ spin_lock_init(&(info->data->lock));
4976+ for (i = 0; i < 256; i++)
4977+ INIT_LIST_HEAD(&(info->data->iphash[i]));
4978+
4979+ return 1;
4980+}
4981+
4982+static void destroy(void *matchinfo, unsigned int matchinfosize)
4983+{
4984+ struct ipt_connlimit_info *info = matchinfo;
4985+ struct ipt_connlimit_conn *conn;
4986+ struct list_head *hash;
4987+ int i;
4988+
4989+ /* cleanup */
4990+ for (i = 0; i < 256; i++) {
4991+ hash = &(info->data->iphash[i]);
4992+ while (hash != hash->next) {
4993+ conn = list_entry(hash->next,struct ipt_connlimit_conn,list);
4994+ list_del(hash->next);
4995+ kfree(conn);
4996+ }
4997+ }
4998+ kfree(info->data);
4999+}
5000+
5001+static struct ipt_match connlimit_match = {
5002+ .name = "connlimit",
5003+ .match = &match,
5004+ .checkentry = &check,
5005+ .destroy = &destroy,
5006+ .me = THIS_MODULE
5007+};
5008+
5009+static int __init init(void)
5010+{
5011+ return ipt_register_match(&connlimit_match);
5012+}
5013+
5014+static void __exit fini(void)
5015+{
5016+ ipt_unregister_match(&connlimit_match);
5017+}
5018+
5019+module_init(init);
5020+module_exit(fini);
5021diff -Nur linux-2.6.3.org/net/ipv4/netfilter/ipt_conntrack.c linux-2.6.3/net/ipv4/netfilter/ipt_conntrack.c
5022--- linux-2.6.3.org/net/ipv4/netfilter/ipt_conntrack.c 2004-02-18 04:59:26.000000000 +0100
5023+++ linux-2.6.3/net/ipv4/netfilter/ipt_conntrack.c 2004-02-27 00:03:14.483026424 +0100
5024@@ -35,11 +35,13 @@
5025
5026 #define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg))
5027
5028- if (ct)
5029- statebit = IPT_CONNTRACK_STATE_BIT(ctinfo);
5030- else
5031- statebit = IPT_CONNTRACK_STATE_INVALID;
5032-
5033+ if (skb->nfct == &ip_conntrack_untracked.infos[IP_CT_NEW])
5034+ statebit = IPT_CONNTRACK_STATE_UNTRACKED;
5035+ else if (ct)
5036+ statebit = IPT_CONNTRACK_STATE_BIT(ctinfo);
5037+ else
5038+ statebit = IPT_CONNTRACK_STATE_INVALID;
5039+
5040 if(sinfo->flags & IPT_CONNTRACK_STATE) {
5041 if (ct) {
5042 if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip !=
5043diff -Nur linux-2.6.3.org/net/ipv4/netfilter/ipt_conntrack.c.orig linux-2.6.3/net/ipv4/netfilter/ipt_conntrack.c.orig
5044--- linux-2.6.3.org/net/ipv4/netfilter/ipt_conntrack.c.orig 1970-01-01 01:00:00.000000000 +0100
5045+++ linux-2.6.3/net/ipv4/netfilter/ipt_conntrack.c.orig 2004-02-18 04:59:26.000000000 +0100
5046@@ -0,0 +1,134 @@
5047+/* Kernel module to match connection tracking information.
5048+ * Superset of Rusty's minimalistic state match.
5049+ *
5050+ * (C) 2001 Marc Boucher (marc@mbsi.ca).
5051+ *
5052+ * This program is free software; you can redistribute it and/or modify
5053+ * it under the terms of the GNU General Public License version 2 as
5054+ * published by the Free Software Foundation.
5055+ */
5056+
5057+#include <linux/module.h>
5058+#include <linux/skbuff.h>
5059+#include <linux/netfilter_ipv4/ip_conntrack.h>
5060+#include <linux/netfilter_ipv4/ip_tables.h>
5061+#include <linux/netfilter_ipv4/ipt_conntrack.h>
5062+
5063+MODULE_LICENSE("GPL");
5064+MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
5065+MODULE_DESCRIPTION("iptables connection tracking match module");
5066+
5067+static int
5068+match(const struct sk_buff *skb,
5069+ const struct net_device *in,
5070+ const struct net_device *out,
5071+ const void *matchinfo,
5072+ int offset,
5073+ int *hotdrop)
5074+{
5075+ const struct ipt_conntrack_info *sinfo = matchinfo;
5076+ struct ip_conntrack *ct;
5077+ enum ip_conntrack_info ctinfo;
5078+ unsigned int statebit;
5079+
5080+ ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
5081+
5082+#define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg))
5083+
5084+ if (ct)
5085+ statebit = IPT_CONNTRACK_STATE_BIT(ctinfo);
5086+ else
5087+ statebit = IPT_CONNTRACK_STATE_INVALID;
5088+
5089+ if(sinfo->flags & IPT_CONNTRACK_STATE) {
5090+ if (ct) {
5091+ if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip !=
5092+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip)
5093+ statebit |= IPT_CONNTRACK_STATE_SNAT;
5094+
5095+ if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip !=
5096+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip)
5097+ statebit |= IPT_CONNTRACK_STATE_DNAT;
5098+ }
5099+
5100+ if (FWINV((statebit & sinfo->statemask) == 0, IPT_CONNTRACK_STATE))
5101+ return 0;
5102+ }
5103+
5104+ if(sinfo->flags & IPT_CONNTRACK_PROTO) {
5105+ if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, IPT_CONNTRACK_PROTO))
5106+ return 0;
5107+ }
5108+
5109+ if(sinfo->flags & IPT_CONNTRACK_ORIGSRC) {
5110+ if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, IPT_CONNTRACK_ORIGSRC))
5111+ return 0;
5112+ }
5113+
5114+ if(sinfo->flags & IPT_CONNTRACK_ORIGDST) {
5115+ if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, IPT_CONNTRACK_ORIGDST))
5116+ return 0;
5117+ }
5118+
5119+ if(sinfo->flags & IPT_CONNTRACK_REPLSRC) {
5120+ if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, IPT_CONNTRACK_REPLSRC))
5121+ return 0;
5122+ }
5123+
5124+ if(sinfo->flags & IPT_CONNTRACK_REPLDST) {
5125+ if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, IPT_CONNTRACK_REPLDST))
5126+ return 0;
5127+ }
5128+
5129+ if(sinfo->flags & IPT_CONNTRACK_STATUS) {
5130+ if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, IPT_CONNTRACK_STATUS))
5131+ return 0;
5132+ }
5133+
5134+ if(sinfo->flags & IPT_CONNTRACK_EXPIRES) {
5135+ unsigned long expires;
5136+
5137+ if(!ct)
5138+ return 0;
5139+
5140+ expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0;
5141+
5142+ if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), IPT_CONNTRACK_EXPIRES))
5143+ return 0;
5144+ }
5145+
5146+ return 1;
5147+}
5148+
5149+static int check(const char *tablename,
5150+ const struct ipt_ip *ip,
5151+ void *matchinfo,
5152+ unsigned int matchsize,
5153+ unsigned int hook_mask)
5154+{
5155+ if (matchsize != IPT_ALIGN(sizeof(struct ipt_conntrack_info)))
5156+ return 0;
5157+
5158+ return 1;
5159+}
5160+
5161+static struct ipt_match conntrack_match = {
5162+ .name = "conntrack",
5163+ .match = &match,
5164+ .checkentry = &check,
5165+ .me = THIS_MODULE,
5166+};
5167+
5168+static int __init init(void)
5169+{
5170+ need_ip_conntrack();
5171+ return ipt_register_match(&conntrack_match);
5172+}
5173+
5174+static void __exit fini(void)
5175+{
5176+ ipt_unregister_match(&conntrack_match);
5177+}
5178+
5179+module_init(init);
5180+module_exit(fini);
5181diff -Nur linux-2.6.3.org/net/ipv4/netfilter/ipt_dstlimit.c linux-2.6.3/net/ipv4/netfilter/ipt_dstlimit.c
5182--- linux-2.6.3.org/net/ipv4/netfilter/ipt_dstlimit.c 1970-01-01 01:00:00.000000000 +0100
5183+++ linux-2.6.3/net/ipv4/netfilter/ipt_dstlimit.c 2004-02-27 00:03:08.652912736 +0100
5184@@ -0,0 +1,690 @@
5185+/* iptables match extension to limit the number of packets per second
5186+ * seperately for each destination.
5187+ *
5188+ * (C) 2003 by Harald Welte <laforge@netfilter.org>
5189+ *
5190+ * ipt_dstlimit.c,v 1.3 2004/02/23 00:15:45 laforge Exp
5191+ *
5192+ * Development of this code was funded by Astaro AG, http://www.astaro.com/
5193+ *
5194+ * based on ipt_limit.c by:
5195