]> git.pld-linux.org Git - packages/kernel.git/blame - 2.6.x-patch-o-matic-ng-base-20040308.patch
- fix unresolved symbols in ipv6 netfilter
[packages/kernel.git] / 2.6.x-patch-o-matic-ng-base-20040308.patch
CommitLineData
735d9e84 1diff -Nur linux-2.6.4-rc2.org/include/linux/netfilter.h linux-2.6.4-rc2/include/linux/netfilter.h
2--- linux-2.6.4-rc2.org/include/linux/netfilter.h 2004-03-04 06:16:47.000000000 +0000
3+++ linux-2.6.4-rc2/include/linux/netfilter.h 2004-03-08 08:48:37.000000000 +0000
4@@ -99,6 +99,24 @@
5
6 extern struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
7
8+typedef void nf_logfn(unsigned int hooknum,
9+ const struct sk_buff *skb,
10+ const struct net_device *in,
11+ const struct net_device *out,
12+ const char *prefix);
13+
14+/* Function to register/unregister log function. */
15+int nf_log_register(int pf, nf_logfn *logfn);
16+void nf_log_unregister(int pf, nf_logfn *logfn);
17+
18+/* Calls the registered backend logging function */
19+void nf_log_packet(int pf,
20+ unsigned int hooknum,
21+ const struct sk_buff *skb,
22+ const struct net_device *in,
23+ const struct net_device *out,
24+ const char *fmt, ...);
25+
26 /* Activate hook; either okfn or kfree_skb called, unless a hook
27 returns NF_STOLEN (in which case, it's up to the hook to deal with
28 the consequences).
29diff -Nur linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ip_conntrack.h linux-2.6.4-rc2/include/linux/netfilter_ipv4/ip_conntrack.h
30--- linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ip_conntrack.h 2004-03-04 06:17:04.000000000 +0000
31+++ linux-2.6.4-rc2/include/linux/netfilter_ipv4/ip_conntrack.h 2004-03-08 08:48:52.000000000 +0000
32@@ -251,6 +251,9 @@
33 /* Call me when a conntrack is destroyed. */
34 extern void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack);
35
36+/* Fake conntrack entry for untracked connections */
37+extern struct ip_conntrack ip_conntrack_untracked;
38+
39 /* Returns new sk_buff, or NULL */
40 struct sk_buff *
41 ip_ct_gather_frags(struct sk_buff *skb);
42diff -Nur linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_TTL.h linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_TTL.h
43--- linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_TTL.h 1970-01-01 00:00:00.000000000 +0000
44+++ linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_TTL.h 2004-03-08 08:48:44.000000000 +0000
45@@ -0,0 +1,21 @@
46+/* TTL modification module for IP tables
47+ * (C) 2000 by Harald Welte <laforge@gnumonks.org> */
48+
49+#ifndef _IPT_TTL_H
50+#define _IPT_TTL_H
51+
52+enum {
53+ IPT_TTL_SET = 0,
54+ IPT_TTL_INC,
55+ IPT_TTL_DEC
56+};
57+
58+#define IPT_TTL_MAXMODE IPT_TTL_DEC
59+
60+struct ipt_TTL_info {
61+ u_int8_t mode;
62+ u_int8_t ttl;
63+};
64+
65+
66+#endif
67diff -Nur linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_ULOG.h linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_ULOG.h
68--- linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_ULOG.h 2004-03-04 06:16:43.000000000 +0000
69+++ linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_ULOG.h 2004-03-08 08:48:37.000000000 +0000
70@@ -11,6 +11,9 @@
71 #define NETLINK_NFLOG 5
72 #endif
73
74+#define ULOG_DEFAULT_NLGROUP 1
75+#define ULOG_DEFAULT_QTHRESHOLD 1
76+
77 #define ULOG_MAC_LEN 80
78 #define ULOG_PREFIX_LEN 32
79
80diff -Nur linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_connlimit.h linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_connlimit.h
81--- linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_connlimit.h 1970-01-01 00:00:00.000000000 +0000
82+++ linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_connlimit.h 2004-03-08 08:48:45.000000000 +0000
83@@ -0,0 +1,12 @@
84+#ifndef _IPT_CONNLIMIT_H
85+#define _IPT_CONNLIMIT_H
86+
87+struct ipt_connlimit_data;
88+
89+struct ipt_connlimit_info {
90+ int limit;
91+ int inverse;
92+ u_int32_t mask;
93+ struct ipt_connlimit_data *data;
94+};
95+#endif /* _IPT_CONNLIMIT_H */
96diff -Nur linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_conntrack.h linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_conntrack.h
97--- linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_conntrack.h 2004-03-04 06:16:55.000000000 +0000
98+++ linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_conntrack.h 2004-03-08 08:48:52.000000000 +0000
99@@ -10,6 +10,7 @@
100
101 #define IPT_CONNTRACK_STATE_SNAT (1 << (IP_CT_NUMBER + 1))
102 #define IPT_CONNTRACK_STATE_DNAT (1 << (IP_CT_NUMBER + 2))
103+#define IPT_CONNTRACK_STATE_UNTRACKED (1 << (IP_CT_NUMBER + 3))
104
105 /* flags, invflags: */
106 #define IPT_CONNTRACK_STATE 0x01
107diff -Nur linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_dstlimit.h linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_dstlimit.h
108--- linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_dstlimit.h 1970-01-01 00:00:00.000000000 +0000
109+++ linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_dstlimit.h 2004-03-08 08:48:46.000000000 +0000
110@@ -0,0 +1,39 @@
111+#ifndef _IPT_DSTLIMIT_H
112+#define _IPT_DSTLIMIT_H
113+
114+/* timings are in milliseconds. */
115+#define IPT_DSTLIMIT_SCALE 10000
116+/* 1/10,000 sec period => max of 10,000/sec. Min rate is then 429490
117+ seconds, or one every 59 hours. */
118+
119+/* details of this structure hidden by the implementation */
120+struct ipt_dstlimit_htable;
121+
122+#define IPT_DSTLIMIT_HASH_DIP 0x0001
123+#define IPT_DSTLIMIT_HASH_DPT 0x0002
124+#define IPT_DSTLIMIT_HASH_SIP 0x0004
125+
126+struct dstlimit_cfg {
127+ u_int32_t mode; /* bitmask of IPT_DSTLIMIT_HASH_* */
128+ u_int32_t avg; /* Average secs between packets * scale */
129+ u_int32_t burst; /* Period multiplier for upper limit. */
130+
131+ /* user specified */
132+ u_int32_t size; /* how many buckets */
133+ u_int32_t max; /* max number of entries */
134+ u_int32_t gc_interval; /* gc interval */
135+ u_int32_t expire; /* when do entries expire? */
136+};
137+
138+struct ipt_dstlimit_info {
139+ char name [IFNAMSIZ]; /* name */
140+ struct dstlimit_cfg cfg;
141+ struct ipt_dstlimit_htable *hinfo;
142+
143+ /* Used internally by the kernel */
144+ union {
145+ void *ptr;
146+ struct ipt_dstlimit_info *master;
147+ } u;
148+};
149+#endif /*_IPT_DSTLIMIT_H*/
150diff -Nur linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_fuzzy.h linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_fuzzy.h
151--- linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_fuzzy.h 1970-01-01 00:00:00.000000000 +0000
152+++ linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_fuzzy.h 2004-03-08 08:48:47.000000000 +0000
153@@ -0,0 +1,21 @@
154+#ifndef _IPT_FUZZY_H
155+#define _IPT_FUZZY_H
156+
157+#include <linux/param.h>
158+#include <linux/types.h>
159+
160+#define MAXFUZZYRATE 10000000
161+#define MINFUZZYRATE 3
162+
163+struct ipt_fuzzy_info {
164+ u_int32_t minimum_rate;
165+ u_int32_t maximum_rate;
166+ u_int32_t packets_total;
167+ u_int32_t bytes_total;
168+ u_int32_t previous_time;
169+ u_int32_t present_time;
170+ u_int32_t mean_rate;
171+ u_int8_t acceptance_rate;
172+};
173+
174+#endif /*_IPT_FUZZY_H*/
175diff -Nur linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_ipv4options.h linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_ipv4options.h
176--- linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_ipv4options.h 1970-01-01 00:00:00.000000000 +0000
177+++ linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_ipv4options.h 2004-03-08 08:48:48.000000000 +0000
178@@ -0,0 +1,21 @@
179+#ifndef __ipt_ipv4options_h_included__
180+#define __ipt_ipv4options_h_included__
181+
182+#define IPT_IPV4OPTION_MATCH_SSRR 0x01 /* For strict source routing */
183+#define IPT_IPV4OPTION_MATCH_LSRR 0x02 /* For loose source routing */
184+#define IPT_IPV4OPTION_DONT_MATCH_SRR 0x04 /* any source routing */
185+#define IPT_IPV4OPTION_MATCH_RR 0x08 /* For Record route */
186+#define IPT_IPV4OPTION_DONT_MATCH_RR 0x10
187+#define IPT_IPV4OPTION_MATCH_TIMESTAMP 0x20 /* For timestamp request */
188+#define IPT_IPV4OPTION_DONT_MATCH_TIMESTAMP 0x40
189+#define IPT_IPV4OPTION_MATCH_ROUTER_ALERT 0x80 /* For router-alert */
190+#define IPT_IPV4OPTION_DONT_MATCH_ROUTER_ALERT 0x100
191+#define IPT_IPV4OPTION_MATCH_ANY_OPT 0x200 /* match packet with any option */
192+#define IPT_IPV4OPTION_DONT_MATCH_ANY_OPT 0x400 /* match packet with no option */
193+
194+struct ipt_ipv4options_info {
195+ u_int16_t options;
196+};
197+
198+
199+#endif /* __ipt_ipv4options_h_included__ */
200diff -Nur linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_mport.h linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_mport.h
201--- linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_mport.h 1970-01-01 00:00:00.000000000 +0000
202+++ linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_mport.h 2004-03-08 08:48:49.000000000 +0000
203@@ -0,0 +1,24 @@
204+#ifndef _IPT_MPORT_H
205+#define _IPT_MPORT_H
206+#include <linux/netfilter_ipv4/ip_tables.h>
207+
208+#define IPT_MPORT_SOURCE (1<<0)
209+#define IPT_MPORT_DESTINATION (1<<1)
210+#define IPT_MPORT_EITHER (IPT_MPORT_SOURCE|IPT_MPORT_DESTINATION)
211+
212+#define IPT_MULTI_PORTS 15
213+
214+/* Must fit inside union ipt_matchinfo: 32 bytes */
215+/* every entry in ports[] except for the last one has one bit in pflags
216+ * associated with it. If this bit is set, the port is the first port of
217+ * a portrange, with the next entry being the last.
218+ * End of list is marked with pflags bit set and port=65535.
219+ * If 14 ports are used (last one does not have a pflag), the last port
220+ * is repeated to fill the last entry in ports[] */
221+struct ipt_mport
222+{
223+ u_int8_t flags:2; /* Type of comparison */
224+ u_int16_t pflags:14; /* Port flags */
225+ u_int16_t ports[IPT_MULTI_PORTS]; /* Ports */
226+};
227+#endif /*_IPT_MPORT_H*/
228diff -Nur linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_nth.h linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_nth.h
229--- linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_nth.h 1970-01-01 00:00:00.000000000 +0000
230+++ linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_nth.h 2004-03-08 08:48:50.000000000 +0000
231@@ -0,0 +1,19 @@
232+#ifndef _IPT_NTH_H
233+#define _IPT_NTH_H
234+
235+#include <linux/param.h>
236+#include <linux/types.h>
237+
238+#ifndef IPT_NTH_NUM_COUNTERS
239+#define IPT_NTH_NUM_COUNTERS 16
240+#endif
241+
242+struct ipt_nth_info {
243+ u_int8_t every;
244+ u_int8_t not;
245+ u_int8_t startat;
246+ u_int8_t counter;
247+ u_int8_t packet;
248+};
249+
250+#endif /*_IPT_NTH_H*/
251diff -Nur linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_quota.h linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_quota.h
252--- linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_quota.h 1970-01-01 00:00:00.000000000 +0000
253+++ linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_quota.h 2004-03-08 08:48:51.000000000 +0000
254@@ -0,0 +1,11 @@
255+#ifndef _IPT_QUOTA_H
256+#define _IPT_QUOTA_H
257+
258+/* print debug info in both kernel/netfilter module & iptable library */
259+//#define DEBUG_IPT_QUOTA
260+
261+struct ipt_quota_info {
262+ u_int64_t quota;
263+};
264+
265+#endif /*_IPT_QUOTA_H*/
266diff -Nur linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_realm.h linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_realm.h
267--- linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_realm.h 1970-01-01 00:00:00.000000000 +0000
268+++ linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_realm.h 2004-03-08 08:48:54.000000000 +0000
269@@ -0,0 +1,9 @@
270+#ifndef _IPT_REALM_H
271+#define _IPT_REALM_H
272+
273+struct ipt_realm_info {
274+ u_int32_t id;
275+ u_int32_t mask;
276+ u_int8_t invert;
277+};
278+#endif /*_IPT_REALM_H*/
279diff -Nur linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_sctp.h linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_sctp.h
280--- linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_sctp.h 1970-01-01 00:00:00.000000000 +0000
281+++ linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_sctp.h 2004-03-08 08:48:57.000000000 +0000
282@@ -0,0 +1,107 @@
283+#ifndef _IPT_SCTP_H_
284+#define _IPT_SCTP_H_
285+
286+#define IPT_SCTP_SRC_PORTS 0x01
287+#define IPT_SCTP_DEST_PORTS 0x02
288+#define IPT_SCTP_CHUNK_TYPES 0x04
289+
290+#define IPT_SCTP_VALID_FLAGS 0x07
291+
292+#define ELEMCOUNT(x) (sizeof(x)/sizeof(x[0]))
293+
294+
295+struct ipt_sctp_flag_info {
296+ u_int8_t chunktype;
297+ u_int8_t flag;
298+ u_int8_t flag_mask;
299+};
300+
301+#define IPT_NUM_SCTP_FLAGS 4
302+
303+struct ipt_sctp_info {
304+ u_int16_t dpts[2]; /* Min, Max */
305+ u_int16_t spts[2]; /* Min, Max */
306+
307+ u_int32_t chunkmap[256 / sizeof (u_int32_t)]; /* Bit mask of chunks to be matched according to RFC 2960 */
308+
309+#define SCTP_CHUNK_MATCH_ANY 0x01 /* Match if any of the chunk types are present */
310+#define SCTP_CHUNK_MATCH_ALL 0x02 /* Match if all of the chunk types are present */
311+#define SCTP_CHUNK_MATCH_ONLY 0x04 /* Match if these are the only chunk types present */
312+
313+ u_int32_t chunk_match_type;
314+ struct ipt_sctp_flag_info flag_info[IPT_NUM_SCTP_FLAGS];
315+ int flag_count;
316+
317+ u_int32_t flags;
318+ u_int32_t invflags;
319+};
320+
321+#define bytes(type) (sizeof(type) * 8)
322+
323+#define SCTP_CHUNKMAP_SET(chunkmap, type) \
324+ do { \
325+ chunkmap[type / bytes(u_int32_t)] |= \
326+ 1 << (type % bytes(u_int32_t)); \
327+ } while (0)
328+
329+#define SCTP_CHUNKMAP_CLEAR(chunkmap, type) \
330+ do { \
331+ chunkmap[type / bytes(u_int32_t)] &= \
332+ ~(1 << (type % bytes(u_int32_t))); \
333+ } while (0)
334+
335+#define SCTP_CHUNKMAP_IS_SET(chunkmap, type) \
336+({ \
337+ (chunkmap[type / bytes (u_int32_t)] & \
338+ (1 << (type % bytes (u_int32_t)))) ? 1: 0; \
339+})
340+
341+#define SCTP_CHUNKMAP_RESET(chunkmap) \
342+ do { \
343+ int i; \
344+ for (i = 0; i < ELEMCOUNT(chunkmap); i++) \
345+ chunkmap[i] = 0; \
346+ } while (0)
347+
348+#define SCTP_CHUNKMAP_SET_ALL(chunkmap) \
349+ do { \
350+ int i; \
351+ for (i = 0; i < ELEMCOUNT(chunkmap); i++) \
352+ chunkmap[i] = ~0; \
353+ } while (0)
354+
355+#define SCTP_CHUNKMAP_COPY(destmap, srcmap) \
356+ do { \
357+ int i; \
358+ for (i = 0; i < ELEMCOUNT(chunkmap); i++) \
359+ destmap[i] = srcmap[i]; \
360+ } while (0)
361+
362+#define SCTP_CHUNKMAP_IS_CLEAR(chunkmap) \
363+({ \
364+ int i; \
365+ int flag = 1; \
366+ for (i = 0; i < ELEMCOUNT(chunkmap); i++) { \
367+ if (chunkmap[i]) { \
368+ flag = 0; \
369+ break; \
370+ } \
371+ } \
372+ flag; \
373+})
374+
375+#define SCTP_CHUNKMAP_IS_ALL_SET(chunkmap) \
376+({ \
377+ int i; \
378+ int flag = 1; \
379+ for (i = 0; i < ELEMCOUNT(chunkmap); i++) { \
380+ if (chunkmap[i] != ~0) { \
381+ flag = 0; \
382+ break; \
383+ } \
384+ } \
385+ flag; \
386+})
387+
388+#endif /* _IPT_SCTP_H_ */
389+
390diff -Nur linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_state.h linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_state.h
391--- linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_state.h 2004-03-04 06:17:00.000000000 +0000
392+++ linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_state.h 2004-03-08 08:48:52.000000000 +0000
393@@ -4,6 +4,8 @@
394 #define IPT_STATE_BIT(ctinfo) (1 << ((ctinfo)%IP_CT_IS_REPLY+1))
395 #define IPT_STATE_INVALID (1 << 0)
396
397+#define IPT_STATE_UNTRACKED (1 << (IP_CT_NUMBER + 1))
398+
399 struct ipt_state_info
400 {
401 unsigned int statemask;
402diff -Nur linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_u32.h linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_u32.h
403--- linux-2.6.4-rc2.org/include/linux/netfilter_ipv4/ipt_u32.h 1970-01-01 00:00:00.000000000 +0000
404+++ linux-2.6.4-rc2/include/linux/netfilter_ipv4/ipt_u32.h 2004-03-08 08:48:59.000000000 +0000
405@@ -0,0 +1,40 @@
406+#ifndef _IPT_U32_H
407+#define _IPT_U32_H
408+#include <linux/netfilter_ipv4/ip_tables.h>
409+
410+enum ipt_u32_ops
411+{
412+ IPT_U32_AND,
413+ IPT_U32_LEFTSH,
414+ IPT_U32_RIGHTSH,
415+ IPT_U32_AT
416+};
417+
418+struct ipt_u32_location_element
419+{
420+ u_int32_t number;
421+ u_int8_t nextop;
422+};
423+struct ipt_u32_value_element
424+{
425+ u_int32_t min;
426+ u_int32_t max;
427+};
428+/* *** any way to allow for an arbitrary number of elements?
429+ for now I settle for a limit of 10 of each */
430+#define U32MAXSIZE 10
431+struct ipt_u32_test
432+{
433+ u_int8_t nnums;
434+ struct ipt_u32_location_element location[U32MAXSIZE+1];
435+ u_int8_t nvalues;
436+ struct ipt_u32_value_element value[U32MAXSIZE+1];
437+};
438+
439+struct ipt_u32
440+{
441+ u_int8_t ntests;
442+ struct ipt_u32_test tests[U32MAXSIZE+1];
443+};
444+
445+#endif /*_IPT_U32_H*/
446diff -Nur linux-2.6.4-rc2.org/include/linux/netfilter_ipv4.h linux-2.6.4-rc2/include/linux/netfilter_ipv4.h
447--- linux-2.6.4-rc2.org/include/linux/netfilter_ipv4.h 2004-03-04 06:16:58.000000000 +0000
448+++ linux-2.6.4-rc2/include/linux/netfilter_ipv4.h 2004-03-08 08:48:52.000000000 +0000
449@@ -51,6 +51,8 @@
450
451 enum nf_ip_hook_priorities {
452 NF_IP_PRI_FIRST = INT_MIN,
453+ NF_IP_PRI_CONNTRACK_DEFRAG = -400,
454+ NF_IP_PRI_RAW = -300,
455 NF_IP_PRI_SELINUX_FIRST = -225,
456 NF_IP_PRI_CONNTRACK = -200,
457 NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD = -175,
458diff -Nur linux-2.6.4-rc2.org/include/linux/netfilter_ipv6/ip6t_HL.h linux-2.6.4-rc2/include/linux/netfilter_ipv6/ip6t_HL.h
459--- linux-2.6.4-rc2.org/include/linux/netfilter_ipv6/ip6t_HL.h 1970-01-01 00:00:00.000000000 +0000
460+++ linux-2.6.4-rc2/include/linux/netfilter_ipv6/ip6t_HL.h 2004-03-08 08:48:38.000000000 +0000
461@@ -0,0 +1,22 @@
462+/* Hop Limit modification module for ip6tables
463+ * Maciej Soltysiak <solt@dns.toxicfilms.tv>
464+ * Based on HW's TTL module */
465+
466+#ifndef _IP6T_HL_H
467+#define _IP6T_HL_H
468+
469+enum {
470+ IP6T_HL_SET = 0,
471+ IP6T_HL_INC,
472+ IP6T_HL_DEC
473+};
474+
475+#define IP6T_HL_MAXMODE IP6T_HL_DEC
476+
477+struct ip6t_HL_info {
478+ u_int8_t mode;
479+ u_int8_t hop_limit;
480+};
481+
482+
483+#endif
484diff -Nur linux-2.6.4-rc2.org/include/linux/netfilter_ipv6/ip6t_REJECT.h linux-2.6.4-rc2/include/linux/netfilter_ipv6/ip6t_REJECT.h
485--- linux-2.6.4-rc2.org/include/linux/netfilter_ipv6/ip6t_REJECT.h 2004-03-04 06:16:34.000000000 +0000
486+++ linux-2.6.4-rc2/include/linux/netfilter_ipv6/ip6t_REJECT.h 2004-03-08 08:48:42.000000000 +0000
487@@ -2,15 +2,17 @@
488 #define _IP6T_REJECT_H
489
490 enum ip6t_reject_with {
491- IP6T_ICMP_NET_UNREACHABLE,
492- IP6T_ICMP_HOST_UNREACHABLE,
493- IP6T_ICMP_PROT_UNREACHABLE,
494- IP6T_ICMP_PORT_UNREACHABLE,
495- IP6T_ICMP_ECHOREPLY
496+ IP6T_ICMP6_NO_ROUTE,
497+ IP6T_ICMP6_ADM_PROHIBITED,
498+ IP6T_ICMP6_NOT_NEIGHBOUR,
499+ IP6T_ICMP6_ADDR_UNREACH,
500+ IP6T_ICMP6_PORT_UNREACH,
501+ IP6T_ICMP6_ECHOREPLY,
502+ IP6T_TCP_RESET
503 };
504
505 struct ip6t_reject_info {
506 enum ip6t_reject_with with; /* reject type */
507 };
508
509-#endif /*_IPT_REJECT_H*/
510+#endif /*_IP6T_REJECT_H*/
511diff -Nur linux-2.6.4-rc2.org/include/linux/netfilter_ipv6/ip6t_fuzzy.h linux-2.6.4-rc2/include/linux/netfilter_ipv6/ip6t_fuzzy.h
512--- linux-2.6.4-rc2.org/include/linux/netfilter_ipv6/ip6t_fuzzy.h 1970-01-01 00:00:00.000000000 +0000
513+++ linux-2.6.4-rc2/include/linux/netfilter_ipv6/ip6t_fuzzy.h 2004-03-08 08:48:47.000000000 +0000
514@@ -0,0 +1,21 @@
515+#ifndef _IP6T_FUZZY_H
516+#define _IP6T_FUZZY_H
517+
518+#include <linux/param.h>
519+#include <linux/types.h>
520+
521+#define MAXFUZZYRATE 10000000
522+#define MINFUZZYRATE 3
523+
524+struct ip6t_fuzzy_info {
525+ u_int32_t minimum_rate;
526+ u_int32_t maximum_rate;
527+ u_int32_t packets_total;
528+ u_int32_t bytes_total;
529+ u_int32_t previous_time;
530+ u_int32_t present_time;
531+ u_int32_t mean_rate;
532+ u_int8_t acceptance_rate;
533+};
534+
535+#endif /*_IP6T_FUZZY_H*/
536diff -Nur linux-2.6.4-rc2.org/include/linux/netfilter_ipv6/ip6t_nth.h linux-2.6.4-rc2/include/linux/netfilter_ipv6/ip6t_nth.h
537--- linux-2.6.4-rc2.org/include/linux/netfilter_ipv6/ip6t_nth.h 1970-01-01 00:00:00.000000000 +0000
538+++ linux-2.6.4-rc2/include/linux/netfilter_ipv6/ip6t_nth.h 2004-03-08 08:48:50.000000000 +0000
539@@ -0,0 +1,19 @@
540+#ifndef _IP6T_NTH_H
541+#define _IP6T_NTH_H
542+
543+#include <linux/param.h>
544+#include <linux/types.h>
545+
546+#ifndef IP6T_NTH_NUM_COUNTERS
547+#define IP6T_NTH_NUM_COUNTERS 16
548+#endif
549+
550+struct ip6t_nth_info {
551+ u_int8_t every;
552+ u_int8_t not;
553+ u_int8_t startat;
554+ u_int8_t counter;
555+ u_int8_t packet;
556+};
557+
558+#endif /*_IP6T_NTH_H*/
559diff -Nur linux-2.6.4-rc2.org/net/core/netfilter.c linux-2.6.4-rc2/net/core/netfilter.c
560--- linux-2.6.4-rc2.org/net/core/netfilter.c 2004-03-04 06:16:45.000000000 +0000
561+++ linux-2.6.4-rc2/net/core/netfilter.c 2004-03-08 08:48:37.000000000 +0000
562@@ -8,8 +8,10 @@
563 *
564 * February 2000: Modified by James Morris to have 1 queue per protocol.
565 * 15-Mar-2000: Added NF_REPEAT --RR.
566+ * 08-May-2003: Internal logging interface added by Jozsef Kadlecsik.
567 */
568 #include <linux/config.h>
569+#include <linux/kernel.h>
570 #include <linux/netfilter.h>
571 #include <net/protocol.h>
572 #include <linux/init.h>
573@@ -740,6 +742,72 @@
574 EXPORT_SYMBOL(skb_ip_make_writable);
575 #endif /*CONFIG_INET*/
576
577+/* Internal logging interface, which relies on the real
578+ LOG target modules */
579+
580+#define NF_LOG_PREFIXLEN 128
581+
582+static nf_logfn *nf_logging[NPROTO]; /* = NULL */
583+static int reported = 0;
584+static spinlock_t nf_log_lock = SPIN_LOCK_UNLOCKED;
585+
586+int nf_log_register(int pf, nf_logfn *logfn)
587+{
588+ int ret = -EBUSY;
589+
590+ /* Any setup of logging members must be done before
591+ * substituting pointer. */
592+ smp_wmb();
593+ spin_lock(&nf_log_lock);
594+ if (!nf_logging[pf]) {
595+ nf_logging[pf] = logfn;
596+ ret = 0;
597+ }
598+ spin_unlock(&nf_log_lock);
599+ return ret;
600+}
601+
602+void nf_log_unregister(int pf, nf_logfn *logfn)
603+{
604+ spin_lock(&nf_log_lock);
605+ if (nf_logging[pf] == logfn)
606+ nf_logging[pf] = NULL;
607+ spin_unlock(&nf_log_lock);
608+
609+ /* Give time to concurrent readers. */
610+ synchronize_net();
611+}
612+
613+void nf_log_packet(int pf,
614+ unsigned int hooknum,
615+ const struct sk_buff *skb,
616+ const struct net_device *in,
617+ const struct net_device *out,
618+ const char *fmt, ...)
619+{
620+ va_list args;
621+ char prefix[NF_LOG_PREFIXLEN];
622+ nf_logfn *logfn;
623+
624+ rcu_read_lock();
625+ logfn = nf_logging[pf];
626+ if (logfn) {
627+ va_start(args, fmt);
628+ vsnprintf(prefix, sizeof(prefix), fmt, args);
629+ va_end(args);
630+ /* We must read logging before nf_logfn[pf] */
631+ smp_read_barrier_depends();
632+ logfn(hooknum, skb, in, out, prefix);
633+ } else if (!reported) {
634+ printk(KERN_WARNING "nf_log_packet: can\'t log yet, "
635+ "no backend logging module loaded in!\n");
636+ reported++;
637+ }
638+ rcu_read_unlock();
639+}
640+EXPORT_SYMBOL(nf_log_register);
641+EXPORT_SYMBOL(nf_log_unregister);
642+EXPORT_SYMBOL(nf_log_packet);
643
644 /* This does not belong here, but ipt_REJECT needs it if connection
645 tracking in use: without this, connection may not be in hash table,
646diff -Nur linux-2.6.4-rc2.org/net/core/netfilter.c.orig linux-2.6.4-rc2/net/core/netfilter.c.orig
647--- linux-2.6.4-rc2.org/net/core/netfilter.c.orig 1970-01-01 00:00:00.000000000 +0000
648+++ linux-2.6.4-rc2/net/core/netfilter.c.orig 2004-03-04 06:16:45.000000000 +0000
649@@ -0,0 +1,772 @@
650+/* netfilter.c: look after the filters for various protocols.
651+ * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
652+ *
653+ * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
654+ * way.
655+ *
656+ * Rusty Russell (C)2000 -- This code is GPL.
657+ *
658+ * February 2000: Modified by James Morris to have 1 queue per protocol.
659+ * 15-Mar-2000: Added NF_REPEAT --RR.
660+ */
661+#include <linux/config.h>
662+#include <linux/netfilter.h>
663+#include <net/protocol.h>
664+#include <linux/init.h>
665+#include <linux/skbuff.h>
666+#include <linux/wait.h>
667+#include <linux/module.h>
668+#include <linux/interrupt.h>
669+#include <linux/if.h>
670+#include <linux/netdevice.h>
671+#include <linux/inetdevice.h>
672+#include <linux/tcp.h>
673+#include <linux/udp.h>
674+#include <linux/icmp.h>
675+#include <net/sock.h>
676+#include <net/route.h>
677+#include <linux/ip.h>
678+
679+/* In this code, we can be waiting indefinitely for userspace to
680+ * service a packet if a hook returns NF_QUEUE. We could keep a count
681+ * of skbuffs queued for userspace, and not deregister a hook unless
682+ * this is zero, but that sucks. Now, we simply check when the
683+ * packets come back: if the hook is gone, the packet is discarded. */
684+#ifdef CONFIG_NETFILTER_DEBUG
685+#define NFDEBUG(format, args...) printk(format , ## args)
686+#else
687+#define NFDEBUG(format, args...)
688+#endif
689+
690+/* Sockopts only registered and called from user context, so
691+ net locking would be overkill. Also, [gs]etsockopt calls may
692+ sleep. */
693+static DECLARE_MUTEX(nf_sockopt_mutex);
694+
695+struct list_head nf_hooks[NPROTO][NF_MAX_HOOKS];
696+static LIST_HEAD(nf_sockopts);
697+static spinlock_t nf_hook_lock = SPIN_LOCK_UNLOCKED;
698+
699+/*
700+ * A queue handler may be registered for each protocol. Each is protected by
701+ * long term mutex. The handler must provide an an outfn() to accept packets
702+ * for queueing and must reinject all packets it receives, no matter what.
703+ */
704+static struct nf_queue_handler_t {
705+ nf_queue_outfn_t outfn;
706+ void *data;
707+} queue_handler[NPROTO];
708+static rwlock_t queue_handler_lock = RW_LOCK_UNLOCKED;
709+
710+int nf_register_hook(struct nf_hook_ops *reg)
711+{
712+ struct list_head *i;
713+
714+ spin_lock_bh(&nf_hook_lock);
715+ list_for_each(i, &nf_hooks[reg->pf][reg->hooknum]) {
716+ if (reg->priority < ((struct nf_hook_ops *)i)->priority)
717+ break;
718+ }
719+ list_add_rcu(&reg->list, i->prev);
720+ spin_unlock_bh(&nf_hook_lock);
721+
722+ synchronize_net();
723+ return 0;
724+}
725+
726+void nf_unregister_hook(struct nf_hook_ops *reg)
727+{
728+ spin_lock_bh(&nf_hook_lock);
729+ list_del_rcu(&reg->list);
730+ spin_unlock_bh(&nf_hook_lock);
731+
732+ synchronize_net();
733+}
734+
735+/* Do exclusive ranges overlap? */
736+static inline int overlap(int min1, int max1, int min2, int max2)
737+{
738+ return max1 > min2 && min1 < max2;
739+}
740+
741+/* Functions to register sockopt ranges (exclusive). */
742+int nf_register_sockopt(struct nf_sockopt_ops *reg)
743+{
744+ struct list_head *i;
745+ int ret = 0;
746+
747+ if (down_interruptible(&nf_sockopt_mutex) != 0)
748+ return -EINTR;
749+
750+ list_for_each(i, &nf_sockopts) {
751+ struct nf_sockopt_ops *ops = (struct nf_sockopt_ops *)i;
752+ if (ops->pf == reg->pf
753+ && (overlap(ops->set_optmin, ops->set_optmax,
754+ reg->set_optmin, reg->set_optmax)
755+ || overlap(ops->get_optmin, ops->get_optmax,
756+ reg->get_optmin, reg->get_optmax))) {
757+ NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n",
758+ ops->set_optmin, ops->set_optmax,
759+ ops->get_optmin, ops->get_optmax,
760+ reg->set_optmin, reg->set_optmax,
761+ reg->get_optmin, reg->get_optmax);
762+ ret = -EBUSY;
763+ goto out;
764+ }
765+ }
766+
767+ list_add(&reg->list, &nf_sockopts);
768+out:
769+ up(&nf_sockopt_mutex);
770+ return ret;
771+}
772+
773+void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
774+{
775+ /* No point being interruptible: we're probably in cleanup_module() */
776+ restart:
777+ down(&nf_sockopt_mutex);
778+ if (reg->use != 0) {
779+ /* To be woken by nf_sockopt call... */
780+ /* FIXME: Stuart Young's name appears gratuitously. */
781+ set_current_state(TASK_UNINTERRUPTIBLE);
782+ reg->cleanup_task = current;
783+ up(&nf_sockopt_mutex);
784+ schedule();
785+ goto restart;
786+ }
787+ list_del(&reg->list);
788+ up(&nf_sockopt_mutex);
789+}
790+
791+#ifdef CONFIG_NETFILTER_DEBUG
792+#include <net/ip.h>
793+#include <net/tcp.h>
794+#include <linux/netfilter_ipv4.h>
795+
796+static void debug_print_hooks_ip(unsigned int nf_debug)
797+{
798+ if (nf_debug & (1 << NF_IP_PRE_ROUTING)) {
799+ printk("PRE_ROUTING ");
800+ nf_debug ^= (1 << NF_IP_PRE_ROUTING);
801+ }
802+ if (nf_debug & (1 << NF_IP_LOCAL_IN)) {
803+ printk("LOCAL_IN ");
804+ nf_debug ^= (1 << NF_IP_LOCAL_IN);
805+ }
806+ if (nf_debug & (1 << NF_IP_FORWARD)) {
807+ printk("FORWARD ");
808+ nf_debug ^= (1 << NF_IP_FORWARD);
809+ }
810+ if (nf_debug & (1 << NF_IP_LOCAL_OUT)) {
811+ printk("LOCAL_OUT ");
812+ nf_debug ^= (1 << NF_IP_LOCAL_OUT);
813+ }
814+ if (nf_debug & (1 << NF_IP_POST_ROUTING)) {
815+ printk("POST_ROUTING ");
816+ nf_debug ^= (1 << NF_IP_POST_ROUTING);
817+ }
818+ if (nf_debug)
819+ printk("Crap bits: 0x%04X", nf_debug);
820+ printk("\n");
821+}
822+
823+void nf_dump_skb(int pf, struct sk_buff *skb)
824+{
825+ printk("skb: pf=%i %s dev=%s len=%u\n",
826+ pf,
827+ skb->sk ? "(owned)" : "(unowned)",
828+ skb->dev ? skb->dev->name : "(no dev)",
829+ skb->len);
830+ switch (pf) {
831+ case PF_INET: {
832+ const struct iphdr *ip = skb->nh.iph;
833+ __u32 *opt = (__u32 *) (ip + 1);
834+ int opti;
835+ __u16 src_port = 0, dst_port = 0;
836+
837+ if (ip->protocol == IPPROTO_TCP
838+ || ip->protocol == IPPROTO_UDP) {
839+ struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
840+ src_port = ntohs(tcp->source);
841+ dst_port = ntohs(tcp->dest);
842+ }
843+
844+ printk("PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu"
845+ " L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
846+ ip->protocol, NIPQUAD(ip->saddr),
847+ src_port, NIPQUAD(ip->daddr),
848+ dst_port,
849+ ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
850+ ntohs(ip->frag_off), ip->ttl);
851+
852+ for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
853+ printk(" O=0x%8.8X", *opt++);
854+ printk("\n");
855+ }
856+ }
857+}
858+
859+void nf_debug_ip_local_deliver(struct sk_buff *skb)
860+{
861+ /* If it's a loopback packet, it must have come through
862+ * NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and
863+ * NF_IP_LOCAL_IN. Otherwise, must have gone through
864+ * NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING. */
865+ if (!skb->dev) {
866+ printk("ip_local_deliver: skb->dev is NULL.\n");
867+ }
868+ else if (strcmp(skb->dev->name, "lo") == 0) {
869+ if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
870+ | (1 << NF_IP_POST_ROUTING)
871+ | (1 << NF_IP_PRE_ROUTING)
872+ | (1 << NF_IP_LOCAL_IN))) {
873+ printk("ip_local_deliver: bad loopback skb: ");
874+ debug_print_hooks_ip(skb->nf_debug);
875+ nf_dump_skb(PF_INET, skb);
876+ }
877+ }
878+ else {
879+ if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING)
880+ | (1<<NF_IP_LOCAL_IN))) {
881+ printk("ip_local_deliver: bad non-lo skb: ");
882+ debug_print_hooks_ip(skb->nf_debug);
883+ nf_dump_skb(PF_INET, skb);
884+ }
885+ }
886+}
887+
888+void nf_debug_ip_loopback_xmit(struct sk_buff *newskb)
889+{
890+ if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
891+ | (1 << NF_IP_POST_ROUTING))) {
892+ printk("ip_dev_loopback_xmit: bad owned skb = %p: ",
893+ newskb);
894+ debug_print_hooks_ip(newskb->nf_debug);
895+ nf_dump_skb(PF_INET, newskb);
896+ }
897+ /* Clear to avoid confusing input check */
898+ newskb->nf_debug = 0;
899+}
900+
901+void nf_debug_ip_finish_output2(struct sk_buff *skb)
902+{
903+ /* If it's owned, it must have gone through the
904+ * NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING.
905+ * Otherwise, must have gone through
906+ * NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING.
907+ */
908+ if (skb->sk) {
909+ if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
910+ | (1 << NF_IP_POST_ROUTING))) {
911+ printk("ip_finish_output: bad owned skb = %p: ", skb);
912+ debug_print_hooks_ip(skb->nf_debug);
913+ nf_dump_skb(PF_INET, skb);
914+ }
915+ } else {
916+ if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING)
917+ | (1 << NF_IP_FORWARD)
918+ | (1 << NF_IP_POST_ROUTING))) {
919+ /* Fragments, entunnelled packets, TCP RSTs
920+ generated by ipt_REJECT will have no
921+ owners, but still may be local */
922+ if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
923+ | (1 << NF_IP_POST_ROUTING))){
924+ printk("ip_finish_output:"
925+ " bad unowned skb = %p: ",skb);
926+ debug_print_hooks_ip(skb->nf_debug);
927+ nf_dump_skb(PF_INET, skb);
928+ }
929+ }
930+ }
931+}
932+#endif /*CONFIG_NETFILTER_DEBUG*/
933+
934+/* Call get/setsockopt() */
935+static int nf_sockopt(struct sock *sk, int pf, int val,
936+ char *opt, int *len, int get)
937+{
938+ struct list_head *i;
939+ struct nf_sockopt_ops *ops;
940+ int ret;
941+
942+ if (down_interruptible(&nf_sockopt_mutex) != 0)
943+ return -EINTR;
944+
945+ list_for_each(i, &nf_sockopts) {
946+ ops = (struct nf_sockopt_ops *)i;
947+ if (ops->pf == pf) {
948+ if (get) {
949+ if (val >= ops->get_optmin
950+ && val < ops->get_optmax) {
951+ ops->use++;
952+ up(&nf_sockopt_mutex);
953+ ret = ops->get(sk, val, opt, len);
954+ goto out;
955+ }
956+ } else {
957+ if (val >= ops->set_optmin
958+ && val < ops->set_optmax) {
959+ ops->use++;
960+ up(&nf_sockopt_mutex);
961+ ret = ops->set(sk, val, opt, *len);
962+ goto out;
963+ }
964+ }
965+ }
966+ }
967+ up(&nf_sockopt_mutex);
968+ return -ENOPROTOOPT;
969+
970+ out:
971+ down(&nf_sockopt_mutex);
972+ ops->use--;
973+ if (ops->cleanup_task)
974+ wake_up_process(ops->cleanup_task);
975+ up(&nf_sockopt_mutex);
976+ return ret;
977+}
978+
979+int nf_setsockopt(struct sock *sk, int pf, int val, char *opt,
980+ int len)
981+{
982+ return nf_sockopt(sk, pf, val, opt, &len, 0);
983+}
984+
985+int nf_getsockopt(struct sock *sk, int pf, int val, char *opt, int *len)
986+{
987+ return nf_sockopt(sk, pf, val, opt, len, 1);
988+}
989+
990+static unsigned int nf_iterate(struct list_head *head,
991+ struct sk_buff **skb,
992+ int hook,
993+ const struct net_device *indev,
994+ const struct net_device *outdev,
995+ struct list_head **i,
996+ int (*okfn)(struct sk_buff *),
997+ int hook_thresh)
998+{
999+ /*
1000+ * The caller must not block between calls to this
1001+ * function because of risk of continuing from deleted element.
1002+ */
1003+ list_for_each_continue_rcu(*i, head) {
1004+ struct nf_hook_ops *elem = (struct nf_hook_ops *)*i;
1005+
1006+ if (hook_thresh > elem->priority)
1007+ continue;
1008+
1009+ /* Optimization: we don't need to hold module
1010+ reference here, since function can't sleep. --RR */
1011+ switch (elem->hook(hook, skb, indev, outdev, okfn)) {
1012+ case NF_QUEUE:
1013+ return NF_QUEUE;
1014+
1015+ case NF_STOLEN:
1016+ return NF_STOLEN;
1017+
1018+ case NF_DROP:
1019+ return NF_DROP;
1020+
1021+ case NF_REPEAT:
1022+ *i = (*i)->prev;
1023+ break;
1024+
1025+#ifdef CONFIG_NETFILTER_DEBUG
1026+ case NF_ACCEPT:
1027+ break;
1028+
1029+ default:
1030+ NFDEBUG("Evil return from %p(%u).\n",
1031+ elem->hook, hook);
1032+#endif
1033+ }
1034+ }
1035+ return NF_ACCEPT;
1036+}
1037+
1038+int nf_register_queue_handler(int pf, nf_queue_outfn_t outfn, void *data)
1039+{
1040+ int ret;
1041+
1042+ write_lock_bh(&queue_handler_lock);
1043+ if (queue_handler[pf].outfn)
1044+ ret = -EBUSY;
1045+ else {
1046+ queue_handler[pf].outfn = outfn;
1047+ queue_handler[pf].data = data;
1048+ ret = 0;
1049+ }
1050+ write_unlock_bh(&queue_handler_lock);
1051+
1052+ return ret;
1053+}
1054+
1055+/* The caller must flush their queue before this */
1056+int nf_unregister_queue_handler(int pf)
1057+{
1058+ write_lock_bh(&queue_handler_lock);
1059+ queue_handler[pf].outfn = NULL;
1060+ queue_handler[pf].data = NULL;
1061+ write_unlock_bh(&queue_handler_lock);
1062+
1063+ return 0;
1064+}
1065+
1066+/*
1067+ * Any packet that leaves via this function must come back
1068+ * through nf_reinject().
1069+ */
1070+static int nf_queue(struct sk_buff *skb,
1071+ struct list_head *elem,
1072+ int pf, unsigned int hook,
1073+ struct net_device *indev,
1074+ struct net_device *outdev,
1075+ int (*okfn)(struct sk_buff *))
1076+{
1077+ int status;
1078+ struct nf_info *info;
1079+#ifdef CONFIG_BRIDGE_NETFILTER
1080+ struct net_device *physindev = NULL;
1081+ struct net_device *physoutdev = NULL;
1082+#endif
1083+
1084+ /* QUEUE == DROP if noone is waiting, to be safe. */
1085+ read_lock(&queue_handler_lock);
1086+ if (!queue_handler[pf].outfn) {
1087+ read_unlock(&queue_handler_lock);
1088+ kfree_skb(skb);
1089+ return 1;
1090+ }
1091+
1092+ info = kmalloc(sizeof(*info), GFP_ATOMIC);
1093+ if (!info) {
1094+ if (net_ratelimit())
1095+ printk(KERN_ERR "OOM queueing packet %p\n",
1096+ skb);
1097+ read_unlock(&queue_handler_lock);
1098+ kfree_skb(skb);
1099+ return 1;
1100+ }
1101+
1102+ *info = (struct nf_info) {
1103+ (struct nf_hook_ops *)elem, pf, hook, indev, outdev, okfn };
1104+
1105+ /* If it's going away, ignore hook. */
1106+ if (!try_module_get(info->elem->owner)) {
1107+ read_unlock(&queue_handler_lock);
1108+ kfree(info);
1109+ return 0;
1110+ }
1111+
1112+ /* Bump dev refs so they don't vanish while packet is out */
1113+ if (indev) dev_hold(indev);
1114+ if (outdev) dev_hold(outdev);
1115+
1116+#ifdef CONFIG_BRIDGE_NETFILTER
1117+ if (skb->nf_bridge) {
1118+ physindev = skb->nf_bridge->physindev;
1119+ if (physindev) dev_hold(physindev);
1120+ physoutdev = skb->nf_bridge->physoutdev;
1121+ if (physoutdev) dev_hold(physoutdev);
1122+ }
1123+#endif
1124+
1125+ status = queue_handler[pf].outfn(skb, info, queue_handler[pf].data);
1126+ read_unlock(&queue_handler_lock);
1127+
1128+ if (status < 0) {
1129+ /* James M doesn't say fuck enough. */
1130+ if (indev) dev_put(indev);
1131+ if (outdev) dev_put(outdev);
1132+#ifdef CONFIG_BRIDGE_NETFILTER
1133+ if (physindev) dev_put(physindev);
1134+ if (physoutdev) dev_put(physoutdev);
1135+#endif
1136+ module_put(info->elem->owner);
1137+ kfree(info);
1138+ kfree_skb(skb);
1139+ return 1;
1140+ }
1141+ return 1;
1142+}
1143+
1144+int nf_hook_slow(int pf, unsigned int hook, struct sk_buff *skb,
1145+ struct net_device *indev,
1146+ struct net_device *outdev,
1147+ int (*okfn)(struct sk_buff *),
1148+ int hook_thresh)
1149+{
1150+ struct list_head *elem;
1151+ unsigned int verdict;
1152+ int ret = 0;
1153+
1154+ if (skb->ip_summed == CHECKSUM_HW) {
1155+ if (outdev == NULL) {
1156+ skb->ip_summed = CHECKSUM_NONE;
1157+ } else {
1158+ skb_checksum_help(skb);
1159+ }
1160+ }
1161+
1162+ /* We may already have this, but read-locks nest anyway */
1163+ rcu_read_lock();
1164+
1165+#ifdef CONFIG_NETFILTER_DEBUG
1166+ if (skb->nf_debug & (1 << hook)) {
1167+ printk("nf_hook: hook %i already set.\n", hook);
1168+ nf_dump_skb(pf, skb);
1169+ }
1170+ skb->nf_debug |= (1 << hook);
1171+#endif
1172+
1173+ elem = &nf_hooks[pf][hook];
1174+ next_hook:
1175+ verdict = nf_iterate(&nf_hooks[pf][hook], &skb, hook, indev,
1176+ outdev, &elem, okfn, hook_thresh);
1177+ if (verdict == NF_QUEUE) {
1178+ NFDEBUG("nf_hook: Verdict = QUEUE.\n");
1179+ if (!nf_queue(skb, elem, pf, hook, indev, outdev, okfn))
1180+ goto next_hook;
1181+ }
1182+
1183+ switch (verdict) {
1184+ case NF_ACCEPT:
1185+ ret = okfn(skb);
1186+ break;
1187+
1188+ case NF_DROP:
1189+ kfree_skb(skb);
1190+ ret = -EPERM;
1191+ break;
1192+ }
1193+
1194+ rcu_read_unlock();
1195+ return ret;
1196+}
1197+
1198+void nf_reinject(struct sk_buff *skb, struct nf_info *info,
1199+ unsigned int verdict)
1200+{
1201+ struct list_head *elem = &info->elem->list;
1202+ struct list_head *i;
1203+
1204+ rcu_read_lock();
1205+
1206+ /* Release those devices we held, or Alexey will kill me. */
1207+ if (info->indev) dev_put(info->indev);
1208+ if (info->outdev) dev_put(info->outdev);
1209+#ifdef CONFIG_BRIDGE_NETFILTER
1210+ if (skb->nf_bridge) {
1211+ if (skb->nf_bridge->physindev)
1212+ dev_put(skb->nf_bridge->physindev);
1213+ if (skb->nf_bridge->physoutdev)
1214+ dev_put(skb->nf_bridge->physoutdev);
1215+ }
1216+#endif
1217+
1218+ /* Drop reference to owner of hook which queued us. */
1219+ module_put(info->elem->owner);
1220+
1221+ list_for_each_rcu(i, &nf_hooks[info->pf][info->hook]) {
1222+ if (i == elem)
1223+ break;
1224+ }
1225+
1226+ if (elem == &nf_hooks[info->pf][info->hook]) {
1227+ /* The module which sent it to userspace is gone. */
1228+ NFDEBUG("%s: module disappeared, dropping packet.\n",
1229+ __FUNCTION__);
1230+ verdict = NF_DROP;
1231+ }
1232+
1233+ /* Continue traversal iff userspace said ok... */
1234+ if (verdict == NF_REPEAT) {
1235+ elem = elem->prev;
1236+ verdict = NF_ACCEPT;
1237+ }
1238+
1239+ if (verdict == NF_ACCEPT) {
1240+ next_hook:
1241+ verdict = nf_iterate(&nf_hooks[info->pf][info->hook],
1242+ &skb, info->hook,
1243+ info->indev, info->outdev, &elem,
1244+ info->okfn, INT_MIN);
1245+ }
1246+
1247+ switch (verdict) {
1248+ case NF_ACCEPT:
1249+ info->okfn(skb);
1250+ break;
1251+
1252+ case NF_QUEUE:
1253+ if (!nf_queue(skb, elem, info->pf, info->hook,
1254+ info->indev, info->outdev, info->okfn))
1255+ goto next_hook;
1256+ break;
1257+ }
1258+ rcu_read_unlock();
1259+
1260+ if (verdict == NF_DROP)
1261+ kfree_skb(skb);
1262+
1263+ kfree(info);
1264+ return;
1265+}
1266+
1267+#ifdef CONFIG_INET
1268+/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
1269+int ip_route_me_harder(struct sk_buff **pskb)
1270+{
1271+ struct iphdr *iph = (*pskb)->nh.iph;
1272+ struct rtable *rt;
1273+ struct flowi fl = {};
1274+ struct dst_entry *odst;
1275+ unsigned int hh_len;
1276+
1277+ /* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
1278+ * packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook.
1279+ */
1280+ if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
1281+ fl.nl_u.ip4_u.daddr = iph->daddr;
1282+ fl.nl_u.ip4_u.saddr = iph->saddr;
1283+ fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
1284+ fl.oif = (*pskb)->sk ? (*pskb)->sk->sk_bound_dev_if : 0;
1285+#ifdef CONFIG_IP_ROUTE_FWMARK
1286+ fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
1287+#endif
1288+ if (ip_route_output_key(&rt, &fl) != 0)
1289+ return -1;
1290+
1291+ /* Drop old route. */
1292+ dst_release((*pskb)->dst);
1293+ (*pskb)->dst = &rt->u.dst;
1294+ } else {
1295+ /* non-local src, find valid iif to satisfy
1296+ * rp-filter when calling ip_route_input. */
1297+ fl.nl_u.ip4_u.daddr = iph->saddr;
1298+ if (ip_route_output_key(&rt, &fl) != 0)
1299+ return -1;
1300+
1301+ odst = (*pskb)->dst;
1302+ if (ip_route_input(*pskb, iph->daddr, iph->saddr,
1303+ RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
1304+ dst_release(&rt->u.dst);
1305+ return -1;
1306+ }
1307+ dst_release(&rt->u.dst);
1308+ dst_release(odst);
1309+ }
1310+
1311+ if ((*pskb)->dst->error)
1312+ return -1;
1313+
1314+ /* Change in oif may mean change in hh_len. */
1315+ hh_len = (*pskb)->dst->dev->hard_header_len;
1316+ if (skb_headroom(*pskb) < hh_len) {
1317+ struct sk_buff *nskb;
1318+
1319+ nskb = skb_realloc_headroom(*pskb, hh_len);
1320+ if (!nskb)
1321+ return -1;
1322+ if ((*pskb)->sk)
1323+ skb_set_owner_w(nskb, (*pskb)->sk);
1324+ kfree_skb(*pskb);
1325+ *pskb = nskb;
1326+ }
1327+
1328+ return 0;
1329+}
1330+
1331+int skb_ip_make_writable(struct sk_buff **pskb, unsigned int writable_len)
1332+{
1333+ struct sk_buff *nskb;
1334+ unsigned int iplen;
1335+
1336+ if (writable_len > (*pskb)->len)
1337+ return 0;
1338+
1339+ /* Not exclusive use of packet? Must copy. */
1340+ if (skb_shared(*pskb) || skb_cloned(*pskb))
1341+ goto copy_skb;
1342+
1343+ /* Alexey says IP hdr is always modifiable and linear, so ok. */
1344+ if (writable_len <= (*pskb)->nh.iph->ihl*4)
1345+ return 1;
1346+
1347+ iplen = writable_len - (*pskb)->nh.iph->ihl*4;
1348+
1349+ /* DaveM says protocol headers are also modifiable. */
1350+ switch ((*pskb)->nh.iph->protocol) {
1351+ case IPPROTO_TCP: {
1352+ struct tcphdr hdr;
1353+ if (skb_copy_bits(*pskb, (*pskb)->nh.iph->ihl*4,
1354+ &hdr, sizeof(hdr)) != 0)
1355+ goto copy_skb;
1356+ if (writable_len <= (*pskb)->nh.iph->ihl*4 + hdr.doff*4)
1357+ goto pull_skb;
1358+ goto copy_skb;
1359+ }
1360+ case IPPROTO_UDP:
1361+ if (writable_len<=(*pskb)->nh.iph->ihl*4+sizeof(struct udphdr))
1362+ goto pull_skb;
1363+ goto copy_skb;
1364+ case IPPROTO_ICMP:
1365+ if (writable_len
1366+ <= (*pskb)->nh.iph->ihl*4 + sizeof(struct icmphdr))
1367+ goto pull_skb;
1368+ goto copy_skb;
1369+ /* Insert other cases here as desired */
1370+ }
1371+
1372+copy_skb:
1373+ nskb = skb_copy(*pskb, GFP_ATOMIC);
1374+ if (!nskb)
1375+ return 0;
1376+ BUG_ON(skb_is_nonlinear(nskb));
1377+
1378+ /* Rest of kernel will get very unhappy if we pass it a
1379+ suddenly-orphaned skbuff */
1380+ if ((*pskb)->sk)
1381+ skb_set_owner_w(nskb, (*pskb)->sk);
1382+ kfree_skb(*pskb);
1383+ *pskb = nskb;
1384+ return 1;
1385+
1386+pull_skb:
1387+ return pskb_may_pull(*pskb, writable_len);
1388+}
1389+EXPORT_SYMBOL(skb_ip_make_writable);
1390+#endif /*CONFIG_INET*/
1391+
1392+
1393+/* This does not belong here, but ipt_REJECT needs it if connection
1394+ tracking in use: without this, connection may not be in hash table,
1395+ and hence manufactured ICMP or RST packets will not be associated
1396+ with it. */
1397+void (*ip_ct_attach)(struct sk_buff *, struct nf_ct_info *);
1398+
1399+void __init netfilter_init(void)
1400+{
1401+ int i, h;
1402+
1403+ for (i = 0; i < NPROTO; i++) {
1404+ for (h = 0; h < NF_MAX_HOOKS; h++)
1405+ INIT_LIST_HEAD(&nf_hooks[i][h]);
1406+ }
1407+}
1408+
1409+EXPORT_SYMBOL(ip_ct_attach);
1410+EXPORT_SYMBOL(ip_route_me_harder);
1411+EXPORT_SYMBOL(nf_getsockopt);
1412+EXPORT_SYMBOL(nf_hook_slow);
1413+EXPORT_SYMBOL(nf_hooks);
1414+EXPORT_SYMBOL(nf_register_hook);
1415+EXPORT_SYMBOL(nf_register_queue_handler);
1416+EXPORT_SYMBOL(nf_register_sockopt);
1417+EXPORT_SYMBOL(nf_reinject);
1418+EXPORT_SYMBOL(nf_setsockopt);
1419+EXPORT_SYMBOL(nf_unregister_hook);
1420+EXPORT_SYMBOL(nf_unregister_queue_handler);
1421+EXPORT_SYMBOL(nf_unregister_sockopt);
1422diff -Nur linux-2.6.4-rc2.org/net/ipv4/netfilter/Kconfig linux-2.6.4-rc2/net/ipv4/netfilter/Kconfig
1423--- linux-2.6.4-rc2.org/net/ipv4/netfilter/Kconfig 2004-03-04 06:16:58.000000000 +0000
1424+++ linux-2.6.4-rc2/net/ipv4/netfilter/Kconfig 2004-03-08 08:48:59.000000000 +0000
1425@@ -579,5 +579,89 @@
1426
1427 To compile it as a module, choose M here. If unsure, say N.
1428
1429+config IP_NF_TARGET_IPV4OPTSSTRIP
1430+ tristate 'IPV4OPTSSTRIP target support'
1431+ depends on IP_NF_MANGLE
1432+ help
1433+
1434+config IP_NF_TARGET_TTL
1435+ tristate 'TTL target support'
1436+ depends on IP_NF_MANGLE
1437+ help
1438+
1439+config IP_NF_MATCH_CONNLIMIT
1440+ tristate 'Connections/IP limit match support'
1441+ depends on IP_NF_IPTABLES
1442+ help
1443+
1444+config IP_NF_MATCH_DSTLIMIT
1445+ tristate 'dstlimit match support'
1446+ depends on IP_NF_IPTABLES
1447+ help
1448+
1449+config IP_NF_MATCH_FUZZY
1450+ tristate 'fuzzy match support'
1451+ depends on IP_NF_IPTABLES
1452+ help
1453+
1454+config IP_NF_MATCH_IPV4OPTIONS
1455+ tristate 'IPV4OPTIONS match support'
1456+ depends on IP_NF_IPTABLES
1457+ help
1458+
1459+config IP_NF_MATCH_MPORT
1460+ tristate 'Multiple port with ranges match support'
1461+ depends on IP_NF_IPTABLES
1462+ help
1463+
1464+config IP_NF_MATCH_NTH
1465+ tristate 'Nth match support'
1466+ depends on IP_NF_IPTABLES
1467+ help
1468+
1469+config IP_NF_MATCH_QUOTA
1470+ tristate 'quota match support'
1471+ depends on IP_NF_IPTABLES
1472+ help
1473+
1474+config IP_NF_TARGET_NOTRACK
1475+ tristate 'NOTRACK target support'
1476+ depends on IP_NF_RAW
1477+ help
1478+ The NOTRACK target allows a select rule to specify
1479+ which packets *not* to enter the conntrack/NAT
1480+ subsystem with all the consequences (no ICMP error tracking,
1481+ no protocol helpers for the selected packets).
1482+
1483+ If you want to compile it as a module, say M here and read
1484+ <file:Documentation/modules.txt>. If unsure, say `N'.
1485+
1486+config IP_NF_RAW
1487+ tristate 'raw table support (required for NOTRACK/TRACE)'
1488+ depends on IP_NF_IPTABLES
1489+ help
1490+ This option adds a `raw' table to iptables. This table is the very
1491+ first in the netfilter framework and hooks in at the PREROUTING
1492+ and OUTPUT chains.
1493+
1494+ If you want to compile it as a module, say M here and read
1495+ <file:Documentation/modules.txt>. If unsure, say `N'.
1496+ help
1497+
1498+config IP_NF_MATCH_REALM
1499+ tristate 'realm match support'
1500+ depends on IP_NF_IPTABLES && NET_CLS_ROUTE
1501+ help
1502+
1503+config IP_NF_MATCH_SCTP
1504+ tristate 'SCTP protocol match support'
1505+ depends on IP_NF_IPTABLES
1506+ help
1507+
1508+config IP_NF_MATCH_U32
1509+ tristate 'U32 match support'
1510+ depends on IP_NF_IPTABLES
1511+ help
1512+
1513 endmenu
1514
1515diff -Nur linux-2.6.4-rc2.org/net/ipv4/netfilter/Makefile linux-2.6.4-rc2/net/ipv4/netfilter/Makefile
1516--- linux-2.6.4-rc2.org/net/ipv4/netfilter/Makefile 2004-03-04 06:16:38.000000000 +0000
1517+++ linux-2.6.4-rc2/net/ipv4/netfilter/Makefile 2004-03-08 08:48:59.000000000 +0000
1518@@ -38,19 +38,33 @@
1519 obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
1520 obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
1521 obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
1522+obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
1523
1524 # matches
1525 obj-$(CONFIG_IP_NF_MATCH_HELPER) += ipt_helper.o
1526 obj-$(CONFIG_IP_NF_MATCH_LIMIT) += ipt_limit.o
1527+obj-$(CONFIG_IP_NF_MATCH_SCTP) += ipt_sctp.o
1528+obj-$(CONFIG_IP_NF_MATCH_QUOTA) += ipt_quota.o
1529+obj-$(CONFIG_IP_NF_MATCH_DSTLIMIT) += ipt_dstlimit.o
1530 obj-$(CONFIG_IP_NF_MATCH_MARK) += ipt_mark.o
1531 obj-$(CONFIG_IP_NF_MATCH_MAC) += ipt_mac.o
1532 obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o
1533
1534 obj-$(CONFIG_IP_NF_MATCH_PKTTYPE) += ipt_pkttype.o
1535 obj-$(CONFIG_IP_NF_MATCH_MULTIPORT) += ipt_multiport.o
1536+
1537+obj-$(CONFIG_IP_NF_MATCH_MPORT) += ipt_mport.o
1538+
1539 obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o
1540 obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o
1541
1542+obj-$(CONFIG_IP_NF_MATCH_NTH) += ipt_nth.o
1543+
1544+obj-$(CONFIG_IP_NF_MATCH_IPV4OPTIONS) += ipt_ipv4options.o
1545+
1546+
1547+obj-$(CONFIG_IP_NF_MATCH_FUZZY) += ipt_fuzzy.o
1548+
1549 obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o
1550
1551 obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
1552@@ -59,10 +73,15 @@
1553
1554 obj-$(CONFIG_IP_NF_MATCH_LENGTH) += ipt_length.o
1555
1556+obj-$(CONFIG_IP_NF_MATCH_U32) += ipt_u32.o
1557+
1558+
1559 obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
1560 obj-$(CONFIG_IP_NF_MATCH_STATE) += ipt_state.o
1561+obj-$(CONFIG_IP_NF_MATCH_CONNLIMIT) += ipt_connlimit.o
1562 obj-$(CONFIG_IP_NF_MATCH_CONNTRACK) += ipt_conntrack.o
1563 obj-$(CONFIG_IP_NF_MATCH_TCPMSS) += ipt_tcpmss.o
1564+obj-$(CONFIG_IP_NF_MATCH_REALM) += ipt_realm.o
1565
1566 obj-$(CONFIG_IP_NF_MATCH_PHYSDEV) += ipt_physdev.o
1567
1568@@ -79,8 +98,11 @@
1569 obj-$(CONFIG_IP_NF_TARGET_CLASSIFY) += ipt_CLASSIFY.o
1570 obj-$(CONFIG_IP_NF_NAT_SNMP_BASIC) += ip_nat_snmp_basic.o
1571 obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
1572+obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o
1573+obj-$(CONFIG_IP_NF_TARGET_IPV4OPTSSTRIP) += ipt_IPV4OPTSSTRIP.o
1574 obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
1575 obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o
1576+obj-$(CONFIG_IP_NF_TARGET_NOTRACK) += ipt_NOTRACK.o
1577
1578 # generic ARP tables
1579 obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o
1580diff -Nur linux-2.6.4-rc2.org/net/ipv4/netfilter/ip_conntrack_core.c linux-2.6.4-rc2/net/ipv4/netfilter/ip_conntrack_core.c
1581--- linux-2.6.4-rc2.org/net/ipv4/netfilter/ip_conntrack_core.c 2004-03-04 06:16:34.000000000 +0000
1582+++ linux-2.6.4-rc2/net/ipv4/netfilter/ip_conntrack_core.c 2004-03-08 08:48:52.000000000 +0000
1583@@ -67,6 +67,7 @@
1584 static atomic_t ip_conntrack_count = ATOMIC_INIT(0);
1585 struct list_head *ip_conntrack_hash;
1586 static kmem_cache_t *ip_conntrack_cachep;
1587+struct ip_conntrack ip_conntrack_untracked;
1588
1589 extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;
1590
1591@@ -691,42 +692,50 @@
1592 struct ip_conntrack_expect *, tuple);
1593 READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
1594
1595- /* If master is not in hash table yet (ie. packet hasn't left
1596- this machine yet), how can other end know about expected?
1597- Hence these are not the droids you are looking for (if
1598- master ct never got confirmed, we'd hold a reference to it
1599- and weird things would happen to future packets). */
1600- if (expected && !is_confirmed(expected->expectant))
1601- expected = NULL;
1602-
1603- /* Look up the conntrack helper for master connections only */
1604- if (!expected)
1605- conntrack->helper = ip_ct_find_helper(&repl_tuple);
1606-
1607- /* If the expectation is dying, then this is a loser. */
1608- if (expected
1609- && expected->expectant->helper->timeout
1610- && ! del_timer(&expected->timeout))
1611- expected = NULL;
1612-
1613 if (expected) {
1614- DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
1615- conntrack, expected);
1616- /* Welcome, Mr. Bond. We've been expecting you... */
1617- IP_NF_ASSERT(master_ct(conntrack));
1618- __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
1619- conntrack->master = expected;
1620- expected->sibling = conntrack;
1621- LIST_DELETE(&ip_conntrack_expect_list, expected);
1622- expected->expectant->expecting--;
1623- nf_conntrack_get(&master_ct(conntrack)->infos[0]);
1624- }
1625- atomic_inc(&ip_conntrack_count);
1626+ /* If master is not in hash table yet (ie. packet hasn't left
1627+ this machine yet), how can other end know about expected?
1628+ Hence these are not the droids you are looking for (if
1629+ master ct never got confirmed, we'd hold a reference to it
1630+ and weird things would happen to future packets). */
1631+ if (!is_confirmed(expected->expectant)) {
1632+
1633+ conntrack->helper = ip_ct_find_helper(&repl_tuple);
1634+ goto end;
1635+ }
1636+
1637+ /* Expectation is dying... */
1638+ if (expected->expectant->helper->timeout
1639+ && ! del_timer(&expected->timeout)) {
1640+ goto end;
1641+ }
1642+
1643+ DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
1644+ conntrack, expected);
1645+ /* Welcome, Mr. Bond. We've been expecting you... */
1646+ IP_NF_ASSERT(master_ct(conntrack));
1647+ __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
1648+ conntrack->master = expected;
1649+ expected->sibling = conntrack;
1650+ LIST_DELETE(&ip_conntrack_expect_list, expected);
1651+ expected->expectant->expecting--;
1652+ nf_conntrack_get(&master_ct(conntrack)->infos[0]);
1653+
1654+ /* this is a braindead... --pablo */
1655+ atomic_inc(&ip_conntrack_count);
1656+ WRITE_UNLOCK(&ip_conntrack_lock);
1657+
1658+ if (expected->expectfn)
1659+ expected->expectfn(conntrack);
1660+
1661+ goto ret;
1662+ } else
1663+ conntrack->helper = ip_ct_find_helper(&repl_tuple);
1664+
1665+end: atomic_inc(&ip_conntrack_count);
1666 WRITE_UNLOCK(&ip_conntrack_lock);
1667
1668- if (expected && expected->expectfn)
1669- expected->expectfn(conntrack);
1670- return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
1671+ret: return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
1672 }
1673
1674 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
1675@@ -794,6 +803,15 @@
1676 int set_reply;
1677 int ret;
1678
1679+ /* Never happen */
1680+ if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
1681+ if (net_ratelimit()) {
1682+ printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
1683+ (*pskb)->nh.iph->protocol, hooknum);
1684+ }
1685+ return NF_DROP;
1686+ }
1687+
1688 /* FIXME: Do this right please. --RR */
1689 (*pskb)->nfcache |= NFC_UNKNOWN;
1690
1691@@ -812,18 +830,10 @@
1692 }
1693 #endif
1694
1695- /* Previously seen (loopback)? Ignore. Do this before
1696- fragment check. */
1697+ /* Previously seen (loopback or untracked)? Ignore. */
1698 if ((*pskb)->nfct)
1699 return NF_ACCEPT;
1700
1701- /* Gather fragments. */
1702- if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
1703- *pskb = ip_ct_gather_frags(*pskb);
1704- if (!*pskb)
1705- return NF_STOLEN;
1706- }
1707-
1708 proto = ip_ct_find_proto((*pskb)->nh.iph->protocol);
1709
1710 /* It may be an icmp error... */
1711@@ -1422,6 +1432,18 @@
1712
1713 /* For use by ipt_REJECT */
1714 ip_ct_attach = ip_conntrack_attach;
1715+
1716+ /* Set up fake conntrack:
1717+ - to never be deleted, not in any hashes */
1718+ atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
1719+ /* - and look it like as a confirmed connection */
1720+ set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
1721+ /* - and prepare the ctinfo field for REJECT & NAT. */
1722+ ip_conntrack_untracked.infos[IP_CT_NEW].master =
1723+ ip_conntrack_untracked.infos[IP_CT_RELATED].master =
1724+ ip_conntrack_untracked.infos[IP_CT_RELATED + IP_CT_IS_REPLY].master =
1725+ &ip_conntrack_untracked.ct_general;
1726+
1727 return ret;
1728
1729 err_free_hash:
1730diff -Nur linux-2.6.4-rc2.org/net/ipv4/netfilter/ip_conntrack_core.c.orig linux-2.6.4-rc2/net/ipv4/netfilter/ip_conntrack_core.c.orig
1731--- linux-2.6.4-rc2.org/net/ipv4/netfilter/ip_conntrack_core.c.orig 1970-01-01 00:00:00.000000000 +0000
1732+++ linux-2.6.4-rc2/net/ipv4/netfilter/ip_conntrack_core.c.orig 2004-03-08 08:48:35.000000000 +0000
1733@@ -0,0 +1,1441 @@
1734+/* Connection state tracking for netfilter. This is separated from,
1735+ but required by, the NAT layer; it can also be used by an iptables
1736+ extension. */
1737+
1738+/* (C) 1999-2001 Paul `Rusty' Russell
1739+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
1740+ *
1741+ * This program is free software; you can redistribute it and/or modify
1742+ * it under the terms of the GNU General Public License version 2 as
1743+ * published by the Free Software Foundation.
1744+ *
1745+ * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
1746+ * - new API and handling of conntrack/nat helpers
1747+ * - now capable of multiple expectations for one master
1748+ * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
1749+ * - add usage/reference counts to ip_conntrack_expect
1750+ * - export ip_conntrack[_expect]_{find_get,put} functions
1751+ * */
1752+
1753+#include <linux/config.h>
1754+#include <linux/types.h>
1755+#include <linux/icmp.h>
1756+#include <linux/ip.h>
1757+#include <linux/netfilter.h>
1758+#include <linux/netfilter_ipv4.h>
1759+#include <linux/module.h>
1760+#include <linux/skbuff.h>
1761+#include <linux/proc_fs.h>
1762+#include <linux/vmalloc.h>
1763+#include <net/checksum.h>
1764+#include <linux/stddef.h>
1765+#include <linux/sysctl.h>
1766+#include <linux/slab.h>
1767+#include <linux/random.h>
1768+#include <linux/jhash.h>
1769+/* For ERR_PTR(). Yeah, I know... --RR */
1770+#include <linux/fs.h>
1771+
1772+/* This rwlock protects the main hash table, protocol/helper/expected
1773+ registrations, conntrack timers*/
1774+#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)
1775+#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)
1776+
1777+#include <linux/netfilter_ipv4/ip_conntrack.h>
1778+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
1779+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
1780+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
1781+#include <linux/netfilter_ipv4/listhelp.h>
1782+
1783+#define IP_CONNTRACK_VERSION "2.1"
1784+
1785+#if 0
1786+#define DEBUGP printk
1787+#else
1788+#define DEBUGP(format, args...)
1789+#endif
1790+
1791+DECLARE_RWLOCK(ip_conntrack_lock);
1792+DECLARE_RWLOCK(ip_conntrack_expect_tuple_lock);
1793+
1794+void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
1795+LIST_HEAD(ip_conntrack_expect_list);
1796+LIST_HEAD(protocol_list);
1797+static LIST_HEAD(helpers);
1798+unsigned int ip_conntrack_htable_size = 0;
1799+int ip_conntrack_max;
1800+static atomic_t ip_conntrack_count = ATOMIC_INIT(0);
1801+struct list_head *ip_conntrack_hash;
1802+static kmem_cache_t *ip_conntrack_cachep;
1803+
1804+extern struct ip_conntrack_protocol ip_conntrack_generic_protocol;
1805+
1806+static inline int proto_cmpfn(const struct ip_conntrack_protocol *curr,
1807+ u_int8_t protocol)
1808+{
1809+ return protocol == curr->proto;
1810+}
1811+
1812+struct ip_conntrack_protocol *__ip_ct_find_proto(u_int8_t protocol)
1813+{
1814+ struct ip_conntrack_protocol *p;
1815+
1816+ MUST_BE_READ_LOCKED(&ip_conntrack_lock);
1817+ p = LIST_FIND(&protocol_list, proto_cmpfn,
1818+ struct ip_conntrack_protocol *, protocol);
1819+ if (!p)
1820+ p = &ip_conntrack_generic_protocol;
1821+
1822+ return p;
1823+}
1824+
1825+struct ip_conntrack_protocol *ip_ct_find_proto(u_int8_t protocol)
1826+{
1827+ struct ip_conntrack_protocol *p;
1828+
1829+ READ_LOCK(&ip_conntrack_lock);
1830+ p = __ip_ct_find_proto(protocol);
1831+ READ_UNLOCK(&ip_conntrack_lock);
1832+ return p;
1833+}
1834+
1835+inline void
1836+ip_conntrack_put(struct ip_conntrack *ct)
1837+{
1838+ IP_NF_ASSERT(ct);
1839+ IP_NF_ASSERT(ct->infos[0].master);
1840+ /* nf_conntrack_put wants to go via an info struct, so feed it
1841+ one at random. */
1842+ nf_conntrack_put(&ct->infos[0]);
1843+}
1844+
1845+static int ip_conntrack_hash_rnd_initted;
1846+static unsigned int ip_conntrack_hash_rnd;
1847+
1848+static u_int32_t
1849+hash_conntrack(const struct ip_conntrack_tuple *tuple)
1850+{
1851+#if 0
1852+ dump_tuple(tuple);
1853+#endif
1854+ return (jhash_3words(tuple->src.ip,
1855+ (tuple->dst.ip ^ tuple->dst.protonum),
1856+ (tuple->src.u.all | (tuple->dst.u.all << 16)),
1857+ ip_conntrack_hash_rnd) % ip_conntrack_htable_size);
1858+}
1859+
1860+int
1861+get_tuple(const struct iphdr *iph,
1862+ const struct sk_buff *skb,
1863+ unsigned int dataoff,
1864+ struct ip_conntrack_tuple *tuple,
1865+ const struct ip_conntrack_protocol *protocol)
1866+{
1867+ /* Never happen */
1868+ if (iph->frag_off & htons(IP_OFFSET)) {
1869+ printk("ip_conntrack_core: Frag of proto %u.\n",
1870+ iph->protocol);
1871+ return 0;
1872+ }
1873+
1874+ tuple->src.ip = iph->saddr;
1875+ tuple->dst.ip = iph->daddr;
1876+ tuple->dst.protonum = iph->protocol;
1877+
1878+ return protocol->pkt_to_tuple(skb, dataoff, tuple);
1879+}
1880+
1881+static int
1882+invert_tuple(struct ip_conntrack_tuple *inverse,
1883+ const struct ip_conntrack_tuple *orig,
1884+ const struct ip_conntrack_protocol *protocol)
1885+{
1886+ inverse->src.ip = orig->dst.ip;
1887+ inverse->dst.ip = orig->src.ip;
1888+ inverse->dst.protonum = orig->dst.protonum;
1889+
1890+ return protocol->invert_tuple(inverse, orig);
1891+}
1892+
1893+
1894+/* ip_conntrack_expect helper functions */
1895+
1896+/* Compare tuple parts depending on mask. */
1897+static inline int expect_cmp(const struct ip_conntrack_expect *i,
1898+ const struct ip_conntrack_tuple *tuple)
1899+{
1900+ MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock);
1901+ return ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask);
1902+}
1903+
1904+static void
1905+destroy_expect(struct ip_conntrack_expect *exp)
1906+{
1907+ DEBUGP("destroy_expect(%p) use=%d\n", exp, atomic_read(&exp->use));
1908+ IP_NF_ASSERT(atomic_read(&exp->use));
1909+ IP_NF_ASSERT(!timer_pending(&exp->timeout));
1910+
1911+ kfree(exp);
1912+}
1913+
1914+
1915+inline void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
1916+{
1917+ IP_NF_ASSERT(exp);
1918+
1919+ if (atomic_dec_and_test(&exp->use)) {
1920+ /* usage count dropped to zero */
1921+ destroy_expect(exp);
1922+ }
1923+}
1924+
1925+static inline struct ip_conntrack_expect *
1926+__ip_ct_expect_find(const struct ip_conntrack_tuple *tuple)
1927+{
1928+ MUST_BE_READ_LOCKED(&ip_conntrack_lock);
1929+ MUST_BE_READ_LOCKED(&ip_conntrack_expect_tuple_lock);
1930+ return LIST_FIND(&ip_conntrack_expect_list, expect_cmp,
1931+ struct ip_conntrack_expect *, tuple);
1932+}
1933+
1934+/* Find a expectation corresponding to a tuple. */
1935+struct ip_conntrack_expect *
1936+ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
1937+{
1938+ struct ip_conntrack_expect *exp;
1939+
1940+ READ_LOCK(&ip_conntrack_lock);
1941+ READ_LOCK(&ip_conntrack_expect_tuple_lock);
1942+ exp = __ip_ct_expect_find(tuple);
1943+ if (exp)
1944+ atomic_inc(&exp->use);
1945+ READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
1946+ READ_UNLOCK(&ip_conntrack_lock);
1947+
1948+ return exp;
1949+}
1950+
1951+/* remove one specific expectation from all lists and drop refcount,
1952+ * does _NOT_ delete the timer. */
1953+static void __unexpect_related(struct ip_conntrack_expect *expect)
1954+{
1955+ DEBUGP("unexpect_related(%p)\n", expect);
1956+ MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
1957+
1958+ /* we're not allowed to unexpect a confirmed expectation! */
1959+ IP_NF_ASSERT(!expect->sibling);
1960+
1961+ /* delete from global and local lists */
1962+ list_del(&expect->list);
1963+ list_del(&expect->expected_list);
1964+
1965+ /* decrement expect-count of master conntrack */
1966+ if (expect->expectant)
1967+ expect->expectant->expecting--;
1968+
1969+ ip_conntrack_expect_put(expect);
1970+}
1971+
1972+/* remove one specific expecatation from all lists, drop refcount
1973+ * and expire timer.
1974+ * This function can _NOT_ be called for confirmed expects! */
1975+static void unexpect_related(struct ip_conntrack_expect *expect)
1976+{
1977+ IP_NF_ASSERT(expect->expectant);
1978+ IP_NF_ASSERT(expect->expectant->helper);
1979+ /* if we are supposed to have a timer, but we can't delete
1980+ * it: race condition. __unexpect_related will
1981+ * be calledd by timeout function */
1982+ if (expect->expectant->helper->timeout
1983+ && !del_timer(&expect->timeout))
1984+ return;
1985+
1986+ __unexpect_related(expect);
1987+}
1988+
1989+/* delete all unconfirmed expectations for this conntrack */
1990+static void remove_expectations(struct ip_conntrack *ct, int drop_refcount)
1991+{
1992+ struct list_head *exp_entry, *next;
1993+ struct ip_conntrack_expect *exp;
1994+
1995+ DEBUGP("remove_expectations(%p)\n", ct);
1996+
1997+ list_for_each_safe(exp_entry, next, &ct->sibling_list) {
1998+ exp = list_entry(exp_entry, struct ip_conntrack_expect,
1999+ expected_list);
2000+
2001+ /* we skip established expectations, as we want to delete
2002+ * the un-established ones only */
2003+ if (exp->sibling) {
2004+ DEBUGP("remove_expectations: skipping established %p of %p\n", exp->sibling, ct);
2005+ if (drop_refcount) {
2006+ /* Indicate that this expectations parent is dead */
2007+ ip_conntrack_put(exp->expectant);
2008+ exp->expectant = NULL;
2009+ }
2010+ continue;
2011+ }
2012+
2013+ IP_NF_ASSERT(list_inlist(&ip_conntrack_expect_list, exp));
2014+ IP_NF_ASSERT(exp->expectant == ct);
2015+
2016+ /* delete expectation from global and private lists */
2017+ unexpect_related(exp);
2018+ }
2019+}
2020+
2021+static void
2022+clean_from_lists(struct ip_conntrack *ct)
2023+{
2024+ unsigned int ho, hr;
2025+
2026+ DEBUGP("clean_from_lists(%p)\n", ct);
2027+ MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
2028+
2029+ ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
2030+ hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
2031+ LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
2032+ LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
2033+
2034+ /* Destroy all un-established, pending expectations */
2035+ remove_expectations(ct, 1);
2036+}
2037+
2038+static void
2039+destroy_conntrack(struct nf_conntrack *nfct)
2040+{
2041+ struct ip_conntrack *ct = (struct ip_conntrack *)nfct, *master = NULL;
2042+ struct ip_conntrack_protocol *proto;
2043+
2044+ DEBUGP("destroy_conntrack(%p)\n", ct);
2045+ IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
2046+ IP_NF_ASSERT(!timer_pending(&ct->timeout));
2047+
2048+ /* To make sure we don't get any weird locking issues here:
2049+ * destroy_conntrack() MUST NOT be called with a write lock
2050+ * to ip_conntrack_lock!!! -HW */
2051+ proto = ip_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
2052+ if (proto && proto->destroy)
2053+ proto->destroy(ct);
2054+
2055+ if (ip_conntrack_destroyed)
2056+ ip_conntrack_destroyed(ct);
2057+
2058+ WRITE_LOCK(&ip_conntrack_lock);
2059+ /* Delete us from our own list to prevent corruption later */
2060+ list_del(&ct->sibling_list);
2061+
2062+ /* Delete our master expectation */
2063+ if (ct->master) {
2064+ if (ct->master->expectant) {
2065+ /* can't call __unexpect_related here,
2066+ * since it would screw up expect_list */
2067+ list_del(&ct->master->expected_list);
2068+ master = ct->master->expectant;
2069+ }
2070+ kfree(ct->master);
2071+ }
2072+ WRITE_UNLOCK(&ip_conntrack_lock);
2073+
2074+ if (master)
2075+ ip_conntrack_put(master);
2076+
2077+ DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
2078+ kmem_cache_free(ip_conntrack_cachep, ct);
2079+ atomic_dec(&ip_conntrack_count);
2080+}
2081+
2082+static void death_by_timeout(unsigned long ul_conntrack)
2083+{
2084+ struct ip_conntrack *ct = (void *)ul_conntrack;
2085+
2086+ WRITE_LOCK(&ip_conntrack_lock);
2087+ clean_from_lists(ct);
2088+ WRITE_UNLOCK(&ip_conntrack_lock);
2089+ ip_conntrack_put(ct);
2090+}
2091+
2092+static inline int
2093+conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
2094+ const struct ip_conntrack_tuple *tuple,
2095+ const struct ip_conntrack *ignored_conntrack)
2096+{
2097+ MUST_BE_READ_LOCKED(&ip_conntrack_lock);
2098+ return i->ctrack != ignored_conntrack
2099+ && ip_ct_tuple_equal(tuple, &i->tuple);
2100+}
2101+
2102+static struct ip_conntrack_tuple_hash *
2103+__ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
2104+ const struct ip_conntrack *ignored_conntrack)
2105+{
2106+ struct ip_conntrack_tuple_hash *h;
2107+ unsigned int hash = hash_conntrack(tuple);
2108+
2109+ MUST_BE_READ_LOCKED(&ip_conntrack_lock);
2110+ h = LIST_FIND(&ip_conntrack_hash[hash],
2111+ conntrack_tuple_cmp,
2112+ struct ip_conntrack_tuple_hash *,
2113+ tuple, ignored_conntrack);
2114+ return h;
2115+}
2116+
2117+/* Find a connection corresponding to a tuple. */
2118+struct ip_conntrack_tuple_hash *
2119+ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
2120+ const struct ip_conntrack *ignored_conntrack)
2121+{
2122+ struct ip_conntrack_tuple_hash *h;
2123+
2124+ READ_LOCK(&ip_conntrack_lock);
2125+ h = __ip_conntrack_find(tuple, ignored_conntrack);
2126+ if (h)
2127+ atomic_inc(&h->ctrack->ct_general.use);
2128+ READ_UNLOCK(&ip_conntrack_lock);
2129+
2130+ return h;
2131+}
2132+
2133+static inline struct ip_conntrack *
2134+__ip_conntrack_get(struct nf_ct_info *nfct, enum ip_conntrack_info *ctinfo)
2135+{
2136+ struct ip_conntrack *ct
2137+ = (struct ip_conntrack *)nfct->master;
2138+
2139+ /* ctinfo is the index of the nfct inside the conntrack */
2140+ *ctinfo = nfct - ct->infos;
2141+ IP_NF_ASSERT(*ctinfo >= 0 && *ctinfo < IP_CT_NUMBER);
2142+ return ct;
2143+}
2144+
2145+/* Return conntrack and conntrack_info given skb->nfct->master */
2146+struct ip_conntrack *
2147+ip_conntrack_get(struct sk_buff *skb, enum ip_conntrack_info *ctinfo)
2148+{
2149+ if (skb->nfct)
2150+ return __ip_conntrack_get(skb->nfct, ctinfo);
2151+ return NULL;
2152+}
2153+
2154+/* Confirm a connection given skb->nfct; places it in hash table */
2155+int
2156+__ip_conntrack_confirm(struct nf_ct_info *nfct)
2157+{
2158+ unsigned int hash, repl_hash;
2159+ struct ip_conntrack *ct;
2160+ enum ip_conntrack_info ctinfo;
2161+
2162+ ct = __ip_conntrack_get(nfct, &ctinfo);
2163+
2164+ /* ipt_REJECT uses ip_conntrack_attach to attach related
2165+ ICMP/TCP RST packets in other direction. Actual packet
2166+ which created connection will be IP_CT_NEW or for an
2167+ expected connection, IP_CT_RELATED. */
2168+ if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
2169+ return NF_ACCEPT;
2170+
2171+ hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
2172+ repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
2173+
2174+ /* We're not in hash table, and we refuse to set up related
2175+ connections for unconfirmed conns. But packet copies and
2176+ REJECT will give spurious warnings here. */
2177+ /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
2178+
2179+ /* No external references means noone else could have
2180+ confirmed us. */
2181+ IP_NF_ASSERT(!is_confirmed(ct));
2182+ DEBUGP("Confirming conntrack %p\n", ct);
2183+
2184+ WRITE_LOCK(&ip_conntrack_lock);
2185+ /* See if there's one in the list already, including reverse:
2186+ NAT could have grabbed it without realizing, since we're
2187+ not in the hash. If there is, we lost race. */
2188+ if (!LIST_FIND(&ip_conntrack_hash[hash],
2189+ conntrack_tuple_cmp,
2190+ struct ip_conntrack_tuple_hash *,
2191+ &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
2192+ && !LIST_FIND(&ip_conntrack_hash[repl_hash],
2193+ conntrack_tuple_cmp,
2194+ struct ip_conntrack_tuple_hash *,
2195+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
2196+ list_prepend(&ip_conntrack_hash[hash],
2197+ &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
2198+ list_prepend(&ip_conntrack_hash[repl_hash],
2199+ &ct->tuplehash[IP_CT_DIR_REPLY]);
2200+ /* Timer relative to confirmation time, not original
2201+ setting time, otherwise we'd get timer wrap in
2202+ weird delay cases. */
2203+ ct->timeout.expires += jiffies;
2204+ add_timer(&ct->timeout);
2205+ atomic_inc(&ct->ct_general.use);
2206+ set_bit(IPS_CONFIRMED_BIT, &ct->status);
2207+ WRITE_UNLOCK(&ip_conntrack_lock);
2208+ return NF_ACCEPT;
2209+ }
2210+
2211+ WRITE_UNLOCK(&ip_conntrack_lock);
2212+ return NF_DROP;
2213+}
2214+
2215+/* Returns true if a connection correspondings to the tuple (required
2216+ for NAT). */
2217+int
2218+ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
2219+ const struct ip_conntrack *ignored_conntrack)
2220+{
2221+ struct ip_conntrack_tuple_hash *h;
2222+
2223+ READ_LOCK(&ip_conntrack_lock);
2224+ h = __ip_conntrack_find(tuple, ignored_conntrack);
2225+ READ_UNLOCK(&ip_conntrack_lock);
2226+
2227+ return h != NULL;
2228+}
2229+
2230+/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
2231+struct ip_conntrack *
2232+icmp_error_track(struct sk_buff *skb,
2233+ enum ip_conntrack_info *ctinfo,
2234+ unsigned int hooknum)
2235+{
2236+ struct ip_conntrack_tuple innertuple, origtuple;
2237+ struct {
2238+ struct icmphdr icmp;
2239+ struct iphdr ip;
2240+ } inside;
2241+ struct ip_conntrack_protocol *innerproto;
2242+ struct ip_conntrack_tuple_hash *h;
2243+ int dataoff;
2244+
2245+ IP_NF_ASSERT(skb->nfct == NULL);
2246+
2247+ /* Not enough header? */
2248+ if (skb_copy_bits(skb, skb->nh.iph->ihl*4, &inside, sizeof(inside))!=0)
2249+ return NULL;
2250+
2251+ if (inside.icmp.type != ICMP_DEST_UNREACH
2252+ && inside.icmp.type != ICMP_SOURCE_QUENCH
2253+ && inside.icmp.type != ICMP_TIME_EXCEEDED
2254+ && inside.icmp.type != ICMP_PARAMETERPROB
2255+ && inside.icmp.type != ICMP_REDIRECT)
2256+ return NULL;
2257+
2258+ /* Ignore ICMP's containing fragments (shouldn't happen) */
2259+ if (inside.ip.frag_off & htons(IP_OFFSET)) {
2260+ DEBUGP("icmp_error_track: fragment of proto %u\n",
2261+ inside.ip.protocol);
2262+ return NULL;
2263+ }
2264+
2265+ innerproto = ip_ct_find_proto(inside.ip.protocol);
2266+ dataoff = skb->nh.iph->ihl*4 + sizeof(inside.icmp) + inside.ip.ihl*4;
2267+ /* Are they talking about one of our connections? */
2268+ if (!get_tuple(&inside.ip, skb, dataoff, &origtuple, innerproto)) {
2269+ DEBUGP("icmp_error: ! get_tuple p=%u", inside.ip.protocol);
2270+ return NULL;
2271+ }
2272+
2273+ /* Ordinarily, we'd expect the inverted tupleproto, but it's
2274+ been preserved inside the ICMP. */
2275+ if (!invert_tuple(&innertuple, &origtuple, innerproto)) {
2276+ DEBUGP("icmp_error_track: Can't invert tuple\n");
2277+ return NULL;
2278+ }
2279+
2280+ *ctinfo = IP_CT_RELATED;
2281+
2282+ h = ip_conntrack_find_get(&innertuple, NULL);
2283+ if (!h) {
2284+ /* Locally generated ICMPs will match inverted if they
2285+ haven't been SNAT'ed yet */
2286+ /* FIXME: NAT code has to handle half-done double NAT --RR */
2287+ if (hooknum == NF_IP_LOCAL_OUT)
2288+ h = ip_conntrack_find_get(&origtuple, NULL);
2289+
2290+ if (!h) {
2291+ DEBUGP("icmp_error_track: no match\n");
2292+ return NULL;
2293+ }
2294+ /* Reverse direction from that found */
2295+ if (DIRECTION(h) != IP_CT_DIR_REPLY)
2296+ *ctinfo += IP_CT_IS_REPLY;
2297+ } else {
2298+ if (DIRECTION(h) == IP_CT_DIR_REPLY)
2299+ *ctinfo += IP_CT_IS_REPLY;
2300+ }
2301+
2302+ /* Update skb to refer to this connection */
2303+ skb->nfct = &h->ctrack->infos[*ctinfo];
2304+ return h->ctrack;
2305+}
2306+
2307+/* There's a small race here where we may free a just-assured
2308+ connection. Too bad: we're in trouble anyway. */
2309+static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
2310+{
2311+ return !(test_bit(IPS_ASSURED_BIT, &i->ctrack->status));
2312+}
2313+
2314+static int early_drop(struct list_head *chain)
2315+{
2316+ /* Traverse backwards: gives us oldest, which is roughly LRU */
2317+ struct ip_conntrack_tuple_hash *h;
2318+ int dropped = 0;
2319+
2320+ READ_LOCK(&ip_conntrack_lock);
2321+ h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
2322+ if (h)
2323+ atomic_inc(&h->ctrack->ct_general.use);
2324+ READ_UNLOCK(&ip_conntrack_lock);
2325+
2326+ if (!h)
2327+ return dropped;
2328+
2329+ if (del_timer(&h->ctrack->timeout)) {
2330+ death_by_timeout((unsigned long)h->ctrack);
2331+ dropped = 1;
2332+ }
2333+ ip_conntrack_put(h->ctrack);
2334+ return dropped;
2335+}
2336+
2337+static inline int helper_cmp(const struct ip_conntrack_helper *i,
2338+ const struct ip_conntrack_tuple *rtuple)
2339+{
2340+ return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
2341+}
2342+
2343+struct ip_conntrack_helper *ip_ct_find_helper(const struct ip_conntrack_tuple *tuple)
2344+{
2345+ return LIST_FIND(&helpers, helper_cmp,
2346+ struct ip_conntrack_helper *,
2347+ tuple);
2348+}
2349+
2350+/* Allocate a new conntrack: we return -ENOMEM if classification
2351+ failed due to stress. Otherwise it really is unclassifiable. */
2352+static struct ip_conntrack_tuple_hash *
2353+init_conntrack(const struct ip_conntrack_tuple *tuple,
2354+ struct ip_conntrack_protocol *protocol,
2355+ struct sk_buff *skb)
2356+{
2357+ struct ip_conntrack *conntrack;
2358+ struct ip_conntrack_tuple repl_tuple;
2359+ size_t hash;
2360+ struct ip_conntrack_expect *expected;
2361+ int i;
2362+ static unsigned int drop_next;
2363+
2364+ if (!ip_conntrack_hash_rnd_initted) {
2365+ get_random_bytes(&ip_conntrack_hash_rnd, 4);
2366+ ip_conntrack_hash_rnd_initted = 1;
2367+ }
2368+
2369+ hash = hash_conntrack(tuple);
2370+
2371+ if (ip_conntrack_max &&
2372+ atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
2373+ /* Try dropping from random chain, or else from the
2374+ chain about to put into (in case they're trying to
2375+ bomb one hash chain). */
2376+ unsigned int next = (drop_next++)%ip_conntrack_htable_size;
2377+
2378+ if (!early_drop(&ip_conntrack_hash[next])
2379+ && !early_drop(&ip_conntrack_hash[hash])) {
2380+ if (net_ratelimit())
2381+ printk(KERN_WARNING
2382+ "ip_conntrack: table full, dropping"
2383+ " packet.\n");
2384+ return ERR_PTR(-ENOMEM);
2385+ }
2386+ }
2387+
2388+ if (!invert_tuple(&repl_tuple, tuple, protocol)) {
2389+ DEBUGP("Can't invert tuple.\n");
2390+ return NULL;
2391+ }
2392+
2393+ conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
2394+ if (!conntrack) {
2395+ DEBUGP("Can't allocate conntrack.\n");
2396+ return ERR_PTR(-ENOMEM);
2397+ }
2398+
2399+ memset(conntrack, 0, sizeof(*conntrack));
2400+ atomic_set(&conntrack->ct_general.use, 1);
2401+ conntrack->ct_general.destroy = destroy_conntrack;
2402+ conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *tuple;
2403+ conntrack->tuplehash[IP_CT_DIR_ORIGINAL].ctrack = conntrack;
2404+ conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = repl_tuple;
2405+ conntrack->tuplehash[IP_CT_DIR_REPLY].ctrack = conntrack;
2406+ for (i=0; i < IP_CT_NUMBER; i++)
2407+ conntrack->infos[i].master = &conntrack->ct_general;
2408+
2409+ if (!protocol->new(conntrack, skb)) {
2410+ kmem_cache_free(ip_conntrack_cachep, conntrack);
2411+ return NULL;
2412+ }
2413+ /* Don't set timer yet: wait for confirmation */
2414+ init_timer(&conntrack->timeout);
2415+ conntrack->timeout.data = (unsigned long)conntrack;
2416+ conntrack->timeout.function = death_by_timeout;
2417+
2418+ INIT_LIST_HEAD(&conntrack->sibling_list);
2419+
2420+ WRITE_LOCK(&ip_conntrack_lock);
2421+ /* Need finding and deleting of expected ONLY if we win race */
2422+ READ_LOCK(&ip_conntrack_expect_tuple_lock);
2423+ expected = LIST_FIND(&ip_conntrack_expect_list, expect_cmp,
2424+ struct ip_conntrack_expect *, tuple);
2425+ READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
2426+
2427+ if (expected) {
2428+ /* If master is not in hash table yet (ie. packet hasn't left
2429+ this machine yet), how can other end know about expected?
2430+ Hence these are not the droids you are looking for (if
2431+ master ct never got confirmed, we'd hold a reference to it
2432+ and weird things would happen to future packets). */
2433+ if (!is_confirmed(expected->expectant)) {
2434+
2435+ conntrack->helper = ip_ct_find_helper(&repl_tuple);
2436+ goto end;
2437+ }
2438+
2439+ /* Expectation is dying... */
2440+ if (expected->expectant->helper->timeout
2441+ && ! del_timer(&expected->timeout)) {
2442+ goto end;
2443+ }
2444+
2445+ DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
2446+ conntrack, expected);
2447+ /* Welcome, Mr. Bond. We've been expecting you... */
2448+ IP_NF_ASSERT(master_ct(conntrack));
2449+ __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
2450+ conntrack->master = expected;
2451+ expected->sibling = conntrack;
2452+ LIST_DELETE(&ip_conntrack_expect_list, expected);
2453+ expected->expectant->expecting--;
2454+ nf_conntrack_get(&master_ct(conntrack)->infos[0]);
2455+
2456+ /* this is a braindead... --pablo */
2457+ atomic_inc(&ip_conntrack_count);
2458+ WRITE_UNLOCK(&ip_conntrack_lock);
2459+
2460+ if (expected->expectfn)
2461+ expected->expectfn(conntrack);
2462+
2463+ goto ret;
2464+ } else
2465+ conntrack->helper = ip_ct_find_helper(&repl_tuple);
2466+
2467+end: atomic_inc(&ip_conntrack_count);
2468+ WRITE_UNLOCK(&ip_conntrack_lock);
2469+
2470+ret: return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
2471+}
2472+
2473+/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
2474+static inline struct ip_conntrack *
2475+resolve_normal_ct(struct sk_buff *skb,
2476+ struct ip_conntrack_protocol *proto,
2477+ int *set_reply,
2478+ unsigned int hooknum,
2479+ enum ip_conntrack_info *ctinfo)
2480+{
2481+ struct ip_conntrack_tuple tuple;
2482+ struct ip_conntrack_tuple_hash *h;
2483+
2484+ IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
2485+
2486+ if (!get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4, &tuple, proto))
2487+ return NULL;
2488+
2489+ /* look for tuple match */
2490+ h = ip_conntrack_find_get(&tuple, NULL);
2491+ if (!h) {
2492+ h = init_conntrack(&tuple, proto, skb);
2493+ if (!h)
2494+ return NULL;
2495+ if (IS_ERR(h))
2496+ return (void *)h;
2497+ }
2498+
2499+ /* It exists; we have (non-exclusive) reference. */
2500+ if (DIRECTION(h) == IP_CT_DIR_REPLY) {
2501+ *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
2502+ /* Please set reply bit if this packet OK */
2503+ *set_reply = 1;
2504+ } else {
2505+ /* Once we've had two way comms, always ESTABLISHED. */
2506+ if (test_bit(IPS_SEEN_REPLY_BIT, &h->ctrack->status)) {
2507+ DEBUGP("ip_conntrack_in: normal packet for %p\n",
2508+ h->ctrack);
2509+ *ctinfo = IP_CT_ESTABLISHED;
2510+ } else if (test_bit(IPS_EXPECTED_BIT, &h->ctrack->status)) {
2511+ DEBUGP("ip_conntrack_in: related packet for %p\n",
2512+ h->ctrack);
2513+ *ctinfo = IP_CT_RELATED;
2514+ } else {
2515+ DEBUGP("ip_conntrack_in: new packet for %p\n",
2516+ h->ctrack);
2517+ *ctinfo = IP_CT_NEW;
2518+ }
2519+ *set_reply = 0;
2520+ }
2521+ skb->nfct = &h->ctrack->infos[*ctinfo];
2522+ return h->ctrack;
2523+}
2524+
2525+/* Netfilter hook itself. */
2526+unsigned int ip_conntrack_in(unsigned int hooknum,
2527+ struct sk_buff **pskb,
2528+ const struct net_device *in,
2529+ const struct net_device *out,
2530+ int (*okfn)(struct sk_buff *))
2531+{
2532+ struct ip_conntrack *ct;
2533+ enum ip_conntrack_info ctinfo;
2534+ struct ip_conntrack_protocol *proto;
2535+ int set_reply;
2536+ int ret;
2537+
2538+ /* FIXME: Do this right please. --RR */
2539+ (*pskb)->nfcache |= NFC_UNKNOWN;
2540+
2541+/* Doesn't cover locally-generated broadcast, so not worth it. */
2542+#if 0
2543+ /* Ignore broadcast: no `connection'. */
2544+ if ((*pskb)->pkt_type == PACKET_BROADCAST) {
2545+ printk("Broadcast packet!\n");
2546+ return NF_ACCEPT;
2547+ } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF))
2548+ == htonl(0x000000FF)) {
2549+ printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
2550+ NIPQUAD((*pskb)->nh.iph->saddr),
2551+ NIPQUAD((*pskb)->nh.iph->daddr),
2552+ (*pskb)->sk, (*pskb)->pkt_type);
2553+ }
2554+#endif
2555+
2556+ /* Previously seen (loopback)? Ignore. Do this before
2557+ fragment check. */
2558+ if ((*pskb)->nfct)
2559+ return NF_ACCEPT;
2560+
2561+ /* Gather fragments. */
2562+ if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
2563+ *pskb = ip_ct_gather_frags(*pskb);
2564+ if (!*pskb)
2565+ return NF_STOLEN;
2566+ }
2567+
2568+ proto = ip_ct_find_proto((*pskb)->nh.iph->protocol);
2569+
2570+ /* It may be an icmp error... */
2571+ if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP
2572+ && icmp_error_track(*pskb, &ctinfo, hooknum))
2573+ return NF_ACCEPT;
2574+
2575+ if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo)))
2576+ /* Not valid part of a connection */
2577+ return NF_ACCEPT;
2578+
2579+ if (IS_ERR(ct))
2580+ /* Too stressed to deal. */
2581+ return NF_DROP;
2582+
2583+ IP_NF_ASSERT((*pskb)->nfct);
2584+
2585+ ret = proto->packet(ct, *pskb, ctinfo);
2586+ if (ret == -1) {
2587+ /* Invalid */
2588+ nf_conntrack_put((*pskb)->nfct);
2589+ (*pskb)->nfct = NULL;
2590+ return NF_ACCEPT;
2591+ }
2592+
2593+ if (ret != NF_DROP && ct->helper) {
2594+ ret = ct->helper->help(*pskb, ct, ctinfo);
2595+ if (ret == -1) {
2596+ /* Invalid */
2597+ nf_conntrack_put((*pskb)->nfct);
2598+ (*pskb)->nfct = NULL;
2599+ return NF_ACCEPT;
2600+ }
2601+ }
2602+ if (set_reply)
2603+ set_bit(IPS_SEEN_REPLY_BIT, &ct->status);
2604+
2605+ return ret;
2606+}
2607+
2608+int invert_tuplepr(struct ip_conntrack_tuple *inverse,
2609+ const struct ip_conntrack_tuple *orig)
2610+{
2611+ return invert_tuple(inverse, orig, ip_ct_find_proto(orig->dst.protonum));
2612+}
2613+
2614+static inline int resent_expect(const struct ip_conntrack_expect *i,
2615+ const struct ip_conntrack_tuple *tuple,
2616+ const struct ip_conntrack_tuple *mask)
2617+{
2618+ DEBUGP("resent_expect\n");
2619+ DEBUGP(" tuple: "); DUMP_TUPLE(&i->tuple);
2620+ DEBUGP("ct_tuple: "); DUMP_TUPLE(&i->ct_tuple);
2621+ DEBUGP("test tuple: "); DUMP_TUPLE(tuple);
2622+ return (((i->ct_tuple.dst.protonum == 0 && ip_ct_tuple_equal(&i->tuple, tuple))
2623+ || (i->ct_tuple.dst.protonum && ip_ct_tuple_equal(&i->ct_tuple, tuple)))
2624+ && ip_ct_tuple_equal(&i->mask, mask));
2625+}
2626+
2627+/* Would two expected things clash? */
2628+static inline int expect_clash(const struct ip_conntrack_expect *i,
2629+ const struct ip_conntrack_tuple *tuple,
2630+ const struct ip_conntrack_tuple *mask)
2631+{
2632+ /* Part covered by intersection of masks must be unequal,
2633+ otherwise they clash */
2634+ struct ip_conntrack_tuple intersect_mask
2635+ = { { i->mask.src.ip & mask->src.ip,
2636+ { i->mask.src.u.all & mask->src.u.all } },
2637+ { i->mask.dst.ip & mask->dst.ip,
2638+ { i->mask.dst.u.all & mask->dst.u.all },
2639+ i->mask.dst.protonum & mask->dst.protonum } };
2640+
2641+ return ip_ct_tuple_mask_cmp(&i->tuple, tuple, &intersect_mask);
2642+}
2643+
2644+inline void ip_conntrack_unexpect_related(struct ip_conntrack_expect *expect)
2645+{
2646+ WRITE_LOCK(&ip_conntrack_lock);
2647+ unexpect_related(expect);
2648+ WRITE_UNLOCK(&ip_conntrack_lock);
2649+}
2650+
2651+static void expectation_timed_out(unsigned long ul_expect)
2652+{
2653+ struct ip_conntrack_expect *expect = (void *) ul_expect;
2654+
2655+ DEBUGP("expectation %p timed out\n", expect);
2656+ WRITE_LOCK(&ip_conntrack_lock);
2657+ __unexpect_related(expect);
2658+ WRITE_UNLOCK(&ip_conntrack_lock);
2659+}
2660+
2661+/* Add a related connection. */
2662+int ip_conntrack_expect_related(struct ip_conntrack *related_to,
2663+ struct ip_conntrack_expect *expect)
2664+{
2665+ struct ip_conntrack_expect *old, *new;
2666+ int ret = 0;
2667+
2668+ WRITE_LOCK(&ip_conntrack_lock);
2669+ /* Because of the write lock, no reader can walk the lists,
2670+ * so there is no need to use the tuple lock too */
2671+
2672+ DEBUGP("ip_conntrack_expect_related %p\n", related_to);
2673+ DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
2674+ DEBUGP("mask: "); DUMP_TUPLE(&expect->mask);
2675+
2676+ old = LIST_FIND(&ip_conntrack_expect_list, resent_expect,
2677+ struct ip_conntrack_expect *, &expect->tuple,
2678+ &expect->mask);
2679+ if (old) {
2680+ /* Helper private data may contain offsets but no pointers
2681+ pointing into the payload - otherwise we should have to copy
2682+ the data filled out by the helper over the old one */
2683+ DEBUGP("expect_related: resent packet\n");
2684+ if (related_to->helper->timeout) {
2685+ if (!del_timer(&old->timeout)) {
2686+ /* expectation is dying. Fall through */
2687+ old = NULL;
2688+ } else {
2689+ old->timeout.expires = jiffies +
2690+ related_to->helper->timeout * HZ;
2691+ add_timer(&old->timeout);
2692+ }
2693+ }
2694+
2695+ if (old) {
2696+ WRITE_UNLOCK(&ip_conntrack_lock);
2697+ return -EEXIST;
2698+ }
2699+ } else if (related_to->helper->max_expected &&
2700+ related_to->expecting >= related_to->helper->max_expected) {
2701+ struct list_head *cur_item;
2702+ /* old == NULL */
2703+ if (!(related_to->helper->flags &
2704+ IP_CT_HELPER_F_REUSE_EXPECT)) {
2705+ WRITE_UNLOCK(&ip_conntrack_lock);
2706+ if (net_ratelimit())
2707+ printk(KERN_WARNING
2708+ "ip_conntrack: max number of expected "
2709+ "connections %i of %s reached for "
2710+ "%u.%u.%u.%u->%u.%u.%u.%u\n",
2711+ related_to->helper->max_expected,
2712+ related_to->helper->name,
2713+ NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
2714+ NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip));
2715+ return -EPERM;
2716+ }
2717+ DEBUGP("ip_conntrack: max number of expected "
2718+ "connections %i of %s reached for "
2719+ "%u.%u.%u.%u->%u.%u.%u.%u, reusing\n",
2720+ related_to->helper->max_expected,
2721+ related_to->helper->name,
2722+ NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip),
2723+ NIPQUAD(related_to->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip));
2724+
2725+ /* choose the the oldest expectation to evict */
2726+ list_for_each(cur_item, &related_to->sibling_list) {
2727+ struct ip_conntrack_expect *cur;
2728+
2729+ cur = list_entry(cur_item,
2730+ struct ip_conntrack_expect,
2731+ expected_list);
2732+ if (cur->sibling == NULL) {
2733+ old = cur;
2734+ break;
2735+ }
2736+ }
2737+
2738+ /* (!old) cannot happen, since related_to->expecting is the
2739+ * number of unconfirmed expects */
2740+ IP_NF_ASSERT(old);
2741+
2742+ /* newnat14 does not reuse the real allocated memory
2743+ * structures but rather unexpects the old and
2744+ * allocates a new. unexpect_related will decrement
2745+ * related_to->expecting.
2746+ */
2747+ unexpect_related(old);
2748+ ret = -EPERM;
2749+ } else if (LIST_FIND(&ip_conntrack_expect_list, expect_clash,
2750+ struct ip_conntrack_expect *, &expect->tuple,
2751+ &expect->mask)) {
2752+ WRITE_UNLOCK(&ip_conntrack_lock);
2753+ DEBUGP("expect_related: busy!\n");
2754+ return -EBUSY;
2755+ }
2756+
2757+ new = (struct ip_conntrack_expect *)
2758+ kmalloc(sizeof(struct ip_conntrack_expect), GFP_ATOMIC);
2759+ if (!new) {
2760+ WRITE_UNLOCK(&ip_conntrack_lock);
2761+ DEBUGP("expect_relaed: OOM allocating expect\n");
2762+ return -ENOMEM;
2763+ }
2764+
2765+ DEBUGP("new expectation %p of conntrack %p\n", new, related_to);
2766+ memcpy(new, expect, sizeof(*expect));
2767+ new->expectant = related_to;
2768+ new->sibling = NULL;
2769+ atomic_set(&new->use, 1);
2770+
2771+ /* add to expected list for this connection */
2772+ list_add(&new->expected_list, &related_to->sibling_list);
2773+ /* add to global list of expectations */
2774+ list_prepend(&ip_conntrack_expect_list, &new->list);
2775+ /* add and start timer if required */
2776+ if (related_to->helper->timeout) {
2777+ init_timer(&new->timeout);
2778+ new->timeout.data = (unsigned long)new;
2779+ new->timeout.function = expectation_timed_out;
2780+ new->timeout.expires = jiffies +
2781+ related_to->helper->timeout * HZ;
2782+ add_timer(&new->timeout);
2783+ }
2784+ related_to->expecting++;
2785+
2786+ WRITE_UNLOCK(&ip_conntrack_lock);
2787+
2788+ return ret;
2789+}
2790+
2791+/* Change tuple in an existing expectation */
2792+int ip_conntrack_change_expect(struct ip_conntrack_expect *expect,
2793+ struct ip_conntrack_tuple *newtuple)
2794+{
2795+ int ret;
2796+
2797+ MUST_BE_READ_LOCKED(&ip_conntrack_lock);
2798+ WRITE_LOCK(&ip_conntrack_expect_tuple_lock);
2799+
2800+ DEBUGP("change_expect:\n");
2801+ DEBUGP("exp tuple: "); DUMP_TUPLE(&expect->tuple);
2802+ DEBUGP("exp mask: "); DUMP_TUPLE(&expect->mask);
2803+ DEBUGP("newtuple: "); DUMP_TUPLE(newtuple);
2804+ if (expect->ct_tuple.dst.protonum == 0) {
2805+ /* Never seen before */
2806+ DEBUGP("change expect: never seen before\n");
2807+ if (!ip_ct_tuple_equal(&expect->tuple, newtuple)
2808+ && LIST_FIND(&ip_conntrack_expect_list, expect_clash,
2809+ struct ip_conntrack_expect *, newtuple, &expect->mask)) {
2810+ /* Force NAT to find an unused tuple */
2811+ ret = -1;
2812+ } else {
2813+ memcpy(&expect->ct_tuple, &expect->tuple, sizeof(expect->tuple));
2814+ memcpy(&expect->tuple, newtuple, sizeof(expect->tuple));
2815+ ret = 0;
2816+ }
2817+ } else {
2818+ /* Resent packet */
2819+ DEBUGP("change expect: resent packet\n");
2820+ if (ip_ct_tuple_equal(&expect->tuple, newtuple)) {
2821+ ret = 0;
2822+ } else {
2823+ /* Force NAT to choose again the same port */
2824+ ret = -1;
2825+ }
2826+ }
2827+ WRITE_UNLOCK(&ip_conntrack_expect_tuple_lock);
2828+
2829+ return ret;
2830+}
2831+
2832+/* Alter reply tuple (maybe alter helper). If it's already taken,
2833+ return 0 and don't do alteration. */
2834+int ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
2835+ const struct ip_conntrack_tuple *newreply)
2836+{
2837+ WRITE_LOCK(&ip_conntrack_lock);
2838+ if (__ip_conntrack_find(newreply, conntrack)) {
2839+ WRITE_UNLOCK(&ip_conntrack_lock);
2840+ return 0;
2841+ }
2842+ /* Should be unconfirmed, so not in hash table yet */
2843+ IP_NF_ASSERT(!is_confirmed(conntrack));
2844+
2845+ DEBUGP("Altering reply tuple of %p to ", conntrack);
2846+ DUMP_TUPLE(newreply);
2847+
2848+ conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
2849+ if (!conntrack->master)
2850+ conntrack->helper = LIST_FIND(&helpers, helper_cmp,
2851+ struct ip_conntrack_helper *,
2852+ newreply);
2853+ WRITE_UNLOCK(&ip_conntrack_lock);
2854+
2855+ return 1;
2856+}
2857+
2858+int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
2859+{
2860+ WRITE_LOCK(&ip_conntrack_lock);
2861+ list_prepend(&helpers, me);
2862+ WRITE_UNLOCK(&ip_conntrack_lock);
2863+
2864+ return 0;
2865+}
2866+
2867+static inline int unhelp(struct ip_conntrack_tuple_hash *i,
2868+ const struct ip_conntrack_helper *me)
2869+{
2870+ if (i->ctrack->helper == me) {
2871+ /* Get rid of any expected. */
2872+ remove_expectations(i->ctrack, 0);
2873+ /* And *then* set helper to NULL */
2874+ i->ctrack->helper = NULL;
2875+ }
2876+ return 0;
2877+}
2878+
2879+void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
2880+{
2881+ unsigned int i;
2882+
2883+ /* Need write lock here, to delete helper. */
2884+ WRITE_LOCK(&ip_conntrack_lock);
2885+ LIST_DELETE(&helpers, me);
2886+
2887+ /* Get rid of expecteds, set helpers to NULL. */
2888+ for (i = 0; i < ip_conntrack_htable_size; i++)
2889+ LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
2890+ struct ip_conntrack_tuple_hash *, me);
2891+ WRITE_UNLOCK(&ip_conntrack_lock);
2892+
2893+ /* Someone could be still looking at the helper in a bh. */
2894+ synchronize_net();
2895+}
2896+
2897+/* Refresh conntrack for this many jiffies. */
2898+void ip_ct_refresh(struct ip_conntrack *ct, unsigned long extra_jiffies)
2899+{
2900+ IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
2901+
2902+ WRITE_LOCK(&ip_conntrack_lock);
2903+ /* If not in hash table, timer will not be active yet */
2904+ if (!is_confirmed(ct))
2905+ ct->timeout.expires = extra_jiffies;
2906+ else {
2907+ /* Need del_timer for race avoidance (may already be dying). */
2908+ if (del_timer(&ct->timeout)) {
2909+ ct->timeout.expires = jiffies + extra_jiffies;
2910+ add_timer(&ct->timeout);
2911+ }
2912+ }
2913+ WRITE_UNLOCK(&ip_conntrack_lock);
2914+}
2915+
2916+/* Returns new sk_buff, or NULL */
2917+struct sk_buff *
2918+ip_ct_gather_frags(struct sk_buff *skb)
2919+{
2920+ struct sock *sk = skb->sk;
2921+#ifdef CONFIG_NETFILTER_DEBUG
2922+ unsigned int olddebug = skb->nf_debug;
2923+#endif
2924+ if (sk) {
2925+ sock_hold(sk);
2926+ skb_orphan(skb);
2927+ }
2928+
2929+ local_bh_disable();
2930+ skb = ip_defrag(skb);
2931+ local_bh_enable();
2932+
2933+ if (!skb) {
2934+ if (sk)
2935+ sock_put(sk);
2936+ return skb;
2937+ }
2938+
2939+ if (sk) {
2940+ skb_set_owner_w(skb, sk);
2941+ sock_put(sk);
2942+ }
2943+
2944+ ip_send_check(skb->nh.iph);
2945+ skb->nfcache |= NFC_ALTERED;
2946+#ifdef CONFIG_NETFILTER_DEBUG
2947+ /* Packet path as if nothing had happened. */
2948+ skb->nf_debug = olddebug;
2949+#endif
2950+ return skb;
2951+}
2952+
2953+/* Used by ipt_REJECT. */
2954+static void ip_conntrack_attach(struct sk_buff *nskb, struct nf_ct_info *nfct)
2955+{
2956+ struct ip_conntrack *ct;
2957+ enum ip_conntrack_info ctinfo;
2958+
2959+ ct = __ip_conntrack_get(nfct, &ctinfo);
2960+
2961+ /* This ICMP is in reverse direction to the packet which
2962+ caused it */
2963+ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
2964+ ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
2965+ else
2966+ ctinfo = IP_CT_RELATED;
2967+
2968+ /* Attach new skbuff, and increment count */
2969+ nskb->nfct = &ct->infos[ctinfo];
2970+ atomic_inc(&ct->ct_general.use);
2971+}
2972+
2973+static inline int
2974+do_kill(const struct ip_conntrack_tuple_hash *i,
2975+ int (*kill)(const struct ip_conntrack *i, void *data),
2976+ void *data)
2977+{
2978+ return kill(i->ctrack, data);
2979+}
2980+
2981+/* Bring out ya dead! */
2982+static struct ip_conntrack_tuple_hash *
2983+get_next_corpse(int (*kill)(const struct ip_conntrack *i, void *data),
2984+ void *data, unsigned int *bucket)
2985+{
2986+ struct ip_conntrack_tuple_hash *h = NULL;
2987+
2988+ READ_LOCK(&ip_conntrack_lock);
2989+ for (; !h && *bucket < ip_conntrack_htable_size; (*bucket)++) {
2990+ h = LIST_FIND(&ip_conntrack_hash[*bucket], do_kill,
2991+ struct ip_conntrack_tuple_hash *, kill, data);
2992+ }
2993+ if (h)
2994+ atomic_inc(&h->ctrack->ct_general.use);
2995+ READ_UNLOCK(&ip_conntrack_lock);
2996+
2997+ return h;
2998+}
2999+
3000+void
3001+ip_ct_selective_cleanup(int (*kill)(const struct ip_conntrack *i, void *data),
3002+ void *data)
3003+{
3004+ struct ip_conntrack_tuple_hash *h;
3005+ unsigned int bucket = 0;
3006+
3007+ while ((h = get_next_corpse(kill, data, &bucket)) != NULL) {
3008+ /* Time to push up daises... */
3009+ if (del_timer(&h->ctrack->timeout))
3010+ death_by_timeout((unsigned long)h->ctrack);
3011+ /* ... else the timer will get him soon. */
3012+
3013+ ip_conntrack_put(h->ctrack);
3014+ }
3015+}
3016+
3017+/* Fast function for those who don't want to parse /proc (and I don't
3018+ blame them). */
3019+/* Reversing the socket's dst/src point of view gives us the reply
3020+ mapping. */
3021+static int
3022+getorigdst(struct sock *sk, int optval, void *user, int *len)
3023+{
3024+ struct inet_opt *inet = inet_sk(sk);
3025+ struct ip_conntrack_tuple_hash *h;
3026+ struct ip_conntrack_tuple tuple;
3027+
3028+ IP_CT_TUPLE_U_BLANK(&tuple);
3029+ tuple.src.ip = inet->rcv_saddr;
3030+ tuple.src.u.tcp.port = inet->sport;
3031+ tuple.dst.ip = inet->daddr;
3032+ tuple.dst.u.tcp.port = inet->dport;
3033+ tuple.dst.protonum = IPPROTO_TCP;
3034+
3035+ /* We only do TCP at the moment: is there a better way? */
3036+ if (strcmp(sk->sk_prot->name, "TCP")) {
3037+ DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
3038+ return -ENOPROTOOPT;
3039+ }
3040+
3041+ if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
3042+ DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
3043+ *len, sizeof(struct sockaddr_in));
3044+ return -EINVAL;
3045+ }
3046+
3047+ h = ip_conntrack_find_get(&tuple, NULL);
3048+ if (h) {
3049+ struct sockaddr_in sin;
3050+
3051+ sin.sin_family = AF_INET;
3052+ sin.sin_port = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL]
3053+ .tuple.dst.u.tcp.port;
3054+ sin.sin_addr.s_addr = h->ctrack->tuplehash[IP_CT_DIR_ORIGINAL]
3055+ .tuple.dst.ip;
3056+
3057+ DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
3058+ NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
3059+ ip_conntrack_put(h->ctrack);
3060+ if (copy_to_user(user, &sin, sizeof(sin)) != 0)
3061+ return -EFAULT;
3062+ else
3063+ return 0;
3064+ }
3065+ DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
3066+ NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
3067+ NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
3068+ return -ENOENT;
3069+}
3070+
3071+static struct nf_sockopt_ops so_getorigdst = {
3072+ .pf = PF_INET,
3073+ .get_optmin = SO_ORIGINAL_DST,
3074+ .get_optmax = SO_ORIGINAL_DST+1,
3075+ .get = &getorigdst,
3076+};
3077+
3078+static int kill_all(const struct ip_conntrack *i, void *data)
3079+{
3080+ return 1;
3081+}
3082+
3083+/* Mishearing the voices in his head, our hero wonders how he's
3084+ supposed to kill the mall. */
3085+void ip_conntrack_cleanup(void)
3086+{
3087+ ip_ct_attach = NULL;
3088+ /* This makes sure all current packets have passed through
3089+ netfilter framework. Roll on, two-stage module
3090+ delete... */
3091+ synchronize_net();
3092+
3093+ i_see_dead_people:
3094+ ip_ct_selective_cleanup(kill_all, NULL);
3095+ if (atomic_read(&ip_conntrack_count) != 0) {
3096+ schedule();
3097+ goto i_see_dead_people;
3098+ }
3099+
3100+ kmem_cache_destroy(ip_conntrack_cachep);
3101+ vfree(ip_conntrack_hash);
3102+ nf_unregister_sockopt(&so_getorigdst);
3103+}
3104+
3105+static int hashsize;
3106+MODULE_PARM(hashsize, "i");
3107+
3108+int __init ip_conntrack_init(void)
3109+{
3110+ unsigned int i;
3111+ int ret;
3112+
3113+ /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
3114+ * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
3115+ if (hashsize) {
3116+ ip_conntrack_htable_size = hashsize;
3117+ } else {
3118+ ip_conntrack_htable_size
3119+ = (((num_physpages << PAGE_SHIFT) / 16384)
3120+ / sizeof(struct list_head));
3121+ if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
3122+ ip_conntrack_htable_size = 8192;
3123+ if (ip_conntrack_htable_size < 16)
3124+ ip_conntrack_htable_size = 16;
3125+ }
3126+ ip_conntrack_max = 8 * ip_conntrack_htable_size;
3127+
3128+ printk("ip_conntrack version %s (%u buckets, %d max)"
3129+ " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
3130+ ip_conntrack_htable_size, ip_conntrack_max,
3131+ sizeof(struct ip_conntrack));
3132+
3133+ ret = nf_register_sockopt(&so_getorigdst);
3134+ if (ret != 0) {
3135+ printk(KERN_ERR "Unable to register netfilter socket option\n");
3136+ return ret;
3137+ }
3138+
3139+ ip_conntrack_hash = vmalloc(sizeof(struct list_head)
3140+ * ip_conntrack_htable_size);
3141+ if (!ip_conntrack_hash) {
3142+ printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
3143+ goto err_unreg_sockopt;
3144+ }
3145+
3146+ ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
3147+ sizeof(struct ip_conntrack), 0,
3148+ SLAB_HWCACHE_ALIGN, NULL, NULL);
3149+ if (!ip_conntrack_cachep) {
3150+ printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
3151+ goto err_free_hash;
3152+ }
3153+ /* Don't NEED lock here, but good form anyway. */
3154+ WRITE_LOCK(&ip_conntrack_lock);
3155+ /* Sew in builtin protocols. */
3156+ list_append(&protocol_list, &ip_conntrack_protocol_tcp);
3157+ list_append(&protocol_list, &ip_conntrack_protocol_udp);
3158+ list_append(&protocol_list, &ip_conntrack_protocol_icmp);
3159+ WRITE_UNLOCK(&ip_conntrack_lock);
3160+
3161+ for (i = 0; i < ip_conntrack_htable_size; i++)
3162+ INIT_LIST_HEAD(&ip_conntrack_hash[i]);
3163+
3164+ /* For use by ipt_REJECT */
3165+ ip_ct_attach = ip_conntrack_attach;
3166+ return ret;
3167+
3168+err_free_hash:
3169+ vfree(ip_conntrack_hash);
3170+err_unreg_sockopt:
3171+ nf_unregister_sockopt(&so_getorigdst);
3172+
3173+ return -ENOMEM;
3174+}
3175diff -Nur linux-2.6.4-rc2.org/net/ipv4/netfilter/ip_conntrack_standalone.c linux-2.6.4-rc2/net/ipv4/netfilter/ip_conntrack_standalone.c
3176--- linux-2.6.4-rc2.org/net/ipv4/netfilter/ip_conntrack_standalone.c 2004-03-04 06:16:44.000000000 +0000
3177+++ linux-2.6.4-rc2/net/ipv4/netfilter/ip_conntrack_standalone.c 2004-03-08 08:48:52.000000000 +0000
3178@@ -194,6 +194,26 @@
3179 return ip_conntrack_confirm(*pskb);
3180 }
3181
3182+static unsigned int ip_conntrack_defrag(unsigned int hooknum,
3183+ struct sk_buff **pskb,
3184+ const struct net_device *in,
3185+ const struct net_device *out,
3186+ int (*okfn)(struct sk_buff *))
3187+{
3188+ /* Previously seen (loopback)? Ignore. Do this before
3189+ fragment check. */
3190+ if ((*pskb)->nfct)
3191+ return NF_ACCEPT;
3192+
3193+ /* Gather fragments. */
3194+ if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
3195+ *pskb = ip_ct_gather_frags(*pskb);
3196+ if (!*pskb)
3197+ return NF_STOLEN;
3198+ }
3199+ return NF_ACCEPT;
3200+}
3201+
3202 static unsigned int ip_refrag(unsigned int hooknum,
3203 struct sk_buff **pskb,
3204 const struct net_device *in,
3205@@ -236,6 +256,14 @@
3206
3207 /* Connection tracking may drop packets, but never alters them, so
3208 make it the first hook. */
3209+static struct nf_hook_ops ip_conntrack_defrag_ops = {
3210+ .hook = ip_conntrack_defrag,
3211+ .owner = THIS_MODULE,
3212+ .pf = PF_INET,
3213+ .hooknum = NF_IP_PRE_ROUTING,
3214+ .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
3215+};
3216+
3217 static struct nf_hook_ops ip_conntrack_in_ops = {
3218 .hook = ip_conntrack_in,
3219 .owner = THIS_MODULE,
3220@@ -244,6 +272,14 @@
3221 .priority = NF_IP_PRI_CONNTRACK,
3222 };
3223
3224+static struct nf_hook_ops ip_conntrack_defrag_local_out_ops = {
3225+ .hook = ip_conntrack_defrag,
3226+ .owner = THIS_MODULE,
3227+ .pf = PF_INET,
3228+ .hooknum = NF_IP_LOCAL_OUT,
3229+ .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
3230+};
3231+
3232 static struct nf_hook_ops ip_conntrack_local_out_ops = {
3233 .hook = ip_conntrack_local,
3234 .owner = THIS_MODULE,
3235@@ -470,10 +506,20 @@
3236 if (!proc) goto cleanup_init;
3237 proc->owner = THIS_MODULE;
3238
3239+ ret = nf_register_hook(&ip_conntrack_defrag_ops);
3240+ if (ret < 0) {
3241+ printk("ip_conntrack: can't register pre-routing defrag hook.\n");
3242+ goto cleanup_proc;
3243+ }
3244+ ret = nf_register_hook(&ip_conntrack_defrag_local_out_ops);
3245+ if (ret < 0) {
3246+ printk("ip_conntrack: can't register local_out defrag hook.\n");
3247+ goto cleanup_defragops;
3248+ }
3249 ret = nf_register_hook(&ip_conntrack_in_ops);
3250 if (ret < 0) {
3251 printk("ip_conntrack: can't register pre-routing hook.\n");
3252- goto cleanup_proc;
3253+ goto cleanup_defraglocalops;
3254 }
3255 ret = nf_register_hook(&ip_conntrack_local_out_ops);
3256 if (ret < 0) {
3257@@ -511,6 +557,10 @@
3258 nf_unregister_hook(&ip_conntrack_local_out_ops);
3259 cleanup_inops:
3260 nf_unregister_hook(&ip_conntrack_in_ops);
3261+ cleanup_defraglocalops:
3262+ nf_unregister_hook(&ip_conntrack_defrag_local_out_ops);
3263+ cleanup_defragops:
3264+ nf_unregister_hook(&ip_conntrack_defrag_ops);
3265 cleanup_proc:
3266 proc_net_remove("ip_conntrack");
3267 cleanup_init:
3268@@ -602,5 +652,6 @@
3269 EXPORT_SYMBOL(ip_conntrack_expect_list);
3270 EXPORT_SYMBOL(ip_conntrack_lock);
3271 EXPORT_SYMBOL(ip_conntrack_hash);
3272+EXPORT_SYMBOL(ip_conntrack_untracked);
3273 EXPORT_SYMBOL_GPL(ip_conntrack_find_get);
3274 EXPORT_SYMBOL_GPL(ip_conntrack_put);
3275diff -Nur linux-2.6.4-rc2.org/net/ipv4/netfilter/ip_conntrack_standalone.c.orig linux-2.6.4-rc2/net/ipv4/netfilter/ip_conntrack_standalone.c.orig
3276--- linux-2.6.4-rc2.org/net/ipv4/netfilter/ip_conntrack_standalone.c.orig 1970-01-01 00:00:00.000000000 +0000
3277+++ linux-2.6.4-rc2/net/ipv4/netfilter/ip_conntrack_standalone.c.orig 2004-03-04 06:16:44.000000000 +0000
3278@@ -0,0 +1,606 @@
3279+/* This file contains all the functions required for the standalone
3280+ ip_conntrack module.
3281+
3282+ These are not required by the compatibility layer.
3283+*/
3284+
3285+/* (C) 1999-2001 Paul `Rusty' Russell
3286+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3287+ *
3288+ * This program is free software; you can redistribute it and/or modify
3289+ * it under the terms of the GNU General Public License version 2 as
3290+ * published by the Free Software Foundation.
3291+ */
3292+
3293+#include <linux/config.h>
3294+#include <linux/types.h>
3295+#include <linux/ip.h>
3296+#include <linux/netfilter.h>
3297+#include <linux/netfilter_ipv4.h>
3298+#include <linux/module.h>
3299+#include <linux/skbuff.h>
3300+#include <linux/proc_fs.h>
3301+#ifdef CONFIG_SYSCTL
3302+#include <linux/sysctl.h>
3303+#endif
3304+#include <net/checksum.h>
3305+
3306+#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)
3307+#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)
3308+
3309+#include <linux/netfilter_ipv4/ip_conntrack.h>
3310+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
3311+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
3312+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
3313+#include <linux/netfilter_ipv4/listhelp.h>
3314+
3315+#if 0
3316+#define DEBUGP printk
3317+#else
3318+#define DEBUGP(format, args...)
3319+#endif
3320+
3321+MODULE_LICENSE("GPL");
3322+
3323+static int kill_proto(const struct ip_conntrack *i, void *data)
3324+{
3325+ return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum ==
3326+ *((u_int8_t *) data));
3327+}
3328+
3329+static unsigned int
3330+print_tuple(char *buffer, const struct ip_conntrack_tuple *tuple,
3331+ struct ip_conntrack_protocol *proto)
3332+{
3333+ int len;
3334+
3335+ len = sprintf(buffer, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ",
3336+ NIPQUAD(tuple->src.ip), NIPQUAD(tuple->dst.ip));
3337+
3338+ len += proto->print_tuple(buffer + len, tuple);
3339+
3340+ return len;
3341+}
3342+
3343+/* FIXME: Don't print source proto part. --RR */
3344+static unsigned int
3345+print_expect(char *buffer, const struct ip_conntrack_expect *expect)
3346+{
3347+ unsigned int len;
3348+
3349+ if (expect->expectant->helper->timeout)
3350+ len = sprintf(buffer, "EXPECTING: %lu ",
3351+ timer_pending(&expect->timeout)
3352+ ? (expect->timeout.expires - jiffies)/HZ : 0);
3353+ else
3354+ len = sprintf(buffer, "EXPECTING: - ");
3355+ len += sprintf(buffer + len, "use=%u proto=%u ",
3356+ atomic_read(&expect->use), expect->tuple.dst.protonum);
3357+ len += print_tuple(buffer + len, &expect->tuple,
3358+ __ip_ct_find_proto(expect->tuple.dst.protonum));
3359+ len += sprintf(buffer + len, "\n");
3360+ return len;
3361+}
3362+
3363+static unsigned int
3364+print_conntrack(char *buffer, struct ip_conntrack *conntrack)
3365+{
3366+ unsigned int len;
3367+ struct ip_conntrack_protocol *proto
3368+ = __ip_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
3369+ .tuple.dst.protonum);
3370+
3371+ len = sprintf(buffer, "%-8s %u %lu ",
3372+ proto->name,
3373+ conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
3374+ .tuple.dst.protonum,
3375+ timer_pending(&conntrack->timeout)
3376+ ? (conntrack->timeout.expires - jiffies)/HZ : 0);
3377+
3378+ len += proto->print_conntrack(buffer + len, conntrack);
3379+ len += print_tuple(buffer + len,
3380+ &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
3381+ proto);
3382+ if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)))
3383+ len += sprintf(buffer + len, "[UNREPLIED] ");
3384+ len += print_tuple(buffer + len,
3385+ &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple,
3386+ proto);
3387+ if (test_bit(IPS_ASSURED_BIT, &conntrack->status))
3388+ len += sprintf(buffer + len, "[ASSURED] ");
3389+ len += sprintf(buffer + len, "use=%u ",
3390+ atomic_read(&conntrack->ct_general.use));
3391+ len += sprintf(buffer + len, "\n");
3392+
3393+ return len;
3394+}
3395+
3396+/* Returns true when finished. */
3397+static inline int
3398+conntrack_iterate(const struct ip_conntrack_tuple_hash *hash,
3399+ char *buffer, off_t offset, off_t *upto,
3400+ unsigned int *len, unsigned int maxlen)
3401+{
3402+ unsigned int newlen;
3403+ IP_NF_ASSERT(hash->ctrack);
3404+
3405+ MUST_BE_READ_LOCKED(&ip_conntrack_lock);
3406+
3407+ /* Only count originals */
3408+ if (DIRECTION(hash))
3409+ return 0;
3410+
3411+ if ((*upto)++ < offset)
3412+ return 0;
3413+
3414+ newlen = print_conntrack(buffer + *len, hash->ctrack);
3415+ if (*len + newlen > maxlen)
3416+ return 1;
3417+ else *len += newlen;
3418+
3419+ return 0;
3420+}
3421+
3422+static int
3423+list_conntracks(char *buffer, char **start, off_t offset, int length)
3424+{
3425+ unsigned int i;
3426+ unsigned int len = 0;
3427+ off_t upto = 0;
3428+ struct list_head *e;
3429+
3430+ READ_LOCK(&ip_conntrack_lock);
3431+ /* Traverse hash; print originals then reply. */
3432+ for (i = 0; i < ip_conntrack_htable_size; i++) {
3433+ if (LIST_FIND(&ip_conntrack_hash[i], conntrack_iterate,
3434+ struct ip_conntrack_tuple_hash *,
3435+ buffer, offset, &upto, &len, length))
3436+ goto finished;
3437+ }
3438+
3439+ /* Now iterate through expecteds. */
3440+ READ_LOCK(&ip_conntrack_expect_tuple_lock);
3441+ list_for_each(e, &ip_conntrack_expect_list) {
3442+ unsigned int last_len;
3443+ struct ip_conntrack_expect *expect
3444+ = (struct ip_conntrack_expect *)e;
3445+ if (upto++ < offset) continue;
3446+
3447+ last_len = len;
3448+ len += print_expect(buffer + len, expect);
3449+ if (len > length) {
3450+ len = last_len;
3451+ goto finished_expects;
3452+ }
3453+ }
3454+
3455+ finished_expects:
3456+ READ_UNLOCK(&ip_conntrack_expect_tuple_lock);
3457+ finished:
3458+ READ_UNLOCK(&ip_conntrack_lock);
3459+
3460+ /* `start' hack - see fs/proc/generic.c line ~165 */
3461+ *start = (char *)((unsigned int)upto - offset);
3462+ return len;
3463+}
3464+
3465+static unsigned int ip_confirm(unsigned int hooknum,
3466+ struct sk_buff **pskb,
3467+ const struct net_device *in,
3468+ const struct net_device *out,
3469+ int (*okfn)(struct sk_buff *))
3470+{
3471+ /* We've seen it coming out the other side: confirm it */
3472+ return ip_conntrack_confirm(*pskb);
3473+}
3474+
3475+static unsigned int ip_refrag(unsigned int hooknum,
3476+ struct sk_buff **pskb,
3477+ const struct net_device *in,
3478+ const struct net_device *out,
3479+ int (*okfn)(struct sk_buff *))
3480+{
3481+ struct rtable *rt = (struct rtable *)(*pskb)->dst;
3482+
3483+ /* We've seen it coming out the other side: confirm */
3484+ if (ip_confirm(hooknum, pskb, in, out, okfn) != NF_ACCEPT)
3485+ return NF_DROP;
3486+
3487+ /* Local packets are never produced too large for their
3488+ interface. We degfragment them at LOCAL_OUT, however,
3489+ so we have to refragment them here. */
3490+ if ((*pskb)->len > dst_pmtu(&rt->u.dst) &&
3491+ !skb_shinfo(*pskb)->tso_size) {
3492+ /* No hook can be after us, so this should be OK. */
3493+ ip_fragment(*pskb, okfn);
3494+ return NF_STOLEN;
3495+ }
3496+ return NF_ACCEPT;
3497+}
3498+
3499+static unsigned int ip_conntrack_local(unsigned int hooknum,
3500+ struct sk_buff **pskb,
3501+ const struct net_device *in,
3502+ const struct net_device *out,
3503+ int (*okfn)(struct sk_buff *))
3504+{
3505+ /* root is playing with raw sockets. */
3506+ if ((*pskb)->len < sizeof(struct iphdr)
3507+ || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
3508+ if (net_ratelimit())
3509+ printk("ipt_hook: happy cracking.\n");
3510+ return NF_ACCEPT;
3511+ }
3512+ return ip_conntrack_in(hooknum, pskb, in, out, okfn);
3513+}
3514+
3515+/* Connection tracking may drop packets, but never alters them, so
3516+ make it the first hook. */
3517+static struct nf_hook_ops ip_conntrack_in_ops = {
3518+ .hook = ip_conntrack_in,
3519+ .owner = THIS_MODULE,
3520+ .pf = PF_INET,
3521+ .hooknum = NF_IP_PRE_ROUTING,
3522+ .priority = NF_IP_PRI_CONNTRACK,
3523+};
3524+
3525+static struct nf_hook_ops ip_conntrack_local_out_ops = {
3526+ .hook = ip_conntrack_local,
3527+ .owner = THIS_MODULE,
3528+ .pf = PF_INET,
3529+ .hooknum = NF_IP_LOCAL_OUT,
3530+ .priority = NF_IP_PRI_CONNTRACK,
3531+};
3532+
3533+/* Refragmenter; last chance. */
3534+static struct nf_hook_ops ip_conntrack_out_ops = {
3535+ .hook = ip_refrag,
3536+ .owner = THIS_MODULE,
3537+ .pf = PF_INET,
3538+ .hooknum = NF_IP_POST_ROUTING,
3539+ .priority = NF_IP_PRI_LAST,
3540+};
3541+
3542+static struct nf_hook_ops ip_conntrack_local_in_ops = {
3543+ .hook = ip_confirm,
3544+ .owner = THIS_MODULE,
3545+ .pf = PF_INET,
3546+ .hooknum = NF_IP_LOCAL_IN,
3547+ .priority = NF_IP_PRI_LAST-1,
3548+};
3549+
3550+/* Sysctl support */
3551+
3552+#ifdef CONFIG_SYSCTL
3553+
3554+/* From ip_conntrack_core.c */
3555+extern int ip_conntrack_max;
3556+extern unsigned int ip_conntrack_htable_size;
3557+
3558+/* From ip_conntrack_proto_tcp.c */
3559+extern unsigned long ip_ct_tcp_timeout_syn_sent;
3560+extern unsigned long ip_ct_tcp_timeout_syn_recv;
3561+extern unsigned long ip_ct_tcp_timeout_established;
3562+extern unsigned long ip_ct_tcp_timeout_fin_wait;
3563+extern unsigned long ip_ct_tcp_timeout_close_wait;
3564+extern unsigned long ip_ct_tcp_timeout_last_ack;
3565+extern unsigned long ip_ct_tcp_timeout_time_wait;
3566+extern unsigned long ip_ct_tcp_timeout_close;
3567+
3568+/* From ip_conntrack_proto_udp.c */
3569+extern unsigned long ip_ct_udp_timeout;
3570+extern unsigned long ip_ct_udp_timeout_stream;
3571+
3572+/* From ip_conntrack_proto_icmp.c */
3573+extern unsigned long ip_ct_icmp_timeout;
3574+
3575+/* From ip_conntrack_proto_icmp.c */
3576+extern unsigned long ip_ct_generic_timeout;
3577+
3578+static struct ctl_table_header *ip_ct_sysctl_header;
3579+
3580+static ctl_table ip_ct_sysctl_table[] = {
3581+ {
3582+ .ctl_name = NET_IPV4_NF_CONNTRACK_MAX,
3583+ .procname = "ip_conntrack_max",
3584+ .data = &ip_conntrack_max,
3585+ .maxlen = sizeof(int),
3586+ .mode = 0644,
3587+ .proc_handler = &proc_dointvec,
3588+ },
3589+ {
3590+ .ctl_name = NET_IPV4_NF_CONNTRACK_BUCKETS,
3591+ .procname = "ip_conntrack_buckets",
3592+ .data = &ip_conntrack_htable_size,
3593+ .maxlen = sizeof(unsigned int),
3594+ .mode = 0444,
3595+ .proc_handler = &proc_dointvec,
3596+ },
3597+ {
3598+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT,
3599+ .procname = "ip_conntrack_tcp_timeout_syn_sent",
3600+ .data = &ip_ct_tcp_timeout_syn_sent,
3601+ .maxlen = sizeof(unsigned int),
3602+ .mode = 0644,
3603+ .proc_handler = &proc_dointvec_jiffies,
3604+ },
3605+ {
3606+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV,
3607+ .procname = "ip_conntrack_tcp_timeout_syn_recv",
3608+ .data = &ip_ct_tcp_timeout_syn_recv,
3609+ .maxlen = sizeof(unsigned int),
3610+ .mode = 0644,
3611+ .proc_handler = &proc_dointvec_jiffies,
3612+ },
3613+ {
3614+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED,
3615+ .procname = "ip_conntrack_tcp_timeout_established",
3616+ .data = &ip_ct_tcp_timeout_established,
3617+ .maxlen = sizeof(unsigned int),
3618+ .mode = 0644,
3619+ .proc_handler = &proc_dointvec_jiffies,
3620+ },
3621+ {
3622+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT,
3623+ .procname = "ip_conntrack_tcp_timeout_fin_wait",
3624+ .data = &ip_ct_tcp_timeout_fin_wait,
3625+ .maxlen = sizeof(unsigned int),
3626+ .mode = 0644,
3627+ .proc_handler = &proc_dointvec_jiffies,
3628+ },
3629+ {
3630+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT,
3631+ .procname = "ip_conntrack_tcp_timeout_close_wait",
3632+ .data = &ip_ct_tcp_timeout_close_wait,
3633+ .maxlen = sizeof(unsigned int),
3634+ .mode = 0644,
3635+ .proc_handler = &proc_dointvec_jiffies,
3636+ },
3637+ {
3638+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK,
3639+ .procname = "ip_conntrack_tcp_timeout_last_ack",
3640+ .data = &ip_ct_tcp_timeout_last_ack,
3641+ .maxlen = sizeof(unsigned int),
3642+ .mode = 0644,
3643+ .proc_handler = &proc_dointvec_jiffies,
3644+ },
3645+ {
3646+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT,
3647+ .procname = "ip_conntrack_tcp_timeout_time_wait",
3648+ .data = &ip_ct_tcp_timeout_time_wait,
3649+ .maxlen = sizeof(unsigned int),
3650+ .mode = 0644,
3651+ .proc_handler = &proc_dointvec_jiffies,
3652+ },
3653+ {
3654+ .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE,
3655+ .procname = "ip_conntrack_tcp_timeout_close",
3656+ .data = &ip_ct_tcp_timeout_close,
3657+ .maxlen = sizeof(unsigned int),
3658+ .mode = 0644,
3659+ .proc_handler = &proc_dointvec_jiffies,
3660+ },
3661+ {
3662+ .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT,
3663+ .procname = "ip_conntrack_udp_timeout",
3664+ .data = &ip_ct_udp_timeout,
3665+ .maxlen = sizeof(unsigned int),
3666+ .mode = 0644,
3667+ .proc_handler = &proc_dointvec_jiffies,
3668+ },
3669+ {
3670+ .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM,
3671+ .procname = "ip_conntrack_udp_timeout_stream",
3672+ .data = &ip_ct_udp_timeout_stream,
3673+ .maxlen = sizeof(unsigned int),
3674+ .mode = 0644,
3675+ .proc_handler = &proc_dointvec_jiffies,
3676+ },
3677+ {
3678+ .ctl_name = NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT,
3679+ .procname = "ip_conntrack_icmp_timeout",
3680+ .data = &ip_ct_icmp_timeout,
3681+ .maxlen = sizeof(unsigned int),
3682+ .mode = 0644,
3683+ .proc_handler = &proc_dointvec_jiffies,
3684+ },
3685+ {
3686+ .ctl_name = NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT,
3687+ .procname = "ip_conntrack_generic_timeout",
3688+ .data = &ip_ct_generic_timeout,
3689+ .maxlen = sizeof(unsigned int),
3690+ .mode = 0644,
3691+ .proc_handler = &proc_dointvec_jiffies,
3692+ },
3693+ { .ctl_name = 0 }
3694+};
3695+
3696+#define NET_IP_CONNTRACK_MAX 2089
3697+
3698+static ctl_table ip_ct_netfilter_table[] = {
3699+ {
3700+ .ctl_name = NET_IPV4_NETFILTER,
3701+ .procname = "netfilter",
3702+ .mode = 0555,
3703+ .child = ip_ct_sysctl_table,
3704+ },
3705+ {
3706+ .ctl_name = NET_IP_CONNTRACK_MAX,
3707+ .procname = "ip_conntrack_max",
3708+ .data = &ip_conntrack_max,
3709+ .maxlen = sizeof(int),
3710+ .mode = 0644,
3711+ .proc_handler = &proc_dointvec
3712+ },
3713+ { .ctl_name = 0 }
3714+};
3715+
3716+static ctl_table ip_ct_ipv4_table[] = {
3717+ {
3718+ .ctl_name = NET_IPV4,
3719+ .procname = "ipv4",
3720+ .mode = 0555,
3721+ .child = ip_ct_netfilter_table,
3722+ },
3723+ { .ctl_name = 0 }
3724+};
3725+
3726+static ctl_table ip_ct_net_table[] = {
3727+ {
3728+ .ctl_name = CTL_NET,
3729+ .procname = "net",
3730+ .mode = 0555,
3731+ .child = ip_ct_ipv4_table,
3732+ },
3733+ { .ctl_name = 0 }
3734+};
3735+#endif
3736+static int init_or_cleanup(int init)
3737+{
3738+ struct proc_dir_entry *proc;
3739+ int ret = 0;
3740+
3741+ if (!init) goto cleanup;
3742+
3743+ ret = ip_conntrack_init();
3744+ if (ret < 0)
3745+ goto cleanup_nothing;
3746+
3747+ proc = proc_net_create("ip_conntrack",0,list_conntracks);
3748+ if (!proc) goto cleanup_init;
3749+ proc->owner = THIS_MODULE;
3750+
3751+ ret = nf_register_hook(&ip_conntrack_in_ops);
3752+ if (ret < 0) {
3753+ printk("ip_conntrack: can't register pre-routing hook.\n");
3754+ goto cleanup_proc;
3755+ }
3756+ ret = nf_register_hook(&ip_conntrack_local_out_ops);
3757+ if (ret < 0) {
3758+ printk("ip_conntrack: can't register local out hook.\n");
3759+ goto cleanup_inops;
3760+ }
3761+ ret = nf_register_hook(&ip_conntrack_out_ops);
3762+ if (ret < 0) {
3763+ printk("ip_conntrack: can't register post-routing hook.\n");
3764+ goto cleanup_inandlocalops;
3765+ }
3766+ ret = nf_register_hook(&ip_conntrack_local_in_ops);
3767+ if (ret < 0) {
3768+ printk("ip_conntrack: can't register local in hook.\n");
3769+ goto cleanup_inoutandlocalops;
3770+ }
3771+#ifdef CONFIG_SYSCTL
3772+ ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table, 0);
3773+ if (ip_ct_sysctl_header == NULL) {
3774+ printk("ip_conntrack: can't register to sysctl.\n");
3775+ goto cleanup;
3776+ }
3777+#endif
3778+
3779+ return ret;
3780+
3781+ cleanup:
3782+#ifdef CONFIG_SYSCTL
3783+ unregister_sysctl_table(ip_ct_sysctl_header);
3784+#endif
3785+ nf_unregister_hook(&ip_conntrack_local_in_ops);
3786+ cleanup_inoutandlocalops:
3787+ nf_unregister_hook(&ip_conntrack_out_ops);
3788+ cleanup_inandlocalops:
3789+ nf_unregister_hook(&ip_conntrack_local_out_ops);
3790+ cleanup_inops:
3791+ nf_unregister_hook(&ip_conntrack_in_ops);
3792+ cleanup_proc:
3793+ proc_net_remove("ip_conntrack");
3794+ cleanup_init:
3795+ ip_conntrack_cleanup();
3796+ cleanup_nothing:
3797+ return ret;
3798+}
3799+
3800+/* FIXME: Allow NULL functions and sub in pointers to generic for
3801+ them. --RR */
3802+int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto)
3803+{
3804+ int ret = 0;
3805+ struct list_head *i;
3806+
3807+ WRITE_LOCK(&ip_conntrack_lock);
3808+ list_for_each(i, &protocol_list) {
3809+ if (((struct ip_conntrack_protocol *)i)->proto
3810+ == proto->proto) {
3811+ ret = -EBUSY;
3812+ goto out;
3813+ }
3814+ }
3815+
3816+ list_prepend(&protocol_list, proto);
3817+
3818+ out:
3819+ WRITE_UNLOCK(&ip_conntrack_lock);
3820+ return ret;
3821+}
3822+
3823+void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
3824+{
3825+ WRITE_LOCK(&ip_conntrack_lock);
3826+
3827+ /* ip_ct_find_proto() returns proto_generic in case there is no protocol
3828+ * helper. So this should be enough - HW */
3829+ LIST_DELETE(&protocol_list, proto);
3830+ WRITE_UNLOCK(&ip_conntrack_lock);
3831+
3832+ /* Somebody could be still looking at the proto in bh. */
3833+ synchronize_net();
3834+
3835+ /* Remove all contrack entries for this protocol */
3836+ ip_ct_selective_cleanup(kill_proto, &proto->proto);
3837+}
3838+
3839+static int __init init(void)
3840+{
3841+ return init_or_cleanup(1);
3842+}
3843+
3844+static void __exit fini(void)
3845+{
3846+ init_or_cleanup(0);
3847+}
3848+
3849+module_init(init);
3850+module_exit(fini);
3851+
3852+/* Some modules need us, but don't depend directly on any symbol.
3853+ They should call this. */
3854+void need_ip_conntrack(void)
3855+{
3856+}
3857+
3858+EXPORT_SYMBOL(ip_conntrack_protocol_register);
3859+EXPORT_SYMBOL(ip_conntrack_protocol_unregister);
3860+EXPORT_SYMBOL(invert_tuplepr);
3861+EXPORT_SYMBOL(ip_conntrack_alter_reply);
3862+EXPORT_SYMBOL(ip_conntrack_destroyed);
3863+EXPORT_SYMBOL(ip_conntrack_get);
3864+EXPORT_SYMBOL(need_ip_conntrack);
3865+EXPORT_SYMBOL(ip_conntrack_helper_register);
3866+EXPORT_SYMBOL(ip_conntrack_helper_unregister);
3867+EXPORT_SYMBOL(ip_ct_selective_cleanup);
3868+EXPORT_SYMBOL(ip_ct_refresh);
3869+EXPORT_SYMBOL(ip_ct_find_proto);
3870+EXPORT_SYMBOL(__ip_ct_find_proto);
3871+EXPORT_SYMBOL(ip_ct_find_helper);
3872+EXPORT_SYMBOL(ip_conntrack_expect_related);
3873+EXPORT_SYMBOL(ip_conntrack_change_expect);
3874+EXPORT_SYMBOL(ip_conntrack_unexpect_related);
3875+EXPORT_SYMBOL_GPL(ip_conntrack_expect_find_get);
3876+EXPORT_SYMBOL_GPL(ip_conntrack_expect_put);
3877+EXPORT_SYMBOL(ip_conntrack_tuple_taken);
3878+EXPORT_SYMBOL(ip_ct_gather_frags);
3879+EXPORT_SYMBOL(ip_conntrack_htable_size);
3880+EXPORT_SYMBOL(ip_conntrack_expect_list);
3881+EXPORT_SYMBOL(ip_conntrack_lock);
3882+EXPORT_SYMBOL(ip_conntrack_hash);
3883+EXPORT_SYMBOL_GPL(ip_conntrack_find_get);
3884+EXPORT_SYMBOL_GPL(ip_conntrack_put);
3885diff -Nur linux-2.6.4-rc2.org/net/ipv4/netfilter/ip_nat_core.c linux-2.6.4-rc2/net/ipv4/netfilter/ip_nat_core.c
3886--- linux-2.6.4-rc2.org/net/ipv4/netfilter/ip_nat_core.c 2004-03-04 06:16:37.000000000 +0000
3887+++ linux-2.6.4-rc2/net/ipv4/netfilter/ip_nat_core.c 2004-03-08 08:48:52.000000000 +0000
3888@@ -1016,6 +1016,10 @@
3889 /* FIXME: Man, this is a hack. <SIGH> */
3890 IP_NF_ASSERT(ip_conntrack_destroyed == NULL);
3891 ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
3892+
3893+ /* Initialize fake conntrack so that NAT will skip it */
3894+ ip_conntrack_untracked.nat.info.initialized |=
3895+ (1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST);
3896
3897 return 0;
3898 }
3899diff -Nur linux-2.6.4-rc2.org/net/ipv4/netfilter/ip_nat_core.c.orig linux-2.6.4-rc2/net/ipv4/netfilter/ip_nat_core.c.orig
3900--- linux-2.6.4-rc2.org/net/ipv4/netfilter/ip_nat_core.c.orig 1970-01-01 00:00:00.000000000 +0000
3901+++ linux-2.6.4-rc2/net/ipv4/netfilter/ip_nat_core.c.orig 2004-03-04 06:16:37.000000000 +0000
3902@@ -0,0 +1,1036 @@
3903+/* NAT for netfilter; shared with compatibility layer. */
3904+
3905+/* (C) 1999-2001 Paul `Rusty' Russell
3906+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
3907+ *
3908+ * This program is free software; you can redistribute it and/or modify
3909+ * it under the terms of the GNU General Public License version 2 as
3910+ * published by the Free Software Foundation.
3911+ */
3912+
3913+#include <linux/module.h>
3914+#include <linux/types.h>
3915+#include <linux/timer.h>
3916+#include <linux/skbuff.h>
3917+#include <linux/netfilter_ipv4.h>
3918+#include <linux/vmalloc.h>
3919+#include <net/checksum.h>
3920+#include <net/icmp.h>
3921+#include <net/ip.h>
3922+#include <net/tcp.h> /* For tcp_prot in getorigdst */
3923+#include <linux/icmp.h>
3924+#include <linux/udp.h>
3925+
3926+#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
3927+#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
3928+
3929+#include <linux/netfilter_ipv4/ip_conntrack.h>
3930+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
3931+#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
3932+#include <linux/netfilter_ipv4/ip_nat.h>
3933+#include <linux/netfilter_ipv4/ip_nat_protocol.h>
3934+#include <linux/netfilter_ipv4/ip_nat_core.h>
3935+#include <linux/netfilter_ipv4/ip_nat_helper.h>
3936+#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
3937+#include <linux/netfilter_ipv4/listhelp.h>
3938+
3939+#if 0
3940+#define DEBUGP printk
3941+#else
3942+#define DEBUGP(format, args...)
3943+#endif
3944+
3945+DECLARE_RWLOCK(ip_nat_lock);
3946+DECLARE_RWLOCK_EXTERN(ip_conntrack_lock);
3947+
3948+/* Calculated at init based on memory size */
3949+static unsigned int ip_nat_htable_size;
3950+
3951+static struct list_head *bysource;
3952+static struct list_head *byipsproto;
3953+LIST_HEAD(protos);
3954+LIST_HEAD(helpers);
3955+
3956+extern struct ip_nat_protocol unknown_nat_protocol;
3957+
3958+/* We keep extra hashes for each conntrack, for fast searching. */
3959+static inline size_t
3960+hash_by_ipsproto(u_int32_t src, u_int32_t dst, u_int16_t proto)
3961+{
3962+ /* Modified src and dst, to ensure we don't create two
3963+ identical streams. */
3964+ return (src + dst + proto) % ip_nat_htable_size;
3965+}
3966+
3967+static inline size_t
3968+hash_by_src(const struct ip_conntrack_manip *manip, u_int16_t proto)
3969+{
3970+ /* Original src, to ensure we map it consistently if poss. */
3971+ return (manip->ip + manip->u.all + proto) % ip_nat_htable_size;
3972+}
3973+
3974+/* Noone using conntrack by the time this called. */
3975+static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn)
3976+{
3977+ struct ip_nat_info *info = &conn->nat.info;
3978+ unsigned int hs, hp;
3979+
3980+ if (!info->initialized)
3981+ return;
3982+
3983+ IP_NF_ASSERT(info->bysource.conntrack);
3984+ IP_NF_ASSERT(info->byipsproto.conntrack);
3985+
3986+ hs = hash_by_src(&conn->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src,
3987+ conn->tuplehash[IP_CT_DIR_ORIGINAL]
3988+ .tuple.dst.protonum);
3989+
3990+ hp = hash_by_ipsproto(conn->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip,
3991+ conn->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip,
3992+ conn->tuplehash[IP_CT_DIR_REPLY]
3993+ .tuple.dst.protonum);
3994+
3995+ WRITE_LOCK(&ip_nat_lock);
3996+ LIST_DELETE(&bysource[hs], &info->bysource);
3997+ LIST_DELETE(&byipsproto[hp], &info->byipsproto);
3998+ WRITE_UNLOCK(&ip_nat_lock);
3999+}
4000+
4001+/* We do checksum mangling, so if they were wrong before they're still
4002+ * wrong. Also works for incomplete packets (eg. ICMP dest
4003+ * unreachables.) */
4004+u_int16_t
4005+ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
4006+{
4007+ u_int32_t diffs[] = { oldvalinv, newval };
4008+ return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
4009+ oldcheck^0xFFFF));
4010+}
4011+
4012+static inline int cmp_proto(const struct ip_nat_protocol *i, int proto)
4013+{
4014+ return i->protonum == proto;
4015+}
4016+
4017+struct ip_nat_protocol *
4018+find_nat_proto(u_int16_t protonum)
4019+{
4020+ struct ip_nat_protocol *i;
4021+
4022+ MUST_BE_READ_LOCKED(&ip_nat_lock);
4023+ i = LIST_FIND(&protos, cmp_proto, struct ip_nat_protocol *, protonum);
4024+ if (!i)
4025+ i = &unknown_nat_protocol;
4026+ return i;
4027+}
4028+
4029+/* Is this tuple already taken? (not by us) */
4030+int
4031+ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
4032+ const struct ip_conntrack *ignored_conntrack)
4033+{
4034+ /* Conntrack tracking doesn't keep track of outgoing tuples; only
4035+ incoming ones. NAT means they don't have a fixed mapping,
4036+ so we invert the tuple and look for the incoming reply.
4037+
4038+ We could keep a separate hash if this proves too slow. */
4039+ struct ip_conntrack_tuple reply;
4040+
4041+ invert_tuplepr(&reply, tuple);
4042+ return ip_conntrack_tuple_taken(&reply, ignored_conntrack);
4043+}
4044+
4045+/* Does tuple + the source manip come within the range mr */
4046+static int
4047+in_range(const struct ip_conntrack_tuple *tuple,
4048+ const struct ip_conntrack_manip *manip,
4049+ const struct ip_nat_multi_range *mr)
4050+{
4051+ struct ip_nat_protocol *proto = find_nat_proto(tuple->dst.protonum);
4052+ unsigned int i;
4053+ struct ip_conntrack_tuple newtuple = { *manip, tuple->dst };
4054+
4055+ for (i = 0; i < mr->rangesize; i++) {
4056+ /* If we are allowed to map IPs, then we must be in the
4057+ range specified, otherwise we must be unchanged. */
4058+ if (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS) {
4059+ if (ntohl(newtuple.src.ip) < ntohl(mr->range[i].min_ip)
4060+ || (ntohl(newtuple.src.ip)
4061+ > ntohl(mr->range[i].max_ip)))
4062+ continue;
4063+ } else {
4064+ if (newtuple.src.ip != tuple->src.ip)
4065+ continue;
4066+ }
4067+
4068+ if (!(mr->range[i].flags & IP_NAT_RANGE_PROTO_SPECIFIED)
4069+ || proto->in_range(&newtuple, IP_NAT_MANIP_SRC,
4070+ &mr->range[i].min, &mr->range[i].max))
4071+ return 1;
4072+ }
4073+ return 0;
4074+}
4075+
4076+static inline int
4077+src_cmp(const struct ip_nat_hash *i,
4078+ const struct ip_conntrack_tuple *tuple,
4079+ const struct ip_nat_multi_range *mr)
4080+{
4081+ return (i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum
4082+ == tuple->dst.protonum
4083+ && i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip
4084+ == tuple->src.ip
4085+ && i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all
4086+ == tuple->src.u.all
4087+ && in_range(tuple,
4088+ &i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
4089+ .tuple.src,
4090+ mr));
4091+}
4092+
4093+/* Only called for SRC manip */
4094+static struct ip_conntrack_manip *
4095+find_appropriate_src(const struct ip_conntrack_tuple *tuple,
4096+ const struct ip_nat_multi_range *mr)
4097+{
4098+ unsigned int h = hash_by_src(&tuple->src, tuple->dst.protonum);
4099+ struct ip_nat_hash *i;
4100+
4101+ MUST_BE_READ_LOCKED(&ip_nat_lock);
4102+ i = LIST_FIND(&bysource[h], src_cmp, struct ip_nat_hash *, tuple, mr);
4103+ if (i)
4104+ return &i->conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src;
4105+ else
4106+ return NULL;
4107+}
4108+
4109+#ifdef CONFIG_IP_NF_NAT_LOCAL
4110+/* If it's really a local destination manip, it may need to do a
4111+ source manip too. */
4112+static int
4113+do_extra_mangle(u_int32_t var_ip, u_int32_t *other_ipp)
4114+{
4115+ struct flowi fl = { .nl_u = { .ip4_u = { .daddr = var_ip } } };
4116+ struct rtable *rt;
4117+
4118+ /* FIXME: IPTOS_TOS(iph->tos) --RR */
4119+ if (ip_route_output_key(&rt, &fl) != 0) {
4120+ DEBUGP("do_extra_mangle: Can't get route to %u.%u.%u.%u\n",
4121+ NIPQUAD(var_ip));
4122+ return 0;
4123+ }
4124+
4125+ *other_ipp = rt->rt_src;
4126+ ip_rt_put(rt);
4127+ return 1;
4128+}
4129+#endif
4130+
4131+/* Simple way to iterate through all. */
4132+static inline int fake_cmp(const struct ip_nat_hash *i,
4133+ u_int32_t src, u_int32_t dst, u_int16_t protonum,
4134+ unsigned int *score,
4135+ const struct ip_conntrack *conntrack)
4136+{
4137+ /* Compare backwards: we're dealing with OUTGOING tuples, and
4138+ inside the conntrack is the REPLY tuple. Don't count this
4139+ conntrack. */
4140+ if (i->conntrack != conntrack
4141+ && i->conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip == dst
4142+ && i->conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip == src
4143+ && (i->conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum
4144+ == protonum))
4145+ (*score)++;
4146+ return 0;
4147+}
4148+
4149+static inline unsigned int
4150+count_maps(u_int32_t src, u_int32_t dst, u_int16_t protonum,
4151+ const struct ip_conntrack *conntrack)
4152+{
4153+ unsigned int score = 0;
4154+ unsigned int h;
4155+
4156+ MUST_BE_READ_LOCKED(&ip_nat_lock);
4157+ h = hash_by_ipsproto(src, dst, protonum);
4158+ LIST_FIND(&byipsproto[h], fake_cmp, struct ip_nat_hash *,
4159+ src, dst, protonum, &score, conntrack);
4160+
4161+ return score;
4162+}
4163+
4164+/* For [FUTURE] fragmentation handling, we want the least-used
4165+ src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus
4166+ if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
4167+ 1-65535, we don't do pro-rata allocation based on ports; we choose
4168+ the ip with the lowest src-ip/dst-ip/proto usage.
4169+
4170+ If an allocation then fails (eg. all 6 ports used in the 1.2.3.4
4171+ range), we eliminate that and try again. This is not the most
4172+ efficient approach, but if you're worried about that, don't hand us
4173+ ranges you don't really have. */
4174+static struct ip_nat_range *
4175+find_best_ips_proto(struct ip_conntrack_tuple *tuple,
4176+ const struct ip_nat_multi_range *mr,
4177+ const struct ip_conntrack *conntrack,
4178+ unsigned int hooknum)
4179+{
4180+ unsigned int i;
4181+ struct {
4182+ const struct ip_nat_range *range;
4183+ unsigned int score;
4184+ struct ip_conntrack_tuple tuple;
4185+ } best = { NULL, 0xFFFFFFFF };
4186+ u_int32_t *var_ipp, *other_ipp, saved_ip, orig_dstip;
4187+ static unsigned int randomness;
4188+
4189+ if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC) {
4190+ var_ipp = &tuple->src.ip;
4191+ saved_ip = tuple->dst.ip;
4192+ other_ipp = &tuple->dst.ip;
4193+ } else {
4194+ var_ipp = &tuple->dst.ip;
4195+ saved_ip = tuple->src.ip;
4196+ other_ipp = &tuple->src.ip;
4197+ }
4198+ /* Don't do do_extra_mangle unless necessary (overrides
4199+ explicit socket bindings, for example) */
4200+ orig_dstip = tuple->dst.ip;
4201+
4202+ IP_NF_ASSERT(mr->rangesize >= 1);
4203+ for (i = 0; i < mr->rangesize; i++) {
4204+ /* Host order */
4205+ u_int32_t minip, maxip, j;
4206+
4207+ /* Don't do ranges which are already eliminated. */
4208+ if (mr->range[i].flags & IP_NAT_RANGE_FULL) {
4209+ continue;
4210+ }
4211+
4212+ if (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS) {
4213+ minip = ntohl(mr->range[i].min_ip);
4214+ maxip = ntohl(mr->range[i].max_ip);
4215+ } else
4216+ minip = maxip = ntohl(*var_ipp);
4217+
4218+ randomness++;
4219+ for (j = 0; j < maxip - minip + 1; j++) {
4220+ unsigned int score;
4221+
4222+ *var_ipp = htonl(minip + (randomness + j)
4223+ % (maxip - minip + 1));
4224+
4225+ /* Reset the other ip in case it was mangled by
4226+ * do_extra_mangle last time. */
4227+ *other_ipp = saved_ip;
4228+
4229+#ifdef CONFIG_IP_NF_NAT_LOCAL
4230+ if (hooknum == NF_IP_LOCAL_OUT
4231+ && *var_ipp != orig_dstip
4232+ && !do_extra_mangle(*var_ipp, other_ipp)) {
4233+ DEBUGP("Range %u %u.%u.%u.%u rt failed!\n",
4234+ i, NIPQUAD(*var_ipp));
4235+ /* Can't route? This whole range part is
4236+ * probably screwed, but keep trying
4237+ * anyway. */
4238+ continue;
4239+ }
4240+#endif
4241+
4242+ /* Count how many others map onto this. */
4243+ score = count_maps(tuple->src.ip, tuple->dst.ip,
4244+ tuple->dst.protonum, conntrack);
4245+ if (score < best.score) {
4246+ /* Optimization: doesn't get any better than
4247+ this. */
4248+ if (score == 0)
4249+ return (struct ip_nat_range *)
4250+ &mr->range[i];
4251+
4252+ best.score = score;
4253+ best.tuple = *tuple;
4254+ best.range = &mr->range[i];
4255+ }
4256+ }
4257+ }
4258+ *tuple = best.tuple;
4259+
4260+ /* Discard const. */
4261+ return (struct ip_nat_range *)best.range;
4262+}
4263+
4264+/* Fast version doesn't iterate through hash chains, but only handles
4265+ common case of single IP address (null NAT, masquerade) */
4266+static struct ip_nat_range *
4267+find_best_ips_proto_fast(struct ip_conntrack_tuple *tuple,
4268+ const struct ip_nat_multi_range *mr,
4269+ const struct ip_conntrack *conntrack,
4270+ unsigned int hooknum)
4271+{
4272+ if (mr->rangesize != 1
4273+ || (mr->range[0].flags & IP_NAT_RANGE_FULL)
4274+ || ((mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
4275+ && mr->range[0].min_ip != mr->range[0].max_ip))
4276+ return find_best_ips_proto(tuple, mr, conntrack, hooknum);
4277+
4278+ if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
4279+ if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC)
4280+ tuple->src.ip = mr->range[0].min_ip;
4281+ else {
4282+ /* Only do extra mangle when required (breaks
4283+ socket binding) */
4284+#ifdef CONFIG_IP_NF_NAT_LOCAL
4285+ if (tuple->dst.ip != mr->range[0].min_ip
4286+ && hooknum == NF_IP_LOCAL_OUT
4287+ && !do_extra_mangle(mr->range[0].min_ip,
4288+ &tuple->src.ip))
4289+ return NULL;
4290+#endif
4291+ tuple->dst.ip = mr->range[0].min_ip;
4292+ }
4293+ }
4294+
4295+ /* Discard const. */
4296+ return (struct ip_nat_range *)&mr->range[0];
4297+}
4298+
4299+static int
4300+get_unique_tuple(struct ip_conntrack_tuple *tuple,
4301+ const struct ip_conntrack_tuple *orig_tuple,
4302+ const struct ip_nat_multi_range *mrr,
4303+ struct ip_conntrack *conntrack,
4304+ unsigned int hooknum)
4305+{
4306+ struct ip_nat_protocol *proto
4307+ = find_nat_proto(orig_tuple->dst.protonum);
4308+ struct ip_nat_range *rptr;
4309+ unsigned int i;
4310+ int ret;
4311+
4312+ /* We temporarily use flags for marking full parts, but we
4313+ always clean up afterwards */
4314+ struct ip_nat_multi_range *mr = (void *)mrr;
4315+
4316+ /* 1) If this srcip/proto/src-proto-part is currently mapped,
4317+ and that same mapping gives a unique tuple within the given
4318+ range, use that.
4319+
4320+ This is only required for source (ie. NAT/masq) mappings.
4321+ So far, we don't do local source mappings, so multiple
4322+ manips not an issue. */
4323+ if (hooknum == NF_IP_POST_ROUTING) {
4324+ struct ip_conntrack_manip *manip;
4325+
4326+ manip = find_appropriate_src(orig_tuple, mr);
4327+ if (manip) {
4328+ /* Apply same source manipulation. */
4329+ *tuple = ((struct ip_conntrack_tuple)
4330+ { *manip, orig_tuple->dst });
4331+ DEBUGP("get_unique_tuple: Found current src map\n");
4332+ if (!ip_nat_used_tuple(tuple, conntrack))
4333+ return 1;
4334+ }
4335+ }
4336+
4337+ /* 2) Select the least-used IP/proto combination in the given
4338+ range.
4339+ */
4340+ *tuple = *orig_tuple;
4341+ while ((rptr = find_best_ips_proto_fast(tuple, mr, conntrack, hooknum))
4342+ != NULL) {
4343+ DEBUGP("Found best for "); DUMP_TUPLE(tuple);
4344+ /* 3) The per-protocol part of the manip is made to
4345+ map into the range to make a unique tuple. */
4346+
4347+ /* Only bother mapping if it's not already in range
4348+ and unique */
4349+ if ((!(rptr->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
4350+ || proto->in_range(tuple, HOOK2MANIP(hooknum),
4351+ &rptr->min, &rptr->max))
4352+ && !ip_nat_used_tuple(tuple, conntrack)) {
4353+ ret = 1;
4354+ goto clear_fulls;
4355+ } else {
4356+ if (proto->unique_tuple(tuple, rptr,
4357+ HOOK2MANIP(hooknum),
4358+ conntrack)) {
4359+ /* Must be unique. */
4360+ IP_NF_ASSERT(!ip_nat_used_tuple(tuple,
4361+ conntrack));
4362+ ret = 1;
4363+ goto clear_fulls;
4364+ } else if (HOOK2MANIP(hooknum) == IP_NAT_MANIP_DST) {
4365+ /* Try implicit source NAT; protocol
4366+ may be able to play with ports to
4367+ make it unique. */
4368+ struct ip_nat_range r
4369+ = { IP_NAT_RANGE_MAP_IPS,
4370+ tuple->src.ip, tuple->src.ip,
4371+ { 0 }, { 0 } };
4372+ DEBUGP("Trying implicit mapping\n");
4373+ if (proto->unique_tuple(tuple, &r,
4374+ IP_NAT_MANIP_SRC,
4375+ conntrack)) {
4376+ /* Must be unique. */
4377+ IP_NF_ASSERT(!ip_nat_used_tuple
4378+ (tuple, conntrack));
4379+ ret = 1;
4380+ goto clear_fulls;
4381+ }
4382+ }
4383+ DEBUGP("Protocol can't get unique tuple %u.\n",
4384+ hooknum);
4385+ }
4386+
4387+ /* Eliminate that from range, and try again. */
4388+ rptr->flags |= IP_NAT_RANGE_FULL;
4389+ *tuple = *orig_tuple;
4390+ }
4391+
4392+ ret = 0;
4393+
4394+ clear_fulls:
4395+ /* Clear full flags. */
4396+ IP_NF_ASSERT(mr->rangesize >= 1);
4397+ for (i = 0; i < mr->rangesize; i++)
4398+ mr->range[i].flags &= ~IP_NAT_RANGE_FULL;
4399+
4400+ return ret;
4401+}
4402+
4403+static inline int
4404+helper_cmp(const struct ip_nat_helper *helper,
4405+ const struct ip_conntrack_tuple *tuple)
4406+{
4407+ return ip_ct_tuple_mask_cmp(tuple, &helper->tuple, &helper->mask);
4408+}
4409+
4410+/* Where to manip the reply packets (will be reverse manip). */
4411+static unsigned int opposite_hook[NF_IP_NUMHOOKS]
4412+= { [NF_IP_PRE_ROUTING] = NF_IP_POST_ROUTING,
4413+ [NF_IP_POST_ROUTING] = NF_IP_PRE_ROUTING,
4414+#ifdef CONFIG_IP_NF_NAT_LOCAL
4415+ [NF_IP_LOCAL_OUT] = NF_IP_LOCAL_IN,
4416+ [NF_IP_LOCAL_IN] = NF_IP_LOCAL_OUT,
4417+#endif
4418+};
4419+
4420+unsigned int
4421+ip_nat_setup_info(struct ip_conntrack *conntrack,
4422+ const struct ip_nat_multi_range *mr,
4423+ unsigned int hooknum)
4424+{
4425+ struct ip_conntrack_tuple new_tuple, inv_tuple, reply;
4426+ struct ip_conntrack_tuple orig_tp;
4427+ struct ip_nat_info *info = &conntrack->nat.info;
4428+ int in_hashes = info->initialized;
4429+
4430+ MUST_BE_WRITE_LOCKED(&ip_nat_lock);
4431+ IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
4432+ || hooknum == NF_IP_POST_ROUTING
4433+ || hooknum == NF_IP_LOCAL_OUT);
4434+ IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS);
4435+ IP_NF_ASSERT(!(info->initialized & (1 << HOOK2MANIP(hooknum))));
4436+
4437+ /* What we've got will look like inverse of reply. Normally
4438+ this is what is in the conntrack, except for prior
4439+ manipulations (future optimization: if num_manips == 0,
4440+ orig_tp =
4441+ conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
4442+ invert_tuplepr(&orig_tp,
4443+ &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple);
4444+
4445+#if 0
4446+ {
4447+ unsigned int i;
4448+
4449+ DEBUGP("Hook %u (%s), ", hooknum,
4450+ HOOK2MANIP(hooknum)==IP_NAT_MANIP_SRC ? "SRC" : "DST");
4451+ DUMP_TUPLE(&orig_tp);
4452+ DEBUGP("Range %p: ", mr);
4453+ for (i = 0; i < mr->rangesize; i++) {
4454+ DEBUGP("%u:%s%s%s %u.%u.%u.%u - %u.%u.%u.%u %u - %u\n",
4455+ i,
4456+ (mr->range[i].flags & IP_NAT_RANGE_MAP_IPS)
4457+ ? " MAP_IPS" : "",
4458+ (mr->range[i].flags
4459+ & IP_NAT_RANGE_PROTO_SPECIFIED)
4460+ ? " PROTO_SPECIFIED" : "",
4461+ (mr->range[i].flags & IP_NAT_RANGE_FULL)
4462+ ? " FULL" : "",
4463+ NIPQUAD(mr->range[i].min_ip),
4464+ NIPQUAD(mr->range[i].max_ip),
4465+ mr->range[i].min.all,
4466+ mr->range[i].max.all);
4467+ }
4468+ }
4469+#endif
4470+
4471+ do {
4472+ if (!get_unique_tuple(&new_tuple, &orig_tp, mr, conntrack,
4473+ hooknum)) {
4474+ DEBUGP("ip_nat_setup_info: Can't get unique for %p.\n",
4475+ conntrack);
4476+ return NF_DROP;
4477+ }
4478+
4479+#if 0
4480+ DEBUGP("Hook %u (%s) %p\n", hooknum,
4481+ HOOK2MANIP(hooknum)==IP_NAT_MANIP_SRC ? "SRC" : "DST",
4482+ conntrack);
4483+ DEBUGP("Original: ");
4484+ DUMP_TUPLE(&orig_tp);
4485+ DEBUGP("New: ");
4486+ DUMP_TUPLE(&new_tuple);
4487+#endif
4488+
4489+ /* We now have two tuples (SRCIP/SRCPT/DSTIP/DSTPT):
4490+ the original (A/B/C/D') and the mangled one (E/F/G/H').
4491+
4492+ We're only allowed to work with the SRC per-proto
4493+ part, so we create inverses of both to start, then
4494+ derive the other fields we need. */
4495+
4496+ /* Reply connection: simply invert the new tuple
4497+ (G/H/E/F') */
4498+ invert_tuplepr(&reply, &new_tuple);
4499+
4500+ /* Alter conntrack table so it recognizes replies.
4501+ If fail this race (reply tuple now used), repeat. */
4502+ } while (!ip_conntrack_alter_reply(conntrack, &reply));
4503+
4504+ /* FIXME: We can simply used existing conntrack reply tuple
4505+ here --RR */
4506+ /* Create inverse of original: C/D/A/B' */
4507+ invert_tuplepr(&inv_tuple, &orig_tp);
4508+
4509+ /* Has source changed?. */
4510+ if (!ip_ct_tuple_src_equal(&new_tuple, &orig_tp)) {
4511+ /* In this direction, a source manip. */
4512+ info->manips[info->num_manips++] =
4513+ ((struct ip_nat_info_manip)
4514+ { IP_CT_DIR_ORIGINAL, hooknum,
4515+ IP_NAT_MANIP_SRC, new_tuple.src });
4516+
4517+ IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS);
4518+
4519+ /* In the reverse direction, a destination manip. */
4520+ info->manips[info->num_manips++] =
4521+ ((struct ip_nat_info_manip)
4522+ { IP_CT_DIR_REPLY, opposite_hook[hooknum],
4523+ IP_NAT_MANIP_DST, orig_tp.src });
4524+ IP_NF_ASSERT(info->num_manips <= IP_NAT_MAX_MANIPS);
4525+ }
4526+
4527+ /* Has destination changed? */
4528+ if (!ip_ct_tuple_dst_equal(&new_tuple, &orig_tp)) {
4529+ /* In this direction, a destination manip */
4530+ info->manips[info->num_manips++] =
4531+ ((struct ip_nat_info_manip)
4532+ { IP_CT_DIR_ORIGINAL, hooknum,
4533+ IP_NAT_MANIP_DST, reply.src });
4534+
4535+ IP_NF_ASSERT(info->num_manips < IP_NAT_MAX_MANIPS);
4536+
4537+ /* In the reverse direction, a source manip. */
4538+ info->manips[info->num_manips++] =
4539+ ((struct ip_nat_info_manip)
4540+ { IP_CT_DIR_REPLY, opposite_hook[hooknum],
4541+ IP_NAT_MANIP_SRC, inv_tuple.src });
4542+ IP_NF_ASSERT(info->num_manips <= IP_NAT_MAX_MANIPS);
4543+ }
4544+
4545+ /* If there's a helper, assign it; based on new tuple. */
4546+ if (!conntrack->master)
4547+ info->helper = LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *,
4548+ &reply);
4549+
4550+ /* It's done. */
4551+ info->initialized |= (1 << HOOK2MANIP(hooknum));
4552+
4553+ if (in_hashes) {
4554+ IP_NF_ASSERT(info->bysource.conntrack);
4555+ replace_in_hashes(conntrack, info);
4556+ } else {
4557+ place_in_hashes(conntrack, info);
4558+ }
4559+
4560+ return NF_ACCEPT;
4561+}
4562+
4563+void replace_in_hashes(struct ip_conntrack *conntrack,
4564+ struct ip_nat_info *info)
4565+{
4566+ /* Source has changed, so replace in hashes. */
4567+ unsigned int srchash
4568+ = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
4569+ .tuple.src,
4570+ conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
4571+ .tuple.dst.protonum);
4572+ /* We place packet as seen OUTGOUNG in byips_proto hash
4573+ (ie. reverse dst and src of reply packet. */
4574+ unsigned int ipsprotohash
4575+ = hash_by_ipsproto(conntrack->tuplehash[IP_CT_DIR_REPLY]
4576+ .tuple.dst.ip,
4577+ conntrack->tuplehash[IP_CT_DIR_REPLY]
4578+ .tuple.src.ip,
4579+ conntrack->tuplehash[IP_CT_DIR_REPLY]
4580+ .tuple.dst.protonum);
4581+
4582+ IP_NF_ASSERT(info->bysource.conntrack == conntrack);
4583+ MUST_BE_WRITE_LOCKED(&ip_nat_lock);
4584+
4585+ list_del(&info->bysource.list);
4586+ list_del(&info->byipsproto.list);
4587+
4588+ list_prepend(&bysource[srchash], &info->bysource);
4589+ list_prepend(&byipsproto[ipsprotohash], &info->byipsproto);
4590+}
4591+
4592+void place_in_hashes(struct ip_conntrack *conntrack,
4593+ struct ip_nat_info *info)
4594+{
4595+ unsigned int srchash
4596+ = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
4597+ .tuple.src,
4598+ conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
4599+ .tuple.dst.protonum);
4600+ /* We place packet as seen OUTGOUNG in byips_proto hash
4601+ (ie. reverse dst and src of reply packet. */
4602+ unsigned int ipsprotohash
4603+ = hash_by_ipsproto(conntrack->tuplehash[IP_CT_DIR_REPLY]
4604+ .tuple.dst.ip,
4605+ conntrack->tuplehash[IP_CT_DIR_REPLY]
4606+ .tuple.src.ip,
4607+ conntrack->tuplehash[IP_CT_DIR_REPLY]
4608+ .tuple.dst.protonum);
4609+
4610+ IP_NF_ASSERT(!info->bysource.conntrack);
4611+
4612+ MUST_BE_WRITE_LOCKED(&ip_nat_lock);
4613+ info->byipsproto.conntrack = conntrack;
4614+ info->bysource.conntrack = conntrack;
4615+
4616+ list_prepend(&bysource[srchash], &info->bysource);
4617+ list_prepend(&byipsproto[ipsprotohash], &info->byipsproto);
4618+}
4619+
4620+/* Returns true if succeeded. */
4621+static int
4622+manip_pkt(u_int16_t proto,
4623+ struct sk_buff **pskb,
4624+ unsigned int iphdroff,
4625+ const struct ip_conntrack_manip *manip,
4626+ enum ip_nat_manip_type maniptype)
4627+{
4628+ struct iphdr *iph;
4629+
4630+ (*pskb)->nfcache |= NFC_ALTERED;
4631+ if (!skb_ip_make_writable(pskb, iphdroff+sizeof(iph)))
4632+ return 0;
4633+
4634+ iph = (void *)(*pskb)->data + iphdroff;
4635+
4636+ /* Manipulate protcol part. */
4637+ if (!find_nat_proto(proto)->manip_pkt(pskb,
4638+ iphdroff + iph->ihl*4,
4639+ manip, maniptype))
4640+ return 0;
4641+
4642+ iph = (void *)(*pskb)->data + iphdroff;
4643+
4644+ if (maniptype == IP_NAT_MANIP_SRC) {
4645+ iph->check = ip_nat_cheat_check(~iph->saddr, manip->ip,
4646+ iph->check);
4647+ iph->saddr = manip->ip;
4648+ } else {
4649+ iph->check = ip_nat_cheat_check(~iph->daddr, manip->ip,
4650+ iph->check);
4651+ iph->daddr = manip->ip;
4652+ }
4653+ return 1;
4654+}
4655+
4656+static inline int exp_for_packet(struct ip_conntrack_expect *exp,
4657+ struct sk_buff *skb)
4658+{
4659+ struct ip_conntrack_protocol *proto;
4660+ int ret = 1;
4661+
4662+ MUST_BE_READ_LOCKED(&ip_conntrack_lock);
4663+ proto = __ip_ct_find_proto(skb->nh.iph->protocol);
4664+ if (proto->exp_matches_pkt)
4665+ ret = proto->exp_matches_pkt(exp, skb);
4666+
4667+ return ret;
4668+}
4669+
4670+/* Do packet manipulations according to binding. */
4671+unsigned int
4672+do_bindings(struct ip_conntrack *ct,
4673+ enum ip_conntrack_info ctinfo,
4674+ struct ip_nat_info *info,
4675+ unsigned int hooknum,
4676+ struct sk_buff **pskb)
4677+{
4678+ unsigned int i;
4679+ struct ip_nat_helper *helper;
4680+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
4681+ int proto = (*pskb)->nh.iph->protocol;
4682+
4683+ /* Need nat lock to protect against modification, but neither
4684+ conntrack (referenced) and helper (deleted with
4685+ synchronize_bh()) can vanish. */
4686+ READ_LOCK(&ip_nat_lock);
4687+ for (i = 0; i < info->num_manips; i++) {
4688+ if (info->manips[i].direction == dir
4689+ && info->manips[i].hooknum == hooknum) {
4690+ DEBUGP("Mangling %p: %s to %u.%u.%u.%u %u\n",
4691+ *pskb,
4692+ info->manips[i].maniptype == IP_NAT_MANIP_SRC
4693+ ? "SRC" : "DST",
4694+ NIPQUAD(info->manips[i].manip.ip),
4695+ htons(info->manips[i].manip.u.all));
4696+ if (!manip_pkt(proto, pskb, 0,
4697+ &info->manips[i].manip,
4698+ info->manips[i].maniptype)) {
4699+ READ_UNLOCK(&ip_nat_lock);
4700+ return NF_DROP;
4701+ }
4702+ }
4703+ }
4704+ helper = info->helper;
4705+ READ_UNLOCK(&ip_nat_lock);
4706+
4707+ if (helper) {
4708+ struct ip_conntrack_expect *exp = NULL;
4709+ struct list_head *cur_item;
4710+ int ret = NF_ACCEPT;
4711+ int helper_called = 0;
4712+
4713+ DEBUGP("do_bindings: helper existing for (%p)\n", ct);
4714+
4715+ /* Always defragged for helpers */
4716+ IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
4717+ & htons(IP_MF|IP_OFFSET)));
4718+
4719+ /* Have to grab read lock before sibling_list traversal */
4720+ READ_LOCK(&ip_conntrack_lock);
4721+ list_for_each(cur_item, &ct->sibling_list) {
4722+ exp = list_entry(cur_item, struct ip_conntrack_expect,
4723+ expected_list);
4724+
4725+ /* if this expectation is already established, skip */
4726+ if (exp->sibling)
4727+ continue;
4728+
4729+ if (exp_for_packet(exp, *pskb)) {
4730+ /* FIXME: May be true multiple times in the
4731+ * case of UDP!! */
4732+ DEBUGP("calling nat helper (exp=%p) for packet\n", exp);
4733+ ret = helper->help(ct, exp, info, ctinfo,
4734+ hooknum, pskb);
4735+ if (ret != NF_ACCEPT) {
4736+ READ_UNLOCK(&ip_conntrack_lock);
4737+ return ret;
4738+ }
4739+ helper_called = 1;
4740+ }
4741+ }
4742+ /* Helper might want to manip the packet even when there is no
4743+ * matching expectation for this packet */
4744+ if (!helper_called && helper->flags & IP_NAT_HELPER_F_ALWAYS) {
4745+ DEBUGP("calling nat helper for packet without expectation\n");
4746+ ret = helper->help(ct, NULL, info, ctinfo,
4747+ hooknum, pskb);
4748+ if (ret != NF_ACCEPT) {
4749+ READ_UNLOCK(&ip_conntrack_lock);
4750+ return ret;
4751+ }
4752+ }
4753+ READ_UNLOCK(&ip_conntrack_lock);
4754+
4755+ /* Adjust sequence number only once per packet
4756+ * (helper is called at all hooks) */
4757+ if (proto == IPPROTO_TCP
4758+ && (hooknum == NF_IP_POST_ROUTING
4759+ || hooknum == NF_IP_LOCAL_IN)) {
4760+ DEBUGP("ip_nat_core: adjusting sequence number\n");
4761+ /* future: put this in a l4-proto specific function,
4762+ * and call this function here. */
4763+ if (!ip_nat_seq_adjust(pskb, ct, ctinfo))
4764+ ret = NF_DROP;
4765+ }
4766+
4767+ return ret;
4768+
4769+ } else
4770+ return NF_ACCEPT;
4771+
4772+ /* not reached */
4773+}
4774+
4775+int
4776+icmp_reply_translation(struct sk_buff **pskb,
4777+ struct ip_conntrack *conntrack,
4778+ unsigned int hooknum,
4779+ int dir)
4780+{
4781+ struct {
4782+ struct icmphdr icmp;
4783+ struct iphdr ip;
4784+ } *inside;
4785+ unsigned int i;
4786+ struct ip_nat_info *info = &conntrack->nat.info;
4787+ int hdrlen;
4788+
4789+ if (!skb_ip_make_writable(pskb,(*pskb)->nh.iph->ihl*4+sizeof(*inside)))
4790+ return 0;
4791+ inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
4792+
4793+ /* We're actually going to mangle it beyond trivial checksum
4794+ adjustment, so make sure the current checksum is correct. */
4795+ if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) {
4796+ hdrlen = (*pskb)->nh.iph->ihl * 4;
4797+ if ((u16)csum_fold(skb_checksum(*pskb, hdrlen,
4798+ (*pskb)->len - hdrlen, 0)))
4799+ return 0;
4800+ }
4801+
4802+ /* Must be RELATED */
4803+ IP_NF_ASSERT((*pskb)->nfct
4804+ - (struct ip_conntrack *)(*pskb)->nfct->master
4805+ == IP_CT_RELATED
4806+ || (*pskb)->nfct
4807+ - (struct ip_conntrack *)(*pskb)->nfct->master
4808+ == IP_CT_RELATED+IP_CT_IS_REPLY);
4809+
4810+ /* Redirects on non-null nats must be dropped, else they'll
4811+ start talking to each other without our translation, and be
4812+ confused... --RR */
4813+ if (inside->icmp.type == ICMP_REDIRECT) {
4814+ /* Don't care about races here. */
4815+ if (info->initialized
4816+ != ((1 << IP_NAT_MANIP_SRC) | (1 << IP_NAT_MANIP_DST))
4817+ || info->num_manips != 0)
4818+ return 0;
4819+ }
4820+
4821+ DEBUGP("icmp_reply_translation: translating error %p hook %u dir %s\n",
4822+ *pskb, hooknum, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
4823+ /* Note: May not be from a NAT'd host, but probably safest to
4824+ do translation always as if it came from the host itself
4825+ (even though a "host unreachable" coming from the host
4826+ itself is a bit weird).
4827+
4828+ More explanation: some people use NAT for anonymizing.
4829+ Also, CERT recommends dropping all packets from private IP
4830+ addresses (although ICMP errors from internal links with
4831+ such addresses are not too uncommon, as Alan Cox points
4832+ out) */
4833+
4834+ READ_LOCK(&ip_nat_lock);
4835+ for (i = 0; i < info->num_manips; i++) {
4836+ DEBUGP("icmp_reply: manip %u dir %s hook %u\n",
4837+ i, info->manips[i].direction == IP_CT_DIR_ORIGINAL ?
4838+ "ORIG" : "REPLY", info->manips[i].hooknum);
4839+
4840+ if (info->manips[i].direction != dir)
4841+ continue;
4842+
4843+ /* Mapping the inner packet is just like a normal
4844+ packet, except it was never src/dst reversed, so
4845+ where we would normally apply a dst manip, we apply
4846+ a src, and vice versa. */
4847+ if (info->manips[i].hooknum == hooknum) {
4848+ DEBUGP("icmp_reply: inner %s -> %u.%u.%u.%u %u\n",
4849+ info->manips[i].maniptype == IP_NAT_MANIP_SRC
4850+ ? "DST" : "SRC",
4851+ NIPQUAD(info->manips[i].manip.ip),
4852+ ntohs(info->manips[i].manip.u.udp.port));
4853+ if (!manip_pkt(inside->ip.protocol, pskb,
4854+ (*pskb)->nh.iph->ihl*4
4855+ + sizeof(inside->icmp),
4856+ &info->manips[i].manip,
4857+ !info->manips[i].maniptype))
4858+ goto unlock_fail;
4859+
4860+ /* Outer packet needs to have IP header NATed like
4861+ it's a reply. */
4862+
4863+ /* Use mapping to map outer packet: 0 give no
4864+ per-proto mapping */
4865+ DEBUGP("icmp_reply: outer %s -> %u.%u.%u.%u\n",
4866+ info->manips[i].maniptype == IP_NAT_MANIP_SRC
4867+ ? "SRC" : "DST",
4868+ NIPQUAD(info->manips[i].manip.ip));
4869+ if (!manip_pkt(0, pskb, 0,
4870+ &info->manips[i].manip,
4871+ info->manips[i].maniptype))
4872+ goto unlock_fail;
4873+ }
4874+ }
4875+ READ_UNLOCK(&ip_nat_lock);
4876+
4877+ hdrlen = (*pskb)->nh.iph->ihl * 4;
4878+
4879+ inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
4880+
4881+ inside->icmp.checksum = 0;
4882+ inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
4883+ (*pskb)->len - hdrlen,
4884+ 0));
4885+ return 1;
4886+
4887+ unlock_fail:
4888+ READ_UNLOCK(&ip_nat_lock);
4889+ return 0;
4890+}
4891+
4892+int __init ip_nat_init(void)
4893+{
4894+ size_t i;
4895+
4896+ /* Leave them the same for the moment. */
4897+ ip_nat_htable_size = ip_conntrack_htable_size;
4898+
4899+ /* One vmalloc for both hash tables */
4900+ bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size*2);
4901+ if (!bysource) {
4902+ return -ENOMEM;
4903+ }
4904+ byipsproto = bysource + ip_nat_htable_size;
4905+
4906+ /* Sew in builtin protocols. */
4907+ WRITE_LOCK(&ip_nat_lock);
4908+ list_append(&protos, &ip_nat_protocol_tcp);
4909+ list_append(&protos, &ip_nat_protocol_udp);
4910+ list_append(&protos, &ip_nat_protocol_icmp);
4911+ WRITE_UNLOCK(&ip_nat_lock);
4912+
4913+ for (i = 0; i < ip_nat_htable_size; i++) {
4914+ INIT_LIST_HEAD(&bysource[i]);
4915+ INIT_LIST_HEAD(&byipsproto[i]);
4916+ }
4917+
4918+ /* FIXME: Man, this is a hack. <SIGH> */
4919+ IP_NF_ASSERT(ip_conntrack_destroyed == NULL);
4920+ ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
4921+
4922+ return 0;
4923+}
4924+
4925+/* Clear NAT section of all conntracks, in case we're loaded again. */
4926+static int clean_nat(const struct ip_conntrack *i, void *data)
4927+{
4928+ memset((void *)&i->nat, 0, sizeof(i->nat));
4929+ return 0;
4930+}
4931+
4932+/* Not __exit: called from ip_nat_standalone.c:init_or_cleanup() --RR */
4933+void ip_nat_cleanup(void)
4934+{
4935+ ip_ct_selective_cleanup(&clean_nat, NULL);
4936+ ip_conntrack_destroyed = NULL;
4937+ vfree(bysource);
4938+}
4939diff -Nur linux-2.6.4-rc2.org/net/ipv4/netfilter/ipt_IPV4OPTSSTRIP.c linux-2.6.4-rc2/net/ipv4/netfilter/ipt_IPV4OPTSSTRIP.c
4940--- linux-2.6.4-rc2.org/net/ipv4/netfilter/ipt_IPV4OPTSSTRIP.c 1970-01-01 00:00:00.000000000 +0000
4941+++ linux-2.6.4-rc2/net/ipv4/netfilter/ipt_IPV4OPTSSTRIP.c 2004-03-08 08:48:40.000000000 +0000
4942@@ -0,0 +1,89 @@
4943+/**
4944+ * Strip all IP options in the IP packet header.
4945+ *
4946+ * (C) 2001 by Fabrice MARIE <fabrice@netfilter.org>
4947+ * This software is distributed under GNU GPL v2, 1991
4948+ */
4949+
4950+#include <linux/module.h>
4951+#include <linux/skbuff.h>
4952+#include <linux/ip.h>
4953+#include <net/checksum.h>
4954+
4955+#include <linux/netfilter_ipv4/ip_tables.h>
4956+
4957+MODULE_AUTHOR("Fabrice MARIE <fabrice@netfilter.org>");
4958+MODULE_DESCRIPTION("Strip all options in IPv4 packets");
4959+MODULE_LICENSE("GPL");
4960+
4961+static unsigned int
4962+target(struct sk_buff **pskb,
4963+ const struct net_device *in,
4964+ const struct net_device *out,
4965+ unsigned int hooknum,
4966+ const void *targinfo,
4967+ void *userinfo)
4968+{
4969+ struct iphdr *iph;
4970+ struct sk_buff *skb;
4971+ struct ip_options *opt;
4972+ unsigned char *optiph;
4973+ int l;
4974+
4975+ if (!skb_ip_make_writable(pskb, (*pskb)->len))
4976+ return NF_DROP;
4977+
4978+ skb = (*pskb);
4979+ iph = (*pskb)->nh.iph;
4980+ optiph = skb->nh.raw;
4981+ l = ((struct ip_options *)(&(IPCB(skb)->opt)))->optlen;
4982+
4983+ /* if no options in packet then nothing to clear. */
4984+ if (iph->ihl * 4 == sizeof(struct iphdr))
4985+ return IPT_CONTINUE;
4986+
4987+ /* else clear all options */
4988+ memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
4989+ memset(optiph+sizeof(struct iphdr), IPOPT_NOOP, l);
4990+ opt = &(IPCB(skb)->opt);
4991+ opt->is_data = 0;
4992+ opt->optlen = l;
4993+
4994+ skb->nfcache |= NFC_ALTERED;
4995+
4996+ return IPT_CONTINUE;
4997+}
4998+
4999+static int
5000+checkentry(const char *tablename,
5001+ const struct ipt_entry *e,
5002+ void *targinfo,
5003+ unsigned int targinfosize,
5004+ unsigned int hook_mask)
5005+{
5006+ if (strcmp(tablename, "mangle")) {
5007+ printk(KERN_WARNING "IPV4OPTSSTRIP: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
5008+ return 0;
5009+ }
5010+ /* nothing else to check because no parameters */
5011+ return 1;
5012+}
5013+
5014+static struct ipt_target ipt_ipv4optsstrip_reg = {
5015+ .name = "IPV4OPTSSTRIP",
5016+ .target = target,
5017+ .checkentry = checkentry,
5018+ .me = THIS_MODULE };
5019+
5020+static int __init init(void)
5021+{
5022+ return ipt_register_target(&ipt_ipv4optsstrip_reg);
5023+}
5024+
5025+static void __exit fini(void)
5026+{
5027+ ipt_unregister_target(&ipt_ipv4optsstrip_reg);
5028+}
5029+
5030+module_init(init);
5031+module_exit(fini);
5032diff -Nur linux-2.6.4-rc2.org/net/ipv4/netfilter/ipt_LOG.c linux-2.6.4-rc2/net/ipv4/netfilter/ipt_LOG.c
5033--- linux-2.6.4-rc2.org/net/ipv4/netfilter/ipt_LOG.c 2004-03-04 06:17:03.000000000 +0000
5034+++ linux-2.6.4-rc2/net/ipv4/netfilter/ipt_LOG.c 2004-03-08 08:48:37.000000000 +0000
5035@@ -19,6 +19,7 @@
5036 #include <net/tcp.h>
5037 #include <net/route.h>
5038
5039+#include <linux/netfilter.h>
5040 #include <linux/netfilter_ipv4/ip_tables.h>
5041 #include <linux/netfilter_ipv4/ipt_LOG.h>
5042
5043@@ -26,6 +27,10 @@
5044 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
5045 MODULE_DESCRIPTION("iptables syslog logging module");
5046
5047+static unsigned int nflog = 1;
5048+MODULE_PARM(nflog, "i");
5049+MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
5050+
5051 #if 0
5052 #define DEBUGP printk
5053 #else
5054@@ -324,28 +329,25 @@
5055 /* maxlen = 230+ 91 + 230 + 252 = 803 */
5056 }
5057
5058-static unsigned int
5059-ipt_log_target(struct sk_buff **pskb,
5060+static void
5061+ipt_log_packet(unsigned int hooknum,
5062+ const struct sk_buff *skb,
5063 const struct net_device *in,
5064 const struct net_device *out,
5065- unsigned int hooknum,
5066- const void *targinfo,
5067- void *userinfo)
5068+ const struct ipt_log_info *loginfo,
5069+ const char *level_string,
5070+ const char *prefix)
5071 {
5072- const struct ipt_log_info *loginfo = targinfo;
5073- char level_string[4] = "< >";
5074-
5075- level_string[1] = '0' + (loginfo->level % 8);
5076 spin_lock_bh(&log_lock);
5077 printk(level_string);
5078 printk("%sIN=%s OUT=%s ",
5079- loginfo->prefix,
5080+ prefix == NULL ? loginfo->prefix : prefix,
5081 in ? in->name : "",
5082 out ? out->name : "");
5083 #ifdef CONFIG_BRIDGE_NETFILTER
5084- if ((*pskb)->nf_bridge) {
5085- struct net_device *physindev = (*pskb)->nf_bridge->physindev;
5086- struct net_device *physoutdev = (*pskb)->nf_bridge->physoutdev;
5087+ if (skb->nf_bridge) {
5088+ struct net_device *physindev = skb->nf_bridge->physindev;
5089+ struct net_device *physoutdev = skb->nf_bridge->physoutdev;
5090
5091 if (physindev && in != physindev)
5092 printk("PHYSIN=%s ", physindev->name);
5093@@ -357,25 +359,56 @@
5094 if (in && !out) {
5095 /* MAC logging for input chain only. */
5096 printk("MAC=");
5097- if ((*pskb)->dev && (*pskb)->dev->hard_header_len
5098- && (*pskb)->mac.raw != (void*)(*pskb)->nh.iph) {
5099+ if (skb->dev && skb->dev->hard_header_len
5100+ && skb->mac.raw != (void*)skb->nh.iph) {
5101 int i;
5102- unsigned char *p = (*pskb)->mac.raw;
5103- for (i = 0; i < (*pskb)->dev->hard_header_len; i++,p++)
5104+ unsigned char *p = skb->mac.raw;
5105+ for (i = 0; i < skb->dev->hard_header_len; i++,p++)
5106 printk("%02x%c", *p,
5107- i==(*pskb)->dev->hard_header_len - 1
5108+ i==skb->dev->hard_header_len - 1
5109 ? ' ':':');
5110 } else
5111 printk(" ");
5112 }
5113
5114- dump_packet(loginfo, *pskb, 0);
5115+ dump_packet(loginfo, skb, 0);
5116 printk("\n");
5117 spin_unlock_bh(&log_lock);
5118+}
5119+
5120+static unsigned int
5121+ipt_log_target(struct sk_buff **pskb,
5122+ const struct net_device *in,
5123+ const struct net_device *out,
5124+ unsigned int hooknum,
5125+ const void *targinfo,
5126+ void *userinfo)
5127+{
5128+ const struct ipt_log_info *loginfo = targinfo;
5129+ char level_string[4] = "< >";
5130+
5131+ level_string[1] = '0' + (loginfo->level % 8);
5132+ ipt_log_packet(hooknum, *pskb, in, out, loginfo, level_string, NULL);
5133
5134 return IPT_CONTINUE;
5135 }
5136
5137+static void
5138+ipt_logfn(unsigned int hooknum,
5139+ const struct sk_buff *skb,
5140+ const struct net_device *in,
5141+ const struct net_device *out,
5142+ const char *prefix)
5143+{
5144+ struct ipt_log_info loginfo = {
5145+ .level = 0,
5146+ .logflags = IPT_LOG_MASK,
5147+ .prefix = ""
5148+ };
5149+
5150+ ipt_log_packet(hooknum, skb, in, out, &loginfo, KERN_WARNING, prefix);
5151+}
5152+
5153 static int ipt_log_checkentry(const char *tablename,
5154 const struct ipt_entry *e,
5155 void *targinfo,
5156@@ -413,11 +446,18 @@
5157
5158 static int __init init(void)
5159 {
5160- return ipt_register_target(&ipt_log_reg);
5161+ if (ipt_register_target(&ipt_log_reg))
5162+ return -EINVAL;
5163+ if (nflog)
5164+ nf_log_register(PF_INET, &ipt_logfn);
5165+
5166+ return 0;
5167 }
5168
5169 static void __exit fini(void)
5170 {
5171+ if (nflog)
5172+ nf_log_unregister(PF_INET, &ipt_logfn);
5173 ipt_unregister_target(&ipt_log_reg);
5174 }
5175
5176diff -Nur linux-2.6.4-rc2.org/net/ipv4/netfilter/ipt_NOTRACK.c linux-2.6.4-rc2/net/ipv4/netfilter/ipt_NOTRACK.c
5177--- linux-2.6.4-rc2.org/net/ipv4/netfilter/ipt_NOTRACK.c 1970-01-01 00:00:00.000000000 +0000
5178+++ linux-2.6.4-rc2/net/ipv4/netfilter/ipt_NOTRACK.c 2004-03-08 08:48:52.000000000 +0000
5179@@ -0,0 +1,75 @@
5180+/* This is a module which is used for setting up fake conntracks
5181+ * on packets so that they are not seen by the conntrack/NAT code.
5182+ */
5183+#include <linux/module.h>
5184+#include <linux/skbuff.h>
5185+
5186+#include <linux/netfilter_ipv4/ip_tables.h>
5187+#include <linux/netfilter_ipv4/ip_conntrack.h>
5188+
5189+static unsigned int
5190+target(struct sk_buff **pskb,
5191+ const struct net_device *in,
5192+ const struct net_device *out,
5193+ unsigned int hooknum,
5194+ const void *targinfo,
5195+ void *userinfo)
5196+{
5197+ /* Previously seen (loopback)? Ignore. */
5198+ if ((*pskb)->nfct != NULL)
5199+ return IPT_CONTINUE;
5200+
5201+ /* Attach fake conntrack entry.
5202+ If there is a real ct entry correspondig to this packet,
5203+ it'll hang aroun till timing out. We don't deal with it
5204+ for performance reasons. JK */
5205+ (*pskb)->nfct = &ip_conntrack_untracked.infos[IP_CT_NEW];
5206+ nf_conntrack_get((*pskb)->nfct);
5207+
5208+ return IPT_CONTINUE;
5209+}
5210+
5211+static int
5212+checkentry(const char *tablename,
5213+ const struct ipt_entry *e,
5214+ void *targinfo,
5215+ unsigned int targinfosize,
5216+ unsigned int hook_mask)
5217+{
5218+ if (targinfosize != 0) {
5219+ printk(KERN_WARNING "NOTRACK: targinfosize %u != 0\n",
5220+ targinfosize);
5221+ return 0;
5222+ }
5223+
5224+ if (strcmp(tablename, "raw") != 0) {
5225+ printk(KERN_WARNING "NOTRACK: can only be called from \"raw\" table, not \"%s\"\n", tablename);
5226+ return 0;
5227+ }
5228+
5229+ return 1;
5230+}
5231+
5232+static struct ipt_target ipt_notrack_reg = {
5233+ .name = "NOTRACK",
5234+ .target = target,
5235+ .checkentry = checkentry,
5236+ .me = THIS_MODULE
5237+};
5238+
5239+static int __init init(void)
5240+{
5241+ if (ipt_register_target(&ipt_notrack_reg))
5242+ return -EINVAL;
5243+
5244+ return 0;
5245+}
5246+
5247+static void __exit fini(void)
5248+{
5249+ ipt_unregister_target(&ipt_notrack_reg);
5250+}
5251+
5252+module_init(init);
5253+module_exit(fini);
5254+MODULE_LICENSE("GPL");
5255diff -Nur linux-2.6.4-rc2.org/net/ipv4/netfilter/ipt_TTL.c linux-2.6.4-rc2/net/ipv4/netfilter/ipt_TTL.c
5256--- linux-2.6.4-rc2.org/net/ipv4/netfilter/ipt_TTL.c 1970-01-01 00:00:00.000000000 +0000
5257+++ linux-2.6.4-rc2/net/ipv4/netfilter/ipt_TTL.c 2004-03-08 08:48:44.000000000 +0000
5258@@ -0,0 +1,120 @@
5259+/* TTL modification target for IP tables
5260+ * (C) 2000 by Harald Welte <laforge@gnumonks.org>
5261+ *
5262+ * Version: $Revision$
5263+ *
5264+ * This software is distributed under the terms of GNU GPL
5265+ */
5266+
5267+#include <linux/module.h>
5268+#include <linux/skbuff.h>
5269+#include <linux/ip.h>
5270+#include <net/checksum.h>
5271+
5272+#include <linux/netfilter_ipv4/ip_tables.h>
5273+#include <linux/netfilter_ipv4/ipt_TTL.h>
5274+
5275+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
5276+MODULE_DESCRIPTION("IP tables TTL modification module");
5277+MODULE_LICENSE("GPL");
5278+
5279+static unsigned int
5280+ipt_ttl_target(struct sk_buff **pskb, const struct net_device *in,
5281+ const struct net_device *out, unsigned int hooknum,
5282+ const void *targinfo, void *userinfo)
5283+{
5284+ struct iphdr *iph;
5285+ const struct ipt_TTL_info *info = targinfo;
5286+ u_int16_t diffs[2];
5287+ int new_ttl;
5288+
5289+ if (!skb_ip_make_writable(pskb, (*pskb)->len))
5290+ return NF_DROP;
5291+
5292+ iph = (*pskb)->nh.iph;
5293+
5294+ switch (info->mode) {
5295+ case IPT_TTL_SET:
5296+ new_ttl = info->ttl;
5297+ break;
5298+ case IPT_TTL_INC:
5299+ new_ttl = iph->ttl + info->ttl;
5300+ if (new_ttl > 255)
5301+ new_ttl = 255;
5302+ break;
5303+ case IPT_TTL_DEC:
5304+ new_ttl = iph->ttl + info->ttl;
5305+ if (new_ttl < 0)
5306+ new_ttl = 0;
5307+ break;
5308+ default:
5309+ new_ttl = iph->ttl;
5310+ break;
5311+ }
5312+
5313+ if (new_ttl != iph->ttl) {
5314+ diffs[0] = htons(((unsigned)iph->ttl) << 8) ^ 0xFFFF;
5315+ iph->ttl = new_ttl;
5316+ diffs[1] = htons(((unsigned)iph->ttl) << 8);
5317+ iph->check = csum_fold(csum_partial((char *)diffs,
5318+ sizeof(diffs),
5319+ iph->check^0xFFFF));
5320+ (*pskb)->nfcache |= NFC_ALTERED;
5321+ }
5322+
5323+ return IPT_CONTINUE;
5324+}
5325+
5326+static int ipt_ttl_checkentry(const char *tablename,
5327+ const struct ipt_entry *e,
5328+ void *targinfo,
5329+ unsigned int targinfosize,
5330+ unsigned int hook_mask)
5331+{
5332+ struct ipt_TTL_info *info = targinfo;
5333+
5334+ if (targinfosize != IPT_ALIGN(sizeof(struct ipt_TTL_info))) {
5335+ printk(KERN_WARNING "TTL: targinfosize %u != %Zu\n",
5336+ targinfosize,
5337+ IPT_ALIGN(sizeof(struct ipt_TTL_info)));
5338+ return 0;
5339+ }
5340+
5341+ if (strcmp(tablename, "mangle")) {
5342+ printk(KERN_WARNING "TTL: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
5343+ return 0;
5344+ }
5345+
5346+ if (info->mode > IPT_TTL_MAXMODE) {
5347+ printk(KERN_WARNING "TTL: invalid or unknown Mode %u\n",
5348+ info->mode);
5349+ return 0;
5350+ }
5351+
5352+ if ((info->mode != IPT_TTL_SET) && (info->ttl == 0)) {
5353+ printk(KERN_WARNING "TTL: increment/decrement doesn't make sense with value 0\n");
5354+ return 0;
5355+ }
5356+
5357+ return 1;
5358+}
5359+
5360+static struct ipt_target ipt_TTL = {
5361+ .name = "TTL",
5362+ .target = ipt_ttl_target,
5363+ .checkentry = ipt_ttl_checkentry,
5364+ .me = THIS_MODULE
5365+};
5366+
5367+static int __init init(void)
5368+{
5369+ return ipt_register_target(&ipt_TTL);
5370+}
5371+
5372+static void __exit fini(void)
5373+{
5374+ ipt_unregister_target(&ipt_TTL);
5375+}
5376+
5377+module_init(init);
5378+module_exit(fini);
5379diff -Nur linux-2.6.4-rc2.org/net/ipv4/netfilter/ipt_ULOG.c linux-2.6.4-rc2/net/ipv4/netfilter/ipt_ULOG.c
5380--- linux-2.6.4-rc2.org/net/ipv4/netfilter/ipt_ULOG.c 2004-03-04 06:16:42.000000000 +0000
5381+++ linux-2.6.4-rc2/net/ipv4/netfilter/ipt_ULOG.c 2004-03-08 08:48:37.000000000 +0000
5382@@ -50,6 +50,7 @@
5383 #include <linux/netlink.h>
5384 #include <linux/netdevice.h>
5385 #include <linux/mm.h>
5386+#include <linux/netfilter.h>
5387 #include <linux/netfilter_ipv4/ip_tables.h>
5388 #include <linux/netfilter_ipv4/ipt_ULOG.h>
5389 #include <linux/netfilter_ipv4/lockhelp.h>
5390@@ -80,6 +81,10 @@
5391 MODULE_PARM(flushtimeout, "i");
5392 MODULE_PARM_DESC(flushtimeout, "buffer flush timeout");
5393
5394+static unsigned int nflog = 1;
5395+MODULE_PARM(nflog, "i");
5396+MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
5397+
5398 /* global data structures */
5399
5400 typedef struct {
5401@@ -157,17 +162,17 @@
5402 return skb;
5403 }
5404
5405-static unsigned int ipt_ulog_target(struct sk_buff **pskb,
5406- const struct net_device *in,
5407- const struct net_device *out,
5408- unsigned int hooknum,
5409- const void *targinfo, void *userinfo)
5410+static void ipt_ulog_packet(unsigned int hooknum,
5411+ const struct sk_buff *skb,
5412+ const struct net_device *in,
5413+ const struct net_device *out,
5414+ const struct ipt_ulog_info *loginfo,
5415+ const char *prefix)
5416 {
5417 ulog_buff_t *ub;
5418 ulog_packet_msg_t *pm;
5419 size_t size, copy_len;
5420 struct nlmsghdr *nlh;
5421- struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
5422
5423 /* ffs == find first bit set, necessary because userspace
5424 * is already shifting groupnumber, but we need unshifted.
5425@@ -176,8 +181,8 @@
5426
5427 /* calculate the size of the skb needed */
5428 if ((loginfo->copy_range == 0) ||
5429- (loginfo->copy_range > (*pskb)->len)) {
5430- copy_len = (*pskb)->len;
5431+ (loginfo->copy_range > skb->len)) {
5432+ copy_len = skb->len;
5433 } else {
5434 copy_len = loginfo->copy_range;
5435 }
5436@@ -214,19 +219,21 @@
5437
5438 /* copy hook, prefix, timestamp, payload, etc. */
5439 pm->data_len = copy_len;
5440- pm->timestamp_sec = (*pskb)->stamp.tv_sec;
5441- pm->timestamp_usec = (*pskb)->stamp.tv_usec;
5442- pm->mark = (*pskb)->nfmark;
5443+ pm->timestamp_sec = skb->stamp.tv_sec;
5444+ pm->timestamp_usec = skb->stamp.tv_usec;
5445+ pm->mark = skb->nfmark;
5446 pm->hook = hooknum;
5447- if (loginfo->prefix[0] != '\0')
5448+ if (prefix != NULL)
5449+ strncpy(pm->prefix, prefix, sizeof(pm->prefix));
5450+ else if (loginfo->prefix[0] != '\0')
5451 strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix));
5452 else
5453 *(pm->prefix) = '\0';
5454
5455 if (in && in->hard_header_len > 0
5456- && (*pskb)->mac.raw != (void *) (*pskb)->nh.iph
5457+ && skb->mac.raw != (void *) skb->nh.iph
5458 && in->hard_header_len <= ULOG_MAC_LEN) {
5459- memcpy(pm->mac, (*pskb)->mac.raw, in->hard_header_len);
5460+ memcpy(pm->mac, skb->mac.raw, in->hard_header_len);
5461 pm->mac_len = in->hard_header_len;
5462 } else
5463 pm->mac_len = 0;
5464@@ -241,8 +248,8 @@
5465 else
5466 pm->outdev_name[0] = '\0';
5467
5468- /* copy_len <= (*pskb)->len, so can't fail. */
5469- if (skb_copy_bits(*pskb, 0, pm->payload, copy_len) < 0)
5470+ /* copy_len <= skb->len, so can't fail. */
5471+ if (skb_copy_bits(skb, 0, pm->payload, copy_len) < 0)
5472 BUG();
5473
5474 /* check if we are building multi-part messages */
5475@@ -266,8 +273,7 @@
5476
5477 UNLOCK_BH(&ulog_lock);
5478
5479- return IPT_CONTINUE;
5480-
5481+ return;
5482
5483 nlmsg_failure:
5484 PRINTR("ipt_ULOG: error during NLMSG_PUT\n");
5485@@ -276,8 +282,35 @@
5486 PRINTR("ipt_ULOG: Error building netlink message\n");
5487
5488 UNLOCK_BH(&ulog_lock);
5489+}
5490+
5491+static unsigned int ipt_ulog_target(struct sk_buff **pskb,
5492+ const struct net_device *in,
5493+ const struct net_device *out,
5494+ unsigned int hooknum,
5495+ const void *targinfo, void *userinfo)
5496+{
5497+ struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
5498
5499- return IPT_CONTINUE;
5500+ ipt_ulog_packet(hooknum, *pskb, in, out, loginfo, NULL);
5501+
5502+ return IPT_CONTINUE;
5503+}
5504+
5505+static void ipt_logfn(unsigned int hooknum,
5506+ const struct sk_buff *skb,
5507+ const struct net_device *in,
5508+ const struct net_device *out,
5509+ const char *prefix)
5510+{
5511+ struct ipt_ulog_info loginfo = {
5512+ .nl_group = ULOG_DEFAULT_NLGROUP,
5513+ .copy_range = 0,
5514+ .qthreshold = ULOG_DEFAULT_QTHRESHOLD,
5515+ .prefix = ""
5516+ };
5517+
5518+ ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
5519 }
5520
5521 static int ipt_ulog_checkentry(const char *tablename,
5522@@ -341,7 +374,9 @@
5523 sock_release(nflognl->sk_socket);
5524 return -EINVAL;
5525 }
5526-
5527+ if (nflog)
5528+ nf_log_register(PF_INET, &ipt_logfn);
5529+
5530 return 0;
5531 }
5532
5533@@ -352,6 +387,8 @@
5534
5535 DEBUGP("ipt_ULOG: cleanup_module\n");
5536
5537+ if (nflog)
5538+ nf_log_unregister(PF_INET, &ipt_logfn);
5539 ipt_unregister_target(&ipt_ulog_reg);
5540 sock_release(nflognl->sk_socket);
5541
5542diff -Nur linux-2.6.4-rc2.org/net/ipv4/netfilter/ipt_connlimit.c linux-2.6.4-rc2/net/ipv4/netfilter/ipt_connlimit.c
5543--- linux-2.6.4-rc2.org/net/ipv4/netfilter/ipt_connlimit.c 1970-01-01 00:00:00.000000000 +0000
5544+++ linux-2.6.4-rc2/net/ipv4/netfilter/ipt_connlimit.c 2004-03-08 08:48:45.000000000 +0000
5545@@ -0,0 +1,230 @@
5546+/*
5547+ * netfilter module to limit the number of parallel tcp
5548+ * connections per IP address.
5549+ * (c) 2000 Gerd Knorr <kraxel@bytesex.org>
5550+ * Nov 2002: Martin Bene <martin.bene@icomedias.com>:
5551+ * only ignore TIME_WAIT or gone connections
5552+ *
5553+ * based on ...
5554+ *
5555+ * Kernel module to match connection tracking information.
5556+ * GPL (C) 1999 Rusty Russell (rusty@rustcorp.com.au).
5557+ */
5558+#include <linux/module.h>
5559+#include <linux/skbuff.h>
5560+#include <linux/list.h>
5561+#include <linux/netfilter_ipv4/ip_conntrack.h>
5562+#include <linux/netfilter_ipv4/ip_conntrack_core.h>
5563+#include <linux/netfilter_ipv4/ip_conntrack_tcp.h>
5564+#include <linux/netfilter_ipv4/ip_tables.h>
5565+#include <linux/netfilter_ipv4/ipt_connlimit.h>
5566+
5567+#define DEBUG 0
5568+
5569+MODULE_LICENSE("GPL");
5570+
5571+/* we'll save the tuples of all connections we care about */
5572+struct ipt_connlimit_conn
5573+{
5574+ struct list_head list;
5575+ struct ip_conntrack_tuple tuple;
5576+};
5577+
5578+struct ipt_connlimit_data {
5579+ spinlock_t lock;
5580+ struct list_head iphash[256];
5581+};
5582+
5583+static int ipt_iphash(u_int32_t addr)
5584+{
5585+ int hash;
5586+
5587+ hash = addr & 0xff;
5588+ hash ^= (addr >> 8) & 0xff;
5589+ hash ^= (addr >> 16) & 0xff;
5590+ hash ^= (addr >> 24) & 0xff;
5591+ return hash;
5592+}
5593+
5594+static int count_them(struct ipt_connlimit_data *data,
5595+ u_int32_t addr, u_int32_t mask,
5596+ struct ip_conntrack *ct)
5597+{
5598+#if DEBUG
5599+ const static char *tcp[] = { "none", "established", "syn_sent", "syn_recv",
5600+ "fin_wait", "time_wait", "close", "close_wait",
5601+ "last_ack", "listen" };
5602+#endif
5603+ int addit = 1, matches = 0;
5604+ struct ip_conntrack_tuple tuple;
5605+ struct ip_conntrack_tuple_hash *found;
5606+ struct ipt_connlimit_conn *conn;
5607+ struct list_head *hash,*lh;
5608+
5609+ spin_lock(&data->lock);
5610+ tuple = ct->tuplehash[0].tuple;
5611+ hash = &data->iphash[ipt_iphash(addr & mask)];
5612+
5613+ /* check the saved connections */
5614+ for (lh = hash->next; lh != hash; lh = lh->next) {
5615+ conn = list_entry(lh,struct ipt_connlimit_conn,list);
5616+ found = ip_conntrack_find_get(&conn->tuple,ct);
5617+ if (0 == memcmp(&conn->tuple,&tuple,sizeof(tuple)) &&
5618+ found != NULL &&
5619+ found->ctrack->proto.tcp.state != TCP_CONNTRACK_TIME_WAIT) {
5620+ /* Just to be sure we have it only once in the list.
5621+ We should'nt see tuples twice unless someone hooks this
5622+ into a table without "-p tcp --syn" */
5623+ addit = 0;
5624+ }
5625+#if DEBUG
5626+ printk("ipt_connlimit [%d]: src=%u.%u.%u.%u:%d dst=%u.%u.%u.%u:%d %s\n",
5627+ ipt_iphash(addr & mask),
5628+ NIPQUAD(conn->tuple.src.ip), ntohs(conn->tuple.src.u.tcp.port),
5629+ NIPQUAD(conn->tuple.dst.ip), ntohs(conn->tuple.dst.u.tcp.port),
5630+ (NULL != found) ? tcp[found->ctrack->proto.tcp.state] : "gone");
5631+#endif
5632+ if (NULL == found) {
5633+ /* this one is gone */
5634+ lh = lh->prev;
5635+ list_del(lh->next);
5636+ kfree(conn);
5637+ continue;
5638+ }
5639+ if (found->ctrack->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT) {
5640+ /* we don't care about connections which are
5641+ closed already -> ditch it */
5642+ lh = lh->prev;
5643+ list_del(lh->next);
5644+ kfree(conn);
5645+ nf_conntrack_put(&found->ctrack->infos[0]);
5646+ continue;
5647+ }
5648+ if ((addr & mask) == (conn->tuple.src.ip & mask)) {
5649+ /* same source IP address -> be counted! */
5650+ matches++;
5651+ }
5652+ nf_conntrack_put(&found->ctrack->infos[0]);
5653+ }
5654+ if (addit) {
5655+ /* save the new connection in our list */
5656+#if DEBUG
5657+ printk("ipt_connlimit [%d]: src=%u.%u.%u.%u:%d dst=%u.%u.%u.%u:%d new\n",
5658+ ipt_iphash(addr & mask),
5659+ NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
5660+ NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
5661+#endif
5662+ conn = kmalloc(sizeof(*conn),GFP_ATOMIC);
5663+ if (NULL == conn)
5664+ return -1;
5665+ memset(conn,0,sizeof(*conn));
5666+ INIT_LIST_HEAD(&conn->list);
5667+ conn->tuple = tuple;
5668+ list_add(&conn->list,hash);
5669+ matches++;
5670+ }
5671+ spin_unlock(&data->lock);
5672+ return matches;
5673+}
5674+
5675+static int
5676+match(const struct sk_buff *skb,
5677+ const struct net_device *in,
5678+ const struct net_device *out,
5679+ const void *matchinfo,
5680+ int offset,
5681+ int *hotdrop)
5682+{
5683+ const struct ipt_connlimit_info *info = matchinfo;
5684+ int connections, match;
5685+ struct ip_conntrack *ct;
5686+ enum ip_conntrack_info ctinfo;
5687+
5688+ ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
5689+ if (NULL == ct) {
5690+ printk("ipt_connlimit: Oops: invalid ct state ?\n");
5691+ *hotdrop = 1;
5692+ return 0;
5693+ }
5694+ connections = count_them(info->data,skb->nh.iph->saddr,info->mask,ct);
5695+ if (-1 == connections) {
5696+ printk("ipt_connlimit: Hmm, kmalloc failed :-(\n");
5697+ *hotdrop = 1; /* let's free some memory :-) */
5698+ return 0;
5699+ }
5700+ match = (info->inverse) ? (connections <= info->limit) : (connections > info->limit);
5701+#if DEBUG
5702+ printk("ipt_connlimit: src=%u.%u.%u.%u mask=%u.%u.%u.%u "
5703+ "connections=%d limit=%d match=%s\n",
5704+ NIPQUAD(skb->nh.iph->saddr), NIPQUAD(info->mask),
5705+ connections, info->limit, match ? "yes" : "no");
5706+#endif
5707+
5708+ return match;
5709+}
5710+
5711+static int check(const char *tablename,
5712+ const struct ipt_ip *ip,
5713+ void *matchinfo,
5714+ unsigned int matchsize,
5715+ unsigned int hook_mask)
5716+{
5717+ struct ipt_connlimit_info *info = matchinfo;
5718+ int i;
5719+
5720+ /* verify size */
5721+ if (matchsize != IPT_ALIGN(sizeof(struct ipt_connlimit_info)))
5722+ return 0;
5723+
5724+ /* refuse anything but tcp */
5725+ if (ip->proto != IPPROTO_TCP)
5726+ return 0;
5727+
5728+ /* init private data */
5729+ info->data = kmalloc(sizeof(struct ipt_connlimit_data),GFP_KERNEL);
5730+ spin_lock_init(&(info->data->lock));
5731+ for (i = 0; i < 256; i++)
5732+ INIT_LIST_HEAD(&(info->data->iphash[i]));
5733+
5734+ return 1;
5735+}
5736+
5737+static void destroy(void *matchinfo, unsigned int matchinfosize)
5738+{
5739+ struct ipt_connlimit_info *info = matchinfo;
5740+ struct ipt_connlimit_conn *conn;
5741+ struct list_head *hash;
5742+ int i;
5743+
5744+ /* cleanup */
5745+ for (i = 0; i < 256; i++) {
5746+ hash = &(info->data->iphash[i]);
5747+ while (hash != hash->next) {
5748+ conn = list_entry(hash->next,struct ipt_connlimit_conn,list);
5749+ list_del(hash->next);
5750+ kfree(conn);
5751+ }
5752+ }
5753+ kfree(info->data);
5754+}
5755+
5756+static struct ipt_match connlimit_match = {
5757+ .name = "connlimit",
5758+ .match = &match,
5759+ .checkentry = &check,
5760+ .destroy = &destroy,
5761+ .me = THIS_MODULE
5762+};
5763+
5764+static int __init init(void)
5765+{
5766+ return ipt_register_match(&connlimit_match);
5767+}
5768+
5769+static void __exit fini(void)
5770+{
5771+ ipt_unregister_match(&connlimit_match);
5772+}
5773+
5774+module_init(init);
5775+module_exit(fini);
5776diff -Nur linux-2.6.4-rc2.org/net/ipv4/netfilter/ipt_conntrack.c linux-2.6.4-rc2/net/ipv4/netfilter/ipt_conntrack.c
5777--- linux-2.6.4-rc2.org/net/ipv4/netfilter/ipt_conntrack.c 2004-03-04 06:17:04.000000000 +0000
5778+++ linux-2.6.4-rc2/net/ipv4/netfilter/ipt_conntrack.c 2004-03-08 08:48:52.000000000 +0000
5779@@ -35,11 +35,13 @@
5780
5781 #define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg))
5782
5783- if (ct)
5784- statebit = IPT_CONNTRACK_STATE_BIT(ctinfo);
5785- else
5786- statebit = IPT_CONNTRACK_STATE_INVALID;
5787-
5788+ if (skb->nfct == &ip_conntrack_untracked.infos[IP_CT_NEW])
5789+ statebit = IPT_CONNTRACK_STATE_UNTRACKED;
5790+ else if (ct)
5791+ statebit = IPT_CONNTRACK_STATE_BIT(ctinfo);
5792+ else
5793+ statebit = IPT_CONNTRACK_STATE_INVALID;
5794+
5795 if(sinfo->flags & IPT_CONNTRACK_STATE) {
5796 if (ct) {
5797 if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip !=
5798diff -Nur linux-2.6.4-rc2.org/net/ipv4/netfilter/ipt_conntrack.c.orig linux-2.6.4-rc2/net/ipv4/netfilter/ipt_conntrack.c.orig
5799--- linux-2.6.4-rc2.org/net/ipv4/netfilter/ipt_conntrack.c.orig 1970-01-01 00:00:00.000000000 +0000
5800+++ linux-2.6.4-rc2/net/ipv4/netfilter/ipt_conntrack.c.orig 2004-03-04 06:17:04.000000000 +0000
5801@@ -0,0 +1,134 @@
5802+/* Kernel module to match connection tracking information.
5803+ * Superset of Rusty's minimalistic state match.
5804+ *
5805+ * (C) 2001 Marc Boucher (marc@mbsi.ca).
5806+ *
5807+ * This program is free software; you can redistribute it and/or modify
5808+ * it under the terms of the GNU General Public License version 2 as
5809+ * published by the Free Software Foundation.
5810+ */
5811+
5812+#include <linux/module.h>
5813+#include <linux/skbuff.h>
5814+#include <linux/netfilter_ipv4/ip_conntrack.h>
5815+#include <linux/netfilter_ipv4/ip_tables.h>
5816+#include <linux/netfilter_ipv4/ipt_conntrack.h>
5817+
5818+MODULE_LICENSE("GPL");
5819+MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
5820+MODULE_DESCRIPTION("iptables connection tracking match module");
5821+
5822+static int
5823+match(const struct sk_buff *skb,
5824+ const struct net_device *in,
5825+ const struct net_device *out,
5826+ const void *matchinfo,
5827+ int offset,
5828+ int *hotdrop)
5829+{
5830+ const struct ipt_conntrack_info *sinfo = matchinfo;
5831+ struct ip_conntrack *ct;
5832+ enum ip_conntrack_info ctinfo;
5833+ unsigned int statebit;
5834+
5835+ ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo);
5836+
5837+#define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg))
5838+
5839+ if (ct)
5840+ statebit = IPT_CONNTRACK_STATE_BIT(ctinfo);
5841+ else
5842+ statebit = IPT_CONNTRACK_STATE_INVALID;
5843+
5844+ if(sinfo->flags & IPT_CONNTRACK_STATE) {
5845+ if (ct) {
5846+ if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip !=
5847+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip)
5848+ statebit |= IPT_CONNTRACK_STATE_SNAT;
5849+
5850+ if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip !=
5851+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip)
5852+ statebit |= IPT_CONNTRACK_STATE_DNAT;
5853+ }
5854+
5855+ if (FWINV((statebit & sinfo->statemask) == 0, IPT_CONNTRACK_STATE))
5856+ return 0;
5857+ }
5858+
5859+ if(sinfo->flags & IPT_CONNTRACK_PROTO) {
5860+ if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, IPT_CONNTRACK_PROTO))
5861+ return 0;
5862+ }
5863+
5864+ if(sinfo->flags & IPT_CONNTRACK_ORIGSRC) {
5865+ if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, IPT_CONNTRACK_ORIGSRC))
5866+ return 0;
5867+ }
5868+
5869+ if(sinfo->flags & IPT_CONNTRACK_ORIGDST) {
5870+ if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, IPT_CONNTRACK_ORIGDST))
5871+ return 0;
5872+ }
5873+
5874+ if(sinfo->flags & IPT_CONNTRACK_REPLSRC) {
5875+ if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, IPT_CONNTRACK_REPLSRC))
5876+ return 0;
5877+ }
5878+
5879+ if(sinfo->flags & IPT_CONNTRACK_REPLDST) {
5880+ if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, IPT_CONNTRACK_REPLDST))
5881+ return 0;
5882+ }
5883+
5884+ if(sinfo->flags & IPT_CONNTRACK_STATUS) {
5885+ if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, IPT_CONNTRACK_STATUS))
5886+ return 0;
5887+ }
5888+
5889+ if(sinfo->flags & IPT_CONNTRACK_EXPIRES) {
5890+ unsigned long expires;
5891+
5892+ if(!ct)
5893+ return 0;
5894+
5895+ expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0;
5896+
5897+ if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), IPT_CONNTRACK_EXPIRES))
5898+ return 0;
5899+ }
5900+
5901+ return 1;
5902+}
5903+
5904+static int check(const char *tablename,
5905+ const struct ipt_ip *ip,
5906+ void *matchinfo,
5907+ unsigned int matchsize,
5908+ unsigned int hook_mask)
5909+{
5910+ if (matchsize != IPT_ALIGN(sizeof(struct ipt_conntrack_info)))
5911+ return 0;
5912+
5913+ return 1;
5914+}
5915+
5916+static struct ipt_match conntrack_match = {
5917+ .name = "conntrack",
5918+ .match = &match,
5919+ .checkentry = &check,
5920+ .me = THIS_MODULE,
5921+};
5922+
5923+static int __init init(void)
5924+{
5925+ need_ip_conntrack();
5926+ return ipt_register_match(&conntrack_match);
5927+}
5928+
5929+static void __exit fini(void)
5930+{
5931+ ipt_unregister_match(&conntrack_match);
5932+}
5933+
5934+module_init(init);
5935+module_exit(fini);
5936diff -Nur linux-2.6.4-rc2.org/net/ipv4/netfilter/ipt_dstlimit.c linux-2.6.4-rc2/net/ipv4/netfilter/ipt_dstlimit.c
5937--- linux-2.6.4-rc2.org/net/ipv4/netfilter/ipt_dstlimit.c 1970-01-01 00:00:00.000000000 +0000
5938+++ linux-2.6.4-rc2/net/ipv4/netfilter/ipt_dstlimit.c 2004-03-08 08:48:46.000000000 +0000
5939@@ -0,0 +1,690 @@
5940+/* iptables match extension to limit the number of packets per second
5941+ * seperately for each destination.
5942+ *
5943+ * (C) 2003 by Harald Welte <laforge@netfilter.org>
5944+ *
5945+ * $Id$
5946+ *
5947+ * Development of this code was funded by Astaro AG, http://www.astaro.com/
5948+ *
5949+ * based on ipt_limit.c by:
5950