]>
Commit | Line | Data |
---|---|---|
7f651772 | 1 | diff --unified --recursive --new-file linux-2.6.21.4/include/linux/ring.h linux-2.6.21.4-1-686-smp-ring3/include/linux/ring.h |
2 | --- linux-2.6.21.4/include/linux/ring.h 1970-01-01 00:00:00.000000000 +0000 | |
3 | +++ linux-2.6.21.4-1-686-smp-ring3/include/linux/ring.h 2007-06-10 16:43:04.346421348 +0000 | |
4 | @@ -0,0 +1,240 @@ | |
5 | +/* | |
6 | + * Definitions for packet ring | |
7 | + * | |
8 | + * 2004-07 Luca Deri <deri@ntop.org> | |
9 | + */ | |
10 | +#ifndef __RING_H | |
11 | +#define __RING_H | |
12 | + | |
13 | +#define INCLUDE_MAC_INFO | |
14 | + | |
15 | +#ifdef INCLUDE_MAC_INFO | |
16 | +#define SKB_DISPLACEMENT 14 /* Include MAC address information */ | |
17 | +#else | |
18 | +#define SKB_DISPLACEMENT 0 /* Do NOT include MAC address information */ | |
19 | +#endif | |
20 | + | |
21 | +#define RING_MAGIC | |
22 | +#define RING_MAGIC_VALUE 0x88 | |
23 | +#define RING_FLOWSLOT_VERSION 6 | |
24 | +#define RING_VERSION "3.4.1" | |
25 | + | |
26 | +#define SO_ADD_TO_CLUSTER 99 | |
27 | +#define SO_REMOVE_FROM_CLUSTER 100 | |
28 | +#define SO_SET_REFLECTOR 101 | |
29 | +#define SO_SET_BLOOM 102 | |
30 | +#define SO_SET_STRING 103 | |
31 | +#define SO_TOGGLE_BLOOM_STATE 104 | |
32 | +#define SO_RESET_BLOOM_FILTERS 105 | |
33 | + | |
34 | +#define BITMASK_SET(n, p) (((char*)p->bits_memory)[n/8] |= (1<<(n % 8))) | |
35 | +#define BITMASK_CLR(n, p) (((char*)p->bits_memory)[n/8] &= ~(1<<(n % 8))) | |
36 | +#define BITMASK_ISSET(n, p) (((char*)p->bits_memory)[n/8] & (1<<(n % 8))) | |
37 | + | |
38 | +/* *********************************** */ | |
39 | + | |
40 | +/* | |
41 | + Aho-Corasick code taken from Snort | |
42 | + under GPL license | |
43 | +*/ | |
44 | +/* | |
45 | + * DEFINES and Typedef's | |
46 | + */ | |
47 | +#define MAX_ALPHABET_SIZE 256 | |
48 | + | |
49 | +/* | |
50 | + FAIL STATE for 1,2,or 4 bytes for state transitions | |
51 | + | |
52 | + Uncomment this define to use 32 bit state values | |
53 | + #define AC32 | |
54 | +*/ | |
55 | + | |
56 | +typedef unsigned short acstate_t; | |
57 | +#define ACSM_FAIL_STATE2 0xffff | |
58 | + | |
59 | +/* | |
60 | + * | |
61 | + */ | |
62 | +typedef | |
63 | +struct _acsm_pattern2 | |
64 | +{ | |
65 | + struct _acsm_pattern2 *next; | |
66 | + | |
67 | + unsigned char *patrn; | |
68 | + unsigned char *casepatrn; | |
69 | + int n; | |
70 | + int nocase; | |
71 | + int offset; | |
72 | + int depth; | |
73 | + void * id; | |
74 | + int iid; | |
75 | + | |
76 | +} ACSM_PATTERN2; | |
77 | + | |
78 | +/* | |
79 | + * transition nodes - either 8 or 12 bytes | |
80 | + */ | |
81 | +typedef | |
82 | +struct trans_node_s { | |
83 | + | |
84 | + acstate_t key; /* The character that got us here - sized to keep structure aligned on 4 bytes */ | |
85 | + /* to better the caching opportunities. A value that crosses the cache line */ | |
86 | + /* forces an expensive reconstruction, typing this as acstate_t stops that. */ | |
87 | + acstate_t next_state; /* */ | |
88 | + struct trans_node_s * next; /* next transition for this state */ | |
89 | + | |
90 | +} trans_node_t; | |
91 | + | |
92 | + | |
93 | +/* | |
94 | + * User specified final storage type for the state transitions | |
95 | + */ | |
96 | +enum { | |
97 | + ACF_FULL, | |
98 | + ACF_SPARSE, | |
99 | + ACF_BANDED, | |
100 | + ACF_SPARSEBANDS, | |
101 | +}; | |
102 | + | |
103 | +/* | |
104 | + * User specified machine types | |
105 | + * | |
106 | + * TRIE : Keyword trie | |
107 | + * NFA : | |
108 | + * DFA : | |
109 | + */ | |
110 | +enum { | |
111 | + FSA_TRIE, | |
112 | + FSA_NFA, | |
113 | + FSA_DFA, | |
114 | +}; | |
115 | + | |
116 | +/* | |
117 | + * Aho-Corasick State Machine Struct - one per group of pattterns | |
118 | + */ | |
119 | +typedef struct { | |
120 | + int acsmMaxStates; | |
121 | + int acsmNumStates; | |
122 | + | |
123 | + ACSM_PATTERN2 * acsmPatterns; | |
124 | + acstate_t * acsmFailState; | |
125 | + ACSM_PATTERN2 ** acsmMatchList; | |
126 | + | |
127 | + /* list of transitions in each state, this is used to build the nfa & dfa */ | |
128 | + /* after construction we convert to sparse or full format matrix and free */ | |
129 | + /* the transition lists */ | |
130 | + trans_node_t ** acsmTransTable; | |
131 | + | |
132 | + acstate_t ** acsmNextState; | |
133 | + int acsmFormat; | |
134 | + int acsmSparseMaxRowNodes; | |
135 | + int acsmSparseMaxZcnt; | |
136 | + | |
137 | + int acsmNumTrans; | |
138 | + int acsmAlphabetSize; | |
139 | + int acsmFSA; | |
140 | + | |
141 | +} ACSM_STRUCT2; | |
142 | + | |
143 | +/* *********************************** */ | |
144 | + | |
145 | +#ifndef HAVE_PCAP | |
146 | +struct pcap_pkthdr { | |
147 | + struct timeval ts; /* time stamp */ | |
148 | + u_int32_t caplen; /* length of portion present */ | |
149 | + u_int32_t len; /* length this packet (off wire) */ | |
150 | + /* packet parsing info */ | |
151 | + u_int16_t eth_type; /* Ethernet type */ | |
152 | + u_int16_t vlan_id; /* VLAN Id or -1 for no vlan */ | |
153 | + u_int8_t l3_proto; /* Layer 3 protocol */ | |
154 | + u_int16_t l3_offset, l4_offset, payload_offset; /* Offsets of L3/L4/payload elements */ | |
155 | + u_int32_t ipv4_src, ipv4_dst; /* IPv4 src/dst IP addresses */ | |
156 | + u_int16_t l4_src_port, l4_dst_port; /* Layer 4 src/dst ports */ | |
157 | +}; | |
158 | +#endif | |
159 | + | |
160 | +/* *********************************** */ | |
161 | + | |
162 | +typedef struct _counter_list { | |
163 | + u_int32_t bit_id; | |
164 | + u_int32_t bit_counter; | |
165 | + struct _counter_list *next; | |
166 | +} bitmask_counter_list; | |
167 | + | |
168 | +typedef struct { | |
169 | + u_int32_t num_bits, order, num_pages; | |
170 | + unsigned long bits_memory; | |
171 | + bitmask_counter_list *clashes; | |
172 | +} bitmask_selector; | |
173 | + | |
174 | +/* *********************************** */ | |
175 | + | |
176 | +enum cluster_type { | |
177 | + cluster_per_flow = 0, | |
178 | + cluster_round_robin | |
179 | +}; | |
180 | + | |
181 | +/* *********************************** */ | |
182 | + | |
183 | +#define RING_MIN_SLOT_SIZE (60+sizeof(struct pcap_pkthdr)) | |
184 | +#define RING_MAX_SLOT_SIZE (1514+sizeof(struct pcap_pkthdr)) | |
185 | + | |
186 | +/* *********************************** */ | |
187 | + | |
188 | +typedef struct flowSlotInfo { | |
189 | + u_int16_t version, sample_rate; | |
190 | + u_int32_t tot_slots, slot_len, data_len, tot_mem; | |
191 | + | |
192 | + u_int64_t tot_pkts, tot_lost; | |
193 | + u_int64_t tot_insert, tot_read; | |
194 | + u_int32_t insert_idx, remove_idx; | |
195 | +} FlowSlotInfo; | |
196 | + | |
197 | +/* *********************************** */ | |
198 | + | |
199 | +typedef struct flowSlot { | |
200 | +#ifdef RING_MAGIC | |
201 | + u_char magic; /* It must alwasy be zero */ | |
202 | +#endif | |
203 | + u_char slot_state; /* 0=empty, 1=full */ | |
204 | + u_char bucket; /* bucket[bucketLen] */ | |
205 | +} FlowSlot; | |
206 | + | |
207 | +/* *********************************** */ | |
208 | + | |
209 | +#ifdef __KERNEL__ | |
210 | + | |
211 | +FlowSlotInfo* getRingPtr(void); | |
212 | +int allocateRing(char *deviceName, u_int numSlots, | |
213 | + u_int bucketLen, u_int sampleRate); | |
214 | +unsigned int pollRing(struct file *fp, struct poll_table_struct * wait); | |
215 | +void deallocateRing(void); | |
216 | + | |
217 | +/* ************************* */ | |
218 | + | |
219 | +typedef int (*handle_ring_skb)(struct sk_buff *skb, | |
220 | + u_char recv_packet, u_char real_skb); | |
221 | +extern handle_ring_skb get_skb_ring_handler(void); | |
222 | +extern void set_skb_ring_handler(handle_ring_skb the_handler); | |
223 | +extern void do_skb_ring_handler(struct sk_buff *skb, | |
224 | + u_char recv_packet, u_char real_skb); | |
225 | + | |
226 | +typedef int (*handle_ring_buffer)(struct net_device *dev, | |
227 | + char *data, int len); | |
228 | +extern handle_ring_buffer get_buffer_ring_handler(void); | |
229 | +extern void set_buffer_ring_handler(handle_ring_buffer the_handler); | |
230 | +extern int do_buffer_ring_handler(struct net_device *dev, | |
231 | + char *data, int len); | |
232 | +#endif /* __KERNEL__ */ | |
233 | + | |
234 | +/* *********************************** */ | |
235 | + | |
236 | +#define PF_RING 27 /* Packet Ring */ | |
237 | +#define SOCK_RING PF_RING | |
238 | + | |
239 | +/* ioctl() */ | |
240 | +#define SIORINGPOLL 0x8888 | |
241 | + | |
242 | +/* *********************************** */ | |
243 | + | |
244 | +#endif /* __RING_H */ | |
245 | diff --unified --recursive --new-file linux-2.6.21.4/net/Kconfig linux-2.6.21.4-1-686-smp-ring3/net/Kconfig | |
246 | --- linux-2.6.21.4/net/Kconfig 2007-06-07 21:27:31.000000000 +0000 | |
247 | +++ linux-2.6.21.4-1-686-smp-ring3/net/Kconfig 2007-06-10 16:43:04.402423771 +0000 | |
248 | @@ -39,6 +39,7 @@ | |
249 | source "net/xfrm/Kconfig" | |
250 | source "net/iucv/Kconfig" | |
251 | ||
252 | +source "net/ring/Kconfig" | |
253 | config INET | |
254 | bool "TCP/IP networking" | |
255 | ---help--- | |
256 | diff --unified --recursive --new-file linux-2.6.21.4/net/Makefile linux-2.6.21.4-1-686-smp-ring3/net/Makefile | |
257 | --- linux-2.6.21.4/net/Makefile 2007-06-07 21:27:31.000000000 +0000 | |
258 | +++ linux-2.6.21.4-1-686-smp-ring3/net/Makefile 2007-06-10 16:43:04.394423425 +0000 | |
259 | @@ -42,6 +42,7 @@ | |
260 | obj-$(CONFIG_DECNET) += decnet/ | |
261 | obj-$(CONFIG_ECONET) += econet/ | |
262 | obj-$(CONFIG_VLAN_8021Q) += 8021q/ | |
263 | +obj-$(CONFIG_RING) += ring/ | |
264 | obj-$(CONFIG_IP_DCCP) += dccp/ | |
265 | obj-$(CONFIG_IP_SCTP) += sctp/ | |
266 | obj-$(CONFIG_IEEE80211) += ieee80211/ | |
267 | diff --unified --recursive --new-file linux-2.6.21.4/net/Makefile.ORG linux-2.6.21.4-1-686-smp-ring3/net/Makefile.ORG | |
268 | --- linux-2.6.21.4/net/Makefile.ORG 1970-01-01 00:00:00.000000000 +0000 | |
269 | +++ linux-2.6.21.4-1-686-smp-ring3/net/Makefile.ORG 2007-06-10 16:43:04.386423079 +0000 | |
270 | @@ -0,0 +1,54 @@ | |
271 | +# | |
272 | +# Makefile for the linux networking. | |
273 | +# | |
274 | +# 2 Sep 2000, Christoph Hellwig <hch@infradead.org> | |
275 | +# Rewritten to use lists instead of if-statements. | |
276 | +# | |
277 | + | |
278 | +obj-y := nonet.o | |
279 | + | |
280 | +obj-$(CONFIG_NET) := socket.o core/ | |
281 | + | |
282 | +tmp-$(CONFIG_COMPAT) := compat.o | |
283 | +obj-$(CONFIG_NET) += $(tmp-y) | |
284 | + | |
285 | +# LLC has to be linked before the files in net/802/ | |
286 | +obj-$(CONFIG_LLC) += llc/ | |
287 | +obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/ | |
288 | +obj-$(CONFIG_NETFILTER) += netfilter/ | |
289 | +obj-$(CONFIG_INET) += ipv4/ | |
290 | +obj-$(CONFIG_XFRM) += xfrm/ | |
291 | +obj-$(CONFIG_UNIX) += unix/ | |
292 | +ifneq ($(CONFIG_IPV6),) | |
293 | +obj-y += ipv6/ | |
294 | +endif | |
295 | +obj-$(CONFIG_PACKET) += packet/ | |
296 | +obj-$(CONFIG_NET_KEY) += key/ | |
297 | +obj-$(CONFIG_NET_SCHED) += sched/ | |
298 | +obj-$(CONFIG_BRIDGE) += bridge/ | |
299 | +obj-$(CONFIG_IPX) += ipx/ | |
300 | +obj-$(CONFIG_ATALK) += appletalk/ | |
301 | +obj-$(CONFIG_WAN_ROUTER) += wanrouter/ | |
302 | +obj-$(CONFIG_X25) += x25/ | |
303 | +obj-$(CONFIG_LAPB) += lapb/ | |
304 | +obj-$(CONFIG_NETROM) += netrom/ | |
305 | +obj-$(CONFIG_ROSE) += rose/ | |
306 | +obj-$(CONFIG_AX25) += ax25/ | |
307 | +obj-$(CONFIG_IRDA) += irda/ | |
308 | +obj-$(CONFIG_BT) += bluetooth/ | |
309 | +obj-$(CONFIG_SUNRPC) += sunrpc/ | |
310 | +obj-$(CONFIG_RXRPC) += rxrpc/ | |
311 | +obj-$(CONFIG_ATM) += atm/ | |
312 | +obj-$(CONFIG_DECNET) += decnet/ | |
313 | +obj-$(CONFIG_ECONET) += econet/ | |
314 | +obj-$(CONFIG_VLAN_8021Q) += 8021q/ | |
315 | +obj-$(CONFIG_IP_DCCP) += dccp/ | |
316 | +obj-$(CONFIG_IP_SCTP) += sctp/ | |
317 | +obj-$(CONFIG_IEEE80211) += ieee80211/ | |
318 | +obj-$(CONFIG_TIPC) += tipc/ | |
319 | +obj-$(CONFIG_NETLABEL) += netlabel/ | |
320 | +obj-$(CONFIG_IUCV) += iucv/ | |
321 | + | |
322 | +ifeq ($(CONFIG_NET),y) | |
323 | +obj-$(CONFIG_SYSCTL) += sysctl_net.o | |
324 | +endif | |
325 | diff --unified --recursive --new-file linux-2.6.21.4/net/core/dev.c linux-2.6.21.4-1-686-smp-ring3/net/core/dev.c | |
326 | --- linux-2.6.21.4/net/core/dev.c 2007-06-07 21:27:31.000000000 +0000 | |
327 | +++ linux-2.6.21.4-1-686-smp-ring3/net/core/dev.c 2007-06-10 16:43:04.382422906 +0000 | |
328 | @@ -117,6 +117,56 @@ | |
329 | #include <linux/err.h> | |
330 | #include <linux/ctype.h> | |
331 | ||
332 | +#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE) | |
333 | + | |
334 | +/* #define RING_DEBUG */ | |
335 | + | |
336 | +#include <linux/ring.h> | |
337 | +#include <linux/version.h> | |
338 | + | |
339 | +static handle_ring_skb ring_handler = NULL; | |
340 | + | |
341 | +handle_ring_skb get_skb_ring_handler() { return(ring_handler); } | |
342 | + | |
343 | +void set_skb_ring_handler(handle_ring_skb the_handler) { | |
344 | + ring_handler = the_handler; | |
345 | +} | |
346 | + | |
347 | +void do_skb_ring_handler(struct sk_buff *skb, | |
348 | + u_char recv_packet, u_char real_skb) { | |
349 | + if(ring_handler) | |
350 | + ring_handler(skb, recv_packet, real_skb); | |
351 | +} | |
352 | + | |
353 | +/* ******************* */ | |
354 | + | |
355 | +static handle_ring_buffer buffer_ring_handler = NULL; | |
356 | + | |
357 | +handle_ring_buffer get_buffer_ring_handler() { return(buffer_ring_handler); } | |
358 | + | |
359 | +void set_buffer_ring_handler(handle_ring_buffer the_handler) { | |
360 | + buffer_ring_handler = the_handler; | |
361 | +} | |
362 | + | |
363 | +int do_buffer_ring_handler(struct net_device *dev, char *data, int len) { | |
364 | + if(buffer_ring_handler) { | |
365 | + buffer_ring_handler(dev, data, len); | |
366 | + return(1); | |
367 | + } else | |
368 | + return(0); | |
369 | +} | |
370 | + | |
371 | +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) | |
372 | +EXPORT_SYMBOL(get_skb_ring_handler); | |
373 | +EXPORT_SYMBOL(set_skb_ring_handler); | |
374 | +EXPORT_SYMBOL(do_skb_ring_handler); | |
375 | + | |
376 | +EXPORT_SYMBOL(get_buffer_ring_handler); | |
377 | +EXPORT_SYMBOL(set_buffer_ring_handler); | |
378 | +EXPORT_SYMBOL(do_buffer_ring_handler); | |
379 | +#endif | |
380 | + | |
381 | +#endif | |
382 | /* | |
383 | * The list of packet types we will receive (as opposed to discard) | |
384 | * and the routines to invoke. | |
385 | @@ -1474,6 +1524,10 @@ | |
386 | skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); | |
387 | #endif | |
388 | if (q->enqueue) { | |
389 | +#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE) | |
390 | + if(ring_handler) ring_handler(skb, 0, 1); | |
391 | +#endif /* CONFIG_RING */ | |
392 | + | |
393 | /* Grab device queue */ | |
394 | spin_lock(&dev->queue_lock); | |
395 | q = dev->qdisc; | |
396 | @@ -1574,6 +1628,13 @@ | |
397 | unsigned long flags; | |
398 | ||
399 | /* if netpoll wants it, pretend we never saw it */ | |
400 | +#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE) | |
401 | + if(ring_handler && ring_handler(skb, 1, 1)) { | |
402 | + /* The packet has been copied into a ring */ | |
403 | + return(NET_RX_SUCCESS); | |
404 | + } | |
405 | +#endif /* CONFIG_RING */ | |
406 | + | |
407 | if (netpoll_rx(skb)) | |
408 | return NET_RX_DROP; | |
409 | ||
410 | @@ -1764,6 +1825,13 @@ | |
411 | struct net_device *orig_dev; | |
412 | int ret = NET_RX_DROP; | |
413 | __be16 type; | |
414 | +#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE) | |
415 | + if(ring_handler && ring_handler(skb, 1, 1)) { | |
416 | + /* The packet has been copied into a ring */ | |
417 | + return(NET_RX_SUCCESS); | |
418 | + } | |
419 | +#endif /* CONFIG_RING */ | |
420 | + | |
421 | ||
422 | /* if we've gotten here through NAPI, check netpoll */ | |
423 | if (skb->dev->poll && netpoll_rx(skb)) | |
424 | diff --unified --recursive --new-file linux-2.6.21.4/net/core/dev.c.ORG linux-2.6.21.4-1-686-smp-ring3/net/core/dev.c.ORG | |
425 | --- linux-2.6.21.4/net/core/dev.c.ORG 1970-01-01 00:00:00.000000000 +0000 | |
426 | +++ linux-2.6.21.4-1-686-smp-ring3/net/core/dev.c.ORG 2007-06-10 16:43:04.354421694 +0000 | |
427 | @@ -0,0 +1,3571 @@ | |
428 | +/* | |
429 | + * NET3 Protocol independent device support routines. | |
430 | + * | |
431 | + * This program is free software; you can redistribute it and/or | |
432 | + * modify it under the terms of the GNU General Public License | |
433 | + * as published by the Free Software Foundation; either version | |
434 | + * 2 of the License, or (at your option) any later version. | |
435 | + * | |
436 | + * Derived from the non IP parts of dev.c 1.0.19 | |
437 | + * Authors: Ross Biro | |
438 | + * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> | |
439 | + * Mark Evans, <evansmp@uhura.aston.ac.uk> | |
440 | + * | |
441 | + * Additional Authors: | |
442 | + * Florian la Roche <rzsfl@rz.uni-sb.de> | |
443 | + * Alan Cox <gw4pts@gw4pts.ampr.org> | |
444 | + * David Hinds <dahinds@users.sourceforge.net> | |
445 | + * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru> | |
446 | + * Adam Sulmicki <adam@cfar.umd.edu> | |
447 | + * Pekka Riikonen <priikone@poesidon.pspt.fi> | |
448 | + * | |
449 | + * Changes: | |
450 | + * D.J. Barrow : Fixed bug where dev->refcnt gets set | |
451 | + * to 2 if register_netdev gets called | |
452 | + * before net_dev_init & also removed a | |
453 | + * few lines of code in the process. | |
454 | + * Alan Cox : device private ioctl copies fields back. | |
455 | + * Alan Cox : Transmit queue code does relevant | |
456 | + * stunts to keep the queue safe. | |
457 | + * Alan Cox : Fixed double lock. | |
458 | + * Alan Cox : Fixed promisc NULL pointer trap | |
459 | + * ???????? : Support the full private ioctl range | |
460 | + * Alan Cox : Moved ioctl permission check into | |
461 | + * drivers | |
462 | + * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI | |
463 | + * Alan Cox : 100 backlog just doesn't cut it when | |
464 | + * you start doing multicast video 8) | |
465 | + * Alan Cox : Rewrote net_bh and list manager. | |
466 | + * Alan Cox : Fix ETH_P_ALL echoback lengths. | |
467 | + * Alan Cox : Took out transmit every packet pass | |
468 | + * Saved a few bytes in the ioctl handler | |
469 | + * Alan Cox : Network driver sets packet type before | |
470 | + * calling netif_rx. Saves a function | |
471 | + * call a packet. | |
472 | + * Alan Cox : Hashed net_bh() | |
473 | + * Richard Kooijman: Timestamp fixes. | |
474 | + * Alan Cox : Wrong field in SIOCGIFDSTADDR | |
475 | + * Alan Cox : Device lock protection. | |
476 | + * Alan Cox : Fixed nasty side effect of device close | |
477 | + * changes. | |
478 | + * Rudi Cilibrasi : Pass the right thing to | |
479 | + * set_mac_address() | |
480 | + * Dave Miller : 32bit quantity for the device lock to | |
481 | + * make it work out on a Sparc. | |
482 | + * Bjorn Ekwall : Added KERNELD hack. | |
483 | + * Alan Cox : Cleaned up the backlog initialise. | |
484 | + * Craig Metz : SIOCGIFCONF fix if space for under | |
485 | + * 1 device. | |
486 | + * Thomas Bogendoerfer : Return ENODEV for dev_open, if there | |
487 | + * is no device open function. | |
488 | + * Andi Kleen : Fix error reporting for SIOCGIFCONF | |
489 | + * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF | |
490 | + * Cyrus Durgin : Cleaned for KMOD | |
491 | + * Adam Sulmicki : Bug Fix : Network Device Unload | |
492 | + * A network device unload needs to purge | |
493 | + * the backlog queue. | |
494 | + * Paul Rusty Russell : SIOCSIFNAME | |
495 | + * Pekka Riikonen : Netdev boot-time settings code | |
496 | + * Andrew Morton : Make unregister_netdevice wait | |
497 | + * indefinitely on dev->refcnt | |
498 | + * J Hadi Salim : - Backlog queue sampling | |
499 | + * - netif_rx() feedback | |
500 | + */ | |
501 | + | |
502 | +#include <asm/uaccess.h> | |
503 | +#include <asm/system.h> | |
504 | +#include <linux/bitops.h> | |
505 | +#include <linux/capability.h> | |
506 | +#include <linux/cpu.h> | |
507 | +#include <linux/types.h> | |
508 | +#include <linux/kernel.h> | |
509 | +#include <linux/sched.h> | |
510 | +#include <linux/mutex.h> | |
511 | +#include <linux/string.h> | |
512 | +#include <linux/mm.h> | |
513 | +#include <linux/socket.h> | |
514 | +#include <linux/sockios.h> | |
515 | +#include <linux/errno.h> | |
516 | +#include <linux/interrupt.h> | |
517 | +#include <linux/if_ether.h> | |
518 | +#include <linux/netdevice.h> | |
519 | +#include <linux/etherdevice.h> | |
520 | +#include <linux/notifier.h> | |
521 | +#include <linux/skbuff.h> | |
522 | +#include <net/sock.h> | |
523 | +#include <linux/rtnetlink.h> | |
524 | +#include <linux/proc_fs.h> | |
525 | +#include <linux/seq_file.h> | |
526 | +#include <linux/stat.h> | |
527 | +#include <linux/if_bridge.h> | |
528 | +#include <net/dst.h> | |
529 | +#include <net/pkt_sched.h> | |
530 | +#include <net/checksum.h> | |
531 | +#include <linux/highmem.h> | |
532 | +#include <linux/init.h> | |
533 | +#include <linux/kmod.h> | |
534 | +#include <linux/module.h> | |
535 | +#include <linux/kallsyms.h> | |
536 | +#include <linux/netpoll.h> | |
537 | +#include <linux/rcupdate.h> | |
538 | +#include <linux/delay.h> | |
539 | +#include <linux/wireless.h> | |
540 | +#include <net/iw_handler.h> | |
541 | +#include <asm/current.h> | |
542 | +#include <linux/audit.h> | |
543 | +#include <linux/dmaengine.h> | |
544 | +#include <linux/err.h> | |
545 | +#include <linux/ctype.h> | |
546 | + | |
547 | +/* | |
548 | + * The list of packet types we will receive (as opposed to discard) | |
549 | + * and the routines to invoke. | |
550 | + * | |
551 | + * Why 16. Because with 16 the only overlap we get on a hash of the | |
552 | + * low nibble of the protocol value is RARP/SNAP/X.25. | |
553 | + * | |
554 | + * NOTE: That is no longer true with the addition of VLAN tags. Not | |
555 | + * sure which should go first, but I bet it won't make much | |
556 | + * difference if we are running VLANs. The good news is that | |
557 | + * this protocol won't be in the list unless compiled in, so | |
558 | + * the average user (w/out VLANs) will not be adversely affected. | |
559 | + * --BLG | |
560 | + * | |
561 | + * 0800 IP | |
562 | + * 8100 802.1Q VLAN | |
563 | + * 0001 802.3 | |
564 | + * 0002 AX.25 | |
565 | + * 0004 802.2 | |
566 | + * 8035 RARP | |
567 | + * 0005 SNAP | |
568 | + * 0805 X.25 | |
569 | + * 0806 ARP | |
570 | + * 8137 IPX | |
571 | + * 0009 Localtalk | |
572 | + * 86DD IPv6 | |
573 | + */ | |
574 | + | |
575 | +static DEFINE_SPINLOCK(ptype_lock); | |
576 | +static struct list_head ptype_base[16]; /* 16 way hashed list */ | |
577 | +static struct list_head ptype_all; /* Taps */ | |
578 | + | |
579 | +#ifdef CONFIG_NET_DMA | |
580 | +static struct dma_client *net_dma_client; | |
581 | +static unsigned int net_dma_count; | |
582 | +static spinlock_t net_dma_event_lock; | |
583 | +#endif | |
584 | + | |
585 | +/* | |
586 | + * The @dev_base list is protected by @dev_base_lock and the rtnl | |
587 | + * semaphore. | |
588 | + * | |
589 | + * Pure readers hold dev_base_lock for reading. | |
590 | + * | |
591 | + * Writers must hold the rtnl semaphore while they loop through the | |
592 | + * dev_base list, and hold dev_base_lock for writing when they do the | |
593 | + * actual updates. This allows pure readers to access the list even | |
594 | + * while a writer is preparing to update it. | |
595 | + * | |
596 | + * To put it another way, dev_base_lock is held for writing only to | |
597 | + * protect against pure readers; the rtnl semaphore provides the | |
598 | + * protection against other writers. | |
599 | + * | |
600 | + * See, for example usages, register_netdevice() and | |
601 | + * unregister_netdevice(), which must be called with the rtnl | |
602 | + * semaphore held. | |
603 | + */ | |
604 | +struct net_device *dev_base; | |
605 | +static struct net_device **dev_tail = &dev_base; | |
606 | +DEFINE_RWLOCK(dev_base_lock); | |
607 | + | |
608 | +EXPORT_SYMBOL(dev_base); | |
609 | +EXPORT_SYMBOL(dev_base_lock); | |
610 | + | |
611 | +#define NETDEV_HASHBITS 8 | |
612 | +static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS]; | |
613 | +static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS]; | |
614 | + | |
615 | +static inline struct hlist_head *dev_name_hash(const char *name) | |
616 | +{ | |
617 | + unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ)); | |
618 | + return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)]; | |
619 | +} | |
620 | + | |
621 | +static inline struct hlist_head *dev_index_hash(int ifindex) | |
622 | +{ | |
623 | + return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)]; | |
624 | +} | |
625 | + | |
626 | +/* | |
627 | + * Our notifier list | |
628 | + */ | |
629 | + | |
630 | +static RAW_NOTIFIER_HEAD(netdev_chain); | |
631 | + | |
632 | +/* | |
633 | + * Device drivers call our routines to queue packets here. We empty the | |
634 | + * queue in the local softnet handler. | |
635 | + */ | |
636 | +DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL }; | |
637 | + | |
638 | +#ifdef CONFIG_SYSFS | |
639 | +extern int netdev_sysfs_init(void); | |
640 | +extern int netdev_register_sysfs(struct net_device *); | |
641 | +extern void netdev_unregister_sysfs(struct net_device *); | |
642 | +#else | |
643 | +#define netdev_sysfs_init() (0) | |
644 | +#define netdev_register_sysfs(dev) (0) | |
645 | +#define netdev_unregister_sysfs(dev) do { } while(0) | |
646 | +#endif | |
647 | + | |
648 | + | |
649 | +/******************************************************************************* | |
650 | + | |
651 | + Protocol management and registration routines | |
652 | + | |
653 | +*******************************************************************************/ | |
654 | + | |
655 | +/* | |
656 | + * For efficiency | |
657 | + */ | |
658 | + | |
659 | +static int netdev_nit; | |
660 | + | |
661 | +/* | |
662 | + * Add a protocol ID to the list. Now that the input handler is | |
663 | + * smarter we can dispense with all the messy stuff that used to be | |
664 | + * here. | |
665 | + * | |
666 | + * BEWARE!!! Protocol handlers, mangling input packets, | |
667 | + * MUST BE last in hash buckets and checking protocol handlers | |
668 | + * MUST start from promiscuous ptype_all chain in net_bh. | |
669 | + * It is true now, do not change it. | |
670 | + * Explanation follows: if protocol handler, mangling packet, will | |
671 | + * be the first on list, it is not able to sense, that packet | |
672 | + * is cloned and should be copied-on-write, so that it will | |
673 | + * change it and subsequent readers will get broken packet. | |
674 | + * --ANK (980803) | |
675 | + */ | |
676 | + | |
677 | +/** | |
678 | + * dev_add_pack - add packet handler | |
679 | + * @pt: packet type declaration | |
680 | + * | |
681 | + * Add a protocol handler to the networking stack. The passed &packet_type | |
682 | + * is linked into kernel lists and may not be freed until it has been | |
683 | + * removed from the kernel lists. | |
684 | + * | |
685 | + * This call does not sleep therefore it can not | |
686 | + * guarantee all CPU's that are in middle of receiving packets | |
687 | + * will see the new packet type (until the next received packet). | |
688 | + */ | |
689 | + | |
690 | +void dev_add_pack(struct packet_type *pt) | |
691 | +{ | |
692 | + int hash; | |
693 | + | |
694 | + spin_lock_bh(&ptype_lock); | |
695 | + if (pt->type == htons(ETH_P_ALL)) { | |
696 | + netdev_nit++; | |
697 | + list_add_rcu(&pt->list, &ptype_all); | |
698 | + } else { | |
699 | + hash = ntohs(pt->type) & 15; | |
700 | + list_add_rcu(&pt->list, &ptype_base[hash]); | |
701 | + } | |
702 | + spin_unlock_bh(&ptype_lock); | |
703 | +} | |
704 | + | |
705 | +/** | |
706 | + * __dev_remove_pack - remove packet handler | |
707 | + * @pt: packet type declaration | |
708 | + * | |
709 | + * Remove a protocol handler that was previously added to the kernel | |
710 | + * protocol handlers by dev_add_pack(). The passed &packet_type is removed | |
711 | + * from the kernel lists and can be freed or reused once this function | |
712 | + * returns. | |
713 | + * | |
714 | + * The packet type might still be in use by receivers | |
715 | + * and must not be freed until after all the CPU's have gone | |
716 | + * through a quiescent state. | |
717 | + */ | |
718 | +void __dev_remove_pack(struct packet_type *pt) | |
719 | +{ | |
720 | + struct list_head *head; | |
721 | + struct packet_type *pt1; | |
722 | + | |
723 | + spin_lock_bh(&ptype_lock); | |
724 | + | |
725 | + if (pt->type == htons(ETH_P_ALL)) { | |
726 | + netdev_nit--; | |
727 | + head = &ptype_all; | |
728 | + } else | |
729 | + head = &ptype_base[ntohs(pt->type) & 15]; | |
730 | + | |
731 | + list_for_each_entry(pt1, head, list) { | |
732 | + if (pt == pt1) { | |
733 | + list_del_rcu(&pt->list); | |
734 | + goto out; | |
735 | + } | |
736 | + } | |
737 | + | |
738 | + printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt); | |
739 | +out: | |
740 | + spin_unlock_bh(&ptype_lock); | |
741 | +} | |
742 | +/** | |
743 | + * dev_remove_pack - remove packet handler | |
744 | + * @pt: packet type declaration | |
745 | + * | |
746 | + * Remove a protocol handler that was previously added to the kernel | |
747 | + * protocol handlers by dev_add_pack(). The passed &packet_type is removed | |
748 | + * from the kernel lists and can be freed or reused once this function | |
749 | + * returns. | |
750 | + * | |
751 | + * This call sleeps to guarantee that no CPU is looking at the packet | |
752 | + * type after return. | |
753 | + */ | |
754 | +void dev_remove_pack(struct packet_type *pt) | |
755 | +{ | |
756 | + __dev_remove_pack(pt); | |
757 | + | |
758 | + synchronize_net(); | |
759 | +} | |
760 | + | |
761 | +/****************************************************************************** | |
762 | + | |
763 | + Device Boot-time Settings Routines | |
764 | + | |
765 | +*******************************************************************************/ | |
766 | + | |
767 | +/* Boot time configuration table */ | |
768 | +static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX]; | |
769 | + | |
770 | +/** | |
771 | + * netdev_boot_setup_add - add new setup entry | |
772 | + * @name: name of the device | |
773 | + * @map: configured settings for the device | |
774 | + * | |
775 | + * Adds new setup entry to the dev_boot_setup list. The function | |
776 | + * returns 0 on error and 1 on success. This is a generic routine to | |
777 | + * all netdevices. | |
778 | + */ | |
779 | +static int netdev_boot_setup_add(char *name, struct ifmap *map) | |
780 | +{ | |
781 | + struct netdev_boot_setup *s; | |
782 | + int i; | |
783 | + | |
784 | + s = dev_boot_setup; | |
785 | + for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { | |
786 | + if (s[i].name[0] == '\0' || s[i].name[0] == ' ') { | |
787 | + memset(s[i].name, 0, sizeof(s[i].name)); | |
788 | + strcpy(s[i].name, name); | |
789 | + memcpy(&s[i].map, map, sizeof(s[i].map)); | |
790 | + break; | |
791 | + } | |
792 | + } | |
793 | + | |
794 | + return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1; | |
795 | +} | |
796 | + | |
797 | +/** | |
798 | + * netdev_boot_setup_check - check boot time settings | |
799 | + * @dev: the netdevice | |
800 | + * | |
801 | + * Check boot time settings for the device. | |
802 | + * The found settings are set for the device to be used | |
803 | + * later in the device probing. | |
804 | + * Returns 0 if no settings found, 1 if they are. | |
805 | + */ | |
806 | +int netdev_boot_setup_check(struct net_device *dev) | |
807 | +{ | |
808 | + struct netdev_boot_setup *s = dev_boot_setup; | |
809 | + int i; | |
810 | + | |
811 | + for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) { | |
812 | + if (s[i].name[0] != '\0' && s[i].name[0] != ' ' && | |
813 | + !strncmp(dev->name, s[i].name, strlen(s[i].name))) { | |
814 | + dev->irq = s[i].map.irq; | |
815 | + dev->base_addr = s[i].map.base_addr; | |
816 | + dev->mem_start = s[i].map.mem_start; | |
817 | + dev->mem_end = s[i].map.mem_end; | |
818 | + return 1; | |
819 | + } | |
820 | + } | |
821 | + return 0; | |
822 | +} | |
823 | + | |
824 | + | |
825 | +/** | |
826 | + * netdev_boot_base - get address from boot time settings | |
827 | + * @prefix: prefix for network device | |
828 | + * @unit: id for network device | |
829 | + * | |
830 | + * Check boot time settings for the base address of device. | |
831 | + * The found settings are set for the device to be used | |
832 | + * later in the device probing. | |
833 | + * Returns 0 if no settings found. | |
834 | + */ | |
835 | +unsigned long netdev_boot_base(const char *prefix, int unit) | |
836 | +{ | |
837 | + const struct netdev_boot_setup *s = dev_boot_setup; | |
838 | + char name[IFNAMSIZ]; | |
839 | + int i; | |
840 | + | |
841 | + sprintf(name, "%s%d", prefix, unit); | |
842 | + | |
843 | + /* | |
844 | + * If device already registered then return base of 1 | |
845 | + * to indicate not to probe for this interface | |
846 | + */ | |
847 | + if (__dev_get_by_name(name)) | |
848 | + return 1; | |
849 | + | |
850 | + for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) | |
851 | + if (!strcmp(name, s[i].name)) | |
852 | + return s[i].map.base_addr; | |
853 | + return 0; | |
854 | +} | |
855 | + | |
856 | +/* | |
857 | + * Saves at boot time configured settings for any netdevice. | |
858 | + */ | |
859 | +int __init netdev_boot_setup(char *str) | |
860 | +{ | |
861 | + int ints[5]; | |
862 | + struct ifmap map; | |
863 | + | |
864 | + str = get_options(str, ARRAY_SIZE(ints), ints); | |
865 | + if (!str || !*str) | |
866 | + return 0; | |
867 | + | |
868 | + /* Save settings */ | |
869 | + memset(&map, 0, sizeof(map)); | |
870 | + if (ints[0] > 0) | |
871 | + map.irq = ints[1]; | |
872 | + if (ints[0] > 1) | |
873 | + map.base_addr = ints[2]; | |
874 | + if (ints[0] > 2) | |
875 | + map.mem_start = ints[3]; | |
876 | + if (ints[0] > 3) | |
877 | + map.mem_end = ints[4]; | |
878 | + | |
879 | + /* Add new entry to the list */ | |
880 | + return netdev_boot_setup_add(str, &map); | |
881 | +} | |
882 | + | |
883 | +__setup("netdev=", netdev_boot_setup); | |
884 | + | |
885 | +/******************************************************************************* | |
886 | + | |
887 | + Device Interface Subroutines | |
888 | + | |
889 | +*******************************************************************************/ | |
890 | + | |
891 | +/** | |
892 | + * __dev_get_by_name - find a device by its name | |
893 | + * @name: name to find | |
894 | + * | |
895 | + * Find an interface by name. Must be called under RTNL semaphore | |
896 | + * or @dev_base_lock. If the name is found a pointer to the device | |
897 | + * is returned. If the name is not found then %NULL is returned. The | |
898 | + * reference counters are not incremented so the caller must be | |
899 | + * careful with locks. | |
900 | + */ | |
901 | + | |
902 | +struct net_device *__dev_get_by_name(const char *name) | |
903 | +{ | |
904 | + struct hlist_node *p; | |
905 | + | |
906 | + hlist_for_each(p, dev_name_hash(name)) { | |
907 | + struct net_device *dev | |
908 | + = hlist_entry(p, struct net_device, name_hlist); | |
909 | + if (!strncmp(dev->name, name, IFNAMSIZ)) | |
910 | + return dev; | |
911 | + } | |
912 | + return NULL; | |
913 | +} | |
914 | + | |
915 | +/** | |
916 | + * dev_get_by_name - find a device by its name | |
917 | + * @name: name to find | |
918 | + * | |
919 | + * Find an interface by name. This can be called from any | |
920 | + * context and does its own locking. The returned handle has | |
921 | + * the usage count incremented and the caller must use dev_put() to | |
922 | + * release it when it is no longer needed. %NULL is returned if no | |
923 | + * matching device is found. | |
924 | + */ | |
925 | + | |
926 | +struct net_device *dev_get_by_name(const char *name) | |
927 | +{ | |
928 | + struct net_device *dev; | |
929 | + | |
930 | + read_lock(&dev_base_lock); | |
931 | + dev = __dev_get_by_name(name); | |
932 | + if (dev) | |
933 | + dev_hold(dev); | |
934 | + read_unlock(&dev_base_lock); | |
935 | + return dev; | |
936 | +} | |
937 | + | |
938 | +/** | |
939 | + * __dev_get_by_index - find a device by its ifindex | |
940 | + * @ifindex: index of device | |
941 | + * | |
942 | + * Search for an interface by index. Returns %NULL if the device | |
943 | + * is not found or a pointer to the device. The device has not | |
944 | + * had its reference counter increased so the caller must be careful | |
945 | + * about locking. The caller must hold either the RTNL semaphore | |
946 | + * or @dev_base_lock. | |
947 | + */ | |
948 | + | |
949 | +struct net_device *__dev_get_by_index(int ifindex) | |
950 | +{ | |
951 | + struct hlist_node *p; | |
952 | + | |
953 | + hlist_for_each(p, dev_index_hash(ifindex)) { | |
954 | + struct net_device *dev | |
955 | + = hlist_entry(p, struct net_device, index_hlist); | |
956 | + if (dev->ifindex == ifindex) | |
957 | + return dev; | |
958 | + } | |
959 | + return NULL; | |
960 | +} | |
961 | + | |
962 | + | |
963 | +/** | |
964 | + * dev_get_by_index - find a device by its ifindex | |
965 | + * @ifindex: index of device | |
966 | + * | |
967 | + * Search for an interface by index. Returns NULL if the device | |
968 | + * is not found or a pointer to the device. The device returned has | |
969 | + * had a reference added and the pointer is safe until the user calls | |
970 | + * dev_put to indicate they have finished with it. | |
971 | + */ | |
972 | + | |
973 | +struct net_device *dev_get_by_index(int ifindex) | |
974 | +{ | |
975 | + struct net_device *dev; | |
976 | + | |
977 | + read_lock(&dev_base_lock); | |
978 | + dev = __dev_get_by_index(ifindex); | |
979 | + if (dev) | |
980 | + dev_hold(dev); | |
981 | + read_unlock(&dev_base_lock); | |
982 | + return dev; | |
983 | +} | |
984 | + | |
985 | +/** | |
986 | + * dev_getbyhwaddr - find a device by its hardware address | |
987 | + * @type: media type of device | |
988 | + * @ha: hardware address | |
989 | + * | |
990 | + * Search for an interface by MAC address. Returns NULL if the device | |
991 | + * is not found or a pointer to the device. The caller must hold the | |
992 | + * rtnl semaphore. The returned device has not had its ref count increased | |
993 | + * and the caller must therefore be careful about locking | |
994 | + * | |
995 | + * BUGS: | |
996 | + * If the API was consistent this would be __dev_get_by_hwaddr | |
997 | + */ | |
998 | + | |
999 | +struct net_device *dev_getbyhwaddr(unsigned short type, char *ha) | |
1000 | +{ | |
1001 | + struct net_device *dev; | |
1002 | + | |
1003 | + ASSERT_RTNL(); | |
1004 | + | |
1005 | + for (dev = dev_base; dev; dev = dev->next) | |
1006 | + if (dev->type == type && | |
1007 | + !memcmp(dev->dev_addr, ha, dev->addr_len)) | |
1008 | + break; | |
1009 | + return dev; | |
1010 | +} | |
1011 | + | |
1012 | +EXPORT_SYMBOL(dev_getbyhwaddr); | |
1013 | + | |
1014 | +struct net_device *dev_getfirstbyhwtype(unsigned short type) | |
1015 | +{ | |
1016 | + struct net_device *dev; | |
1017 | + | |
1018 | + rtnl_lock(); | |
1019 | + for (dev = dev_base; dev; dev = dev->next) { | |
1020 | + if (dev->type == type) { | |
1021 | + dev_hold(dev); | |
1022 | + break; | |
1023 | + } | |
1024 | + } | |
1025 | + rtnl_unlock(); | |
1026 | + return dev; | |
1027 | +} | |
1028 | + | |
1029 | +EXPORT_SYMBOL(dev_getfirstbyhwtype); | |
1030 | + | |
1031 | +/** | |
1032 | + * dev_get_by_flags - find any device with given flags | |
1033 | + * @if_flags: IFF_* values | |
1034 | + * @mask: bitmask of bits in if_flags to check | |
1035 | + * | |
1036 | + * Search for any interface with the given flags. Returns NULL if a device | |
1037 | + * is not found or a pointer to the device. The device returned has | |
1038 | + * had a reference added and the pointer is safe until the user calls | |
1039 | + * dev_put to indicate they have finished with it. | |
1040 | + */ | |
1041 | + | |
1042 | +struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask) | |
1043 | +{ | |
1044 | + struct net_device *dev; | |
1045 | + | |
1046 | + read_lock(&dev_base_lock); | |
1047 | + for (dev = dev_base; dev != NULL; dev = dev->next) { | |
1048 | + if (((dev->flags ^ if_flags) & mask) == 0) { | |
1049 | + dev_hold(dev); | |
1050 | + break; | |
1051 | + } | |
1052 | + } | |
1053 | + read_unlock(&dev_base_lock); | |
1054 | + return dev; | |
1055 | +} | |
1056 | + | |
1057 | +/** | |
1058 | + * dev_valid_name - check if name is okay for network device | |
1059 | + * @name: name string | |
1060 | + * | |
1061 | + * Network device names need to be valid file names to | |
1062 | + * to allow sysfs to work. We also disallow any kind of | |
1063 | + * whitespace. | |
1064 | + */ | |
1065 | +int dev_valid_name(const char *name) | |
1066 | +{ | |
1067 | + if (*name == '\0') | |
1068 | + return 0; | |
1069 | + if (strlen(name) >= IFNAMSIZ) | |
1070 | + return 0; | |
1071 | + if (!strcmp(name, ".") || !strcmp(name, "..")) | |
1072 | + return 0; | |
1073 | + | |
1074 | + while (*name) { | |
1075 | + if (*name == '/' || isspace(*name)) | |
1076 | + return 0; | |
1077 | + name++; | |
1078 | + } | |
1079 | + return 1; | |
1080 | +} | |
1081 | + | |
1082 | +/** | |
1083 | + * dev_alloc_name - allocate a name for a device | |
1084 | + * @dev: device | |
1085 | + * @name: name format string | |
1086 | + * | |
1087 | + * Passed a format string - eg "lt%d" it will try and find a suitable | |
1088 | + * id. It scans list of devices to build up a free map, then chooses | |
1089 | + * the first empty slot. The caller must hold the dev_base or rtnl lock | |
1090 | + * while allocating the name and adding the device in order to avoid | |
1091 | + * duplicates. | |
1092 | + * Limited to bits_per_byte * page size devices (ie 32K on most platforms). | |
1093 | + * Returns the number of the unit assigned or a negative errno code. | |
1094 | + */ | |
1095 | + | |
1096 | +int dev_alloc_name(struct net_device *dev, const char *name) | |
1097 | +{ | |
1098 | + int i = 0; | |
1099 | + char buf[IFNAMSIZ]; | |
1100 | + const char *p; | |
1101 | + const int max_netdevices = 8*PAGE_SIZE; | |
1102 | + long *inuse; | |
1103 | + struct net_device *d; | |
1104 | + | |
1105 | + p = strnchr(name, IFNAMSIZ-1, '%'); | |
1106 | + if (p) { | |
1107 | + /* | |
1108 | + * Verify the string as this thing may have come from | |
1109 | + * the user. There must be either one "%d" and no other "%" | |
1110 | + * characters. | |
1111 | + */ | |
1112 | + if (p[1] != 'd' || strchr(p + 2, '%')) | |
1113 | + return -EINVAL; | |
1114 | + | |
1115 | + /* Use one page as a bit array of possible slots */ | |
1116 | + inuse = (long *) get_zeroed_page(GFP_ATOMIC); | |
1117 | + if (!inuse) | |
1118 | + return -ENOMEM; | |
1119 | + | |
1120 | + for (d = dev_base; d; d = d->next) { | |
1121 | + if (!sscanf(d->name, name, &i)) | |
1122 | + continue; | |
1123 | + if (i < 0 || i >= max_netdevices) | |
1124 | + continue; | |
1125 | + | |
1126 | + /* avoid cases where sscanf is not exact inverse of printf */ | |
1127 | + snprintf(buf, sizeof(buf), name, i); | |
1128 | + if (!strncmp(buf, d->name, IFNAMSIZ)) | |
1129 | + set_bit(i, inuse); | |
1130 | + } | |
1131 | + | |
1132 | + i = find_first_zero_bit(inuse, max_netdevices); | |
1133 | + free_page((unsigned long) inuse); | |
1134 | + } | |
1135 | + | |
1136 | + snprintf(buf, sizeof(buf), name, i); | |
1137 | + if (!__dev_get_by_name(buf)) { | |
1138 | + strlcpy(dev->name, buf, IFNAMSIZ); | |
1139 | + return i; | |
1140 | + } | |
1141 | + | |
1142 | + /* It is possible to run out of possible slots | |
1143 | + * when the name is long and there isn't enough space left | |
1144 | + * for the digits, or if all bits are used. | |
1145 | + */ | |
1146 | + return -ENFILE; | |
1147 | +} | |
1148 | + | |
1149 | + | |
1150 | +/** | |
1151 | + * dev_change_name - change name of a device | |
1152 | + * @dev: device | |
1153 | + * @newname: name (or format string) must be at least IFNAMSIZ | |
1154 | + * | |
1155 | + * Change name of a device, can pass format strings "eth%d". | |
1156 | + * for wildcarding. | |
1157 | + */ | |
1158 | +int dev_change_name(struct net_device *dev, char *newname) | |
1159 | +{ | |
1160 | + int err = 0; | |
1161 | + | |
1162 | + ASSERT_RTNL(); | |
1163 | + | |
1164 | + if (dev->flags & IFF_UP) | |
1165 | + return -EBUSY; | |
1166 | + | |
1167 | + if (!dev_valid_name(newname)) | |
1168 | + return -EINVAL; | |
1169 | + | |
1170 | + if (strchr(newname, '%')) { | |
1171 | + err = dev_alloc_name(dev, newname); | |
1172 | + if (err < 0) | |
1173 | + return err; | |
1174 | + strcpy(newname, dev->name); | |
1175 | + } | |
1176 | + else if (__dev_get_by_name(newname)) | |
1177 | + return -EEXIST; | |
1178 | + else | |
1179 | + strlcpy(dev->name, newname, IFNAMSIZ); | |
1180 | + | |
1181 | + device_rename(&dev->dev, dev->name); | |
1182 | + hlist_del(&dev->name_hlist); | |
1183 | + hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name)); | |
1184 | + raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev); | |
1185 | + | |
1186 | + return err; | |
1187 | +} | |
1188 | + | |
1189 | +/** | |
1190 | + * netdev_features_change - device changes features | |
1191 | + * @dev: device to cause notification | |
1192 | + * | |
1193 | + * Called to indicate a device has changed features. | |
1194 | + */ | |
1195 | +void netdev_features_change(struct net_device *dev) | |
1196 | +{ | |
1197 | + raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev); | |
1198 | +} | |
1199 | +EXPORT_SYMBOL(netdev_features_change); | |
1200 | + | |
1201 | +/** | |
1202 | + * netdev_state_change - device changes state | |
1203 | + * @dev: device to cause notification | |
1204 | + * | |
1205 | + * Called to indicate a device has changed state. This function calls | |
1206 | + * the notifier chains for netdev_chain and sends a NEWLINK message | |
1207 | + * to the routing socket. | |
1208 | + */ | |
1209 | +void netdev_state_change(struct net_device *dev) | |
1210 | +{ | |
1211 | + if (dev->flags & IFF_UP) { | |
1212 | + raw_notifier_call_chain(&netdev_chain, | |
1213 | + NETDEV_CHANGE, dev); | |
1214 | + rtmsg_ifinfo(RTM_NEWLINK, dev, 0); | |
1215 | + } | |
1216 | +} | |
1217 | + | |
1218 | +/** | |
1219 | + * dev_load - load a network module | |
1220 | + * @name: name of interface | |
1221 | + * | |
1222 | + * If a network interface is not present and the process has suitable | |
1223 | + * privileges this function loads the module. If module loading is not | |
1224 | + * available in this kernel then it becomes a nop. | |
1225 | + */ | |
1226 | + | |
1227 | +void dev_load(const char *name) | |
1228 | +{ | |
1229 | + struct net_device *dev; | |
1230 | + | |
1231 | + read_lock(&dev_base_lock); | |
1232 | + dev = __dev_get_by_name(name); | |
1233 | + read_unlock(&dev_base_lock); | |
1234 | + | |
1235 | + if (!dev && capable(CAP_SYS_MODULE)) | |
1236 | + request_module("%s", name); | |
1237 | +} | |
1238 | + | |
1239 | +static int default_rebuild_header(struct sk_buff *skb) | |
1240 | +{ | |
1241 | + printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n", | |
1242 | + skb->dev ? skb->dev->name : "NULL!!!"); | |
1243 | + kfree_skb(skb); | |
1244 | + return 1; | |
1245 | +} | |
1246 | + | |
1247 | + | |
1248 | +/** | |
1249 | + * dev_open - prepare an interface for use. | |
1250 | + * @dev: device to open | |
1251 | + * | |
1252 | + * Takes a device from down to up state. The device's private open | |
1253 | + * function is invoked and then the multicast lists are loaded. Finally | |
1254 | + * the device is moved into the up state and a %NETDEV_UP message is | |
1255 | + * sent to the netdev notifier chain. | |
1256 | + * | |
1257 | + * Calling this function on an active interface is a nop. On a failure | |
1258 | + * a negative errno code is returned. | |
1259 | + */ | |
1260 | +int dev_open(struct net_device *dev) | |
1261 | +{ | |
1262 | + int ret = 0; | |
1263 | + | |
1264 | + /* | |
1265 | + * Is it already up? | |
1266 | + */ | |
1267 | + | |
1268 | + if (dev->flags & IFF_UP) | |
1269 | + return 0; | |
1270 | + | |
1271 | + /* | |
1272 | + * Is it even present? | |
1273 | + */ | |
1274 | + if (!netif_device_present(dev)) | |
1275 | + return -ENODEV; | |
1276 | + | |
1277 | + /* | |
1278 | + * Call device private open method | |
1279 | + */ | |
1280 | + set_bit(__LINK_STATE_START, &dev->state); | |
1281 | + if (dev->open) { | |
1282 | + ret = dev->open(dev); | |
1283 | + if (ret) | |
1284 | + clear_bit(__LINK_STATE_START, &dev->state); | |
1285 | + } | |
1286 | + | |
1287 | + /* | |
1288 | + * If it went open OK then: | |
1289 | + */ | |
1290 | + | |
1291 | + if (!ret) { | |
1292 | + /* | |
1293 | + * Set the flags. | |
1294 | + */ | |
1295 | + dev->flags |= IFF_UP; | |
1296 | + | |
1297 | + /* | |
1298 | + * Initialize multicasting status | |
1299 | + */ | |
1300 | + dev_mc_upload(dev); | |
1301 | + | |
1302 | + /* | |
1303 | + * Wakeup transmit queue engine | |
1304 | + */ | |
1305 | + dev_activate(dev); | |
1306 | + | |
1307 | + /* | |
1308 | + * ... and announce new interface. | |
1309 | + */ | |
1310 | + raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev); | |
1311 | + } | |
1312 | + return ret; | |
1313 | +} | |
1314 | + | |
1315 | +/** | |
1316 | + * dev_close - shutdown an interface. | |
1317 | + * @dev: device to shutdown | |
1318 | + * | |
1319 | + * This function moves an active device into down state. A | |
1320 | + * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device | |
1321 | + * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier | |
1322 | + * chain. | |
1323 | + */ | |
1324 | +int dev_close(struct net_device *dev) | |
1325 | +{ | |
1326 | + if (!(dev->flags & IFF_UP)) | |
1327 | + return 0; | |
1328 | + | |
1329 | + /* | |
1330 | + * Tell people we are going down, so that they can | |
1331 | + * prepare to death, when device is still operating. | |
1332 | + */ | |
1333 | + raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev); | |
1334 | + | |
1335 | + dev_deactivate(dev); | |
1336 | + | |
1337 | + clear_bit(__LINK_STATE_START, &dev->state); | |
1338 | + | |
1339 | + /* Synchronize to scheduled poll. We cannot touch poll list, | |
1340 | + * it can be even on different cpu. So just clear netif_running(), | |
1341 | + * and wait when poll really will happen. Actually, the best place | |
1342 | + * for this is inside dev->stop() after device stopped its irq | |
1343 | + * engine, but this requires more changes in devices. */ | |
1344 | + | |
1345 | + smp_mb__after_clear_bit(); /* Commit netif_running(). */ | |
1346 | + while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) { | |
1347 | + /* No hurry. */ | |
1348 | + msleep(1); | |
1349 | + } | |
1350 | + | |
1351 | + /* | |
1352 | + * Call the device specific close. This cannot fail. | |
1353 | + * Only if device is UP | |
1354 | + * | |
1355 | + * We allow it to be called even after a DETACH hot-plug | |
1356 | + * event. | |
1357 | + */ | |
1358 | + if (dev->stop) | |
1359 | + dev->stop(dev); | |
1360 | + | |
1361 | + /* | |
1362 | + * Device is now down. | |
1363 | + */ | |
1364 | + | |
1365 | + dev->flags &= ~IFF_UP; | |
1366 | + | |
1367 | + /* | |
1368 | + * Tell people we are down | |
1369 | + */ | |
1370 | + raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev); | |
1371 | + | |
1372 | + return 0; | |
1373 | +} | |
1374 | + | |
1375 | + | |
1376 | +/* | |
1377 | + * Device change register/unregister. These are not inline or static | |
1378 | + * as we export them to the world. | |
1379 | + */ | |
1380 | + | |
1381 | +/** | |
1382 | + * register_netdevice_notifier - register a network notifier block | |
1383 | + * @nb: notifier | |
1384 | + * | |
1385 | + * Register a notifier to be called when network device events occur. | |
1386 | + * The notifier passed is linked into the kernel structures and must | |
1387 | + * not be reused until it has been unregistered. A negative errno code | |
1388 | + * is returned on a failure. | |
1389 | + * | |
1390 | + * When registered all registration and up events are replayed | |
1391 | + * to the new notifier to allow device to have a race free | |
1392 | + * view of the network device list. | |
1393 | + */ | |
1394 | + | |
1395 | +int register_netdevice_notifier(struct notifier_block *nb) | |
1396 | +{ | |
1397 | + struct net_device *dev; | |
1398 | + int err; | |
1399 | + | |
1400 | + rtnl_lock(); | |
1401 | + err = raw_notifier_chain_register(&netdev_chain, nb); | |
1402 | + if (!err) { | |
1403 | + for (dev = dev_base; dev; dev = dev->next) { | |
1404 | + nb->notifier_call(nb, NETDEV_REGISTER, dev); | |
1405 | + | |
1406 | + if (dev->flags & IFF_UP) | |
1407 | + nb->notifier_call(nb, NETDEV_UP, dev); | |
1408 | + } | |
1409 | + } | |
1410 | + rtnl_unlock(); | |
1411 | + return err; | |
1412 | +} | |
1413 | + | |
1414 | +/** | |
1415 | + * unregister_netdevice_notifier - unregister a network notifier block | |
1416 | + * @nb: notifier | |
1417 | + * | |
1418 | + * Unregister a notifier previously registered by | |
1419 | + * register_netdevice_notifier(). The notifier is unlinked into the | |
1420 | + * kernel structures and may then be reused. A negative errno code | |
1421 | + * is returned on a failure. | |
1422 | + */ | |
1423 | + | |
1424 | +int unregister_netdevice_notifier(struct notifier_block *nb) | |
1425 | +{ | |
1426 | + int err; | |
1427 | + | |
1428 | + rtnl_lock(); | |
1429 | + err = raw_notifier_chain_unregister(&netdev_chain, nb); | |
1430 | + rtnl_unlock(); | |
1431 | + return err; | |
1432 | +} | |
1433 | + | |
1434 | +/** | |
1435 | + * call_netdevice_notifiers - call all network notifier blocks | |
1436 | + * @val: value passed unmodified to notifier function | |
1437 | + * @v: pointer passed unmodified to notifier function | |
1438 | + * | |
1439 | + * Call all network notifier blocks. Parameters and return value | |
1440 | + * are as for raw_notifier_call_chain(). | |
1441 | + */ | |
1442 | + | |
1443 | +int call_netdevice_notifiers(unsigned long val, void *v) | |
1444 | +{ | |
1445 | + return raw_notifier_call_chain(&netdev_chain, val, v); | |
1446 | +} | |
1447 | + | |
1448 | +/* When > 0 there are consumers of rx skb time stamps */ | |
1449 | +static atomic_t netstamp_needed = ATOMIC_INIT(0); | |
1450 | + | |
1451 | +void net_enable_timestamp(void) | |
1452 | +{ | |
1453 | + atomic_inc(&netstamp_needed); | |
1454 | +} | |
1455 | + | |
1456 | +void net_disable_timestamp(void) | |
1457 | +{ | |
1458 | + atomic_dec(&netstamp_needed); | |
1459 | +} | |
1460 | + | |
1461 | +void __net_timestamp(struct sk_buff *skb) | |
1462 | +{ | |
1463 | + struct timeval tv; | |
1464 | + | |
1465 | + do_gettimeofday(&tv); | |
1466 | + skb_set_timestamp(skb, &tv); | |
1467 | +} | |
1468 | +EXPORT_SYMBOL(__net_timestamp); | |
1469 | + | |
1470 | +static inline void net_timestamp(struct sk_buff *skb) | |
1471 | +{ | |
1472 | + if (atomic_read(&netstamp_needed)) | |
1473 | + __net_timestamp(skb); | |
1474 | + else { | |
1475 | + skb->tstamp.off_sec = 0; | |
1476 | + skb->tstamp.off_usec = 0; | |
1477 | + } | |
1478 | +} | |
1479 | + | |
1480 | +/* | |
1481 | + * Support routine. Sends outgoing frames to any network | |
1482 | + * taps currently in use. | |
1483 | + */ | |
1484 | + | |
1485 | +static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev) | |
1486 | +{ | |
1487 | + struct packet_type *ptype; | |
1488 | + | |
1489 | + net_timestamp(skb); | |
1490 | + | |
1491 | + rcu_read_lock(); | |
1492 | + list_for_each_entry_rcu(ptype, &ptype_all, list) { | |
1493 | + /* Never send packets back to the socket | |
1494 | + * they originated from - MvS (miquels@drinkel.ow.org) | |
1495 | + */ | |
1496 | + if ((ptype->dev == dev || !ptype->dev) && | |
1497 | + (ptype->af_packet_priv == NULL || | |
1498 | + (struct sock *)ptype->af_packet_priv != skb->sk)) { | |
1499 | + struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC); | |
1500 | + if (!skb2) | |
1501 | + break; | |
1502 | + | |
1503 | + /* skb->nh should be correctly | |
1504 | + set by sender, so that the second statement is | |
1505 | + just protection against buggy protocols. | |
1506 | + */ | |
1507 | + skb2->mac.raw = skb2->data; | |
1508 | + | |
1509 | + if (skb2->nh.raw < skb2->data || | |
1510 | + skb2->nh.raw > skb2->tail) { | |
1511 | + if (net_ratelimit()) | |
1512 | + printk(KERN_CRIT "protocol %04x is " | |
1513 | + "buggy, dev %s\n", | |
1514 | + skb2->protocol, dev->name); | |
1515 | + skb2->nh.raw = skb2->data; | |
1516 | + } | |
1517 | + | |
1518 | + skb2->h.raw = skb2->nh.raw; | |
1519 | + skb2->pkt_type = PACKET_OUTGOING; | |
1520 | + ptype->func(skb2, skb->dev, ptype, skb->dev); | |
1521 | + } | |
1522 | + } | |
1523 | + rcu_read_unlock(); | |
1524 | +} | |
1525 | + | |
1526 | + | |
1527 | +void __netif_schedule(struct net_device *dev) | |
1528 | +{ | |
1529 | + if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) { | |
1530 | + unsigned long flags; | |
1531 | + struct softnet_data *sd; | |
1532 | + | |
1533 | + local_irq_save(flags); | |
1534 | + sd = &__get_cpu_var(softnet_data); | |
1535 | + dev->next_sched = sd->output_queue; | |
1536 | + sd->output_queue = dev; | |
1537 | + raise_softirq_irqoff(NET_TX_SOFTIRQ); | |
1538 | + local_irq_restore(flags); | |
1539 | + } | |
1540 | +} | |
1541 | +EXPORT_SYMBOL(__netif_schedule); | |
1542 | + | |
1543 | +void __netif_rx_schedule(struct net_device *dev) | |
1544 | +{ | |
1545 | + unsigned long flags; | |
1546 | + | |
1547 | + local_irq_save(flags); | |
1548 | + dev_hold(dev); | |
1549 | + list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list); | |
1550 | + if (dev->quota < 0) | |
1551 | + dev->quota += dev->weight; | |
1552 | + else | |
1553 | + dev->quota = dev->weight; | |
1554 | + __raise_softirq_irqoff(NET_RX_SOFTIRQ); | |
1555 | + local_irq_restore(flags); | |
1556 | +} | |
1557 | +EXPORT_SYMBOL(__netif_rx_schedule); | |
1558 | + | |
1559 | +void dev_kfree_skb_any(struct sk_buff *skb) | |
1560 | +{ | |
1561 | + if (in_irq() || irqs_disabled()) | |
1562 | + dev_kfree_skb_irq(skb); | |
1563 | + else | |
1564 | + dev_kfree_skb(skb); | |
1565 | +} | |
1566 | +EXPORT_SYMBOL(dev_kfree_skb_any); | |
1567 | + | |
1568 | + | |
1569 | +/* Hot-plugging. */ | |
1570 | +void netif_device_detach(struct net_device *dev) | |
1571 | +{ | |
1572 | + if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) && | |
1573 | + netif_running(dev)) { | |
1574 | + netif_stop_queue(dev); | |
1575 | + } | |
1576 | +} | |
1577 | +EXPORT_SYMBOL(netif_device_detach); | |
1578 | + | |
1579 | +void netif_device_attach(struct net_device *dev) | |
1580 | +{ | |
1581 | + if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) && | |
1582 | + netif_running(dev)) { | |
1583 | + netif_wake_queue(dev); | |
1584 | + __netdev_watchdog_up(dev); | |
1585 | + } | |
1586 | +} | |
1587 | +EXPORT_SYMBOL(netif_device_attach); | |
1588 | + | |
1589 | + | |
1590 | +/* | |
1591 | + * Invalidate hardware checksum when packet is to be mangled, and | |
1592 | + * complete checksum manually on outgoing path. | |
1593 | + */ | |
1594 | +int skb_checksum_help(struct sk_buff *skb) | |
1595 | +{ | |
1596 | + __wsum csum; | |
1597 | + int ret = 0, offset = skb->h.raw - skb->data; | |
1598 | + | |
1599 | + if (skb->ip_summed == CHECKSUM_COMPLETE) | |
1600 | + goto out_set_summed; | |
1601 | + | |
1602 | + if (unlikely(skb_shinfo(skb)->gso_size)) { | |
1603 | + /* Let GSO fix up the checksum. */ | |
1604 | + goto out_set_summed; | |
1605 | + } | |
1606 | + | |
1607 | + if (skb_cloned(skb)) { | |
1608 | + ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); | |
1609 | + if (ret) | |
1610 | + goto out; | |
1611 | + } | |
1612 | + | |
1613 | + BUG_ON(offset > (int)skb->len); | |
1614 | + csum = skb_checksum(skb, offset, skb->len-offset, 0); | |
1615 | + | |
1616 | + offset = skb->tail - skb->h.raw; | |
1617 | + BUG_ON(offset <= 0); | |
1618 | + BUG_ON(skb->csum_offset + 2 > offset); | |
1619 | + | |
1620 | + *(__sum16*)(skb->h.raw + skb->csum_offset) = csum_fold(csum); | |
1621 | + | |
1622 | +out_set_summed: | |
1623 | + skb->ip_summed = CHECKSUM_NONE; | |
1624 | +out: | |
1625 | + return ret; | |
1626 | +} | |
1627 | + | |
1628 | +/** | |
1629 | + * skb_gso_segment - Perform segmentation on skb. | |
1630 | + * @skb: buffer to segment | |
1631 | + * @features: features for the output path (see dev->features) | |
1632 | + * | |
1633 | + * This function segments the given skb and returns a list of segments. | |
1634 | + * | |
1635 | + * It may return NULL if the skb requires no segmentation. This is | |
1636 | + * only possible when GSO is used for verifying header integrity. | |
1637 | + */ | |
1638 | +struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) | |
1639 | +{ | |
1640 | + struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT); | |
1641 | + struct packet_type *ptype; | |
1642 | + __be16 type = skb->protocol; | |
1643 | + int err; | |
1644 | + | |
1645 | + BUG_ON(skb_shinfo(skb)->frag_list); | |
1646 | + | |
1647 | + skb->mac.raw = skb->data; | |
1648 | + skb->mac_len = skb->nh.raw - skb->data; | |
1649 | + __skb_pull(skb, skb->mac_len); | |
1650 | + | |
1651 | + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { | |
1652 | + if (skb_header_cloned(skb) && | |
1653 | + (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) | |
1654 | + return ERR_PTR(err); | |
1655 | + } | |
1656 | + | |
1657 | + rcu_read_lock(); | |
1658 | + list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { | |
1659 | + if (ptype->type == type && !ptype->dev && ptype->gso_segment) { | |
1660 | + if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { | |
1661 | + err = ptype->gso_send_check(skb); | |
1662 | + segs = ERR_PTR(err); | |
1663 | + if (err || skb_gso_ok(skb, features)) | |
1664 | + break; | |
1665 | + __skb_push(skb, skb->data - skb->nh.raw); | |
1666 | + } | |
1667 | + segs = ptype->gso_segment(skb, features); | |
1668 | + break; | |
1669 | + } | |
1670 | + } | |
1671 | + rcu_read_unlock(); | |
1672 | + | |
1673 | + __skb_push(skb, skb->data - skb->mac.raw); | |
1674 | + | |
1675 | + return segs; | |
1676 | +} | |
1677 | + | |
1678 | +EXPORT_SYMBOL(skb_gso_segment); | |
1679 | + | |
1680 | +/* Take action when hardware reception checksum errors are detected. */ | |
1681 | +#ifdef CONFIG_BUG | |
1682 | +void netdev_rx_csum_fault(struct net_device *dev) | |
1683 | +{ | |
1684 | + if (net_ratelimit()) { | |
1685 | + printk(KERN_ERR "%s: hw csum failure.\n", | |
1686 | + dev ? dev->name : "<unknown>"); | |
1687 | + dump_stack(); | |
1688 | + } | |
1689 | +} | |
1690 | +EXPORT_SYMBOL(netdev_rx_csum_fault); | |
1691 | +#endif | |
1692 | + | |
1693 | +/* Actually, we should eliminate this check as soon as we know, that: | |
1694 | + * 1. IOMMU is present and allows to map all the memory. | |
1695 | + * 2. No high memory really exists on this machine. | |
1696 | + */ | |
1697 | + | |
1698 | +static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) | |
1699 | +{ | |
1700 | +#ifdef CONFIG_HIGHMEM | |
1701 | + int i; | |
1702 | + | |
1703 | + if (dev->features & NETIF_F_HIGHDMA) | |
1704 | + return 0; | |
1705 | + | |
1706 | + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) | |
1707 | + if (PageHighMem(skb_shinfo(skb)->frags[i].page)) | |
1708 | + return 1; | |
1709 | + | |
1710 | +#endif | |
1711 | + return 0; | |
1712 | +} | |
1713 | + | |
1714 | +struct dev_gso_cb { | |
1715 | + void (*destructor)(struct sk_buff *skb); | |
1716 | +}; | |
1717 | + | |
1718 | +#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb) | |
1719 | + | |
1720 | +static void dev_gso_skb_destructor(struct sk_buff *skb) | |
1721 | +{ | |
1722 | + struct dev_gso_cb *cb; | |
1723 | + | |
1724 | + do { | |
1725 | + struct sk_buff *nskb = skb->next; | |
1726 | + | |
1727 | + skb->next = nskb->next; | |
1728 | + nskb->next = NULL; | |
1729 | + kfree_skb(nskb); | |
1730 | + } while (skb->next); | |
1731 | + | |
1732 | + cb = DEV_GSO_CB(skb); | |
1733 | + if (cb->destructor) | |
1734 | + cb->destructor(skb); | |
1735 | +} | |
1736 | + | |
1737 | +/** | |
1738 | + * dev_gso_segment - Perform emulated hardware segmentation on skb. | |
1739 | + * @skb: buffer to segment | |
1740 | + * | |
1741 | + * This function segments the given skb and stores the list of segments | |
1742 | + * in skb->next. | |
1743 | + */ | |
1744 | +static int dev_gso_segment(struct sk_buff *skb) | |
1745 | +{ | |
1746 | + struct net_device *dev = skb->dev; | |
1747 | + struct sk_buff *segs; | |
1748 | + int features = dev->features & ~(illegal_highdma(dev, skb) ? | |
1749 | + NETIF_F_SG : 0); | |
1750 | + | |
1751 | + segs = skb_gso_segment(skb, features); | |
1752 | + | |
1753 | + /* Verifying header integrity only. */ | |
1754 | + if (!segs) | |
1755 | + return 0; | |
1756 | + | |
1757 | + if (unlikely(IS_ERR(segs))) | |
1758 | + return PTR_ERR(segs); | |
1759 | + | |
1760 | + skb->next = segs; | |
1761 | + DEV_GSO_CB(skb)->destructor = skb->destructor; | |
1762 | + skb->destructor = dev_gso_skb_destructor; | |
1763 | + | |
1764 | + return 0; | |
1765 | +} | |
1766 | + | |
1767 | +int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev) | |
1768 | +{ | |
1769 | + if (likely(!skb->next)) { | |
1770 | + if (netdev_nit) | |
1771 | + dev_queue_xmit_nit(skb, dev); | |
1772 | + | |
1773 | + if (netif_needs_gso(dev, skb)) { | |
1774 | + if (unlikely(dev_gso_segment(skb))) | |
1775 | + goto out_kfree_skb; | |
1776 | + if (skb->next) | |
1777 | + goto gso; | |
1778 | + } | |
1779 | + | |
1780 | + return dev->hard_start_xmit(skb, dev); | |
1781 | + } | |
1782 | + | |
1783 | +gso: | |
1784 | + do { | |
1785 | + struct sk_buff *nskb = skb->next; | |
1786 | + int rc; | |
1787 | + | |
1788 | + skb->next = nskb->next; | |
1789 | + nskb->next = NULL; | |
1790 | + rc = dev->hard_start_xmit(nskb, dev); | |
1791 | + if (unlikely(rc)) { | |
1792 | + nskb->next = skb->next; | |
1793 | + skb->next = nskb; | |
1794 | + return rc; | |
1795 | + } | |
1796 | + if (unlikely(netif_queue_stopped(dev) && skb->next)) | |
1797 | + return NETDEV_TX_BUSY; | |
1798 | + } while (skb->next); | |
1799 | + | |
1800 | + skb->destructor = DEV_GSO_CB(skb)->destructor; | |
1801 | + | |
1802 | +out_kfree_skb: | |
1803 | + kfree_skb(skb); | |
1804 | + return 0; | |
1805 | +} | |
1806 | + | |
1807 | +#define HARD_TX_LOCK(dev, cpu) { \ | |
1808 | + if ((dev->features & NETIF_F_LLTX) == 0) { \ | |
1809 | + netif_tx_lock(dev); \ | |
1810 | + } \ | |
1811 | +} | |
1812 | + | |
1813 | +#define HARD_TX_UNLOCK(dev) { \ | |
1814 | + if ((dev->features & NETIF_F_LLTX) == 0) { \ | |
1815 | + netif_tx_unlock(dev); \ | |
1816 | + } \ | |
1817 | +} | |
1818 | + | |
1819 | +/** | |
1820 | + * dev_queue_xmit - transmit a buffer | |
1821 | + * @skb: buffer to transmit | |
1822 | + * | |
1823 | + * Queue a buffer for transmission to a network device. The caller must | |
1824 | + * have set the device and priority and built the buffer before calling | |
1825 | + * this function. The function can be called from an interrupt. | |
1826 | + * | |
1827 | + * A negative errno code is returned on a failure. A success does not | |
1828 | + * guarantee the frame will be transmitted as it may be dropped due | |
1829 | + * to congestion or traffic shaping. | |
1830 | + * | |
1831 | + * ----------------------------------------------------------------------------------- | |
1832 | + * I notice this method can also return errors from the queue disciplines, | |
1833 | + * including NET_XMIT_DROP, which is a positive value. So, errors can also | |
1834 | + * be positive. | |
1835 | + * | |
1836 | + * Regardless of the return value, the skb is consumed, so it is currently | |
1837 | + * difficult to retry a send to this method. (You can bump the ref count | |
1838 | + * before sending to hold a reference for retry if you are careful.) | |
1839 | + * | |
1840 | + * When calling this method, interrupts MUST be enabled. This is because | |
1841 | + * the BH enable code must have IRQs enabled so that it will not deadlock. | |
1842 | + * --BLG | |
1843 | + */ | |
1844 | + | |
1845 | +int dev_queue_xmit(struct sk_buff *skb) | |
1846 | +{ | |
1847 | + struct net_device *dev = skb->dev; | |
1848 | + struct Qdisc *q; | |
1849 | + int rc = -ENOMEM; | |
1850 | + | |
1851 | + /* GSO will handle the following emulations directly. */ | |
1852 | + if (netif_needs_gso(dev, skb)) | |
1853 | + goto gso; | |
1854 | + | |
1855 | + if (skb_shinfo(skb)->frag_list && | |
1856 | + !(dev->features & NETIF_F_FRAGLIST) && | |
1857 | + __skb_linearize(skb)) | |
1858 | + goto out_kfree_skb; | |
1859 | + | |
1860 | + /* Fragmented skb is linearized if device does not support SG, | |
1861 | + * or if at least one of fragments is in highmem and device | |
1862 | + * does not support DMA from it. | |
1863 | + */ | |
1864 | + if (skb_shinfo(skb)->nr_frags && | |
1865 | + (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && | |
1866 | + __skb_linearize(skb)) | |
1867 | + goto out_kfree_skb; | |
1868 | + | |
1869 | + /* If packet is not checksummed and device does not support | |
1870 | + * checksumming for this protocol, complete checksumming here. | |
1871 | + */ | |
1872 | + if (skb->ip_summed == CHECKSUM_PARTIAL && | |
1873 | + (!(dev->features & NETIF_F_GEN_CSUM) && | |
1874 | + (!(dev->features & NETIF_F_IP_CSUM) || | |
1875 | + skb->protocol != htons(ETH_P_IP)))) | |
1876 | + if (skb_checksum_help(skb)) | |
1877 | + goto out_kfree_skb; | |
1878 | + | |
1879 | +gso: | |
1880 | + spin_lock_prefetch(&dev->queue_lock); | |
1881 | + | |
1882 | + /* Disable soft irqs for various locks below. Also | |
1883 | + * stops preemption for RCU. | |
1884 | + */ | |
1885 | + rcu_read_lock_bh(); | |
1886 | + | |
1887 | + /* Updates of qdisc are serialized by queue_lock. | |
1888 | + * The struct Qdisc which is pointed to by qdisc is now a | |
1889 | + * rcu structure - it may be accessed without acquiring | |
1890 | + * a lock (but the structure may be stale.) The freeing of the | |
1891 | + * qdisc will be deferred until it's known that there are no | |
1892 | + * more references to it. | |
1893 | + * | |
1894 | + * If the qdisc has an enqueue function, we still need to | |
1895 | + * hold the queue_lock before calling it, since queue_lock | |
1896 | + * also serializes access to the device queue. | |
1897 | + */ | |
1898 | + | |
1899 | + q = rcu_dereference(dev->qdisc); | |
1900 | +#ifdef CONFIG_NET_CLS_ACT | |
1901 | + skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS); | |
1902 | +#endif | |
1903 | + if (q->enqueue) { | |
1904 | + /* Grab device queue */ | |
1905 | + spin_lock(&dev->queue_lock); | |
1906 | + q = dev->qdisc; | |
1907 | + if (q->enqueue) { | |
1908 | + rc = q->enqueue(skb, q); | |
1909 | + qdisc_run(dev); | |
1910 | + spin_unlock(&dev->queue_lock); | |
1911 | + | |
1912 | + rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc; | |
1913 | + goto out; | |
1914 | + } | |
1915 | + spin_unlock(&dev->queue_lock); | |
1916 | + } | |
1917 | + | |
1918 | + /* The device has no queue. Common case for software devices: | |
1919 | + loopback, all the sorts of tunnels... | |
1920 | + | |
1921 | + Really, it is unlikely that netif_tx_lock protection is necessary | |
1922 | + here. (f.e. loopback and IP tunnels are clean ignoring statistics | |
1923 | + counters.) | |
1924 | + However, it is possible, that they rely on protection | |
1925 | + made by us here. | |
1926 | + | |
1927 | + Check this and shot the lock. It is not prone from deadlocks. | |
1928 | + Either shot noqueue qdisc, it is even simpler 8) | |
1929 | + */ | |
1930 | + if (dev->flags & IFF_UP) { | |
1931 | + int cpu = smp_processor_id(); /* ok because BHs are off */ | |
1932 | + | |
1933 | + if (dev->xmit_lock_owner != cpu) { | |
1934 | + | |
1935 | + HARD_TX_LOCK(dev, cpu); | |
1936 | + | |
1937 | + if (!netif_queue_stopped(dev)) { | |
1938 | + rc = 0; | |
1939 | + if (!dev_hard_start_xmit(skb, dev)) { | |
1940 | + HARD_TX_UNLOCK(dev); | |
1941 | + goto out; | |
1942 | + } | |
1943 | + } | |
1944 | + HARD_TX_UNLOCK(dev); | |
1945 | + if (net_ratelimit()) | |
1946 | + printk(KERN_CRIT "Virtual device %s asks to " | |
1947 | + "queue packet!\n", dev->name); | |
1948 | + } else { | |
1949 | + /* Recursion is detected! It is possible, | |
1950 | + * unfortunately */ | |
1951 | + if (net_ratelimit()) | |
1952 | + printk(KERN_CRIT "Dead loop on virtual device " | |
1953 | + "%s, fix it urgently!\n", dev->name); | |
1954 | + } | |
1955 | + } | |
1956 | + | |
1957 | + rc = -ENETDOWN; | |
1958 | + rcu_read_unlock_bh(); | |
1959 | + | |
1960 | +out_kfree_skb: | |
1961 | + kfree_skb(skb); | |
1962 | + return rc; | |
1963 | +out: | |
1964 | + rcu_read_unlock_bh(); | |
1965 | + return rc; | |
1966 | +} | |
1967 | + | |
1968 | + | |
1969 | +/*======================================================================= | |
1970 | + Receiver routines | |
1971 | + =======================================================================*/ | |
1972 | + | |
1973 | +int netdev_max_backlog = 1000; | |
1974 | +int netdev_budget = 300; | |
1975 | +int weight_p = 64; /* old backlog weight */ | |
1976 | + | |
1977 | +DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, }; | |
1978 | + | |
1979 | + | |
1980 | +/** | |
1981 | + * netif_rx - post buffer to the network code | |
1982 | + * @skb: buffer to post | |
1983 | + * | |
1984 | + * This function receives a packet from a device driver and queues it for | |
1985 | + * the upper (protocol) levels to process. It always succeeds. The buffer | |
1986 | + * may be dropped during processing for congestion control or by the | |
1987 | + * protocol layers. | |
1988 | + * | |
1989 | + * return values: | |
1990 | + * NET_RX_SUCCESS (no congestion) | |
1991 | + * NET_RX_CN_LOW (low congestion) | |
1992 | + * NET_RX_CN_MOD (moderate congestion) | |
1993 | + * NET_RX_CN_HIGH (high congestion) | |
1994 | + * NET_RX_DROP (packet was dropped) | |
1995 | + * | |
1996 | + */ | |
1997 | + | |
1998 | +int netif_rx(struct sk_buff *skb) | |
1999 | +{ | |
2000 | + struct softnet_data *queue; | |
2001 | + unsigned long flags; | |
2002 | + | |
2003 | + /* if netpoll wants it, pretend we never saw it */ | |
2004 | + if (netpoll_rx(skb)) | |
2005 | + return NET_RX_DROP; | |
2006 | + | |
2007 | + if (!skb->tstamp.off_sec) | |
2008 | + net_timestamp(skb); | |
2009 | + | |
2010 | + /* | |
2011 | + * The code is rearranged so that the path is the most | |
2012 | + * short when CPU is congested, but is still operating. | |
2013 | + */ | |
2014 | + local_irq_save(flags); | |
2015 | + queue = &__get_cpu_var(softnet_data); | |
2016 | + | |
2017 | + __get_cpu_var(netdev_rx_stat).total++; | |
2018 | + if (queue->input_pkt_queue.qlen <= netdev_max_backlog) { | |
2019 | + if (queue->input_pkt_queue.qlen) { | |
2020 | +enqueue: | |
2021 | + dev_hold(skb->dev); | |
2022 | + __skb_queue_tail(&queue->input_pkt_queue, skb); | |
2023 | + local_irq_restore(flags); | |
2024 | + return NET_RX_SUCCESS; | |
2025 | + } | |
2026 | + | |
2027 | + netif_rx_schedule(&queue->backlog_dev); | |
2028 | + goto enqueue; | |
2029 | + } | |
2030 | + | |
2031 | + __get_cpu_var(netdev_rx_stat).dropped++; | |
2032 | + local_irq_restore(flags); | |
2033 | + | |
2034 | + kfree_skb(skb); | |
2035 | + return NET_RX_DROP; | |
2036 | +} | |
2037 | + | |
2038 | +int netif_rx_ni(struct sk_buff *skb) | |
2039 | +{ | |
2040 | + int err; | |
2041 | + | |
2042 | + preempt_disable(); | |
2043 | + err = netif_rx(skb); | |
2044 | + if (local_softirq_pending()) | |
2045 | + do_softirq(); | |
2046 | + preempt_enable(); | |
2047 | + | |
2048 | + return err; | |
2049 | +} | |
2050 | + | |
2051 | +EXPORT_SYMBOL(netif_rx_ni); | |
2052 | + | |
2053 | +static inline struct net_device *skb_bond(struct sk_buff *skb) | |
2054 | +{ | |
2055 | + struct net_device *dev = skb->dev; | |
2056 | + | |
2057 | + if (dev->master) { | |
2058 | + if (skb_bond_should_drop(skb)) { | |
2059 | + kfree_skb(skb); | |
2060 | + return NULL; | |
2061 | + } | |
2062 | + skb->dev = dev->master; | |
2063 | + } | |
2064 | + | |
2065 | + return dev; | |
2066 | +} | |
2067 | + | |
2068 | +static void net_tx_action(struct softirq_action *h) | |
2069 | +{ | |
2070 | + struct softnet_data *sd = &__get_cpu_var(softnet_data); | |
2071 | + | |
2072 | + if (sd->completion_queue) { | |
2073 | + struct sk_buff *clist; | |
2074 | + | |
2075 | + local_irq_disable(); | |
2076 | + clist = sd->completion_queue; | |
2077 | + sd->completion_queue = NULL; | |
2078 | + local_irq_enable(); | |
2079 | + | |
2080 | + while (clist) { | |
2081 | + struct sk_buff *skb = clist; | |
2082 | + clist = clist->next; | |
2083 | + | |
2084 | + BUG_TRAP(!atomic_read(&skb->users)); | |
2085 | + __kfree_skb(skb); | |
2086 | + } | |
2087 | + } | |
2088 | + | |
2089 | + if (sd->output_queue) { | |
2090 | + struct net_device *head; | |
2091 | + | |
2092 | + local_irq_disable(); | |
2093 | + head = sd->output_queue; | |
2094 | + sd->output_queue = NULL; | |
2095 | + local_irq_enable(); | |
2096 | + | |
2097 | + while (head) { | |
2098 | + struct net_device *dev = head; | |
2099 | + head = head->next_sched; | |
2100 | + | |
2101 | + smp_mb__before_clear_bit(); | |
2102 | + clear_bit(__LINK_STATE_SCHED, &dev->state); | |
2103 | + | |
2104 | + if (spin_trylock(&dev->queue_lock)) { | |
2105 | + qdisc_run(dev); | |
2106 | + spin_unlock(&dev->queue_lock); | |
2107 | + } else { | |
2108 | + netif_schedule(dev); | |
2109 | + } | |
2110 | + } | |
2111 | + } | |
2112 | +} | |
2113 | + | |
2114 | +static __inline__ int deliver_skb(struct sk_buff *skb, | |
2115 | + struct packet_type *pt_prev, | |
2116 | + struct net_device *orig_dev) | |
2117 | +{ | |
2118 | + atomic_inc(&skb->users); | |
2119 | + return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); | |
2120 | +} | |
2121 | + | |
2122 | +#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE) | |
2123 | +int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb); | |
2124 | +struct net_bridge; | |
2125 | +struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br, | |
2126 | + unsigned char *addr); | |
2127 | +void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent); | |
2128 | + | |
2129 | +static __inline__ int handle_bridge(struct sk_buff **pskb, | |
2130 | + struct packet_type **pt_prev, int *ret, | |
2131 | + struct net_device *orig_dev) | |
2132 | +{ | |
2133 | + struct net_bridge_port *port; | |
2134 | + | |
2135 | + if ((*pskb)->pkt_type == PACKET_LOOPBACK || | |
2136 | + (port = rcu_dereference((*pskb)->dev->br_port)) == NULL) | |
2137 | + return 0; | |
2138 | + | |
2139 | + if (*pt_prev) { | |
2140 | + *ret = deliver_skb(*pskb, *pt_prev, orig_dev); | |
2141 | + *pt_prev = NULL; | |
2142 | + } | |
2143 | + | |
2144 | + return br_handle_frame_hook(port, pskb); | |
2145 | +} | |
2146 | +#else | |
2147 | +#define handle_bridge(skb, pt_prev, ret, orig_dev) (0) | |
2148 | +#endif | |
2149 | + | |
2150 | +#ifdef CONFIG_NET_CLS_ACT | |
2151 | +/* TODO: Maybe we should just force sch_ingress to be compiled in | |
2152 | + * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions | |
2153 | + * a compare and 2 stores extra right now if we dont have it on | |
2154 | + * but have CONFIG_NET_CLS_ACT | |
2155 | + * NOTE: This doesnt stop any functionality; if you dont have | |
2156 | + * the ingress scheduler, you just cant add policies on ingress. | |
2157 | + * | |
2158 | + */ | |
2159 | +static int ing_filter(struct sk_buff *skb) | |
2160 | +{ | |
2161 | + struct Qdisc *q; | |
2162 | + struct net_device *dev = skb->dev; | |
2163 | + int result = TC_ACT_OK; | |
2164 | + | |
2165 | + if (dev->qdisc_ingress) { | |
2166 | + __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd); | |
2167 | + if (MAX_RED_LOOP < ttl++) { | |
2168 | + printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n", | |
2169 | + skb->iif, skb->dev->ifindex); | |
2170 | + return TC_ACT_SHOT; | |
2171 | + } | |
2172 | + | |
2173 | + skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl); | |
2174 | + | |
2175 | + skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS); | |
2176 | + | |
2177 | + spin_lock(&dev->queue_lock); | |
2178 | + if ((q = dev->qdisc_ingress) != NULL) | |
2179 | + result = q->enqueue(skb, q); | |
2180 | + spin_unlock(&dev->queue_lock); | |
2181 | + | |
2182 | + } | |
2183 | + | |
2184 | + return result; | |
2185 | +} | |
2186 | +#endif | |
2187 | + | |
2188 | +int netif_receive_skb(struct sk_buff *skb) | |
2189 | +{ | |
2190 | + struct packet_type *ptype, *pt_prev; | |
2191 | + struct net_device *orig_dev; | |
2192 | + int ret = NET_RX_DROP; | |
2193 | + __be16 type; | |
2194 | + | |
2195 | + /* if we've gotten here through NAPI, check netpoll */ | |
2196 | + if (skb->dev->poll && netpoll_rx(skb)) | |
2197 | + return NET_RX_DROP; | |
2198 | + | |
2199 | + if (!skb->tstamp.off_sec) | |
2200 | + net_timestamp(skb); | |
2201 | + | |
2202 | + if (!skb->iif) | |
2203 | + skb->iif = skb->dev->ifindex; | |
2204 | + | |
2205 | + orig_dev = skb_bond(skb); | |
2206 | + | |
2207 | + if (!orig_dev) | |
2208 | + return NET_RX_DROP; | |
2209 | + | |
2210 | + __get_cpu_var(netdev_rx_stat).total++; | |
2211 | + | |
2212 | + skb->h.raw = skb->nh.raw = skb->data; | |
2213 | + skb->mac_len = skb->nh.raw - skb->mac.raw; | |
2214 | + | |
2215 | + pt_prev = NULL; | |
2216 | + | |
2217 | + rcu_read_lock(); | |
2218 | + | |
2219 | +#ifdef CONFIG_NET_CLS_ACT | |
2220 | + if (skb->tc_verd & TC_NCLS) { | |
2221 | + skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); | |
2222 | + goto ncls; | |
2223 | + } | |
2224 | +#endif | |
2225 | + | |
2226 | + list_for_each_entry_rcu(ptype, &ptype_all, list) { | |
2227 | + if (!ptype->dev || ptype->dev == skb->dev) { | |
2228 | + if (pt_prev) | |
2229 | + ret = deliver_skb(skb, pt_prev, orig_dev); | |
2230 | + pt_prev = ptype; | |
2231 | + } | |
2232 | + } | |
2233 | + | |
2234 | +#ifdef CONFIG_NET_CLS_ACT | |
2235 | + if (pt_prev) { | |
2236 | + ret = deliver_skb(skb, pt_prev, orig_dev); | |
2237 | + pt_prev = NULL; /* noone else should process this after*/ | |
2238 | + } else { | |
2239 | + skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd); | |
2240 | + } | |
2241 | + | |
2242 | + ret = ing_filter(skb); | |
2243 | + | |
2244 | + if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) { | |
2245 | + kfree_skb(skb); | |
2246 | + goto out; | |
2247 | + } | |
2248 | + | |
2249 | + skb->tc_verd = 0; | |
2250 | +ncls: | |
2251 | +#endif | |
2252 | + | |
2253 | + if (handle_bridge(&skb, &pt_prev, &ret, orig_dev)) | |
2254 | + goto out; | |
2255 | + | |
2256 | + type = skb->protocol; | |
2257 | + list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) { | |
2258 | + if (ptype->type == type && | |
2259 | + (!ptype->dev || ptype->dev == skb->dev)) { | |
2260 | + if (pt_prev) | |
2261 | + ret = deliver_skb(skb, pt_prev, orig_dev); | |
2262 | + pt_prev = ptype; | |
2263 | + } | |
2264 | + } | |
2265 | + | |
2266 | + if (pt_prev) { | |
2267 | + ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); | |
2268 | + } else { | |
2269 | + kfree_skb(skb); | |
2270 | + /* Jamal, now you will not able to escape explaining | |
2271 | + * me how you were going to use this. :-) | |
2272 | + */ | |
2273 | + ret = NET_RX_DROP; | |
2274 | + } | |
2275 | + | |
2276 | +out: | |
2277 | + rcu_read_unlock(); | |
2278 | + return ret; | |
2279 | +} | |
2280 | + | |
2281 | +static int process_backlog(struct net_device *backlog_dev, int *budget) | |
2282 | +{ | |
2283 | + int work = 0; | |
2284 | + int quota = min(backlog_dev->quota, *budget); | |
2285 | + struct softnet_data *queue = &__get_cpu_var(softnet_data); | |
2286 | + unsigned long start_time = jiffies; | |
2287 | + | |
2288 | + backlog_dev->weight = weight_p; | |
2289 | + for (;;) { | |
2290 | + struct sk_buff *skb; | |
2291 | + struct net_device *dev; | |
2292 | + | |
2293 | + local_irq_disable(); | |
2294 | + skb = __skb_dequeue(&queue->input_pkt_queue); | |
2295 | + if (!skb) | |
2296 | + goto job_done; | |
2297 | + local_irq_enable(); | |
2298 | + | |
2299 | + dev = skb->dev; | |
2300 | + | |
2301 | + netif_receive_skb(skb); | |
2302 | + | |
2303 | + dev_put(dev); | |
2304 | + | |
2305 | + work++; | |
2306 | + | |
2307 | + if (work >= quota || jiffies - start_time > 1) | |
2308 | + break; | |
2309 | + | |
2310 | + } | |
2311 | + | |
2312 | + backlog_dev->quota -= work; | |
2313 | + *budget -= work; | |
2314 | + return -1; | |
2315 | + | |
2316 | +job_done: | |
2317 | + backlog_dev->quota -= work; | |
2318 | + *budget -= work; | |
2319 | + | |
2320 | + list_del(&backlog_dev->poll_list); | |
2321 | + smp_mb__before_clear_bit(); | |
2322 | + netif_poll_enable(backlog_dev); | |
2323 | + | |
2324 | + local_irq_enable(); | |
2325 | + return 0; | |
2326 | +} | |
2327 | + | |
2328 | +static void net_rx_action(struct softirq_action *h) | |
2329 | +{ | |
2330 | + struct softnet_data *queue = &__get_cpu_var(softnet_data); | |
2331 | + unsigned long start_time = jiffies; | |
2332 | + int budget = netdev_budget; | |
2333 | + void *have; | |
2334 | + | |
2335 | + local_irq_disable(); | |
2336 | + | |
2337 | + while (!list_empty(&queue->poll_list)) { | |
2338 | + struct net_device *dev; | |
2339 | + | |
2340 | + if (budget <= 0 || jiffies - start_time > 1) | |
2341 | + goto softnet_break; | |
2342 | + | |
2343 | + local_irq_enable(); | |
2344 | + | |
2345 | + dev = list_entry(queue->poll_list.next, | |
2346 | + struct net_device, poll_list); | |
2347 | + have = netpoll_poll_lock(dev); | |
2348 | + | |
2349 | + if (dev->quota <= 0 || dev->poll(dev, &budget)) { | |
2350 | + netpoll_poll_unlock(have); | |
2351 | + local_irq_disable(); | |
2352 | + list_move_tail(&dev->poll_list, &queue->poll_list); | |
2353 | + if (dev->quota < 0) | |
2354 | + dev->quota += dev->weight; | |
2355 | + else | |
2356 | + dev->quota = dev->weight; | |
2357 | + } else { | |
2358 | + netpoll_poll_unlock(have); | |
2359 | + dev_put(dev); | |
2360 | + local_irq_disable(); | |
2361 | + } | |
2362 | + } | |
2363 | +out: | |
2364 | +#ifdef CONFIG_NET_DMA | |
2365 | + /* | |
2366 | + * There may not be any more sk_buffs coming right now, so push | |
2367 | + * any pending DMA copies to hardware | |
2368 | + */ | |
2369 | + if (net_dma_client) { | |
2370 | + struct dma_chan *chan; | |
2371 | + rcu_read_lock(); | |
2372 | + list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node) | |
2373 | + dma_async_memcpy_issue_pending(chan); | |
2374 | + rcu_read_unlock(); | |
2375 | + } | |
2376 | +#endif | |
2377 | + local_irq_enable(); | |
2378 | + return; | |
2379 | + | |
2380 | +softnet_break: | |
2381 | + __get_cpu_var(netdev_rx_stat).time_squeeze++; | |
2382 | + __raise_softirq_irqoff(NET_RX_SOFTIRQ); | |
2383 | + goto out; | |
2384 | +} | |
2385 | + | |
2386 | +static gifconf_func_t * gifconf_list [NPROTO]; | |
2387 | + | |
2388 | +/** | |
2389 | + * register_gifconf - register a SIOCGIF handler | |
2390 | + * @family: Address family | |
2391 | + * @gifconf: Function handler | |
2392 | + * | |
2393 | + * Register protocol dependent address dumping routines. The handler | |
2394 | + * that is passed must not be freed or reused until it has been replaced | |
2395 | + * by another handler. | |
2396 | + */ | |
2397 | +int register_gifconf(unsigned int family, gifconf_func_t * gifconf) | |
2398 | +{ | |
2399 | + if (family >= NPROTO) | |
2400 | + return -EINVAL; | |
2401 | + gifconf_list[family] = gifconf; | |
2402 | + return 0; | |
2403 | +} | |
2404 | + | |
2405 | + | |
2406 | +/* | |
2407 | + * Map an interface index to its name (SIOCGIFNAME) | |
2408 | + */ | |
2409 | + | |
2410 | +/* | |
2411 | + * We need this ioctl for efficient implementation of the | |
2412 | + * if_indextoname() function required by the IPv6 API. Without | |
2413 | + * it, we would have to search all the interfaces to find a | |
2414 | + * match. --pb | |
2415 | + */ | |
2416 | + | |
2417 | +static int dev_ifname(struct ifreq __user *arg) | |
2418 | +{ | |
2419 | + struct net_device *dev; | |
2420 | + struct ifreq ifr; | |
2421 | + | |
2422 | + /* | |
2423 | + * Fetch the caller's info block. | |
2424 | + */ | |
2425 | + | |
2426 | + if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) | |
2427 | + return -EFAULT; | |
2428 | + | |
2429 | + read_lock(&dev_base_lock); | |
2430 | + dev = __dev_get_by_index(ifr.ifr_ifindex); | |
2431 | + if (!dev) { | |
2432 | + read_unlock(&dev_base_lock); | |
2433 | + return -ENODEV; | |
2434 | + } | |
2435 | + | |
2436 | + strcpy(ifr.ifr_name, dev->name); | |
2437 | + read_unlock(&dev_base_lock); | |
2438 | + | |
2439 | + if (copy_to_user(arg, &ifr, sizeof(struct ifreq))) | |
2440 | + return -EFAULT; | |
2441 | + return 0; | |
2442 | +} | |
2443 | + | |
2444 | +/* | |
2445 | + * Perform a SIOCGIFCONF call. This structure will change | |
2446 | + * size eventually, and there is nothing I can do about it. | |
2447 | + * Thus we will need a 'compatibility mode'. | |
2448 | + */ | |
2449 | + | |
2450 | +static int dev_ifconf(char __user *arg) | |
2451 | +{ | |
2452 | + struct ifconf ifc; | |
2453 | + struct net_device *dev; | |
2454 | + char __user *pos; | |
2455 | + int len; | |
2456 | + int total; | |
2457 | + int i; | |
2458 | + | |
2459 | + /* | |
2460 | + * Fetch the caller's info block. | |
2461 | + */ | |
2462 | + | |
2463 | + if (copy_from_user(&ifc, arg, sizeof(struct ifconf))) | |
2464 | + return -EFAULT; | |
2465 | + | |
2466 | + pos = ifc.ifc_buf; | |
2467 | + len = ifc.ifc_len; | |
2468 | + | |
2469 | + /* | |
2470 | + * Loop over the interfaces, and write an info block for each. | |
2471 | + */ | |
2472 | + | |
2473 | + total = 0; | |
2474 | + for (dev = dev_base; dev; dev = dev->next) { | |
2475 | + for (i = 0; i < NPROTO; i++) { | |
2476 | + if (gifconf_list[i]) { | |
2477 | + int done; | |
2478 | + if (!pos) | |
2479 | + done = gifconf_list[i](dev, NULL, 0); | |
2480 | + else | |
2481 | + done = gifconf_list[i](dev, pos + total, | |
2482 | + len - total); | |
2483 | + if (done < 0) | |
2484 | + return -EFAULT; | |
2485 | + total += done; | |
2486 | + } | |
2487 | + } | |
2488 | + } | |
2489 | + | |
2490 | + /* | |
2491 | + * All done. Write the updated control block back to the caller. | |
2492 | + */ | |
2493 | + ifc.ifc_len = total; | |
2494 | + | |
2495 | + /* | |
2496 | + * Both BSD and Solaris return 0 here, so we do too. | |
2497 | + */ | |
2498 | + return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0; | |
2499 | +} | |
2500 | + | |
2501 | +#ifdef CONFIG_PROC_FS | |
2502 | +/* | |
2503 | + * This is invoked by the /proc filesystem handler to display a device | |
2504 | + * in detail. | |
2505 | + */ | |
2506 | +static __inline__ struct net_device *dev_get_idx(loff_t pos) | |
2507 | +{ | |
2508 | + struct net_device *dev; | |
2509 | + loff_t i; | |
2510 | + | |
2511 | + for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next); | |
2512 | + | |
2513 | + return i == pos ? dev : NULL; | |
2514 | +} | |
2515 | + | |
2516 | +void *dev_seq_start(struct seq_file *seq, loff_t *pos) | |
2517 | +{ | |
2518 | + read_lock(&dev_base_lock); | |
2519 | + return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN; | |
2520 | +} | |
2521 | + | |
2522 | +void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |
2523 | +{ | |
2524 | + ++*pos; | |
2525 | + return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next; | |
2526 | +} | |
2527 | + | |
2528 | +void dev_seq_stop(struct seq_file *seq, void *v) | |
2529 | +{ | |
2530 | + read_unlock(&dev_base_lock); | |
2531 | +} | |
2532 | + | |
2533 | +static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev) | |
2534 | +{ | |
2535 | + if (dev->get_stats) { | |
2536 | + struct net_device_stats *stats = dev->get_stats(dev); | |
2537 | + | |
2538 | + seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu " | |
2539 | + "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n", | |
2540 | + dev->name, stats->rx_bytes, stats->rx_packets, | |
2541 | + stats->rx_errors, | |
2542 | + stats->rx_dropped + stats->rx_missed_errors, | |
2543 | + stats->rx_fifo_errors, | |
2544 | + stats->rx_length_errors + stats->rx_over_errors + | |
2545 | + stats->rx_crc_errors + stats->rx_frame_errors, | |
2546 | + stats->rx_compressed, stats->multicast, | |
2547 | + stats->tx_bytes, stats->tx_packets, | |
2548 | + stats->tx_errors, stats->tx_dropped, | |
2549 | + stats->tx_fifo_errors, stats->collisions, | |
2550 | + stats->tx_carrier_errors + | |
2551 | + stats->tx_aborted_errors + | |
2552 | + stats->tx_window_errors + | |
2553 | + stats->tx_heartbeat_errors, | |
2554 | + stats->tx_compressed); | |
2555 | + } else | |
2556 | + seq_printf(seq, "%6s: No statistics available.\n", dev->name); | |
2557 | +} | |
2558 | + | |
2559 | +/* | |
2560 | + * Called from the PROCfs module. This now uses the new arbitrary sized | |
2561 | + * /proc/net interface to create /proc/net/dev | |
2562 | + */ | |
2563 | +static int dev_seq_show(struct seq_file *seq, void *v) | |
2564 | +{ | |
2565 | + if (v == SEQ_START_TOKEN) | |
2566 | + seq_puts(seq, "Inter-| Receive " | |
2567 | + " | Transmit\n" | |
2568 | + " face |bytes packets errs drop fifo frame " | |
2569 | + "compressed multicast|bytes packets errs " | |
2570 | + "drop fifo colls carrier compressed\n"); | |
2571 | + else | |
2572 | + dev_seq_printf_stats(seq, v); | |
2573 | + return 0; | |
2574 | +} | |
2575 | + | |
2576 | +static struct netif_rx_stats *softnet_get_online(loff_t *pos) | |
2577 | +{ | |
2578 | + struct netif_rx_stats *rc = NULL; | |
2579 | + | |
2580 | + while (*pos < NR_CPUS) | |
2581 | + if (cpu_online(*pos)) { | |
2582 | + rc = &per_cpu(netdev_rx_stat, *pos); | |
2583 | + break; | |
2584 | + } else | |
2585 | + ++*pos; | |
2586 | + return rc; | |
2587 | +} | |
2588 | + | |
2589 | +static void *softnet_seq_start(struct seq_file *seq, loff_t *pos) | |
2590 | +{ | |
2591 | + return softnet_get_online(pos); | |
2592 | +} | |
2593 | + | |
2594 | +static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos) | |
2595 | +{ | |
2596 | + ++*pos; | |
2597 | + return softnet_get_online(pos); | |
2598 | +} | |
2599 | + | |
2600 | +static void softnet_seq_stop(struct seq_file *seq, void *v) | |
2601 | +{ | |
2602 | +} | |
2603 | + | |
2604 | +static int softnet_seq_show(struct seq_file *seq, void *v) | |
2605 | +{ | |
2606 | + struct netif_rx_stats *s = v; | |
2607 | + | |
2608 | + seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", | |
2609 | + s->total, s->dropped, s->time_squeeze, 0, | |
2610 | + 0, 0, 0, 0, /* was fastroute */ | |
2611 | + s->cpu_collision ); | |
2612 | + return 0; | |
2613 | +} | |
2614 | + | |
2615 | +static struct seq_operations dev_seq_ops = { | |
2616 | + .start = dev_seq_start, | |
2617 | + .next = dev_seq_next, | |
2618 | + .stop = dev_seq_stop, | |
2619 | + .show = dev_seq_show, | |
2620 | +}; | |
2621 | + | |
2622 | +static int dev_seq_open(struct inode *inode, struct file *file) | |
2623 | +{ | |
2624 | + return seq_open(file, &dev_seq_ops); | |
2625 | +} | |
2626 | + | |
2627 | +static const struct file_operations dev_seq_fops = { | |
2628 | + .owner = THIS_MODULE, | |
2629 | + .open = dev_seq_open, | |
2630 | + .read = seq_read, | |
2631 | + .llseek = seq_lseek, | |
2632 | + .release = seq_release, | |
2633 | +}; | |
2634 | + | |
2635 | +static struct seq_operations softnet_seq_ops = { | |
2636 | + .start = softnet_seq_start, | |
2637 | + .next = softnet_seq_next, | |
2638 | + .stop = softnet_seq_stop, | |
2639 | + .show = softnet_seq_show, | |
2640 | +}; | |
2641 | + | |
2642 | +static int softnet_seq_open(struct inode *inode, struct file *file) | |
2643 | +{ | |
2644 | + return seq_open(file, &softnet_seq_ops); | |
2645 | +} | |
2646 | + | |
2647 | +static const struct file_operations softnet_seq_fops = { | |
2648 | + .owner = THIS_MODULE, | |
2649 | + .open = softnet_seq_open, | |
2650 | + .read = seq_read, | |
2651 | + .llseek = seq_lseek, | |
2652 | + .release = seq_release, | |
2653 | +}; | |
2654 | + | |
2655 | +#ifdef CONFIG_WIRELESS_EXT | |
2656 | +extern int wireless_proc_init(void); | |
2657 | +#else | |
2658 | +#define wireless_proc_init() 0 | |
2659 | +#endif | |
2660 | + | |
2661 | +static int __init dev_proc_init(void) | |
2662 | +{ | |
2663 | + int rc = -ENOMEM; | |
2664 | + | |
2665 | + if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops)) | |
2666 | + goto out; | |
2667 | + if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops)) | |
2668 | + goto out_dev; | |
2669 | + if (wireless_proc_init()) | |
2670 | + goto out_softnet; | |
2671 | + rc = 0; | |
2672 | +out: | |
2673 | + return rc; | |
2674 | +out_softnet: | |
2675 | + proc_net_remove("softnet_stat"); | |
2676 | +out_dev: | |
2677 | + proc_net_remove("dev"); | |
2678 | + goto out; | |
2679 | +} | |
2680 | +#else | |
2681 | +#define dev_proc_init() 0 | |
2682 | +#endif /* CONFIG_PROC_FS */ | |
2683 | + | |
2684 | + | |
2685 | +/** | |
2686 | + * netdev_set_master - set up master/slave pair | |
2687 | + * @slave: slave device | |
2688 | + * @master: new master device | |
2689 | + * | |
2690 | + * Changes the master device of the slave. Pass %NULL to break the | |
2691 | + * bonding. The caller must hold the RTNL semaphore. On a failure | |
2692 | + * a negative errno code is returned. On success the reference counts | |
2693 | + * are adjusted, %RTM_NEWLINK is sent to the routing socket and the | |
2694 | + * function returns zero. | |
2695 | + */ | |
2696 | +int netdev_set_master(struct net_device *slave, struct net_device *master) | |
2697 | +{ | |
2698 | + struct net_device *old = slave->master; | |
2699 | + | |
2700 | + ASSERT_RTNL(); | |
2701 | + | |
2702 | + if (master) { | |
2703 | + if (old) | |
2704 | + return -EBUSY; | |
2705 | + dev_hold(master); | |
2706 | + } | |
2707 | + | |
2708 | + slave->master = master; | |
2709 | + | |
2710 | + synchronize_net(); | |
2711 | + | |
2712 | + if (old) | |
2713 | + dev_put(old); | |
2714 | + | |
2715 | + if (master) | |
2716 | + slave->flags |= IFF_SLAVE; | |
2717 | + else | |
2718 | + slave->flags &= ~IFF_SLAVE; | |
2719 | + | |
2720 | + rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE); | |
2721 | + return 0; | |
2722 | +} | |
2723 | + | |
2724 | +/** | |
2725 | + * dev_set_promiscuity - update promiscuity count on a device | |
2726 | + * @dev: device | |
2727 | + * @inc: modifier | |
2728 | + * | |
2729 | + * Add or remove promiscuity from a device. While the count in the device | |
2730 | + * remains above zero the interface remains promiscuous. Once it hits zero | |
2731 | + * the device reverts back to normal filtering operation. A negative inc | |
2732 | + * value is used to drop promiscuity on the device. | |
2733 | + */ | |
2734 | +void dev_set_promiscuity(struct net_device *dev, int inc) | |
2735 | +{ | |
2736 | + unsigned short old_flags = dev->flags; | |
2737 | + | |
2738 | + if ((dev->promiscuity += inc) == 0) | |
2739 | + dev->flags &= ~IFF_PROMISC; | |
2740 | + else | |
2741 | + dev->flags |= IFF_PROMISC; | |
2742 | + if (dev->flags != old_flags) { | |
2743 | + dev_mc_upload(dev); | |
2744 | + printk(KERN_INFO "device %s %s promiscuous mode\n", | |
2745 | + dev->name, (dev->flags & IFF_PROMISC) ? "entered" : | |
2746 | + "left"); | |
2747 | + audit_log(current->audit_context, GFP_ATOMIC, | |
2748 | + AUDIT_ANOM_PROMISCUOUS, | |
2749 | + "dev=%s prom=%d old_prom=%d auid=%u", | |
2750 | + dev->name, (dev->flags & IFF_PROMISC), | |
2751 | + (old_flags & IFF_PROMISC), | |
2752 | + audit_get_loginuid(current->audit_context)); | |
2753 | + } | |
2754 | +} | |
2755 | + | |
2756 | +/** | |
2757 | + * dev_set_allmulti - update allmulti count on a device | |
2758 | + * @dev: device | |
2759 | + * @inc: modifier | |
2760 | + * | |
2761 | + * Add or remove reception of all multicast frames to a device. While the | |
2762 | + * count in the device remains above zero the interface remains listening | |
2763 | + * to all interfaces. Once it hits zero the device reverts back to normal | |
2764 | + * filtering operation. A negative @inc value is used to drop the counter | |
2765 | + * when releasing a resource needing all multicasts. | |
2766 | + */ | |
2767 | + | |
2768 | +void dev_set_allmulti(struct net_device *dev, int inc) | |
2769 | +{ | |
2770 | + unsigned short old_flags = dev->flags; | |
2771 | + | |
2772 | + dev->flags |= IFF_ALLMULTI; | |
2773 | + if ((dev->allmulti += inc) == 0) | |
2774 | + dev->flags &= ~IFF_ALLMULTI; | |
2775 | + if (dev->flags ^ old_flags) | |
2776 | + dev_mc_upload(dev); | |
2777 | +} | |
2778 | + | |
2779 | +unsigned dev_get_flags(const struct net_device *dev) | |
2780 | +{ | |
2781 | + unsigned flags; | |
2782 | + | |
2783 | + flags = (dev->flags & ~(IFF_PROMISC | | |
2784 | + IFF_ALLMULTI | | |
2785 | + IFF_RUNNING | | |
2786 | + IFF_LOWER_UP | | |
2787 | + IFF_DORMANT)) | | |
2788 | + (dev->gflags & (IFF_PROMISC | | |
2789 | + IFF_ALLMULTI)); | |
2790 | + | |
2791 | + if (netif_running(dev)) { | |
2792 | + if (netif_oper_up(dev)) | |
2793 | + flags |= IFF_RUNNING; | |
2794 | + if (netif_carrier_ok(dev)) | |
2795 | + flags |= IFF_LOWER_UP; | |
2796 | + if (netif_dormant(dev)) | |
2797 | + flags |= IFF_DORMANT; | |
2798 | + } | |
2799 | + | |
2800 | + return flags; | |
2801 | +} | |
2802 | + | |
2803 | +int dev_change_flags(struct net_device *dev, unsigned flags) | |
2804 | +{ | |
2805 | + int ret; | |
2806 | + int old_flags = dev->flags; | |
2807 | + | |
2808 | + /* | |
2809 | + * Set the flags on our device. | |
2810 | + */ | |
2811 | + | |
2812 | + dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP | | |
2813 | + IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL | | |
2814 | + IFF_AUTOMEDIA)) | | |
2815 | + (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC | | |
2816 | + IFF_ALLMULTI)); | |
2817 | + | |
2818 | + /* | |
2819 | + * Load in the correct multicast list now the flags have changed. | |
2820 | + */ | |
2821 | + | |
2822 | + dev_mc_upload(dev); | |
2823 | + | |
2824 | + /* | |
2825 | + * Have we downed the interface. We handle IFF_UP ourselves | |
2826 | + * according to user attempts to set it, rather than blindly | |
2827 | + * setting it. | |
2828 | + */ | |
2829 | + | |
2830 | + ret = 0; | |
2831 | + if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */ | |
2832 | + ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev); | |
2833 | + | |
2834 | + if (!ret) | |
2835 | + dev_mc_upload(dev); | |
2836 | + } | |
2837 | + | |
2838 | + if (dev->flags & IFF_UP && | |
2839 | + ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI | | |
2840 | + IFF_VOLATILE))) | |
2841 | + raw_notifier_call_chain(&netdev_chain, | |
2842 | + NETDEV_CHANGE, dev); | |
2843 | + | |
2844 | + if ((flags ^ dev->gflags) & IFF_PROMISC) { | |
2845 | + int inc = (flags & IFF_PROMISC) ? +1 : -1; | |
2846 | + dev->gflags ^= IFF_PROMISC; | |
2847 | + dev_set_promiscuity(dev, inc); | |
2848 | + } | |
2849 | + | |
2850 | + /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI | |
2851 | + is important. Some (broken) drivers set IFF_PROMISC, when | |
2852 | + IFF_ALLMULTI is requested not asking us and not reporting. | |
2853 | + */ | |
2854 | + if ((flags ^ dev->gflags) & IFF_ALLMULTI) { | |
2855 | + int inc = (flags & IFF_ALLMULTI) ? +1 : -1; | |
2856 | + dev->gflags ^= IFF_ALLMULTI; | |
2857 | + dev_set_allmulti(dev, inc); | |
2858 | + } | |
2859 | + | |
2860 | + if (old_flags ^ dev->flags) | |
2861 | + rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags); | |
2862 | + | |
2863 | + return ret; | |
2864 | +} | |
2865 | + | |
2866 | +int dev_set_mtu(struct net_device *dev, int new_mtu) | |
2867 | +{ | |
2868 | + int err; | |
2869 | + | |
2870 | + if (new_mtu == dev->mtu) | |
2871 | + return 0; | |
2872 | + | |
2873 | + /* MTU must be positive. */ | |
2874 | + if (new_mtu < 0) | |
2875 | + return -EINVAL; | |
2876 | + | |
2877 | + if (!netif_device_present(dev)) | |
2878 | + return -ENODEV; | |
2879 | + | |
2880 | + err = 0; | |
2881 | + if (dev->change_mtu) | |
2882 | + err = dev->change_mtu(dev, new_mtu); | |
2883 | + else | |
2884 | + dev->mtu = new_mtu; | |
2885 | + if (!err && dev->flags & IFF_UP) | |
2886 | + raw_notifier_call_chain(&netdev_chain, | |
2887 | + NETDEV_CHANGEMTU, dev); | |
2888 | + return err; | |
2889 | +} | |
2890 | + | |
2891 | +int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) | |
2892 | +{ | |
2893 | + int err; | |
2894 | + | |
2895 | + if (!dev->set_mac_address) | |
2896 | + return -EOPNOTSUPP; | |
2897 | + if (sa->sa_family != dev->type) | |
2898 | + return -EINVAL; | |
2899 | + if (!netif_device_present(dev)) | |
2900 | + return -ENODEV; | |
2901 | + err = dev->set_mac_address(dev, sa); | |
2902 | + if (!err) | |
2903 | + raw_notifier_call_chain(&netdev_chain, | |
2904 | + NETDEV_CHANGEADDR, dev); | |
2905 | + return err; | |
2906 | +} | |
2907 | + | |
2908 | +/* | |
2909 | + * Perform the SIOCxIFxxx calls. | |
2910 | + */ | |
2911 | +static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) | |
2912 | +{ | |
2913 | + int err; | |
2914 | + struct net_device *dev = __dev_get_by_name(ifr->ifr_name); | |
2915 | + | |
2916 | + if (!dev) | |
2917 | + return -ENODEV; | |
2918 | + | |
2919 | + switch (cmd) { | |
2920 | + case SIOCGIFFLAGS: /* Get interface flags */ | |
2921 | + ifr->ifr_flags = dev_get_flags(dev); | |
2922 | + return 0; | |
2923 | + | |
2924 | + case SIOCSIFFLAGS: /* Set interface flags */ | |
2925 | + return dev_change_flags(dev, ifr->ifr_flags); | |
2926 | + | |
2927 | + case SIOCGIFMETRIC: /* Get the metric on the interface | |
2928 | + (currently unused) */ | |
2929 | + ifr->ifr_metric = 0; | |
2930 | + return 0; | |
2931 | + | |
2932 | + case SIOCSIFMETRIC: /* Set the metric on the interface | |
2933 | + (currently unused) */ | |
2934 | + return -EOPNOTSUPP; | |
2935 | + | |
2936 | + case SIOCGIFMTU: /* Get the MTU of a device */ | |
2937 | + ifr->ifr_mtu = dev->mtu; | |
2938 | + return 0; | |
2939 | + | |
2940 | + case SIOCSIFMTU: /* Set the MTU of a device */ | |
2941 | + return dev_set_mtu(dev, ifr->ifr_mtu); | |
2942 | + | |
2943 | + case SIOCGIFHWADDR: | |
2944 | + if (!dev->addr_len) | |
2945 | + memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data); | |
2946 | + else | |
2947 | + memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr, | |
2948 | + min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); | |
2949 | + ifr->ifr_hwaddr.sa_family = dev->type; | |
2950 | + return 0; | |
2951 | + | |
2952 | + case SIOCSIFHWADDR: | |
2953 | + return dev_set_mac_address(dev, &ifr->ifr_hwaddr); | |
2954 | + | |
2955 | + case SIOCSIFHWBROADCAST: | |
2956 | + if (ifr->ifr_hwaddr.sa_family != dev->type) | |
2957 | + return -EINVAL; | |
2958 | + memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, | |
2959 | + min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); | |
2960 | + raw_notifier_call_chain(&netdev_chain, | |
2961 | + NETDEV_CHANGEADDR, dev); | |
2962 | + return 0; | |
2963 | + | |
2964 | + case SIOCGIFMAP: | |
2965 | + ifr->ifr_map.mem_start = dev->mem_start; | |
2966 | + ifr->ifr_map.mem_end = dev->mem_end; | |
2967 | + ifr->ifr_map.base_addr = dev->base_addr; | |
2968 | + ifr->ifr_map.irq = dev->irq; | |
2969 | + ifr->ifr_map.dma = dev->dma; | |
2970 | + ifr->ifr_map.port = dev->if_port; | |
2971 | + return 0; | |
2972 | + | |
2973 | + case SIOCSIFMAP: | |
2974 | + if (dev->set_config) { | |
2975 | + if (!netif_device_present(dev)) | |
2976 | + return -ENODEV; | |
2977 | + return dev->set_config(dev, &ifr->ifr_map); | |
2978 | + } | |
2979 | + return -EOPNOTSUPP; | |
2980 | + | |
2981 | + case SIOCADDMULTI: | |
2982 | + if (!dev->set_multicast_list || | |
2983 | + ifr->ifr_hwaddr.sa_family != AF_UNSPEC) | |
2984 | + return -EINVAL; | |
2985 | + if (!netif_device_present(dev)) | |
2986 | + return -ENODEV; | |
2987 | + return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data, | |
2988 | + dev->addr_len, 1); | |
2989 | + | |
2990 | + case SIOCDELMULTI: | |
2991 | + if (!dev->set_multicast_list || | |
2992 | + ifr->ifr_hwaddr.sa_family != AF_UNSPEC) | |
2993 | + return -EINVAL; | |
2994 | + if (!netif_device_present(dev)) | |
2995 | + return -ENODEV; | |
2996 | + return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data, | |
2997 | + dev->addr_len, 1); | |
2998 | + | |
2999 | + case SIOCGIFINDEX: | |
3000 | + ifr->ifr_ifindex = dev->ifindex; | |
3001 | + return 0; | |
3002 | + | |
3003 | + case SIOCGIFTXQLEN: | |
3004 | + ifr->ifr_qlen = dev->tx_queue_len; | |
3005 | + return 0; | |
3006 | + | |
3007 | + case SIOCSIFTXQLEN: | |
3008 | + if (ifr->ifr_qlen < 0) | |
3009 | + return -EINVAL; | |
3010 | + dev->tx_queue_len = ifr->ifr_qlen; | |
3011 | + return 0; | |
3012 | + | |
3013 | + case SIOCSIFNAME: | |
3014 | + ifr->ifr_newname[IFNAMSIZ-1] = '\0'; | |
3015 | + return dev_change_name(dev, ifr->ifr_newname); | |
3016 | + | |
3017 | + /* | |
3018 | + * Unknown or private ioctl | |
3019 | + */ | |
3020 | + | |
3021 | + default: | |
3022 | + if ((cmd >= SIOCDEVPRIVATE && | |
3023 | + cmd <= SIOCDEVPRIVATE + 15) || | |
3024 | + cmd == SIOCBONDENSLAVE || | |
3025 | + cmd == SIOCBONDRELEASE || | |
3026 | + cmd == SIOCBONDSETHWADDR || | |
3027 | + cmd == SIOCBONDSLAVEINFOQUERY || | |
3028 | + cmd == SIOCBONDINFOQUERY || | |
3029 | + cmd == SIOCBONDCHANGEACTIVE || | |
3030 | + cmd == SIOCGMIIPHY || | |
3031 | + cmd == SIOCGMIIREG || | |
3032 | + cmd == SIOCSMIIREG || | |
3033 | + cmd == SIOCBRADDIF || | |
3034 | + cmd == SIOCBRDELIF || | |
3035 | + cmd == SIOCWANDEV) { | |
3036 | + err = -EOPNOTSUPP; | |
3037 | + if (dev->do_ioctl) { | |
3038 | + if (netif_device_present(dev)) | |
3039 | + err = dev->do_ioctl(dev, ifr, | |
3040 | + cmd); | |
3041 | + else | |
3042 | + err = -ENODEV; | |
3043 | + } | |
3044 | + } else | |
3045 | + err = -EINVAL; | |
3046 | + | |
3047 | + } | |
3048 | + return err; | |
3049 | +} | |
3050 | + | |
3051 | +/* | |
3052 | + * This function handles all "interface"-type I/O control requests. The actual | |
3053 | + * 'doing' part of this is dev_ifsioc above. | |
3054 | + */ | |
3055 | + | |
3056 | +/** | |
3057 | + * dev_ioctl - network device ioctl | |
3058 | + * @cmd: command to issue | |
3059 | + * @arg: pointer to a struct ifreq in user space | |
3060 | + * | |
3061 | + * Issue ioctl functions to devices. This is normally called by the | |
3062 | + * user space syscall interfaces but can sometimes be useful for | |
3063 | + * other purposes. The return value is the return from the syscall if | |
3064 | + * positive or a negative errno code on error. | |
3065 | + */ | |
3066 | + | |
3067 | +int dev_ioctl(unsigned int cmd, void __user *arg) | |
3068 | +{ | |
3069 | + struct ifreq ifr; | |
3070 | + int ret; | |
3071 | + char *colon; | |
3072 | + | |
3073 | + /* One special case: SIOCGIFCONF takes ifconf argument | |
3074 | + and requires shared lock, because it sleeps writing | |
3075 | + to user space. | |
3076 | + */ | |
3077 | + | |
3078 | + if (cmd == SIOCGIFCONF) { | |
3079 | + rtnl_lock(); | |
3080 | + ret = dev_ifconf((char __user *) arg); | |
3081 | + rtnl_unlock(); | |
3082 | + return ret; | |
3083 | + } | |
3084 | + if (cmd == SIOCGIFNAME) | |
3085 | + return dev_ifname((struct ifreq __user *)arg); | |
3086 | + | |
3087 | + if (copy_from_user(&ifr, arg, sizeof(struct ifreq))) | |
3088 | + return -EFAULT; | |
3089 | + | |
3090 | + ifr.ifr_name[IFNAMSIZ-1] = 0; | |
3091 | + | |
3092 | + colon = strchr(ifr.ifr_name, ':'); | |
3093 | + if (colon) | |
3094 | + *colon = 0; | |
3095 | + | |
3096 | + /* | |
3097 | + * See which interface the caller is talking about. | |
3098 | + */ | |
3099 | + | |
3100 | + switch (cmd) { | |
3101 | + /* | |
3102 | + * These ioctl calls: | |
3103 | + * - can be done by all. | |
3104 | + * - atomic and do not require locking. | |
3105 | + * - return a value | |
3106 | + */ | |
3107 | + case SIOCGIFFLAGS: | |
3108 | + case SIOCGIFMETRIC: | |
3109 | + case SIOCGIFMTU: | |
3110 | + case SIOCGIFHWADDR: | |
3111 | + case SIOCGIFSLAVE: | |
3112 | + case SIOCGIFMAP: | |
3113 | + case SIOCGIFINDEX: | |
3114 | + case SIOCGIFTXQLEN: | |
3115 | + dev_load(ifr.ifr_name); | |
3116 | + read_lock(&dev_base_lock); | |
3117 | + ret = dev_ifsioc(&ifr, cmd); | |
3118 | + read_unlock(&dev_base_lock); | |
3119 | + if (!ret) { | |
3120 | + if (colon) | |
3121 | + *colon = ':'; | |
3122 | + if (copy_to_user(arg, &ifr, | |
3123 | + sizeof(struct ifreq))) | |
3124 | + ret = -EFAULT; | |
3125 | + } | |
3126 | + return ret; | |
3127 | + | |
3128 | + case SIOCETHTOOL: | |
3129 | + dev_load(ifr.ifr_name); | |
3130 | + rtnl_lock(); | |
3131 | + ret = dev_ethtool(&ifr); | |
3132 | + rtnl_unlock(); | |
3133 | + if (!ret) { | |
3134 | + if (colon) | |
3135 | + *colon = ':'; | |
3136 | + if (copy_to_user(arg, &ifr, | |
3137 | + sizeof(struct ifreq))) | |
3138 | + ret = -EFAULT; | |
3139 | + } | |
3140 | + return ret; | |
3141 | + | |
3142 | + /* | |
3143 | + * These ioctl calls: | |
3144 | + * - require superuser power. | |
3145 | + * - require strict serialization. | |
3146 | + * - return a value | |
3147 | + */ | |
3148 | + case SIOCGMIIPHY: | |
3149 | + case SIOCGMIIREG: | |
3150 | + case SIOCSIFNAME: | |
3151 | + if (!capable(CAP_NET_ADMIN)) | |
3152 | + return -EPERM; | |
3153 | + dev_load(ifr.ifr_name); | |
3154 | + rtnl_lock(); | |
3155 | + ret = dev_ifsioc(&ifr, cmd); | |
3156 | + rtnl_unlock(); | |
3157 | + if (!ret) { | |
3158 | + if (colon) | |
3159 | + *colon = ':'; | |
3160 | + if (copy_to_user(arg, &ifr, | |
3161 | + sizeof(struct ifreq))) | |
3162 | + ret = -EFAULT; | |
3163 | + } | |
3164 | + return ret; | |
3165 | + | |
3166 | + /* | |
3167 | + * These ioctl calls: | |
3168 | + * - require superuser power. | |
3169 | + * - require strict serialization. | |
3170 | + * - do not return a value | |
3171 | + */ | |
3172 | + case SIOCSIFFLAGS: | |
3173 | + case SIOCSIFMETRIC: | |
3174 | + case SIOCSIFMTU: | |
3175 | + case SIOCSIFMAP: | |
3176 | + case SIOCSIFHWADDR: | |
3177 | + case SIOCSIFSLAVE: | |
3178 | + case SIOCADDMULTI: | |
3179 | + case SIOCDELMULTI: | |
3180 | + case SIOCSIFHWBROADCAST: | |
3181 | + case SIOCSIFTXQLEN: | |
3182 | + case SIOCSMIIREG: | |
3183 | + case SIOCBONDENSLAVE: | |
3184 | + case SIOCBONDRELEASE: | |
3185 | + case SIOCBONDSETHWADDR: | |
3186 | + case SIOCBONDCHANGEACTIVE: | |
3187 | + case SIOCBRADDIF: | |
3188 | + case SIOCBRDELIF: | |
3189 | + if (!capable(CAP_NET_ADMIN)) | |
3190 | + return -EPERM; | |
3191 | + /* fall through */ | |
3192 | + case SIOCBONDSLAVEINFOQUERY: | |
3193 | + case SIOCBONDINFOQUERY: | |
3194 | + dev_load(ifr.ifr_name); | |
3195 | + rtnl_lock(); | |
3196 | + ret = dev_ifsioc(&ifr, cmd); | |
3197 | + rtnl_unlock(); | |
3198 | + return ret; | |
3199 | + | |
3200 | + case SIOCGIFMEM: | |
3201 | + /* Get the per device memory space. We can add this but | |
3202 | + * currently do not support it */ | |
3203 | + case SIOCSIFMEM: | |
3204 | + /* Set the per device memory buffer space. | |
3205 | + * Not applicable in our case */ | |
3206 | + case SIOCSIFLINK: | |
3207 | + return -EINVAL; | |
3208 | + | |
3209 | + /* | |
3210 | + * Unknown or private ioctl. | |
3211 | + */ | |
3212 | + default: | |
3213 | + if (cmd == SIOCWANDEV || | |
3214 | + (cmd >= SIOCDEVPRIVATE && | |
3215 | + cmd <= SIOCDEVPRIVATE + 15)) { | |
3216 | + dev_load(ifr.ifr_name); | |
3217 | + rtnl_lock(); | |
3218 | + ret = dev_ifsioc(&ifr, cmd); | |
3219 | + rtnl_unlock(); | |
3220 | + if (!ret && copy_to_user(arg, &ifr, | |
3221 | + sizeof(struct ifreq))) | |
3222 | + ret = -EFAULT; | |
3223 | + return ret; | |
3224 | + } | |
3225 | +#ifdef CONFIG_WIRELESS_EXT | |
3226 | + /* Take care of Wireless Extensions */ | |
3227 | + if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { | |
3228 | + /* If command is `set a parameter', or | |
3229 | + * `get the encoding parameters', check if | |
3230 | + * the user has the right to do it */ | |
3231 | + if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE | |
3232 | + || cmd == SIOCGIWENCODEEXT) { | |
3233 | + if (!capable(CAP_NET_ADMIN)) | |
3234 | + return -EPERM; | |
3235 | + } | |
3236 | + dev_load(ifr.ifr_name); | |
3237 | + rtnl_lock(); | |
3238 | + /* Follow me in net/core/wireless.c */ | |
3239 | + ret = wireless_process_ioctl(&ifr, cmd); | |
3240 | + rtnl_unlock(); | |
3241 | + if (IW_IS_GET(cmd) && | |
3242 | + copy_to_user(arg, &ifr, | |
3243 | + sizeof(struct ifreq))) | |
3244 | + ret = -EFAULT; | |
3245 | + return ret; | |
3246 | + } | |
3247 | +#endif /* CONFIG_WIRELESS_EXT */ | |
3248 | + return -EINVAL; | |
3249 | + } | |
3250 | +} | |
3251 | + | |
3252 | + | |
3253 | +/** | |
3254 | + * dev_new_index - allocate an ifindex | |
3255 | + * | |
3256 | + * Returns a suitable unique value for a new device interface | |
3257 | + * number. The caller must hold the rtnl semaphore or the | |
3258 | + * dev_base_lock to be sure it remains unique. | |
3259 | + */ | |
3260 | +static int dev_new_index(void) | |
3261 | +{ | |
3262 | + static int ifindex; | |
3263 | + for (;;) { | |
3264 | + if (++ifindex <= 0) | |
3265 | + ifindex = 1; | |
3266 | + if (!__dev_get_by_index(ifindex)) | |
3267 | + return ifindex; | |
3268 | + } | |
3269 | +} | |
3270 | + | |
3271 | +static int dev_boot_phase = 1; | |
3272 | + | |
3273 | +/* Delayed registration/unregisteration */ | |
3274 | +static DEFINE_SPINLOCK(net_todo_list_lock); | |
3275 | +static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list); | |
3276 | + | |
3277 | +static inline void net_set_todo(struct net_device *dev) | |
3278 | +{ | |
3279 | + spin_lock(&net_todo_list_lock); | |
3280 | + list_add_tail(&dev->todo_list, &net_todo_list); | |
3281 | + spin_unlock(&net_todo_list_lock); | |
3282 | +} | |
3283 | + | |
3284 | +/** | |
3285 | + * register_netdevice - register a network device | |
3286 | + * @dev: device to register | |
3287 | + * | |
3288 | + * Take a completed network device structure and add it to the kernel | |
3289 | + * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier | |
3290 | + * chain. 0 is returned on success. A negative errno code is returned | |
3291 | + * on a failure to set up the device, or if the name is a duplicate. | |
3292 | + * | |
3293 | + * Callers must hold the rtnl semaphore. You may want | |
3294 | + * register_netdev() instead of this. | |
3295 | + * | |
3296 | + * BUGS: | |
3297 | + * The locking appears insufficient to guarantee two parallel registers | |
3298 | + * will not get the same name. | |
3299 | + */ | |
3300 | + | |
3301 | +int register_netdevice(struct net_device *dev) | |
3302 | +{ | |
3303 | + struct hlist_head *head; | |
3304 | + struct hlist_node *p; | |
3305 | + int ret; | |
3306 | + | |
3307 | + BUG_ON(dev_boot_phase); | |
3308 | + ASSERT_RTNL(); | |
3309 | + | |
3310 | + might_sleep(); | |
3311 | + | |
3312 | + /* When net_device's are persistent, this will be fatal. */ | |
3313 | + BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); | |
3314 | + | |
3315 | + spin_lock_init(&dev->queue_lock); | |
3316 | + spin_lock_init(&dev->_xmit_lock); | |
3317 | + dev->xmit_lock_owner = -1; | |
3318 | +#ifdef CONFIG_NET_CLS_ACT | |
3319 | + spin_lock_init(&dev->ingress_lock); | |
3320 | +#endif | |
3321 | + | |
3322 | + dev->iflink = -1; | |
3323 | + | |
3324 | + /* Init, if this function is available */ | |
3325 | + if (dev->init) { | |
3326 | + ret = dev->init(dev); | |
3327 | + if (ret) { | |
3328 | + if (ret > 0) | |
3329 | + ret = -EIO; | |
3330 | + goto out; | |
3331 | + } | |
3332 | + } | |
3333 | + | |
3334 | + if (!dev_valid_name(dev->name)) { | |
3335 | + ret = -EINVAL; | |
3336 | + goto out; | |
3337 | + } | |
3338 | + | |
3339 | + dev->ifindex = dev_new_index(); | |
3340 | + if (dev->iflink == -1) | |
3341 | + dev->iflink = dev->ifindex; | |
3342 | + | |
3343 | + /* Check for existence of name */ | |
3344 | + head = dev_name_hash(dev->name); | |
3345 | + hlist_for_each(p, head) { | |
3346 | + struct net_device *d | |
3347 | + = hlist_entry(p, struct net_device, name_hlist); | |
3348 | + if (!strncmp(d->name, dev->name, IFNAMSIZ)) { | |
3349 | + ret = -EEXIST; | |
3350 | + goto out; | |
3351 | + } | |
3352 | + } | |
3353 | + | |
3354 | + /* Fix illegal SG+CSUM combinations. */ | |
3355 | + if ((dev->features & NETIF_F_SG) && | |
3356 | + !(dev->features & NETIF_F_ALL_CSUM)) { | |
3357 | + printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n", | |
3358 | + dev->name); | |
3359 | + dev->features &= ~NETIF_F_SG; | |
3360 | + } | |
3361 | + | |
3362 | + /* TSO requires that SG is present as well. */ | |
3363 | + if ((dev->features & NETIF_F_TSO) && | |
3364 | + !(dev->features & NETIF_F_SG)) { | |
3365 | + printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n", | |
3366 | + dev->name); | |
3367 | + dev->features &= ~NETIF_F_TSO; | |
3368 | + } | |
3369 | + if (dev->features & NETIF_F_UFO) { | |
3370 | + if (!(dev->features & NETIF_F_HW_CSUM)) { | |
3371 | + printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " | |
3372 | + "NETIF_F_HW_CSUM feature.\n", | |
3373 | + dev->name); | |
3374 | + dev->features &= ~NETIF_F_UFO; | |
3375 | + } | |
3376 | + if (!(dev->features & NETIF_F_SG)) { | |
3377 | + printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no " | |
3378 | + "NETIF_F_SG feature.\n", | |
3379 | + dev->name); | |
3380 | + dev->features &= ~NETIF_F_UFO; | |
3381 | + } | |
3382 | + } | |
3383 | + | |
3384 | + /* | |
3385 | + * nil rebuild_header routine, | |
3386 | + * that should be never called and used as just bug trap. | |
3387 | + */ | |
3388 | + | |
3389 | + if (!dev->rebuild_header) | |
3390 | + dev->rebuild_header = default_rebuild_header; | |
3391 | + | |
3392 | + ret = netdev_register_sysfs(dev); | |
3393 | + if (ret) | |
3394 | + goto out; | |
3395 | + dev->reg_state = NETREG_REGISTERED; | |
3396 | + | |
3397 | + /* | |
3398 | + * Default initial state at registry is that the | |
3399 | + * device is present. | |
3400 | + */ | |
3401 | + | |
3402 | + set_bit(__LINK_STATE_PRESENT, &dev->state); | |
3403 | + | |
3404 | + dev->next = NULL; | |
3405 | + dev_init_scheduler(dev); | |
3406 | + write_lock_bh(&dev_base_lock); | |
3407 | + *dev_tail = dev; | |
3408 | + dev_tail = &dev->next; | |
3409 | + hlist_add_head(&dev->name_hlist, head); | |
3410 | + hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex)); | |
3411 | + dev_hold(dev); | |
3412 | + write_unlock_bh(&dev_base_lock); | |
3413 | + | |
3414 | + /* Notify protocols, that a new device appeared. */ | |
3415 | + raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); | |
3416 | + | |
3417 | + ret = 0; | |
3418 | + | |
3419 | +out: | |
3420 | + return ret; | |
3421 | +} | |
3422 | + | |
3423 | +/** | |
3424 | + * register_netdev - register a network device | |
3425 | + * @dev: device to register | |
3426 | + * | |
3427 | + * Take a completed network device structure and add it to the kernel | |
3428 | + * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier | |
3429 | + * chain. 0 is returned on success. A negative errno code is returned | |
3430 | + * on a failure to set up the device, or if the name is a duplicate. | |
3431 | + * | |
3432 | + * This is a wrapper around register_netdev that takes the rtnl semaphore | |
3433 | + * and expands the device name if you passed a format string to | |
3434 | + * alloc_netdev. | |
3435 | + */ | |
3436 | +int register_netdev(struct net_device *dev) | |
3437 | +{ | |
3438 | + int err; | |
3439 | + | |
3440 | + rtnl_lock(); | |
3441 | + | |
3442 | + /* | |
3443 | + * If the name is a format string the caller wants us to do a | |
3444 | + * name allocation. | |
3445 | + */ | |
3446 | + if (strchr(dev->name, '%')) { | |
3447 | + err = dev_alloc_name(dev, dev->name); | |
3448 | + if (err < 0) | |
3449 | + goto out; | |
3450 | + } | |
3451 | + | |
3452 | + err = register_netdevice(dev); | |
3453 | +out: | |
3454 | + rtnl_unlock(); | |
3455 | + return err; | |
3456 | +} | |
3457 | +EXPORT_SYMBOL(register_netdev); | |
3458 | + | |
3459 | +/* | |
3460 | + * netdev_wait_allrefs - wait until all references are gone. | |
3461 | + * | |
3462 | + * This is called when unregistering network devices. | |
3463 | + * | |
3464 | + * Any protocol or device that holds a reference should register | |
3465 | + * for netdevice notification, and cleanup and put back the | |
3466 | + * reference if they receive an UNREGISTER event. | |
3467 | + * We can get stuck here if buggy protocols don't correctly | |
3468 | + * call dev_put. | |
3469 | + */ | |
3470 | +static void netdev_wait_allrefs(struct net_device *dev) | |
3471 | +{ | |
3472 | + unsigned long rebroadcast_time, warning_time; | |
3473 | + | |
3474 | + rebroadcast_time = warning_time = jiffies; | |
3475 | + while (atomic_read(&dev->refcnt) != 0) { | |
3476 | + if (time_after(jiffies, rebroadcast_time + 1 * HZ)) { | |
3477 | + rtnl_lock(); | |
3478 | + | |
3479 | + /* Rebroadcast unregister notification */ | |
3480 | + raw_notifier_call_chain(&netdev_chain, | |
3481 | + NETDEV_UNREGISTER, dev); | |
3482 | + | |
3483 | + if (test_bit(__LINK_STATE_LINKWATCH_PENDING, | |
3484 | + &dev->state)) { | |
3485 | + /* We must not have linkwatch events | |
3486 | + * pending on unregister. If this | |
3487 | + * happens, we simply run the queue | |
3488 | + * unscheduled, resulting in a noop | |
3489 | + * for this device. | |
3490 | + */ | |
3491 | + linkwatch_run_queue(); | |
3492 | + } | |
3493 | + | |
3494 | + __rtnl_unlock(); | |
3495 | + | |
3496 | + rebroadcast_time = jiffies; | |
3497 | + } | |
3498 | + | |
3499 | + msleep(250); | |
3500 | + | |
3501 | + if (time_after(jiffies, warning_time + 10 * HZ)) { | |
3502 | + printk(KERN_EMERG "unregister_netdevice: " | |
3503 | + "waiting for %s to become free. Usage " | |
3504 | + "count = %d\n", | |
3505 | + dev->name, atomic_read(&dev->refcnt)); | |
3506 | + warning_time = jiffies; | |
3507 | + } | |
3508 | + } | |
3509 | +} | |
3510 | + | |
3511 | +/* The sequence is: | |
3512 | + * | |
3513 | + * rtnl_lock(); | |
3514 | + * ... | |
3515 | + * register_netdevice(x1); | |
3516 | + * register_netdevice(x2); | |
3517 | + * ... | |
3518 | + * unregister_netdevice(y1); | |
3519 | + * unregister_netdevice(y2); | |
3520 | + * ... | |
3521 | + * rtnl_unlock(); | |
3522 | + * free_netdev(y1); | |
3523 | + * free_netdev(y2); | |
3524 | + * | |
3525 | + * We are invoked by rtnl_unlock() after it drops the semaphore. | |
3526 | + * This allows us to deal with problems: | |
3527 | + * 1) We can delete sysfs objects which invoke hotplug | |
3528 | + * without deadlocking with linkwatch via keventd. | |
3529 | + * 2) Since we run with the RTNL semaphore not held, we can sleep | |
3530 | + * safely in order to wait for the netdev refcnt to drop to zero. | |
3531 | + */ | |
3532 | +static DEFINE_MUTEX(net_todo_run_mutex); | |
3533 | +void netdev_run_todo(void) | |
3534 | +{ | |
3535 | + struct list_head list; | |
3536 | + | |
3537 | + /* Need to guard against multiple cpu's getting out of order. */ | |
3538 | + mutex_lock(&net_todo_run_mutex); | |
3539 | + | |
3540 | + /* Not safe to do outside the semaphore. We must not return | |
3541 | + * until all unregister events invoked by the local processor | |
3542 | + * have been completed (either by this todo run, or one on | |
3543 | + * another cpu). | |
3544 | + */ | |
3545 | + if (list_empty(&net_todo_list)) | |
3546 | + goto out; | |
3547 | + | |
3548 | + /* Snapshot list, allow later requests */ | |
3549 | + spin_lock(&net_todo_list_lock); | |
3550 | + list_replace_init(&net_todo_list, &list); | |
3551 | + spin_unlock(&net_todo_list_lock); | |
3552 | + | |
3553 | + while (!list_empty(&list)) { | |
3554 | + struct net_device *dev | |
3555 | + = list_entry(list.next, struct net_device, todo_list); | |
3556 | + list_del(&dev->todo_list); | |
3557 | + | |
3558 | + if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { | |
3559 | + printk(KERN_ERR "network todo '%s' but state %d\n", | |
3560 | + dev->name, dev->reg_state); | |
3561 | + dump_stack(); | |
3562 | + continue; | |
3563 | + } | |
3564 | + | |
3565 | + netdev_unregister_sysfs(dev); | |
3566 | + dev->reg_state = NETREG_UNREGISTERED; | |
3567 | + | |
3568 | + netdev_wait_allrefs(dev); | |
3569 | + | |
3570 | + /* paranoia */ | |
3571 | + BUG_ON(atomic_read(&dev->refcnt)); | |
3572 | + BUG_TRAP(!dev->ip_ptr); | |
3573 | + BUG_TRAP(!dev->ip6_ptr); | |
3574 | + BUG_TRAP(!dev->dn_ptr); | |
3575 | + | |
3576 | + /* It must be the very last action, | |
3577 | + * after this 'dev' may point to freed up memory. | |
3578 | + */ | |
3579 | + if (dev->destructor) | |
3580 | + dev->destructor(dev); | |
3581 | + } | |
3582 | + | |
3583 | +out: | |
3584 | + mutex_unlock(&net_todo_run_mutex); | |
3585 | +} | |
3586 | + | |
3587 | +/** | |
3588 | + * alloc_netdev - allocate network device | |
3589 | + * @sizeof_priv: size of private data to allocate space for | |
3590 | + * @name: device name format string | |
3591 | + * @setup: callback to initialize device | |
3592 | + * | |
3593 | + * Allocates a struct net_device with private data area for driver use | |
3594 | + * and performs basic initialization. | |
3595 | + */ | |
3596 | +struct net_device *alloc_netdev(int sizeof_priv, const char *name, | |
3597 | + void (*setup)(struct net_device *)) | |
3598 | +{ | |
3599 | + void *p; | |
3600 | + struct net_device *dev; | |
3601 | + int alloc_size; | |
3602 | + | |
3603 | + BUG_ON(strlen(name) >= sizeof(dev->name)); | |
3604 | + | |
3605 | + /* ensure 32-byte alignment of both the device and private area */ | |
3606 | + alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; | |
3607 | + alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; | |
3608 | + | |
3609 | + p = kzalloc(alloc_size, GFP_KERNEL); | |
3610 | + if (!p) { | |
3611 | + printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n"); | |
3612 | + return NULL; | |
3613 | + } | |
3614 | + | |
3615 | + dev = (struct net_device *) | |
3616 | + (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST); | |
3617 | + dev->padded = (char *)dev - (char *)p; | |
3618 | + | |
3619 | + if (sizeof_priv) | |
3620 | + dev->priv = netdev_priv(dev); | |
3621 | + | |
3622 | + setup(dev); | |
3623 | + strcpy(dev->name, name); | |
3624 | + return dev; | |
3625 | +} | |
3626 | +EXPORT_SYMBOL(alloc_netdev); | |
3627 | + | |
3628 | +/** | |
3629 | + * free_netdev - free network device | |
3630 | + * @dev: device | |
3631 | + * | |
3632 | + * This function does the last stage of destroying an allocated device | |
3633 | + * interface. The reference to the device object is released. | |
3634 | + * If this is the last reference then it will be freed. | |
3635 | + */ | |
3636 | +void free_netdev(struct net_device *dev) | |
3637 | +{ | |
3638 | +#ifdef CONFIG_SYSFS | |
3639 | + /* Compatibility with error handling in drivers */ | |
3640 | + if (dev->reg_state == NETREG_UNINITIALIZED) { | |
3641 | + kfree((char *)dev - dev->padded); | |
3642 | + return; | |
3643 | + } | |
3644 | + | |
3645 | + BUG_ON(dev->reg_state != NETREG_UNREGISTERED); | |
3646 | + dev->reg_state = NETREG_RELEASED; | |
3647 | + | |
3648 | + /* will free via device release */ | |
3649 | + put_device(&dev->dev); | |
3650 | +#else | |
3651 | + kfree((char *)dev - dev->padded); | |
3652 | +#endif | |
3653 | +} | |
3654 | + | |
3655 | +/* Synchronize with packet receive processing. */ | |
3656 | +void synchronize_net(void) | |
3657 | +{ | |
3658 | + might_sleep(); | |
3659 | + synchronize_rcu(); | |
3660 | +} | |
3661 | + | |
3662 | +/** | |
3663 | + * unregister_netdevice - remove device from the kernel | |
3664 | + * @dev: device | |
3665 | + * | |
3666 | + * This function shuts down a device interface and removes it | |
3667 | + * from the kernel tables. On success 0 is returned, on a failure | |
3668 | + * a negative errno code is returned. | |
3669 | + * | |
3670 | + * Callers must hold the rtnl semaphore. You may want | |
3671 | + * unregister_netdev() instead of this. | |
3672 | + */ | |
3673 | + | |
3674 | +void unregister_netdevice(struct net_device *dev) | |
3675 | +{ | |
3676 | + struct net_device *d, **dp; | |
3677 | + | |
3678 | + BUG_ON(dev_boot_phase); | |
3679 | + ASSERT_RTNL(); | |
3680 | + | |
3681 | + /* Some devices call without registering for initialization unwind. */ | |
3682 | + if (dev->reg_state == NETREG_UNINITIALIZED) { | |
3683 | + printk(KERN_DEBUG "unregister_netdevice: device %s/%p never " | |
3684 | + "was registered\n", dev->name, dev); | |
3685 | + | |
3686 | + WARN_ON(1); | |
3687 | + return; | |
3688 | + } | |
3689 | + | |
3690 | + BUG_ON(dev->reg_state != NETREG_REGISTERED); | |
3691 | + | |
3692 | + /* If device is running, close it first. */ | |
3693 | + if (dev->flags & IFF_UP) | |
3694 | + dev_close(dev); | |
3695 | + | |
3696 | + /* And unlink it from device chain. */ | |
3697 | + for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) { | |
3698 | + if (d == dev) { | |
3699 | + write_lock_bh(&dev_base_lock); | |
3700 | + hlist_del(&dev->name_hlist); | |
3701 | + hlist_del(&dev->index_hlist); | |
3702 | + if (dev_tail == &dev->next) | |
3703 | + dev_tail = dp; | |
3704 | + *dp = d->next; | |
3705 | + write_unlock_bh(&dev_base_lock); | |
3706 | + break; | |
3707 | + } | |
3708 | + } | |
3709 | + BUG_ON(!d); | |
3710 | + | |
3711 | + dev->reg_state = NETREG_UNREGISTERING; | |
3712 | + | |
3713 | + synchronize_net(); | |
3714 | + | |
3715 | + /* Shutdown queueing discipline. */ | |
3716 | + dev_shutdown(dev); | |
3717 | + | |
3718 | + | |
3719 | + /* Notify protocols, that we are about to destroy | |
3720 | + this device. They should clean all the things. | |
3721 | + */ | |
3722 | + raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); | |
3723 | + | |
3724 | + /* | |
3725 | + * Flush the multicast chain | |
3726 | + */ | |
3727 | + dev_mc_discard(dev); | |
3728 | + | |
3729 | + if (dev->uninit) | |
3730 | + dev->uninit(dev); | |
3731 | + | |
3732 | + /* Notifier chain MUST detach us from master device. */ | |
3733 | + BUG_TRAP(!dev->master); | |
3734 | + | |
3735 | + /* Finish processing unregister after unlock */ | |
3736 | + net_set_todo(dev); | |
3737 | + | |
3738 | + synchronize_net(); | |
3739 | + | |
3740 | + dev_put(dev); | |
3741 | +} | |
3742 | + | |
3743 | +/** | |
3744 | + * unregister_netdev - remove device from the kernel | |
3745 | + * @dev: device | |
3746 | + * | |
3747 | + * This function shuts down a device interface and removes it | |
3748 | + * from the kernel tables. On success 0 is returned, on a failure | |
3749 | + * a negative errno code is returned. | |
3750 | + * | |
3751 | + * This is just a wrapper for unregister_netdevice that takes | |
3752 | + * the rtnl semaphore. In general you want to use this and not | |
3753 | + * unregister_netdevice. | |
3754 | + */ | |
3755 | +void unregister_netdev(struct net_device *dev) | |
3756 | +{ | |
3757 | + rtnl_lock(); | |
3758 | + unregister_netdevice(dev); | |
3759 | + rtnl_unlock(); | |
3760 | +} | |
3761 | + | |
3762 | +EXPORT_SYMBOL(unregister_netdev); | |
3763 | + | |
3764 | +static int dev_cpu_callback(struct notifier_block *nfb, | |
3765 | + unsigned long action, | |
3766 | + void *ocpu) | |
3767 | +{ | |
3768 | + struct sk_buff **list_skb; | |
3769 | + struct net_device **list_net; | |
3770 | + struct sk_buff *skb; | |
3771 | + unsigned int cpu, oldcpu = (unsigned long)ocpu; | |
3772 | + struct softnet_data *sd, *oldsd; | |
3773 | + | |
3774 | + if (action != CPU_DEAD) | |
3775 | + return NOTIFY_OK; | |
3776 | + | |
3777 | + local_irq_disable(); | |
3778 | + cpu = smp_processor_id(); | |
3779 | + sd = &per_cpu(softnet_data, cpu); | |
3780 | + oldsd = &per_cpu(softnet_data, oldcpu); | |
3781 | + | |
3782 | + /* Find end of our completion_queue. */ | |
3783 | + list_skb = &sd->completion_queue; | |
3784 | + while (*list_skb) | |
3785 | + list_skb = &(*list_skb)->next; | |
3786 | + /* Append completion queue from offline CPU. */ | |
3787 | + *list_skb = oldsd->completion_queue; | |
3788 | + oldsd->completion_queue = NULL; | |
3789 | + | |
3790 | + /* Find end of our output_queue. */ | |
3791 | + list_net = &sd->output_queue; | |
3792 | + while (*list_net) | |
3793 | + list_net = &(*list_net)->next_sched; | |
3794 | + /* Append output queue from offline CPU. */ | |
3795 | + *list_net = oldsd->output_queue; | |
3796 | + oldsd->output_queue = NULL; | |
3797 | + | |
3798 | + raise_softirq_irqoff(NET_TX_SOFTIRQ); | |
3799 | + local_irq_enable(); | |
3800 | + | |
3801 | + /* Process offline CPU's input_pkt_queue */ | |
3802 | + while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) | |
3803 | + netif_rx(skb); | |
3804 | + | |
3805 | + return NOTIFY_OK; | |
3806 | +} | |
3807 | + | |
3808 | +#ifdef CONFIG_NET_DMA | |
3809 | +/** | |
3810 | + * net_dma_rebalance - | |
3811 | + * This is called when the number of channels allocated to the net_dma_client | |
3812 | + * changes. The net_dma_client tries to have one DMA channel per CPU. | |
3813 | + */ | |
3814 | +static void net_dma_rebalance(void) | |
3815 | +{ | |
3816 | + unsigned int cpu, i, n; | |
3817 | + struct dma_chan *chan; | |
3818 | + | |
3819 | + if (net_dma_count == 0) { | |
3820 | + for_each_online_cpu(cpu) | |
3821 | + rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL); | |
3822 | + return; | |
3823 | + } | |
3824 | + | |
3825 | + i = 0; | |
3826 | + cpu = first_cpu(cpu_online_map); | |
3827 | + | |
3828 | + rcu_read_lock(); | |
3829 | + list_for_each_entry(chan, &net_dma_client->channels, client_node) { | |
3830 | + n = ((num_online_cpus() / net_dma_count) | |
3831 | + + (i < (num_online_cpus() % net_dma_count) ? 1 : 0)); | |
3832 | + | |
3833 | + while(n) { | |
3834 | + per_cpu(softnet_data, cpu).net_dma = chan; | |
3835 | + cpu = next_cpu(cpu, cpu_online_map); | |
3836 | + n--; | |
3837 | + } | |
3838 | + i++; | |
3839 | + } | |
3840 | + rcu_read_unlock(); | |
3841 | +} | |
3842 | + | |
3843 | +/** | |
3844 | + * netdev_dma_event - event callback for the net_dma_client | |
3845 | + * @client: should always be net_dma_client | |
3846 | + * @chan: DMA channel for the event | |
3847 | + * @event: event type | |
3848 | + */ | |
3849 | +static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, | |
3850 | + enum dma_event event) | |
3851 | +{ | |
3852 | + spin_lock(&net_dma_event_lock); | |
3853 | + switch (event) { | |
3854 | + case DMA_RESOURCE_ADDED: | |
3855 | + net_dma_count++; | |
3856 | + net_dma_rebalance(); | |
3857 | + break; | |
3858 | + case DMA_RESOURCE_REMOVED: | |
3859 | + net_dma_count--; | |
3860 | + net_dma_rebalance(); | |
3861 | + break; | |
3862 | + default: | |
3863 | + break; | |
3864 | + } | |
3865 | + spin_unlock(&net_dma_event_lock); | |
3866 | +} | |
3867 | + | |
3868 | +/** | |
3869 | + * netdev_dma_regiser - register the networking subsystem as a DMA client | |
3870 | + */ | |
3871 | +static int __init netdev_dma_register(void) | |
3872 | +{ | |
3873 | + spin_lock_init(&net_dma_event_lock); | |
3874 | + net_dma_client = dma_async_client_register(netdev_dma_event); | |
3875 | + if (net_dma_client == NULL) | |
3876 | + return -ENOMEM; | |
3877 | + | |
3878 | + dma_async_client_chan_request(net_dma_client, num_online_cpus()); | |
3879 | + return 0; | |
3880 | +} | |
3881 | + | |
3882 | +#else | |
3883 | +static int __init netdev_dma_register(void) { return -ENODEV; } | |
3884 | +#endif /* CONFIG_NET_DMA */ | |
3885 | + | |
3886 | +/* | |
3887 | + * Initialize the DEV module. At boot time this walks the device list and | |
3888 | + * unhooks any devices that fail to initialise (normally hardware not | |
3889 | + * present) and leaves us with a valid list of present and active devices. | |
3890 | + * | |
3891 | + */ | |
3892 | + | |
3893 | +/* | |
3894 | + * This is called single threaded during boot, so no need | |
3895 | + * to take the rtnl semaphore. | |
3896 | + */ | |
3897 | +static int __init net_dev_init(void) | |
3898 | +{ | |
3899 | + int i, rc = -ENOMEM; | |
3900 | + | |
3901 | + BUG_ON(!dev_boot_phase); | |
3902 | + | |
3903 | + if (dev_proc_init()) | |
3904 | + goto out; | |
3905 | + | |
3906 | + if (netdev_sysfs_init()) | |
3907 | + goto out; | |
3908 | + | |
3909 | + INIT_LIST_HEAD(&ptype_all); | |
3910 | + for (i = 0; i < 16; i++) | |
3911 | + INIT_LIST_HEAD(&ptype_base[i]); | |
3912 | + | |
3913 | + for (i = 0; i < ARRAY_SIZE(dev_name_head); i++) | |
3914 | + INIT_HLIST_HEAD(&dev_name_head[i]); | |
3915 | + | |
3916 | + for (i = 0; i < ARRAY_SIZE(dev_index_head); i++) | |
3917 | + INIT_HLIST_HEAD(&dev_index_head[i]); | |
3918 | + | |
3919 | + /* | |
3920 | + * Initialise the packet receive queues. | |
3921 | + */ | |
3922 | + | |
3923 | + for_each_possible_cpu(i) { | |
3924 | + struct softnet_data *queue; | |
3925 | + | |
3926 | + queue = &per_cpu(softnet_data, i); | |
3927 | + skb_queue_head_init(&queue->input_pkt_queue); | |
3928 | + queue->completion_queue = NULL; | |
3929 | + INIT_LIST_HEAD(&queue->poll_list); | |
3930 | + set_bit(__LINK_STATE_START, &queue->backlog_dev.state); | |
3931 | + queue->backlog_dev.weight = weight_p; | |
3932 | + queue->backlog_dev.poll = process_backlog; | |
3933 | + atomic_set(&queue->backlog_dev.refcnt, 1); | |
3934 | + } | |
3935 | + | |
3936 | + netdev_dma_register(); | |
3937 | + | |
3938 | + dev_boot_phase = 0; | |
3939 | + | |
3940 | + open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); | |
3941 | + open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL); | |
3942 | + | |
3943 | + hotcpu_notifier(dev_cpu_callback, 0); | |
3944 | + dst_init(); | |
3945 | + dev_mcast_init(); | |
3946 | + rc = 0; | |
3947 | +out: | |
3948 | + return rc; | |
3949 | +} | |
3950 | + | |
3951 | +subsys_initcall(net_dev_init); | |
3952 | + | |
3953 | +EXPORT_SYMBOL(__dev_get_by_index); | |
3954 | +EXPORT_SYMBOL(__dev_get_by_name); | |
3955 | +EXPORT_SYMBOL(__dev_remove_pack); | |
3956 | +EXPORT_SYMBOL(dev_valid_name); | |
3957 | +EXPORT_SYMBOL(dev_add_pack); | |
3958 | +EXPORT_SYMBOL(dev_alloc_name); | |
3959 | +EXPORT_SYMBOL(dev_close); | |
3960 | +EXPORT_SYMBOL(dev_get_by_flags); | |
3961 | +EXPORT_SYMBOL(dev_get_by_index); | |
3962 | +EXPORT_SYMBOL(dev_get_by_name); | |
3963 | +EXPORT_SYMBOL(dev_open); | |
3964 | +EXPORT_SYMBOL(dev_queue_xmit); | |
3965 | +EXPORT_SYMBOL(dev_remove_pack); | |
3966 | +EXPORT_SYMBOL(dev_set_allmulti); | |
3967 | +EXPORT_SYMBOL(dev_set_promiscuity); | |
3968 | +EXPORT_SYMBOL(dev_change_flags); | |
3969 | +EXPORT_SYMBOL(dev_set_mtu); | |
3970 | +EXPORT_SYMBOL(dev_set_mac_address); | |
3971 | +EXPORT_SYMBOL(free_netdev); | |
3972 | +EXPORT_SYMBOL(netdev_boot_setup_check); | |
3973 | +EXPORT_SYMBOL(netdev_set_master); | |
3974 | +EXPORT_SYMBOL(netdev_state_change); | |
3975 | +EXPORT_SYMBOL(netif_receive_skb); | |
3976 | +EXPORT_SYMBOL(netif_rx); | |
3977 | +EXPORT_SYMBOL(register_gifconf); | |
3978 | +EXPORT_SYMBOL(register_netdevice); | |
3979 | +EXPORT_SYMBOL(register_netdevice_notifier); | |
3980 | +EXPORT_SYMBOL(skb_checksum_help); | |
3981 | +EXPORT_SYMBOL(synchronize_net); | |
3982 | +EXPORT_SYMBOL(unregister_netdevice); | |
3983 | +EXPORT_SYMBOL(unregister_netdevice_notifier); | |
3984 | +EXPORT_SYMBOL(net_enable_timestamp); | |
3985 | +EXPORT_SYMBOL(net_disable_timestamp); | |
3986 | +EXPORT_SYMBOL(dev_get_flags); | |
3987 | + | |
3988 | +#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) | |
3989 | +EXPORT_SYMBOL(br_handle_frame_hook); | |
3990 | +EXPORT_SYMBOL(br_fdb_get_hook); | |
3991 | +EXPORT_SYMBOL(br_fdb_put_hook); | |
3992 | +#endif | |
3993 | + | |
3994 | +#ifdef CONFIG_KMOD | |
3995 | +EXPORT_SYMBOL(dev_load); | |
3996 | +#endif | |
3997 | + | |
3998 | +EXPORT_PER_CPU_SYMBOL(softnet_data); | |
3999 | diff --unified --recursive --new-file linux-2.6.21.4/net/ring/Kconfig linux-2.6.21.4-1-686-smp-ring3/net/ring/Kconfig | |
4000 | --- linux-2.6.21.4/net/ring/Kconfig 1970-01-01 00:00:00.000000000 +0000 | |
4001 | +++ linux-2.6.21.4-1-686-smp-ring3/net/ring/Kconfig 2007-06-10 16:43:04.406423944 +0000 | |
4002 | @@ -0,0 +1,14 @@ | |
4003 | +config RING | |
4004 | + tristate "PF_RING sockets (EXPERIMENTAL)" | |
4005 | + depends on EXPERIMENTAL | |
4006 | + ---help--- | |
4007 | + PF_RING socket family, optimized for packet capture. | |
4008 | + If a PF_RING socket is bound to an adapter (via the bind() system | |
4009 | + call), such adapter will be used in read-only mode until the socket | |
4010 | + is destroyed. Whenever an incoming packet is received from the adapter | |
4011 | + it will not passed to upper layers, but instead it is copied to a ring | |
4012 | + buffer, which in turn is exported to user space applications via mmap. | |
4013 | + Please refer to http://luca.ntop.org/Ring.pdf for more. | |
4014 | + | |
4015 | + Say N unless you know what you are doing. | |
4016 | + | |
4017 | diff --unified --recursive --new-file linux-2.6.21.4/net/ring/Makefile linux-2.6.21.4-1-686-smp-ring3/net/ring/Makefile | |
4018 | --- linux-2.6.21.4/net/ring/Makefile 1970-01-01 00:00:00.000000000 +0000 | |
4019 | +++ linux-2.6.21.4-1-686-smp-ring3/net/ring/Makefile 2007-06-10 16:43:04.350421521 +0000 | |
4020 | @@ -0,0 +1,7 @@ | |
4021 | +# | |
4022 | +# Makefile for the ring driver. | |
4023 | +# | |
4024 | + | |
4025 | +obj-m += ring.o | |
4026 | + | |
4027 | +ring-objs := ring_packet.o | |
4028 | diff --unified --recursive --new-file linux-2.6.21.4/net/ring/ring_packet.c linux-2.6.21.4-1-686-smp-ring3/net/ring/ring_packet.c | |
4029 | --- linux-2.6.21.4/net/ring/ring_packet.c 1970-01-01 00:00:00.000000000 +0000 | |
4030 | +++ linux-2.6.21.4-1-686-smp-ring3/net/ring/ring_packet.c 2007-06-10 16:43:04.354421694 +0000 | |
4031 | @@ -0,0 +1,4258 @@ | |
4032 | +/* *************************************************************** | |
4033 | + * | |
4034 | + * (C) 2004-07 - Luca Deri <deri@ntop.org> | |
4035 | + * | |
4036 | + * This code includes contributions courtesy of | |
4037 | + * - Jeff Randall <jrandall@nexvu.com> | |
4038 | + * - Helmut Manck <helmut.manck@secunet.com> | |
4039 | + * - Brad Doctor <brad@stillsecure.com> | |
4040 | + * - Amit D. Chaudhary <amit_ml@rajgad.com> | |
4041 | + * - Francesco Fusco <fusco@ntop.org> | |
4042 | + * - Michael Stiller <ms@2scale.net> | |
4043 | + * | |
4044 | + * | |
4045 | + * This program is free software; you can redistribute it and/or modify | |
4046 | + * it under the terms of the GNU General Public License as published by | |
4047 | + * the Free Software Foundation; either version 2 of the License, or | |
4048 | + * (at your option) any later version. | |
4049 | + * | |
4050 | + * This program is distributed in the hope that it will be useful, | |
4051 | + * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
4052 | + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
4053 | + * GNU General Public License for more details. | |
4054 | + * | |
4055 | + * You should have received a copy of the GNU General Public License | |
4056 | + * along with this program; if not, write to the Free Software Foundation, | |
4057 | + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
4058 | + * | |
4059 | + */ | |
4060 | + | |
4061 | +#include <linux/version.h> | |
4062 | +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,19)) | |
4063 | +#include <linux/autoconf.h> | |
4064 | +#else | |
4065 | +#include <linux/config.h> | |
4066 | +#endif | |
4067 | +#include <linux/module.h> | |
4068 | +#include <linux/kernel.h> | |
4069 | +#include <linux/socket.h> | |
4070 | +#include <linux/skbuff.h> | |
4071 | +#include <linux/rtnetlink.h> | |
4072 | +#include <linux/in.h> | |
4073 | +#include <linux/inet.h> | |
4074 | +#include <linux/in6.h> | |
4075 | +#include <linux/init.h> | |
4076 | +#include <linux/filter.h> | |
4077 | +#include <linux/ring.h> | |
4078 | +#include <linux/ip.h> | |
4079 | +#include <linux/tcp.h> | |
4080 | +#include <linux/udp.h> | |
4081 | +#include <linux/list.h> | |
4082 | +#include <linux/proc_fs.h> | |
4083 | +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) | |
4084 | +#include <net/xfrm.h> | |
4085 | +#else | |
4086 | +#include <linux/poll.h> | |
4087 | +#endif | |
4088 | +#include <net/sock.h> | |
4089 | +#include <asm/io.h> /* needed for virt_to_phys() */ | |
4090 | +#ifdef CONFIG_INET | |
4091 | +#include <net/inet_common.h> | |
4092 | +#endif | |
4093 | + | |
4094 | +/* #define RING_DEBUG */ | |
4095 | + | |
4096 | +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11)) | |
4097 | +static inline int remap_page_range(struct vm_area_struct *vma, | |
4098 | + unsigned long uvaddr, | |
4099 | + unsigned long paddr, | |
4100 | + unsigned long size, | |
4101 | + pgprot_t prot) { | |
4102 | + return(remap_pfn_range(vma, uvaddr, paddr >> PAGE_SHIFT, | |
4103 | + size, prot)); | |
4104 | +} | |
4105 | +#endif | |
4106 | + | |
4107 | +/* ************************************************* */ | |
4108 | + | |
4109 | +#define CLUSTER_LEN 8 | |
4110 | + | |
4111 | +struct ring_cluster { | |
4112 | + u_short cluster_id; /* 0 = no cluster */ | |
4113 | + u_short num_cluster_elements; | |
4114 | + enum cluster_type hashing_mode; | |
4115 | + u_short hashing_id; | |
4116 | + struct sock *sk[CLUSTER_LEN]; | |
4117 | + struct ring_cluster *next; /* NULL = last element of the cluster */ | |
4118 | +}; | |
4119 | + | |
4120 | +/* ************************************************* */ | |
4121 | + | |
4122 | +struct ring_element { | |
4123 | + struct list_head list; | |
4124 | + struct sock *sk; | |
4125 | +}; | |
4126 | + | |
4127 | +/* ************************************************* */ | |
4128 | + | |
4129 | +struct ring_opt { | |
4130 | + struct net_device *ring_netdev; | |
4131 | + | |
4132 | + u_short ring_pid; | |
4133 | + | |
4134 | + /* Cluster */ | |
4135 | + u_short cluster_id; /* 0 = no cluster */ | |
4136 | + | |
4137 | + /* Reflector */ | |
4138 | + struct net_device *reflector_dev; | |
4139 | + | |
4140 | + /* Packet buffers */ | |
4141 | + unsigned long order; | |
4142 | + | |
4143 | + /* Ring Slots */ | |
4144 | + unsigned long ring_memory; | |
4145 | + FlowSlotInfo *slots_info; /* Basically it points to ring_memory */ | |
4146 | + char *ring_slots; /* Basically it points to ring_memory | |
4147 | + +sizeof(FlowSlotInfo) */ | |
4148 | + | |
4149 | + /* Packet Sampling */ | |
4150 | + u_int pktToSample, sample_rate; | |
4151 | + | |
4152 | + /* BPF Filter */ | |
4153 | + struct sk_filter *bpfFilter; | |
4154 | + | |
4155 | + /* Aho-Corasick */ | |
4156 | + ACSM_STRUCT2 * acsm; | |
4157 | + | |
4158 | + /* Locks */ | |
4159 | + atomic_t num_ring_slots_waiters; | |
4160 | + wait_queue_head_t ring_slots_waitqueue; | |
4161 | + rwlock_t ring_index_lock; | |
4162 | + | |
4163 | + /* Bloom Filters */ | |
4164 | + u_char bitmask_enabled; | |
4165 | + bitmask_selector mac_bitmask, vlan_bitmask, ip_bitmask, twin_ip_bitmask, | |
4166 | + port_bitmask, twin_port_bitmask, proto_bitmask; | |
4167 | + u_int32_t num_mac_bitmask_add, num_mac_bitmask_remove; | |
4168 | + u_int32_t num_vlan_bitmask_add, num_vlan_bitmask_remove; | |
4169 | + u_int32_t num_ip_bitmask_add, num_ip_bitmask_remove; | |
4170 | + u_int32_t num_port_bitmask_add, num_port_bitmask_remove; | |
4171 | + u_int32_t num_proto_bitmask_add, num_proto_bitmask_remove; | |
4172 | + | |
4173 | + /* Indexes (Internal) */ | |
4174 | + u_int insert_page_id, insert_slot_id; | |
4175 | +}; | |
4176 | + | |
4177 | +/* ************************************************* */ | |
4178 | + | |
4179 | +/* List of all ring sockets. */ | |
4180 | +static struct list_head ring_table; | |
4181 | +static u_int ring_table_size; | |
4182 | + | |
4183 | +/* List of all clusters */ | |
4184 | +static struct ring_cluster *ring_cluster_list; | |
4185 | + | |
4186 | +static rwlock_t ring_mgmt_lock = RW_LOCK_UNLOCKED; | |
4187 | + | |
4188 | +/* ********************************** */ | |
4189 | + | |
4190 | +/* /proc entry for ring module */ | |
4191 | +struct proc_dir_entry *ring_proc_dir = NULL; | |
4192 | +struct proc_dir_entry *ring_proc = NULL; | |
4193 | + | |
4194 | +static int ring_proc_get_info(char *, char **, off_t, int, int *, void *); | |
4195 | +static void ring_proc_add(struct ring_opt *pfr); | |
4196 | +static void ring_proc_remove(struct ring_opt *pfr); | |
4197 | +static void ring_proc_init(void); | |
4198 | +static void ring_proc_term(void); | |
4199 | + | |
4200 | +/* ********************************** */ | |
4201 | + | |
4202 | +/* Forward */ | |
4203 | +static struct proto_ops ring_ops; | |
4204 | + | |
4205 | +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11)) | |
4206 | +static struct proto ring_proto; | |
4207 | +#endif | |
4208 | + | |
4209 | +static int skb_ring_handler(struct sk_buff *skb, u_char recv_packet, | |
4210 | + u_char real_skb); | |
4211 | +static int buffer_ring_handler(struct net_device *dev, char *data, int len); | |
4212 | +static int remove_from_cluster(struct sock *sock, struct ring_opt *pfr); | |
4213 | + | |
4214 | +/* Extern */ | |
4215 | + | |
4216 | +/* ********************************** */ | |
4217 | + | |
4218 | +/* Defaults */ | |
4219 | +static unsigned int bucket_len = 128, num_slots = 4096, sample_rate = 1, | |
4220 | + transparent_mode = 1, enable_tx_capture = 1; | |
4221 | + | |
4222 | +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)) | |
4223 | +module_param(bucket_len, uint, 0644); | |
4224 | +module_param(num_slots, uint, 0644); | |
4225 | +module_param(sample_rate, uint, 0644); | |
4226 | +module_param(transparent_mode, uint, 0644); | |
4227 | +module_param(enable_tx_capture, uint, 0644); | |
4228 | +#else | |
4229 | +MODULE_PARM(bucket_len, "i"); | |
4230 | +MODULE_PARM(num_slots, "i"); | |
4231 | +MODULE_PARM(sample_rate, "i"); | |
4232 | +MODULE_PARM(transparent_mode, "i"); | |
4233 | +MODULE_PARM(enable_tx_capture, "i"); | |
4234 | +#endif | |
4235 | + | |
4236 | +MODULE_PARM_DESC(bucket_len, "Number of ring buckets"); | |
4237 | +MODULE_PARM_DESC(num_slots, "Number of ring slots"); | |
4238 | +MODULE_PARM_DESC(sample_rate, "Ring packet sample rate"); | |
4239 | +MODULE_PARM_DESC(transparent_mode, | |
4240 | + "Set to 1 to set transparent mode " | |
4241 | + "(slower but backwards compatible)"); | |
4242 | + | |
4243 | +MODULE_PARM_DESC(enable_tx_capture, "Set to 1 to capture outgoing packets"); | |
4244 | + | |
4245 | +/* ********************************** */ | |
4246 | + | |
4247 | +#define MIN_QUEUED_PKTS 64 | |
4248 | +#define MAX_QUEUE_LOOPS 64 | |
4249 | + | |
4250 | + | |
4251 | +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) | |
4252 | +#define ring_sk_datatype(__sk) ((struct ring_opt *)__sk) | |
4253 | +#define ring_sk(__sk) ((__sk)->sk_protinfo) | |
4254 | +#else | |
4255 | +#define ring_sk_datatype(a) (a) | |
4256 | +#define ring_sk(__sk) ((__sk)->protinfo.pf_ring) | |
4257 | +#endif | |
4258 | + | |
4259 | +#define _rdtsc() ({ uint64_t x; asm volatile("rdtsc" : "=A" (x)); x; }) | |
4260 | + | |
4261 | +/* | |
4262 | + int dev_queue_xmit(struct sk_buff *skb) | |
4263 | + skb->dev; | |
4264 | + struct net_device *dev_get_by_name(const char *name) | |
4265 | +*/ | |
4266 | + | |
4267 | +/* ********************************** */ | |
4268 | + | |
4269 | +/* | |
4270 | +** $Id$ | |
4271 | +** | |
4272 | +** acsmx2.c | |
4273 | +** | |
4274 | +** Multi-Pattern Search Engine | |
4275 | +** | |
4276 | +** Aho-Corasick State Machine - version 2.0 | |
4277 | +** | |
4278 | +** Supports both Non-Deterministic and Deterministic Finite Automata | |
4279 | +** | |
4280 | +** | |
4281 | +** Reference - Efficient String matching: An Aid to Bibliographic Search | |
4282 | +** Alfred V Aho and Margaret J Corasick | |
4283 | +** Bell Labratories | |
4284 | +** Copyright(C) 1975 Association for Computing Machinery,Inc | |
4285 | +** | |
4286 | +** +++ | |
4287 | +** +++ Version 1.0 notes - Marc Norton: | |
4288 | +** +++ | |
4289 | +** | |
4290 | +** Original implementation based on the 4 algorithms in the paper by Aho & Corasick, | |
4291 | +** some implementation ideas from 'Practical Algorithms in C', and some | |
4292 | +** of my own. | |
4293 | +** | |
4294 | +** 1) Finds all occurrences of all patterns within a text. | |
4295 | +** | |
4296 | +** +++ | |
4297 | +** +++ Version 2.0 Notes - Marc Norton/Dan Roelker: | |
4298 | +** +++ | |
4299 | +** | |
4300 | +** New implementation modifies the state table storage and access model to use | |
4301 | +** compacted sparse vector storage. Dan Roelker and I hammered this strategy out | |
4302 | +** amongst many others in order to reduce memory usage and improve caching performance. | |
4303 | +** The memory usage is greatly reduced, we only use 1/4 of what we use to. The caching | |
4304 | +** performance is better in pure benchmarking tests, but does not show overall improvement | |
4305 | +** in Snort. Unfortunately, once a pattern match test has been performed Snort moves on to doing | |
4306 | +** many other things before we get back to a patteren match test, so the cache is voided. | |
4307 | +** | |
4308 | +** This versions has better caching performance characteristics, reduced memory, | |
4309 | +** more state table storage options, and requires no a priori case conversions. | |
4310 | +** It does maintain the same public interface. (Snort only used banded storage). | |
4311 | +** | |
4312 | +** 1) Supports NFA and DFA state machines, and basic keyword state machines | |
4313 | +** 2) Initial transition table uses Linked Lists | |
4314 | +** 3) Improved state table memory options. NFA and DFA state | |
4315 | +** transition tables are converted to one of 4 formats during compilation. | |
4316 | +** a) Full matrix | |
4317 | +** b) Sparse matrix | |
4318 | +** c) Banded matrix (Default-this is the only one used in snort) | |
4319 | +** d) Sparse-Banded matrix | |
4320 | +** 4) Added support for acstate_t in .h file so we can compile states as | |
4321 | +** 16, or 32 bit state values for another reduction in memory consumption, | |
4322 | +** smaller states allows more of the state table to be cached, and improves | |
4323 | +** performance on x86-P4. Your mileage may vary, especially on risc systems. | |
4324 | +** 5) Added a bool to each state transition list to indicate if there is a matching | |
4325 | +** pattern in the state. This prevents us from accessing another data array | |
4326 | +** and can improve caching/performance. | |
4327 | +** 6) The search functions are very sensitive, don't change them without extensive testing, | |
4328 | +** or you'll just spoil the caching and prefetching opportunities. | |
4329 | +** | |
4330 | +** Extras for fellow pattern matchers: | |
4331 | +** The table below explains the storage format used at each step. | |
4332 | +** You can use an NFA or DFA to match with, the NFA is slower but tiny - set the structure directly. | |
4333 | +** You can use any of the 4 storage modes above -full,sparse,banded,sparse-bands, set the structure directly. | |
4334 | +** For applications where you have lots of data and a pattern set to search, this version was up to 3x faster | |
4335 | +** than the previous verion, due to caching performance. This cannot be fully realized in Snort yet, | |
4336 | +** but other applications may have better caching opportunities. | |
4337 | +** Snort only needs to use the banded or full storage. | |
4338 | +** | |
4339 | +** Transition table format at each processing stage. | |
4340 | +** ------------------------------------------------- | |
4341 | +** Patterns -> Keyword State Table (List) | |
4342 | +** Keyword State Table -> NFA (List) | |
4343 | +** NFA -> DFA (List) | |
4344 | +** DFA (List)-> Sparse Rows O(m-avg # transitions per state) | |
4345 | +** -> Banded Rows O(1) | |
4346 | +** -> Sparse-Banded Rows O(nb-# bands) | |
4347 | +** -> Full Matrix O(1) | |
4348 | +** | |
4349 | +** Copyright(C) 2002,2003,2004 Marc Norton | |
4350 | +** Copyright(C) 2003,2004 Daniel Roelker | |
4351 | +** Copyright(C) 2002,2003,2004 Sourcefire,Inc. | |
4352 | +** | |
4353 | +** This program is free software; you can redistribute it and/or modify | |
4354 | +** it under the terms of the GNU General Public License as published by | |
4355 | +** the Free Software Foundation; either version 2 of the License, or | |
4356 | +** (at your option) any later version. | |
4357 | +** | |
4358 | +** This program is distributed in the hope that it will be useful, | |
4359 | +** but WITHOUT ANY WARRANTY; without even the implied warranty of | |
4360 | +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
4361 | +** GNU General Public License for more details. | |
4362 | +** | |
4363 | +** You should have received a copy of the GNU General Public License | |
4364 | +** along with this program; if not, write to the Free Software | |
4365 | +** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
4366 | +* | |
4367 | +*/ | |
4368 | + | |
4369 | +/* | |
4370 | + * | |
4371 | + */ | |
4372 | +#define MEMASSERT(p,s) if(!p){printk("ACSM-No Memory: %s!\n",s);} | |
4373 | + | |
4374 | +/* | |
4375 | + * | |
4376 | + */ | |
4377 | +static int max_memory = 0; | |
4378 | + | |
4379 | +/* | |
4380 | + * | |
4381 | + */ | |
4382 | +typedef struct acsm_summary_s | |
4383 | +{ | |
4384 | + unsigned num_states; | |
4385 | + unsigned num_transitions; | |
4386 | + ACSM_STRUCT2 acsm; | |
4387 | + | |
4388 | +}acsm_summary_t; | |
4389 | + | |
4390 | +/* | |
4391 | + * | |
4392 | + */ | |
4393 | +static acsm_summary_t summary={0,0}; | |
4394 | + | |
4395 | +/* | |
4396 | +** Case Translation Table | |
4397 | +*/ | |
4398 | +static unsigned char xlatcase[256]; | |
4399 | +/* | |
4400 | + * | |
4401 | + */ | |
4402 | + | |
4403 | +inline int toupper(int ch) { | |
4404 | + if ( (unsigned int)(ch - 'a') < 26u ) | |
4405 | + ch += 'A' - 'a'; | |
4406 | + return ch; | |
4407 | +} | |
4408 | + | |
4409 | +static void init_xlatcase(void) | |
4410 | +{ | |
4411 | + int i; | |
4412 | + for (i = 0; i < 256; i++) | |
4413 | + { | |
4414 | + xlatcase[i] = toupper(i); | |
4415 | + } | |
4416 | +} | |
4417 | + | |
4418 | +/* | |
4419 | + * Case Conversion | |
4420 | + */ | |
4421 | +static | |
4422 | +inline | |
4423 | +void | |
4424 | +ConvertCaseEx (unsigned char *d, unsigned char *s, int m) | |
4425 | +{ | |
4426 | + int i; | |
4427 | +#ifdef XXXX | |
4428 | + int n; | |
4429 | + n = m & 3; | |
4430 | + m >>= 2; | |
4431 | + | |
4432 | + for (i = 0; i < m; i++ ) | |
4433 | + { | |
4434 | + d[0] = xlatcase[ s[0] ]; | |
4435 | + d[2] = xlatcase[ s[2] ]; | |
4436 | + d[1] = xlatcase[ s[1] ]; | |
4437 | + d[3] = xlatcase[ s[3] ]; | |
4438 | + d+=4; | |
4439 | + s+=4; | |
4440 | + } | |
4441 | + | |
4442 | + for (i=0; i < n; i++) | |
4443 | + { | |
4444 | + d[i] = xlatcase[ s[i] ]; | |
4445 | + } | |
4446 | +#else | |
4447 | + for (i=0; i < m; i++) | |
4448 | + { | |
4449 | + d[i] = xlatcase[ s[i] ]; | |
4450 | + } | |
4451 | + | |
4452 | +#endif | |
4453 | +} | |
4454 | + | |
4455 | + | |
4456 | +/* | |
4457 | + * | |
4458 | + */ | |
4459 | +static void * | |
4460 | +AC_MALLOC (int n) | |
4461 | +{ | |
4462 | + void *p; | |
4463 | + p = kmalloc (n, GFP_KERNEL); | |
4464 | + if (p) | |
4465 | + max_memory += n; | |
4466 | + return p; | |
4467 | +} | |
4468 | + | |
4469 | + | |
4470 | +/* | |
4471 | + * | |
4472 | + */ | |
4473 | +static void | |
4474 | +AC_FREE (void *p) | |
4475 | +{ | |
4476 | + if (p) | |
4477 | + kfree (p); | |
4478 | +} | |
4479 | + | |
4480 | + | |
4481 | +/* | |
4482 | + * Simple QUEUE NODE | |
4483 | + */ | |
4484 | +typedef struct _qnode | |
4485 | +{ | |
4486 | + int state; | |
4487 | + struct _qnode *next; | |
4488 | +} | |
4489 | + QNODE; | |
4490 | + | |
4491 | +/* | |
4492 | + * Simple QUEUE Structure | |
4493 | + */ | |
4494 | +typedef struct _queue | |
4495 | +{ | |
4496 | + QNODE * head, *tail; | |
4497 | + int count; | |
4498 | +} | |
4499 | + QUEUE; | |
4500 | + | |
4501 | +/* | |
4502 | + * Initialize the queue | |
4503 | + */ | |
4504 | +static void | |
4505 | +queue_init (QUEUE * s) | |
4506 | +{ | |
4507 | + s->head = s->tail = 0; | |
4508 | + s->count= 0; | |
4509 | +} | |
4510 | + | |
4511 | +/* | |
4512 | + * Find a State in the queue | |
4513 | + */ | |
4514 | +static int | |
4515 | +queue_find (QUEUE * s, int state) | |
4516 | +{ | |
4517 | + QNODE * q; | |
4518 | + q = s->head; | |
4519 | + while( q ) | |
4520 | + { | |
4521 | + if( q->state == state ) return 1; | |
4522 | + q = q->next; | |
4523 | + } | |
4524 | + return 0; | |
4525 | +} | |
4526 | + | |
4527 | +/* | |
4528 | + * Add Tail Item to queue (FiFo/LiLo) | |
4529 | + */ | |
4530 | +static void | |
4531 | +queue_add (QUEUE * s, int state) | |
4532 | +{ | |
4533 | + QNODE * q; | |
4534 | + | |
4535 | + if( queue_find( s, state ) ) return; | |
4536 | + | |
4537 | + if (!s->head) | |
4538 | + { | |
4539 | + q = s->tail = s->head = (QNODE *) AC_MALLOC (sizeof (QNODE)); | |
4540 | + MEMASSERT (q, "queue_add"); | |
4541 | + q->state = state; | |
4542 | + q->next = 0; | |
4543 | + } | |
4544 | + else | |
4545 | + { | |
4546 | + q = (QNODE *) AC_MALLOC (sizeof (QNODE)); | |
4547 | + q->state = state; | |
4548 | + q->next = 0; | |
4549 | + s->tail->next = q; | |
4550 | + s->tail = q; | |
4551 | + } | |
4552 | + s->count++; | |
4553 | +} | |
4554 | + | |
4555 | + | |
4556 | +/* | |
4557 | + * Remove Head Item from queue | |
4558 | + */ | |
4559 | +static int | |
4560 | +queue_remove (QUEUE * s) | |
4561 | +{ | |
4562 | + int state = 0; | |
4563 | + QNODE * q; | |
4564 | + if (s->head) | |
4565 | + { | |
4566 | + q = s->head; | |
4567 | + state = q->state; | |
4568 | + s->head = s->head->next; | |
4569 | + s->count--; | |
4570 | + | |
4571 | + if( !s->head ) | |
4572 | + { | |
4573 | + s->tail = 0; | |
4574 | + s->count = 0; | |
4575 | + } | |
4576 | + AC_FREE (q); | |
4577 | + } | |
4578 | + return state; | |
4579 | +} | |
4580 | + | |
4581 | + | |
4582 | +/* | |
4583 | + * Return items in the queue | |
4584 | + */ | |
4585 | +static int | |
4586 | +queue_count (QUEUE * s) | |
4587 | +{ | |
4588 | + return s->count; | |
4589 | +} | |
4590 | + | |
4591 | + | |
4592 | +/* | |
4593 | + * Free the queue | |
4594 | + */ | |
4595 | +static void | |
4596 | +queue_free (QUEUE * s) | |
4597 | +{ | |
4598 | + while (queue_count (s)) | |
4599 | + { | |
4600 | + queue_remove (s); | |
4601 | + } | |
4602 | +} | |
4603 | + | |
4604 | +/* | |
4605 | + * Get Next State-NFA | |
4606 | + */ | |
4607 | +static | |
4608 | +int List_GetNextState( ACSM_STRUCT2 * acsm, int state, int input ) | |
4609 | +{ | |
4610 | + trans_node_t * t = acsm->acsmTransTable[state]; | |
4611 | + | |
4612 | + while( t ) | |
4613 | + { | |
4614 | + if( t->key == input ) | |
4615 | + { | |
4616 | + return t->next_state; | |
4617 | + } | |
4618 | + t=t->next; | |
4619 | + } | |
4620 | + | |
4621 | + if( state == 0 ) return 0; | |
4622 | + | |
4623 | + return ACSM_FAIL_STATE2; /* Fail state ??? */ | |
4624 | +} | |
4625 | + | |
4626 | +/* | |
4627 | + * Get Next State-DFA | |
4628 | + */ | |
4629 | +static | |
4630 | +int List_GetNextState2( ACSM_STRUCT2 * acsm, int state, int input ) | |
4631 | +{ | |
4632 | + trans_node_t * t = acsm->acsmTransTable[state]; | |
4633 | + | |
4634 | + while( t ) | |
4635 | + { | |
4636 | + if( t->key == input ) | |
4637 | + { | |
4638 | + return t->next_state; | |
4639 | + } | |
4640 | + t = t->next; | |
4641 | + } | |
4642 | + | |
4643 | + return 0; /* default state */ | |
4644 | +} | |
4645 | +/* | |
4646 | + * Put Next State - Head insertion, and transition updates | |
4647 | + */ | |
4648 | +static | |
4649 | +int List_PutNextState( ACSM_STRUCT2 * acsm, int state, int input, int next_state ) | |
4650 | +{ | |
4651 | + trans_node_t * p; | |
4652 | + trans_node_t * tnew; | |
4653 | + | |
4654 | + // printk(" List_PutNextState: state=%d, input='%c', next_state=%d\n",state,input,next_state); | |
4655 | + | |
4656 | + | |
4657 | + /* Check if the transition already exists, if so just update the next_state */ | |
4658 | + p = acsm->acsmTransTable[state]; | |
4659 | + while( p ) | |
4660 | + { | |
4661 | + if( p->key == input ) /* transition already exists- reset the next state */ | |
4662 | + { | |
4663 | + p->next_state = next_state; | |
4664 | + return 0; | |
4665 | + } | |
4666 | + p=p->next; | |
4667 | + } | |
4668 | + | |
4669 | + /* Definitely not an existing transition - add it */ | |
4670 | + tnew = (trans_node_t*)AC_MALLOC(sizeof(trans_node_t)); | |
4671 | + if( !tnew ) return -1; | |
4672 | + | |
4673 | + tnew->key = input; | |
4674 | + tnew->next_state = next_state; | |
4675 | + tnew->next = 0; | |
4676 | + | |
4677 | + tnew->next = acsm->acsmTransTable[state]; | |
4678 | + acsm->acsmTransTable[state] = tnew; | |
4679 | + | |
4680 | + acsm->acsmNumTrans++; | |
4681 | + | |
4682 | + return 0; | |
4683 | +} | |
4684 | +/* | |
4685 | + * Free the entire transition table | |
4686 | + */ | |
4687 | +static | |
4688 | +int List_FreeTransTable( ACSM_STRUCT2 * acsm ) | |
4689 | +{ | |
4690 | + int i; | |
4691 | + trans_node_t * t, *p; | |
4692 | + | |
4693 | + if( !acsm->acsmTransTable ) return 0; | |
4694 | + | |
4695 | + for(i=0;i< acsm->acsmMaxStates;i++) | |
4696 | + { | |
4697 | + t = acsm->acsmTransTable[i]; | |
4698 | + | |
4699 | + while( t ) | |
4700 | + { | |
4701 | + p = t->next; | |
4702 | + kfree(t); | |
4703 | + t = p; | |
4704 | + max_memory -= sizeof(trans_node_t); | |
4705 | + } | |
4706 | + } | |
4707 | + | |
4708 | + kfree(acsm->acsmTransTable); | |
4709 | + | |
4710 | + max_memory -= sizeof(void*) * acsm->acsmMaxStates; | |
4711 | + | |
4712 | + acsm->acsmTransTable = 0; | |
4713 | + | |
4714 | + return 0; | |
4715 | +} | |
4716 | + | |
4717 | +/* | |
4718 | + * | |
4719 | + */ | |
4720 | +/* | |
4721 | + static | |
4722 | + int List_FreeList( trans_node_t * t ) | |
4723 | + { | |
4724 | + int tcnt=0; | |
4725 | + | |
4726 | + trans_node_t *p; | |
4727 | + | |
4728 | + while( t ) | |
4729 | + { | |
4730 | + p = t->next; | |
4731 | + kfree(t); | |
4732 | + t = p; | |
4733 | + max_memory -= sizeof(trans_node_t); | |
4734 | + tcnt++; | |
4735 | + } | |
4736 | + | |
4737 | + return tcnt; | |
4738 | + } | |
4739 | +*/ | |
4740 | + | |
4741 | +/* | |
4742 | + * Converts row of states from list to a full vector format | |
4743 | + */ | |
4744 | +static | |
4745 | +int List_ConvToFull(ACSM_STRUCT2 * acsm, acstate_t state, acstate_t * full ) | |
4746 | +{ | |
4747 | + int tcnt = 0; | |
4748 | + trans_node_t * t = acsm->acsmTransTable[ state ]; | |
4749 | + | |
4750 | + memset(full,0,sizeof(acstate_t)*acsm->acsmAlphabetSize); | |
4751 | + | |
4752 | + if( !t ) return 0; | |
4753 | + | |
4754 | + while(t) | |
4755 | + { | |
4756 | + full[ t->key ] = t->next_state; | |
4757 | + tcnt++; | |
4758 | + t = t->next; | |
4759 | + } | |
4760 | + return tcnt; | |
4761 | +} | |
4762 | + | |
4763 | +/* | |
4764 | + * Copy a Match List Entry - don't dup the pattern data | |
4765 | + */ | |
4766 | +static ACSM_PATTERN2* | |
4767 | +CopyMatchListEntry (ACSM_PATTERN2 * px) | |
4768 | +{ | |
4769 | + ACSM_PATTERN2 * p; | |
4770 | + | |
4771 | + p = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2)); | |
4772 | + MEMASSERT (p, "CopyMatchListEntry"); | |
4773 | + | |
4774 | + memcpy (p, px, sizeof (ACSM_PATTERN2)); | |
4775 | + | |
4776 | + p->next = 0; | |
4777 | + | |
4778 | + return p; | |
4779 | +} | |
4780 | + | |
4781 | +/* | |
4782 | + * Check if a pattern is in the list already, | |
4783 | + * validate it using the 'id' field. This must be unique | |
4784 | + * for every pattern. | |
4785 | + */ | |
4786 | +/* | |
4787 | + static | |
4788 | + int FindMatchListEntry (ACSM_STRUCT2 * acsm, int state, ACSM_PATTERN2 * px) | |
4789 | + { | |
4790 | + ACSM_PATTERN2 * p; | |
4791 | + | |
4792 | + p = acsm->acsmMatchList[state]; | |
4793 | + while( p ) | |
4794 | + { | |
4795 | + if( p->id == px->id ) return 1; | |
4796 | + p = p->next; | |
4797 | + } | |
4798 | + | |
4799 | + return 0; | |
4800 | + } | |
4801 | +*/ | |
4802 | + | |
4803 | + | |
4804 | +/* | |
4805 | + * Add a pattern to the list of patterns terminated at this state. | |
4806 | + * Insert at front of list. | |
4807 | + */ | |
4808 | +static void | |
4809 | +AddMatchListEntry (ACSM_STRUCT2 * acsm, int state, ACSM_PATTERN2 * px) | |
4810 | +{ | |
4811 | + ACSM_PATTERN2 * p; | |
4812 | + | |
4813 | + p = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2)); | |
4814 | + | |
4815 | + MEMASSERT (p, "AddMatchListEntry"); | |
4816 | + | |
4817 | + memcpy (p, px, sizeof (ACSM_PATTERN2)); | |
4818 | + | |
4819 | + p->next = acsm->acsmMatchList[state]; | |
4820 | + | |
4821 | + acsm->acsmMatchList[state] = p; | |
4822 | +} | |
4823 | + | |
4824 | + | |
4825 | +static void | |
4826 | +AddPatternStates (ACSM_STRUCT2 * acsm, ACSM_PATTERN2 * p) | |
4827 | +{ | |
4828 | + int state, next, n; | |
4829 | + unsigned char *pattern; | |
4830 | + | |
4831 | + n = p->n; | |
4832 | + pattern = p->patrn; | |
4833 | + state = 0; | |
4834 | + | |
4835 | + /* | |
4836 | + * Match up pattern with existing states | |
4837 | + */ | |
4838 | + for (; n > 0; pattern++, n--) | |
4839 | + { | |
4840 | + next = List_GetNextState(acsm,state,*pattern); | |
4841 | + if (next == ACSM_FAIL_STATE2 || next == 0) | |
4842 | + { | |
4843 | + break; | |
4844 | + } | |
4845 | + state = next; | |
4846 | + } | |
4847 | + | |
4848 | + /* | |
4849 | + * Add new states for the rest of the pattern bytes, 1 state per byte | |
4850 | + */ | |
4851 | + for (; n > 0; pattern++, n--) | |
4852 | + { | |
4853 | + acsm->acsmNumStates++; | |
4854 | + List_PutNextState(acsm,state,*pattern,acsm->acsmNumStates); | |
4855 | + state = acsm->acsmNumStates; | |
4856 | + } | |
4857 | + | |
4858 | + AddMatchListEntry (acsm, state, p ); | |
4859 | +} | |
4860 | + | |
4861 | +/* | |
4862 | + * Build A Non-Deterministic Finite Automata | |
4863 | + * The keyword state table must already be built, via AddPatternStates(). | |
4864 | + */ | |
4865 | +static void | |
4866 | +Build_NFA (ACSM_STRUCT2 * acsm) | |
4867 | +{ | |
4868 | + int r, s, i; | |
4869 | + QUEUE q, *queue = &q; | |
4870 | + acstate_t * FailState = acsm->acsmFailState; | |
4871 | + ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList; | |
4872 | + ACSM_PATTERN2 * mlist,* px; | |
4873 | + | |
4874 | + /* Init a Queue */ | |
4875 | + queue_init (queue); | |
4876 | + | |
4877 | + | |
4878 | + /* Add the state 0 transitions 1st, the states at depth 1, fail to state 0 */ | |
4879 | + for (i = 0; i < acsm->acsmAlphabetSize; i++) | |
4880 | + { | |
4881 | + s = List_GetNextState2(acsm,0,i); | |
4882 | + if( s ) | |
4883 | + { | |
4884 | + queue_add (queue, s); | |
4885 | + FailState[s] = 0; | |
4886 | + } | |
4887 | + } | |
4888 | + | |
4889 | + /* Build the fail state successive layer of transitions */ | |
4890 | + while (queue_count (queue) > 0) | |
4891 | + { | |
4892 | + r = queue_remove (queue); | |
4893 | + | |
4894 | + /* Find Final States for any Failure */ | |
4895 | + for (i = 0; i < acsm->acsmAlphabetSize; i++) | |
4896 | + { | |
4897 | + int fs, next; | |
4898 | + | |
4899 | + s = List_GetNextState(acsm,r,i); | |
4900 | + | |
4901 | + if( s != ACSM_FAIL_STATE2 ) | |
4902 | + { | |
4903 | + queue_add (queue, s); | |
4904 | + | |
4905 | + fs = FailState[r]; | |
4906 | + | |
4907 | + /* | |
4908 | + * Locate the next valid state for 'i' starting at fs | |
4909 | + */ | |
4910 | + while( (next=List_GetNextState(acsm,fs,i)) == ACSM_FAIL_STATE2 ) | |
4911 | + { | |
4912 | + fs = FailState[fs]; | |
4913 | + } | |
4914 | + | |
4915 | + /* | |
4916 | + * Update 's' state failure state to point to the next valid state | |
4917 | + */ | |
4918 | + FailState[s] = next; | |
4919 | + | |
4920 | + /* | |
4921 | + * Copy 'next'states MatchList to 's' states MatchList, | |
4922 | + * we copy them so each list can be AC_FREE'd later, | |
4923 | + * else we could just manipulate pointers to fake the copy. | |
4924 | + */ | |
4925 | + for( mlist = MatchList[next]; | |
4926 | + mlist; | |
4927 | + mlist = mlist->next) | |
4928 | + { | |
4929 | + px = CopyMatchListEntry (mlist); | |
4930 | + | |
4931 | + /* Insert at front of MatchList */ | |
4932 | + px->next = MatchList[s]; | |
4933 | + MatchList[s] = px; | |
4934 | + } | |
4935 | + } | |
4936 | + } | |
4937 | + } | |
4938 | + | |
4939 | + /* Clean up the queue */ | |
4940 | + queue_free (queue); | |
4941 | +} | |
4942 | + | |
4943 | +/* | |
4944 | + * Build Deterministic Finite Automata from the NFA | |
4945 | + */ | |
4946 | +static void | |
4947 | +Convert_NFA_To_DFA (ACSM_STRUCT2 * acsm) | |
4948 | +{ | |
4949 | + int i, r, s, cFailState; | |
4950 | + QUEUE q, *queue = &q; | |
4951 | + acstate_t * FailState = acsm->acsmFailState; | |
4952 | + | |
4953 | + /* Init a Queue */ | |
4954 | + queue_init (queue); | |
4955 | + | |
4956 | + /* Add the state 0 transitions 1st */ | |
4957 | + for(i=0; i<acsm->acsmAlphabetSize; i++) | |
4958 | + { | |
4959 | + s = List_GetNextState(acsm,0,i); | |
4960 | + if ( s != 0 ) | |
4961 | + { | |
4962 | + queue_add (queue, s); | |
4963 | + } | |
4964 | + } | |
4965 | + | |
4966 | + /* Start building the next layer of transitions */ | |
4967 | + while( queue_count(queue) > 0 ) | |
4968 | + { | |
4969 | + r = queue_remove(queue); | |
4970 | + | |
4971 | + /* Process this states layer */ | |
4972 | + for (i = 0; i < acsm->acsmAlphabetSize; i++) | |
4973 | + { | |
4974 | + s = List_GetNextState(acsm,r,i); | |
4975 | + | |
4976 | + if( s != ACSM_FAIL_STATE2 && s!= 0) | |
4977 | + { | |
4978 | + queue_add (queue, s); | |
4979 | + } | |
4980 | + else | |
4981 | + { | |
4982 | + cFailState = List_GetNextState(acsm,FailState[r],i); | |
4983 | + | |
4984 | + if( cFailState != 0 && cFailState != ACSM_FAIL_STATE2 ) | |
4985 | + { | |
4986 | + List_PutNextState(acsm,r,i,cFailState); | |
4987 | + } | |
4988 | + } | |
4989 | + } | |
4990 | + } | |
4991 | + | |
4992 | + /* Clean up the queue */ | |
4993 | + queue_free (queue); | |
4994 | +} | |
4995 | + | |
4996 | +/* | |
4997 | + * | |
4998 | + * Convert a row lists for the state table to a full vector format | |
4999 | + * | |
5000 | + */ | |
5001 | +static int | |
5002 | +Conv_List_To_Full(ACSM_STRUCT2 * acsm) | |
5003 | +{ | |
5004 | + int tcnt, k; | |
5005 | + acstate_t * p; | |
5006 | + acstate_t ** NextState = acsm->acsmNextState; | |
5007 | + | |
5008 | + for(k=0;k<acsm->acsmMaxStates;k++) | |
5009 | + { | |
5010 | + p = AC_MALLOC( sizeof(acstate_t) * (acsm->acsmAlphabetSize+2) ); | |
5011 | + if(!p) return -1; | |
5012 | + | |
5013 | + tcnt = List_ConvToFull( acsm, (acstate_t)k, p+2 ); | |
5014 | + | |
5015 | + p[0] = ACF_FULL; | |
5016 | + p[1] = 0; /* no matches yet */ | |
5017 | + | |
5018 | + NextState[k] = p; /* now we have a full format row vector */ | |
5019 | + } | |
5020 | + | |
5021 | + return 0; | |
5022 | +} | |
5023 | + | |
5024 | +/* | |
5025 | + * Convert DFA memory usage from list based storage to a sparse-row storage. | |
5026 | + * | |
5027 | + * The Sparse format allows each row to be either full or sparse formatted. If the sparse row has | |
5028 | + * too many transitions, performance or space may dictate that we use the standard full formatting | |
5029 | + * for the row. More than 5 or 10 transitions per state ought to really whack performance. So the | |
5030 | + * user can specify the max state transitions per state allowed in the sparse format. | |
5031 | + * | |
5032 | + * Standard Full Matrix Format | |
5033 | + * --------------------------- | |
5034 | + * acstate_t ** NextState ( 1st index is row/state, 2nd index is column=event/input) | |
5035 | + * | |
5036 | + * example: | |
5037 | + * | |
5038 | + * events -> a b c d e f g h i j k l m n o p | |
5039 | + * states | |
5040 | + * N 1 7 0 0 0 3 0 0 0 0 0 0 0 0 0 0 | |
5041 | + * | |
5042 | + * Sparse Format, each row : Words Value | |
5043 | + * 1-1 fmt(0-full,1-sparse,2-banded,3-sparsebands) | |
5044 | + * 2-2 bool match flag (indicates this state has pattern matches) | |
5045 | + * 3-3 sparse state count ( # of input/next-state pairs ) | |
5046 | + * 4-3+2*cnt 'input,next-state' pairs... each sizof(acstate_t) | |
5047 | + * | |
5048 | + * above example case yields: | |
5049 | + * Full Format: 0, 1 7 0 0 0 3 0 0 0 0 0 0 0 0 0 0 ... | |
5050 | + * Sparse format: 1, 3, 'a',1,'b',7,'f',3 - uses 2+2*ntransitions (non-default transitions) | |
5051 | + */ | |
5052 | +static int | |
5053 | +Conv_Full_DFA_To_Sparse(ACSM_STRUCT2 * acsm) | |
5054 | +{ | |
5055 | + int cnt, m, k, i; | |
5056 | + acstate_t * p, state, maxstates=0; | |
5057 | + acstate_t ** NextState = acsm->acsmNextState; | |
5058 | + acstate_t full[MAX_ALPHABET_SIZE]; | |
5059 | + | |
5060 | + for(k=0;k<acsm->acsmMaxStates;k++) | |
5061 | + { | |
5062 | + cnt=0; | |
5063 | + | |
5064 | + List_ConvToFull(acsm, (acstate_t)k, full ); | |
5065 | + | |
5066 | + for (i = 0; i < acsm->acsmAlphabetSize; i++) | |
5067 | + { | |
5068 | + state = full[i]; | |
5069 | + if( state != 0 && state != ACSM_FAIL_STATE2 ) cnt++; | |
5070 | + } | |
5071 | + | |
5072 | + if( cnt > 0 ) maxstates++; | |
5073 | + | |
5074 | + if( k== 0 || cnt > acsm->acsmSparseMaxRowNodes ) | |
5075 | + { | |
5076 | + p = AC_MALLOC(sizeof(acstate_t)*(acsm->acsmAlphabetSize+2) ); | |
5077 | + if(!p) return -1; | |
5078 | + | |
5079 | + p[0] = ACF_FULL; | |
5080 | + p[1] = 0; | |
5081 | + memcpy(&p[2],full,acsm->acsmAlphabetSize*sizeof(acstate_t)); | |
5082 | + } | |
5083 | + else | |
5084 | + { | |
5085 | + p = AC_MALLOC(sizeof(acstate_t)*(3+2*cnt)); | |
5086 | + if(!p) return -1; | |
5087 | + | |
5088 | + m = 0; | |
5089 | + p[m++] = ACF_SPARSE; | |
5090 | + p[m++] = 0; /* no matches */ | |
5091 | + p[m++] = cnt; | |
5092 | + | |
5093 | + for(i = 0; i < acsm->acsmAlphabetSize ; i++) | |
5094 | + { | |
5095 | + state = full[i]; | |
5096 | + if( state != 0 && state != ACSM_FAIL_STATE2 ) | |
5097 | + { | |
5098 | + p[m++] = i; | |
5099 | + p[m++] = state; | |
5100 | + } | |
5101 | + } | |
5102 | + } | |
5103 | + | |
5104 | + NextState[k] = p; /* now we are a sparse formatted state transition array */ | |
5105 | + } | |
5106 | + | |
5107 | + return 0; | |
5108 | +} | |
5109 | +/* | |
5110 | + Convert Full matrix to Banded row format. | |
5111 | + | |
5112 | + Word values | |
5113 | + 1 2 -> banded | |
5114 | + 2 n number of values | |
5115 | + 3 i index of 1st value (0-256) | |
5116 | + 4 - 3+n next-state values at each index | |
5117 | + | |
5118 | +*/ | |
5119 | +static int | |
5120 | +Conv_Full_DFA_To_Banded(ACSM_STRUCT2 * acsm) | |
5121 | +{ | |
5122 | + int first = -1, last; | |
5123 | + acstate_t * p, state, full[MAX_ALPHABET_SIZE]; | |
5124 | + acstate_t ** NextState = acsm->acsmNextState; | |
5125 | + int cnt,m,k,i; | |
5126 | + | |
5127 | + for(k=0;k<acsm->acsmMaxStates;k++) | |
5128 | + { | |
5129 | + cnt=0; | |
5130 | + | |
5131 | + List_ConvToFull(acsm, (acstate_t)k, full ); | |
5132 | + | |
5133 | + first=-1; | |
5134 | + last =-2; | |
5135 | + | |
5136 | + for (i = 0; i < acsm->acsmAlphabetSize; i++) | |
5137 | + { | |
5138 | + state = full[i]; | |
5139 | + | |
5140 | + if( state !=0 && state != ACSM_FAIL_STATE2 ) | |
5141 | + { | |
5142 | + if( first < 0 ) first = i; | |
5143 | + last = i; | |
5144 | + } | |
5145 | + } | |
5146 | + | |
5147 | + /* calc band width */ | |
5148 | + cnt= last - first + 1; | |
5149 | + | |
5150 | + p = AC_MALLOC(sizeof(acstate_t)*(4+cnt)); | |
5151 | + | |
5152 | + if(!p) return -1; | |
5153 | + | |
5154 | + m = 0; | |
5155 | + p[m++] = ACF_BANDED; | |
5156 | + p[m++] = 0; /* no matches */ | |
5157 | + p[m++] = cnt; | |
5158 | + p[m++] = first; | |
5159 | + | |
5160 | + for(i = first; i <= last; i++) | |
5161 | + { | |
5162 | + p[m++] = full[i]; | |
5163 | + } | |
5164 | + | |
5165 | + NextState[k] = p; /* now we are a banded formatted state transition array */ | |
5166 | + } | |
5167 | + | |
5168 | + return 0; | |
5169 | +} | |
5170 | + | |
5171 | +/* | |
5172 | + * Convert full matrix to Sparse Band row format. | |
5173 | + * | |
5174 | + * next - Full formatted row of next states | |
5175 | + * asize - size of alphabet | |
5176 | + * zcnt - max number of zeros in a run of zeros in any given band. | |
5177 | + * | |
5178 | + * Word Values | |
5179 | + * 1 ACF_SPARSEBANDS | |
5180 | + * 2 number of bands | |
5181 | + * repeat 3 - 5+ ....once for each band in this row. | |
5182 | + * 3 number of items in this band* 4 start index of this band | |
5183 | + * 5- next-state values in this band... | |
5184 | + */ | |
5185 | +static | |
5186 | +int calcSparseBands( acstate_t * next, int * begin, int * end, int asize, int zmax ) | |
5187 | +{ | |
5188 | + int i, nbands,zcnt,last=0; | |
5189 | + acstate_t state; | |
5190 | + | |
5191 | + nbands=0; | |
5192 | + for( i=0; i<asize; i++ ) | |
5193 | + { | |
5194 | + state = next[i]; | |
5195 | + | |
5196 | + if( state !=0 && state != ACSM_FAIL_STATE2 ) | |
5197 | + { | |
5198 | + begin[nbands] = i; | |
5199 | + zcnt=0; | |
5200 | + | |
5201 | + for( ; i< asize; i++ ) | |
5202 | + { | |
5203 | + state = next[i]; | |
5204 | + if( state ==0 || state == ACSM_FAIL_STATE2 ) | |
5205 | + { | |
5206 | + zcnt++; | |
5207 | + if( zcnt > zmax ) break; | |
5208 | + } | |
5209 | + else | |
5210 | + { | |
5211 | + zcnt=0; | |
5212 | + last = i; | |
5213 | + } | |
5214 | + } | |
5215 | + | |
5216 | + end[nbands++] = last; | |
5217 | + | |
5218 | + } | |
5219 | + } | |
5220 | + | |
5221 | + return nbands; | |
5222 | +} | |
5223 | + | |
5224 | + | |
5225 | +/* | |
5226 | + * Sparse Bands | |
5227 | + * | |
5228 | + * Row Format: | |
5229 | + * Word | |
5230 | + * 1 SPARSEBANDS format indicator | |
5231 | + * 2 bool indicates a pattern match in this state | |
5232 | + * 3 number of sparse bands | |
5233 | + * 4 number of elements in this band | |
5234 | + * 5 start index of this band | |
5235 | + * 6- list of next states | |
5236 | + * | |
5237 | + * m number of elements in this band | |
5238 | + * m+1 start index of this band | |
5239 | + * m+2- list of next states | |
5240 | + */ | |
5241 | +static int | |
5242 | +Conv_Full_DFA_To_SparseBands(ACSM_STRUCT2 * acsm) | |
5243 | +{ | |
5244 | + acstate_t * p; | |
5245 | + acstate_t ** NextState = acsm->acsmNextState; | |
5246 | + int cnt,m,k,i,zcnt=acsm->acsmSparseMaxZcnt; | |
5247 | + | |
5248 | + int band_begin[MAX_ALPHABET_SIZE]; | |
5249 | + int band_end[MAX_ALPHABET_SIZE]; | |
5250 | + int nbands,j; | |
5251 | + acstate_t full[MAX_ALPHABET_SIZE]; | |
5252 | + | |
5253 | + for(k=0;k<acsm->acsmMaxStates;k++) | |
5254 | + { | |
5255 | + cnt=0; | |
5256 | + | |
5257 | + List_ConvToFull(acsm, (acstate_t)k, full ); | |
5258 | + | |
5259 | + nbands = calcSparseBands( full, band_begin, band_end, acsm->acsmAlphabetSize, zcnt ); | |
5260 | + | |
5261 | + /* calc band width space*/ | |
5262 | + cnt = 3; | |
5263 | + for(i=0;i<nbands;i++) | |
5264 | + { | |
5265 | + cnt += 2; | |
5266 | + cnt += band_end[i] - band_begin[i] + 1; | |
5267 | + | |
5268 | + /*printk("state %d: sparseband %d, first=%d, last=%d, cnt=%d\n",k,i,band_begin[i],band_end[i],band_end[i]-band_begin[i]+1); */ | |
5269 | + } | |
5270 | + | |
5271 | + p = AC_MALLOC(sizeof(acstate_t)*(cnt)); | |
5272 | + | |
5273 | + if(!p) return -1; | |
5274 | + | |
5275 | + m = 0; | |
5276 | + p[m++] = ACF_SPARSEBANDS; | |
5277 | + p[m++] = 0; /* no matches */ | |
5278 | + p[m++] = nbands; | |
5279 | + | |
5280 | + for( i=0;i<nbands;i++ ) | |
5281 | + { | |
5282 | + p[m++] = band_end[i] - band_begin[i] + 1; /* # states in this band */ | |
5283 | + p[m++] = band_begin[i]; /* start index */ | |
5284 | + | |
5285 | + for( j=band_begin[i]; j<=band_end[i]; j++ ) | |
5286 | + { | |
5287 | + p[m++] = full[j]; /* some states may be state zero */ | |
5288 | + } | |
5289 | + } | |
5290 | + | |
5291 | + NextState[k] = p; /* now we are a sparse-banded formatted state transition array */ | |
5292 | + } | |
5293 | + | |
5294 | + return 0; | |
5295 | +} | |
5296 | + | |
5297 | +/* | |
5298 | + * | |
5299 | + * Convert an NFA or DFA row from sparse to full format | |
5300 | + * and store into the 'full' buffer. | |
5301 | + * | |
5302 | + * returns: | |
5303 | + * 0 - failed, no state transitions | |
5304 | + * *p - pointer to 'full' buffer | |
5305 | + * | |
5306 | + */ | |
5307 | +/* | |
5308 | + static | |
5309 | + acstate_t * acsmConvToFull(ACSM_STRUCT2 * acsm, acstate_t k, acstate_t * full ) | |
5310 | + { | |
5311 | + int i; | |
5312 | + acstate_t * p, n, fmt, index, nb, bmatch; | |
5313 | + acstate_t ** NextState = acsm->acsmNextState; | |
5314 | + | |
5315 | + p = NextState[k]; | |
5316 | + | |
5317 | + if( !p ) return 0; | |
5318 | + | |
5319 | + fmt = *p++; | |
5320 | + | |
5321 | + bmatch = *p++; | |
5322 | + | |
5323 | + if( fmt ==ACF_SPARSE ) | |
5324 | + { | |
5325 | + n = *p++; | |
5326 | + for( ; n>0; n--, p+=2 ) | |
5327 | + { | |
5328 | + full[ p[0] ] = p[1]; | |
5329 | + } | |
5330 | + } | |
5331 | + else if( fmt ==ACF_BANDED ) | |
5332 | + { | |
5333 | + | |
5334 | + n = *p++; | |
5335 | + index = *p++; | |
5336 | + | |
5337 | + for( ; n>0; n--, p++ ) | |
5338 | + { | |
5339 | + full[ index++ ] = p[0]; | |
5340 | + } | |
5341 | + } | |
5342 | + else if( fmt ==ACF_SPARSEBANDS ) | |
5343 | + { | |
5344 | + nb = *p++; | |
5345 | + for(i=0;i<nb;i++) | |
5346 | + { | |
5347 | + n = *p++; | |
5348 | + index = *p++; | |
5349 | + for( ; n>0; n--, p++ ) | |
5350 | + { | |
5351 | + full[ index++ ] = p[0]; | |
5352 | + } | |
5353 | + } | |
5354 | + } | |
5355 | + else if( fmt == ACF_FULL ) | |
5356 | + { | |
5357 | + memcpy(full,p,acsm->acsmAlphabetSize*sizeof(acstate_t)); | |
5358 | + } | |
5359 | + | |
5360 | + return full; | |
5361 | + } | |
5362 | +*/ | |
5363 | + | |
5364 | +/* | |
5365 | + * Select the desired storage mode | |
5366 | + */ | |
5367 | +int acsmSelectFormat2( ACSM_STRUCT2 * acsm, int m ) | |
5368 | +{ | |
5369 | + switch( m ) | |
5370 | + { | |
5371 | + case ACF_FULL: | |
5372 | + case ACF_SPARSE: | |
5373 | + case ACF_BANDED: | |
5374 | + case ACF_SPARSEBANDS: | |
5375 | + acsm->acsmFormat = m; | |
5376 | + break; | |
5377 | + default: | |
5378 | + return -1; | |
5379 | + } | |
5380 | + | |
5381 | + return 0; | |
5382 | +} | |
5383 | +/* | |
5384 | + * | |
5385 | + */ | |
5386 | +void acsmSetMaxSparseBandZeros2( ACSM_STRUCT2 * acsm, int n ) | |
5387 | +{ | |
5388 | + acsm->acsmSparseMaxZcnt = n; | |
5389 | +} | |
5390 | +/* | |
5391 | + * | |
5392 | + */ | |
5393 | +void acsmSetMaxSparseElements2( ACSM_STRUCT2 * acsm, int n ) | |
5394 | +{ | |
5395 | + acsm->acsmSparseMaxRowNodes = n; | |
5396 | +} | |
5397 | +/* | |
5398 | + * | |
5399 | + */ | |
5400 | +int acsmSelectFSA2( ACSM_STRUCT2 * acsm, int m ) | |
5401 | +{ | |
5402 | + switch( m ) | |
5403 | + { | |
5404 | + case FSA_TRIE: | |
5405 | + case FSA_NFA: | |
5406 | + case FSA_DFA: | |
5407 | + acsm->acsmFSA = m; | |
5408 | + default: | |
5409 | + return -1; | |
5410 | + } | |
5411 | +} | |
5412 | +/* | |
5413 | + * | |
5414 | + */ | |
5415 | +int acsmSetAlphabetSize2( ACSM_STRUCT2 * acsm, int n ) | |
5416 | +{ | |
5417 | + if( n <= MAX_ALPHABET_SIZE ) | |
5418 | + { | |
5419 | + acsm->acsmAlphabetSize = n; | |
5420 | + } | |
5421 | + else | |
5422 | + { | |
5423 | + return -1; | |
5424 | + } | |
5425 | + return 0; | |
5426 | +} | |
5427 | +/* | |
5428 | + * Create a new AC state machine | |
5429 | + */ | |
5430 | +static ACSM_STRUCT2 * acsmNew2 (void) | |
5431 | +{ | |
5432 | + ACSM_STRUCT2 * p; | |
5433 | + | |
5434 | + init_xlatcase (); | |
5435 | + | |
5436 | + p = (ACSM_STRUCT2 *) AC_MALLOC(sizeof (ACSM_STRUCT2)); | |
5437 | + MEMASSERT (p, "acsmNew"); | |
5438 | + | |
5439 | + if (p) | |
5440 | + { | |
5441 | + memset (p, 0, sizeof (ACSM_STRUCT2)); | |
5442 | + | |
5443 | + /* Some defaults */ | |
5444 | + p->acsmFSA = FSA_DFA; | |
5445 | + p->acsmFormat = ACF_BANDED; | |
5446 | + p->acsmAlphabetSize = 256; | |
5447 | + p->acsmSparseMaxRowNodes = 256; | |
5448 | + p->acsmSparseMaxZcnt = 10; | |
5449 | + } | |
5450 | + | |
5451 | + return p; | |
5452 | +} | |
5453 | +/* | |
5454 | + * Add a pattern to the list of patterns for this state machine | |
5455 | + * | |
5456 | + */ | |
5457 | +int | |
5458 | +acsmAddPattern2 (ACSM_STRUCT2 * p, unsigned char *pat, int n, int nocase, | |
5459 | + int offset, int depth, void * id, int iid) | |
5460 | +{ | |
5461 | + ACSM_PATTERN2 * plist; | |
5462 | + | |
5463 | + plist = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2)); | |
5464 | + MEMASSERT (plist, "acsmAddPattern"); | |
5465 | + | |
5466 | + plist->patrn = (unsigned char *) AC_MALLOC ( n ); | |
5467 | + MEMASSERT (plist->patrn, "acsmAddPattern"); | |
5468 | + | |
5469 | + ConvertCaseEx(plist->patrn, pat, n); | |
5470 | + | |
5471 | + plist->casepatrn = (unsigned char *) AC_MALLOC ( n ); | |
5472 | + MEMASSERT (plist->casepatrn, "acsmAddPattern"); | |
5473 | + | |
5474 | + memcpy (plist->casepatrn, pat, n); | |
5475 | + | |
5476 | + plist->n = n; | |
5477 | + plist->nocase = nocase; | |
5478 | + plist->offset = offset; | |
5479 | + plist->depth = depth; | |
5480 | + plist->id = id; | |
5481 | + plist->iid = iid; | |
5482 | + | |
5483 | + plist->next = p->acsmPatterns; | |
5484 | + p->acsmPatterns = plist; | |
5485 | + | |
5486 | + return 0; | |
5487 | +} | |
5488 | +/* | |
5489 | + * Add a Key to the list of key+data pairs | |
5490 | + */ | |
5491 | +int acsmAddKey2(ACSM_STRUCT2 * p, unsigned char *key, int klen, int nocase, void * data) | |
5492 | +{ | |
5493 | + ACSM_PATTERN2 * plist; | |
5494 | + | |
5495 | + plist = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2)); | |
5496 | + MEMASSERT (plist, "acsmAddPattern"); | |
5497 | + | |
5498 | + plist->patrn = (unsigned char *) AC_MALLOC (klen); | |
5499 | + memcpy (plist->patrn, key, klen); | |
5500 | + | |
5501 | + plist->casepatrn = (unsigned char *) AC_MALLOC (klen); | |
5502 | + memcpy (plist->casepatrn, key, klen); | |
5503 | + | |
5504 | + plist->n = klen; | |
5505 | + plist->nocase = nocase; | |
5506 | + plist->offset = 0; | |
5507 | + plist->depth = 0; | |
5508 | + plist->id = 0; | |
5509 | + plist->iid = 0; | |
5510 | + | |
5511 | + plist->next = p->acsmPatterns; | |
5512 | + p->acsmPatterns = plist; | |
5513 | + | |
5514 | + return 0; | |
5515 | +} | |
5516 | + | |
5517 | +/* | |
5518 | + * Copy a boolean match flag int NextState table, for caching purposes. | |
5519 | + */ | |
5520 | +static | |
5521 | +void acsmUpdateMatchStates( ACSM_STRUCT2 * acsm ) | |
5522 | +{ | |
5523 | + acstate_t state; | |
5524 | + acstate_t ** NextState = acsm->acsmNextState; | |
5525 | + ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList; | |
5526 | + | |
5527 | + for( state=0; state<acsm->acsmNumStates; state++ ) | |
5528 | + { | |
5529 | + if( MatchList[state] ) | |
5530 | + { | |
5531 | + NextState[state][1] = 1; | |
5532 | + } | |
5533 | + else | |
5534 | + { | |
5535 | + NextState[state][1] = 0; | |
5536 | + } | |
5537 | + } | |
5538 | +} | |
5539 | + | |
5540 | +/* | |
5541 | + * Compile State Machine - NFA or DFA and Full or Banded or Sparse or SparseBands | |
5542 | + */ | |
5543 | +int | |
5544 | +acsmCompile2 (ACSM_STRUCT2 * acsm) | |
5545 | +{ | |
5546 | + int k; | |
5547 | + ACSM_PATTERN2 * plist; | |
5548 | + | |
5549 | + /* Count number of states */ | |
5550 | + for (plist = acsm->acsmPatterns; plist != NULL; plist = plist->next) | |
5551 | + { | |
5552 | + acsm->acsmMaxStates += plist->n; | |
5553 | + /* acsm->acsmMaxStates += plist->n*2; if we handle case in the table */ | |
5554 | + } | |
5555 | + acsm->acsmMaxStates++; /* one extra */ | |
5556 | + | |
5557 | + /* Alloc a List based State Transition table */ | |
5558 | + acsm->acsmTransTable =(trans_node_t**) AC_MALLOC(sizeof(trans_node_t*) * acsm->acsmMaxStates ); | |
5559 | + MEMASSERT (acsm->acsmTransTable, "acsmCompile"); | |
5560 | + | |
5561 | + memset (acsm->acsmTransTable, 0, sizeof(trans_node_t*) * acsm->acsmMaxStates); | |
5562 | + | |
5563 | + /* Alloc a failure table - this has a failure state, and a match list for each state */ | |
5564 | + acsm->acsmFailState =(acstate_t*) AC_MALLOC(sizeof(acstate_t) * acsm->acsmMaxStates ); | |
5565 | + MEMASSERT (acsm->acsmFailState, "acsmCompile"); | |
5566 | + | |
5567 | + memset (acsm->acsmFailState, 0, sizeof(acstate_t) * acsm->acsmMaxStates ); | |
5568 | + | |
5569 | + /* Alloc a MatchList table - this has a lis tof pattern matches for each state, if any */ | |
5570 | + acsm->acsmMatchList=(ACSM_PATTERN2**) AC_MALLOC(sizeof(ACSM_PATTERN2*) * acsm->acsmMaxStates ); | |
5571 | + MEMASSERT (acsm->acsmMatchList, "acsmCompile"); | |
5572 | + | |
5573 | + memset (acsm->acsmMatchList, 0, sizeof(ACSM_PATTERN2*) * acsm->acsmMaxStates ); | |
5574 | + | |
5575 | + /* Alloc a separate state transition table == in state 's' due to event 'k', transition to 'next' state */ | |
5576 | + acsm->acsmNextState=(acstate_t**)AC_MALLOC( acsm->acsmMaxStates * sizeof(acstate_t*) ); | |
5577 | + MEMASSERT(acsm->acsmNextState, "acsmCompile-NextState"); | |
5578 | + | |
5579 | + for (k = 0; k < acsm->acsmMaxStates; k++) | |
5580 | + { | |
5581 | + acsm->acsmNextState[k]=(acstate_t*)0; | |
5582 | + } | |
5583 | + | |
5584 | + /* Initialize state zero as a branch */ | |
5585 | + acsm->acsmNumStates = 0; | |
5586 | + | |
5587 | + /* Add the 0'th state, */ | |
5588 | + //acsm->acsmNumStates++; | |
5589 | + | |
5590 | + /* Add each Pattern to the State Table - This forms a keywords state table */ | |
5591 | + for (plist = acsm->acsmPatterns; plist != NULL; plist = plist->next) | |
5592 | + { | |
5593 | + AddPatternStates (acsm, plist); | |
5594 | + } | |
5595 | + | |
5596 | + acsm->acsmNumStates++; | |
5597 | + | |
5598 | + if( acsm->acsmFSA == FSA_DFA || acsm->acsmFSA == FSA_NFA ) | |
5599 | + { | |
5600 | + /* Build the NFA */ | |
5601 | + Build_NFA (acsm); | |
5602 | + } | |
5603 | + | |
5604 | + if( acsm->acsmFSA == FSA_DFA ) | |
5605 | + { | |
5606 | + /* Convert the NFA to a DFA */ | |
5607 | + Convert_NFA_To_DFA (acsm); | |
5608 | + } | |
5609 | + | |
5610 | + /* | |
5611 | + * | |
5612 | + * Select Final Transition Table Storage Mode | |
5613 | + * | |
5614 | + */ | |
5615 | + if( acsm->acsmFormat == ACF_SPARSE ) | |
5616 | + { | |
5617 | + /* Convert DFA Full matrix to a Sparse matrix */ | |
5618 | + if( Conv_Full_DFA_To_Sparse(acsm) ) | |
5619 | + return -1; | |
5620 | + } | |
5621 | + | |
5622 | + else if( acsm->acsmFormat == ACF_BANDED ) | |
5623 | + { | |
5624 | + /* Convert DFA Full matrix to a Sparse matrix */ | |
5625 | + if( Conv_Full_DFA_To_Banded(acsm) ) | |
5626 | + return -1; | |
5627 | + } | |
5628 | + | |
5629 | + else if( acsm->acsmFormat == ACF_SPARSEBANDS ) | |
5630 | + { | |
5631 | + /* Convert DFA Full matrix to a Sparse matrix */ | |
5632 | + if( Conv_Full_DFA_To_SparseBands(acsm) ) | |
5633 | + return -1; | |
5634 | + } | |
5635 | + else if( acsm->acsmFormat == ACF_FULL ) | |
5636 | + { | |
5637 | + if( Conv_List_To_Full( acsm ) ) | |
5638 | + return -1; | |
5639 | + } | |
5640 | + | |
5641 | + acsmUpdateMatchStates( acsm ); /* load boolean match flags into state table */ | |
5642 | + | |
5643 | + /* Free up the Table Of Transition Lists */ | |
5644 | + List_FreeTransTable( acsm ); | |
5645 | + | |
5646 | + /* For now -- show this info */ | |
5647 | + /* | |
5648 | + * acsmPrintInfo( acsm ); | |
5649 | + */ | |
5650 | + | |
5651 | + | |
5652 | + /* Accrue Summary State Stats */ | |
5653 | + summary.num_states += acsm->acsmNumStates; | |
5654 | + summary.num_transitions += acsm->acsmNumTrans; | |
5655 | + | |
5656 | + memcpy( &summary.acsm, acsm, sizeof(ACSM_STRUCT2)); | |
5657 | + | |
5658 | + return 0; | |
5659 | +} | |
5660 | + | |
5661 | +/* | |
5662 | + * Get the NextState from the NFA, all NFA storage formats use this | |
5663 | + */ | |
5664 | +inline | |
5665 | +acstate_t SparseGetNextStateNFA(acstate_t * ps, acstate_t state, unsigned input) | |
5666 | +{ | |
5667 | + acstate_t fmt; | |
5668 | + acstate_t n; | |
5669 | + int index; | |
5670 | + int nb; | |
5671 | + | |
5672 | + fmt = *ps++; | |
5673 | + | |
5674 | + ps++; /* skip bMatchState */ | |
5675 | + | |
5676 | + switch( fmt ) | |
5677 | + { | |
5678 | + case ACF_BANDED: | |
5679 | + { | |
5680 | + n = ps[0]; | |
5681 | + index = ps[1]; | |
5682 | + | |
5683 | + if( input < index ) | |
5684 | + { | |
5685 | + if(state==0) | |
5686 | + { | |
5687 | + return 0; | |
5688 | + } | |
5689 | + else | |
5690 | + { | |
5691 | + return (acstate_t)ACSM_FAIL_STATE2; | |
5692 | + } | |
5693 | + } | |
5694 | + if( input >= index + n ) | |
5695 | + { | |
5696 | + if(state==0) | |
5697 | + { | |
5698 | + return 0; | |
5699 | + } | |
5700 | + else | |
5701 | + { | |
5702 | + return (acstate_t)ACSM_FAIL_STATE2; | |
5703 | + } | |
5704 | + } | |
5705 | + if( ps[input-index] == 0 ) | |
5706 | + { | |
5707 | + if( state != 0 ) | |
5708 | + { | |
5709 | + return ACSM_FAIL_STATE2; | |
5710 | + } | |
5711 | + } | |
5712 | + | |
5713 | + return (acstate_t) ps[input-index]; | |
5714 | + } | |
5715 | + | |
5716 | + case ACF_SPARSE: | |
5717 | + { | |
5718 | + n = *ps++; /* number of sparse index-value entries */ | |
5719 | + | |
5720 | + for( ; n>0 ; n-- ) | |
5721 | + { | |
5722 | + if( ps[0] > input ) /* cannot match the input, already a higher value than the input */ | |
5723 | + { | |
5724 | + return (acstate_t)ACSM_FAIL_STATE2; /* default state */ | |
5725 | + } | |
5726 | + else if( ps[0] == input ) | |
5727 | + { | |
5728 | + return ps[1]; /* next state */ | |
5729 | + } | |
5730 | + ps+=2; | |
5731 | + } | |
5732 | + if( state == 0 ) | |
5733 | + { | |
5734 | + return 0; | |
5735 | + } | |
5736 | + return ACSM_FAIL_STATE2; | |
5737 | + } | |
5738 | + | |
5739 | + case ACF_SPARSEBANDS: | |
5740 | + { | |
5741 | + nb = *ps++; /* number of bands */ | |
5742 | + | |
5743 | + while( nb > 0 ) /* for each band */ | |
5744 | + { | |
5745 | + n = *ps++; /* number of elements */ | |
5746 | + index = *ps++; /* 1st element value */ | |
5747 | + | |
5748 | + if( input < index ) | |
5749 | + { | |
5750 | + if( state != 0 ) | |
5751 | + { | |
5752 | + return (acstate_t)ACSM_FAIL_STATE2; | |
5753 | + } | |
5754 | + return (acstate_t)0; | |
5755 | + } | |
5756 | + if( (input >= index) && (input < (index + n)) ) | |
5757 | + { | |
5758 | + if( ps[input-index] == 0 ) | |
5759 | + { | |
5760 | + if( state != 0 ) | |
5761 | + { | |
5762 | + return ACSM_FAIL_STATE2; | |
5763 | + } | |
5764 | + } | |
5765 | + return (acstate_t) ps[input-index]; | |
5766 | + } | |
5767 | + nb--; | |
5768 | + ps += n; | |
5769 | + } | |
5770 | + if( state != 0 ) | |
5771 | + { | |
5772 | + return (acstate_t)ACSM_FAIL_STATE2; | |
5773 | + } | |
5774 | + return (acstate_t)0; | |
5775 | + } | |
5776 | + | |
5777 | + case ACF_FULL: | |
5778 | + { | |
5779 | + if( ps[input] == 0 ) | |
5780 | + { | |
5781 | + if( state != 0 ) | |
5782 | + { | |
5783 | + return ACSM_FAIL_STATE2; | |
5784 | + } | |
5785 | + } | |
5786 | + return ps[input]; | |
5787 | + } | |
5788 | + } | |
5789 | + | |
5790 | + return 0; | |
5791 | +} | |
5792 | + | |
5793 | + | |
5794 | + | |
5795 | +/* | |
5796 | + * Get the NextState from the DFA Next State Transition table | |
5797 | + * Full and banded are supported separately, this is for | |
5798 | + * sparse and sparse-bands | |
5799 | + */ | |
5800 | +inline | |
5801 | +acstate_t SparseGetNextStateDFA(acstate_t * ps, acstate_t state, unsigned input) | |
5802 | +{ | |
5803 | + acstate_t n, nb; | |
5804 | + int index; | |
5805 | + | |
5806 | + switch( ps[0] ) | |
5807 | + { | |
5808 | + /* BANDED */ | |
5809 | + case ACF_BANDED: | |
5810 | + { | |
5811 | + /* n=ps[2] : number of entries in the band */ | |
5812 | + /* index=ps[3] : index of the 1st entry, sequential thereafter */ | |
5813 | + | |
5814 | + if( input < ps[3] ) return 0; | |
5815 | + if( input >= (ps[3]+ps[2]) ) return 0; | |
5816 | + | |
5817 | + return ps[4+input-ps[3]]; | |
5818 | + } | |
5819 | + | |
5820 | + /* FULL */ | |
5821 | + case ACF_FULL: | |
5822 | + { | |
5823 | + return ps[2+input]; | |
5824 | + } | |
5825 | + | |
5826 | + /* SPARSE */ | |
5827 | + case ACF_SPARSE: | |
5828 | + { | |
5829 | + n = ps[2]; /* number of entries/ key+next pairs */ | |
5830 | + | |
5831 | + ps += 3; | |
5832 | + | |
5833 | + for( ; n>0 ; n-- ) | |
5834 | + { | |
5835 | + if( input < ps[0] ) /* cannot match the input, already a higher value than the input */ | |
5836 | + { | |
5837 | + return (acstate_t)0; /* default state */ | |
5838 | + } | |
5839 | + else if( ps[0] == input ) | |
5840 | + { | |
5841 | + return ps[1]; /* next state */ | |
5842 | + } | |
5843 | + ps += 2; | |
5844 | + } | |
5845 | + return (acstate_t)0; | |
5846 | + } | |
5847 | + | |
5848 | + | |
5849 | + /* SPARSEBANDS */ | |
5850 | + case ACF_SPARSEBANDS: | |
5851 | + { | |
5852 | + nb = ps[2]; /* number of bands */ | |
5853 | + | |
5854 | + ps += 3; | |
5855 | + | |
5856 | + while( nb > 0 ) /* for each band */ | |
5857 | + { | |
5858 | + n = ps[0]; /* number of elements in this band */ | |
5859 | + index = ps[1]; /* start index/char of this band */ | |
5860 | + if( input < index ) | |
5861 | + { | |
5862 | + return (acstate_t)0; | |
5863 | + } | |
5864 | + if( (input < (index + n)) ) | |
5865 | + { | |
5866 | + return (acstate_t) ps[2+input-index]; | |
5867 | + } | |
5868 | + nb--; | |
5869 | + ps += n; | |
5870 | + } | |
5871 | + return (acstate_t)0; | |
5872 | + } | |
5873 | + } | |
5874 | + | |
5875 | + return 0; | |
5876 | +} | |
5877 | +/* | |
5878 | + * Search Text or Binary Data for Pattern matches | |
5879 | + * | |
5880 | + * Sparse & Sparse-Banded Matrix search | |
5881 | + */ | |
5882 | +static | |
5883 | +inline | |
5884 | +int | |
5885 | +acsmSearchSparseDFA(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n, | |
5886 | + int (*Match) (void * id, int index, void *data), | |
5887 | + void *data) | |
5888 | +{ | |
5889 | + acstate_t state; | |
5890 | + ACSM_PATTERN2 * mlist; | |
5891 | + unsigned char * Tend; | |
5892 | + int nfound = 0; | |
5893 | + unsigned char * T, * Tc; | |
5894 | + int index; | |
5895 | + acstate_t ** NextState = acsm->acsmNextState; | |
5896 | + ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList; | |
5897 | + | |
5898 | + Tc = Tx; | |
5899 | + T = Tx; | |
5900 | + Tend = T + n; | |
5901 | + | |
5902 | + for( state = 0; T < Tend; T++ ) | |
5903 | + { | |
5904 | + state = SparseGetNextStateDFA ( NextState[state], state, xlatcase[*T] ); | |
5905 | + | |
5906 | + /* test if this state has any matching patterns */ | |
5907 | + if( NextState[state][1] ) | |
5908 | + { | |
5909 | + for( mlist = MatchList[state]; | |
5910 | + mlist!= NULL; | |
5911 | + mlist = mlist->next ) | |
5912 | + { | |
5913 | + index = T - mlist->n - Tc; | |
5914 | + if( mlist->nocase ) | |
5915 | + { | |
5916 | + nfound++; | |
5917 | + if (Match (mlist->id, index, data)) | |
5918 | + return nfound; | |
5919 | + } | |
5920 | + else | |
5921 | + { | |
5922 | + if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 ) | |
5923 | + { | |
5924 | + nfound++; | |
5925 | + if (Match (mlist->id, index, data)) | |
5926 | + return nfound; | |
5927 | + } | |
5928 | + } | |
5929 | + } | |
5930 | + } | |
5931 | + } | |
5932 | + return nfound; | |
5933 | +} | |
5934 | +/* | |
5935 | + * Full format DFA search | |
5936 | + * Do not change anything here without testing, caching and prefetching | |
5937 | + * performance is very sensitive to any changes. | |
5938 | + * | |
5939 | + * Perf-Notes: | |
5940 | + * 1) replaced ConvertCaseEx with inline xlatcase - this improves performance 5-10% | |
5941 | + * 2) using 'nocase' improves performance again by 10-15%, since memcmp is not needed | |
5942 | + * 3) | |
5943 | + */ | |
5944 | +static | |
5945 | +inline | |
5946 | +int | |
5947 | +acsmSearchSparseDFA_Full(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n, | |
5948 | + int (*Match) (void * id, int index, void *data), | |
5949 | + void *data) | |
5950 | +{ | |
5951 | + ACSM_PATTERN2 * mlist; | |
5952 | + unsigned char * Tend; | |
5953 | + unsigned char * T; | |
5954 | + int index; | |
5955 | + acstate_t state; | |
5956 | + acstate_t * ps; | |
5957 | + acstate_t sindex; | |
5958 | + acstate_t ** NextState = acsm->acsmNextState; | |
5959 | + ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList; | |
5960 | + int nfound = 0; | |
5961 | + | |
5962 | + T = Tx; | |
5963 | + Tend = Tx + n; | |
5964 | + | |
5965 | + for( state = 0; T < Tend; T++ ) | |
5966 | + { | |
5967 | + ps = NextState[ state ]; | |
5968 | + | |
5969 | + sindex = xlatcase[ T[0] ]; | |
5970 | + | |
5971 | + /* check the current state for a pattern match */ | |
5972 | + if( ps[1] ) | |
5973 | + { | |
5974 | + for( mlist = MatchList[state]; | |
5975 | + mlist!= NULL; | |
5976 | + mlist = mlist->next ) | |
5977 | + { | |
5978 | + index = T - mlist->n - Tx; | |
5979 | + | |
5980 | + | |
5981 | + if( mlist->nocase ) | |
5982 | + { | |
5983 | + nfound++; | |
5984 | + if (Match (mlist->id, index, data)) | |
5985 | + return nfound; | |
5986 | + } | |
5987 | + else | |
5988 | + { | |
5989 | + if( memcmp (mlist->casepatrn, Tx + index, mlist->n ) == 0 ) | |
5990 | + { | |
5991 | + nfound++; | |
5992 | + if (Match (mlist->id, index, data)) | |
5993 | + return nfound; | |
5994 | + } | |
5995 | + } | |
5996 | + | |
5997 | + } | |
5998 | + } | |
5999 | + | |
6000 | + state = ps[ 2u + sindex ]; | |
6001 | + } | |
6002 | + | |
6003 | + /* Check the last state for a pattern match */ | |
6004 | + for( mlist = MatchList[state]; | |
6005 | + mlist!= NULL; | |
6006 | + mlist = mlist->next ) | |
6007 | + { | |
6008 | + index = T - mlist->n - Tx; | |
6009 | + | |
6010 | + if( mlist->nocase ) | |
6011 | + { | |
6012 | + nfound++; | |
6013 | + if (Match (mlist->id, index, data)) | |
6014 | + return nfound; | |
6015 | + } | |
6016 | + else | |
6017 | + { | |
6018 | + if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 ) | |
6019 | + { | |
6020 | + nfound++; | |
6021 | + if (Match (mlist->id, index, data)) | |
6022 | + return nfound; | |
6023 | + } | |
6024 | + } | |
6025 | + } | |
6026 | + | |
6027 | + return nfound; | |
6028 | +} | |
6029 | +/* | |
6030 | + * Banded-Row format DFA search | |
6031 | + * Do not change anything here, caching and prefetching | |
6032 | + * performance is very sensitive to any changes. | |
6033 | + * | |
6034 | + * ps[0] = storage fmt | |
6035 | + * ps[1] = bool match flag | |
6036 | + * ps[2] = # elements in band | |
6037 | + * ps[3] = index of 1st element | |
6038 | + */ | |
6039 | +static | |
6040 | +inline | |
6041 | +int | |
6042 | +acsmSearchSparseDFA_Banded(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n, | |
6043 | + int (*Match) (void * id, int index, void *data), | |
6044 | + void *data) | |
6045 | +{ | |
6046 | + acstate_t state; | |
6047 | + unsigned char * Tend; | |
6048 | + unsigned char * T; | |
6049 | + int sindex; | |
6050 | + int index; | |
6051 | + acstate_t ** NextState = acsm->acsmNextState; | |
6052 | + ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList; | |
6053 | + ACSM_PATTERN2 * mlist; | |
6054 | + acstate_t * ps; | |
6055 | + int nfound = 0; | |
6056 | + | |
6057 | + T = Tx; | |
6058 | + Tend = T + n; | |
6059 | + | |
6060 | + for( state = 0; T < Tend; T++ ) | |
6061 | + { | |
6062 | + ps = NextState[state]; | |
6063 | + | |
6064 | + sindex = xlatcase[ T[0] ]; | |
6065 | + | |
6066 | + /* test if this state has any matching patterns */ | |
6067 | + if( ps[1] ) | |
6068 | + { | |
6069 | + for( mlist = MatchList[state]; | |
6070 | + mlist!= NULL; | |
6071 | + mlist = mlist->next ) | |
6072 | + { | |
6073 | + index = T - mlist->n - Tx; | |
6074 | + | |
6075 | + if( mlist->nocase ) | |
6076 | + { | |
6077 | + nfound++; | |
6078 | + if (Match (mlist->id, index, data)) | |
6079 | + return nfound; | |
6080 | + } | |
6081 | + else | |
6082 | + { | |
6083 | + if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 ) | |
6084 | + { | |
6085 | + nfound++; | |
6086 | + if (Match (mlist->id, index, data)) | |
6087 | + return nfound; | |
6088 | + } | |
6089 | + } | |
6090 | + } | |
6091 | + } | |
6092 | + | |
6093 | + if( sindex < ps[3] ) state = 0; | |
6094 | + else if( sindex >= (ps[3] + ps[2]) ) state = 0; | |
6095 | + else state = ps[ 4u + sindex - ps[3] ]; | |
6096 | + } | |
6097 | + | |
6098 | + /* Check the last state for a pattern match */ | |
6099 | + for( mlist = MatchList[state]; | |
6100 | + mlist!= NULL; | |
6101 | + mlist = mlist->next ) | |
6102 | + { | |
6103 | + index = T - mlist->n - Tx; | |
6104 | + | |
6105 | + if( mlist->nocase ) | |
6106 | + { | |
6107 | + nfound++; | |
6108 | + if (Match (mlist->id, index, data)) | |
6109 | + return nfound; | |
6110 | + } | |
6111 | + else | |
6112 | + { | |
6113 | + if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 ) | |
6114 | + { | |
6115 | + nfound++; | |
6116 | + if (Match (mlist->id, index, data)) | |
6117 | + return nfound; | |
6118 | + } | |
6119 | + } | |
6120 | + } | |
6121 | + | |
6122 | + return nfound; | |
6123 | +} | |
6124 | + | |
6125 | + | |
6126 | + | |
6127 | +/* | |
6128 | + * Search Text or Binary Data for Pattern matches | |
6129 | + * | |
6130 | + * Sparse Storage Version | |
6131 | + */ | |
6132 | +static | |
6133 | +inline | |
6134 | +int | |
6135 | +acsmSearchSparseNFA(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n, | |
6136 | + int (*Match) (void * id, int index, void *data), | |
6137 | + void *data) | |
6138 | +{ | |
6139 | + acstate_t state; | |
6140 | + ACSM_PATTERN2 * mlist; | |
6141 | + unsigned char * Tend; | |
6142 | + int nfound = 0; | |
6143 | + unsigned char * T, *Tc; | |
6144 | + int index; | |
6145 | + acstate_t ** NextState= acsm->acsmNextState; | |
6146 | + acstate_t * FailState= acsm->acsmFailState; | |
6147 | + ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList; | |
6148 | + unsigned char Tchar; | |
6149 | + | |
6150 | + Tc = Tx; | |
6151 | + T = Tx; | |
6152 | + Tend = T + n; | |
6153 | + | |
6154 | + for( state = 0; T < Tend; T++ ) | |
6155 | + { | |
6156 | + acstate_t nstate; | |
6157 | + | |
6158 | + Tchar = xlatcase[ *T ]; | |
6159 | + | |
6160 | + while( (nstate=SparseGetNextStateNFA(NextState[state],state,Tchar))==ACSM_FAIL_STATE2 ) | |
6161 | + state = FailState[state]; | |
6162 | + | |
6163 | + state = nstate; | |
6164 | + | |
6165 | + for( mlist = MatchList[state]; | |
6166 | + mlist!= NULL; | |
6167 | + mlist = mlist->next ) | |
6168 | + { | |
6169 | + index = T - mlist->n - Tx; | |
6170 | + if( mlist->nocase ) | |
6171 | + { | |
6172 | + nfound++; | |
6173 | + if (Match (mlist->id, index, data)) | |
6174 | + return nfound; | |
6175 | + } | |
6176 | + else | |
6177 | + { | |
6178 | + if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 ) | |
6179 | + { | |
6180 | + nfound++; | |
6181 | + if (Match (mlist->id, index, data)) | |
6182 | + return nfound; | |
6183 | + } | |
6184 | + } | |
6185 | + } | |
6186 | + } | |
6187 | + | |
6188 | + return nfound; | |
6189 | +} | |
6190 | + | |
6191 | +/* | |
6192 | + * Search Function | |
6193 | + */ | |
6194 | +int | |
6195 | +acsmSearch2(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n, | |
6196 | + int (*Match) (void * id, int index, void *data), | |
6197 | + void *data) | |
6198 | +{ | |
6199 | + | |
6200 | + switch( acsm->acsmFSA ) | |
6201 | + { | |
6202 | + case FSA_DFA: | |
6203 | + | |
6204 | + if( acsm->acsmFormat == ACF_FULL ) | |
6205 | + { | |
6206 | + return acsmSearchSparseDFA_Full( acsm, Tx, n, Match,data ); | |
6207 | + } | |
6208 | + else if( acsm->acsmFormat == ACF_BANDED ) | |
6209 | + { | |
6210 | + return acsmSearchSparseDFA_Banded( acsm, Tx, n, Match,data ); | |
6211 | + } | |
6212 | + else | |
6213 | + { | |
6214 | + return acsmSearchSparseDFA( acsm, Tx, n, Match,data ); | |
6215 | + } | |
6216 | + | |
6217 | + case FSA_NFA: | |
6218 | + | |
6219 | + return acsmSearchSparseNFA( acsm, Tx, n, Match,data ); | |
6220 | + | |
6221 | + case FSA_TRIE: | |
6222 | + | |
6223 | + return 0; | |
6224 | + } | |
6225 | + return 0; | |
6226 | +} | |
6227 | + | |
6228 | + | |
6229 | +/* | |
6230 | + * Free all memory | |
6231 | + */ | |
6232 | +void | |
6233 | +acsmFree2 (ACSM_STRUCT2 * acsm) | |
6234 | +{ | |
6235 | + int i; | |
6236 | + ACSM_PATTERN2 * mlist, *ilist; | |
6237 | + for (i = 0; i < acsm->acsmMaxStates; i++) | |
6238 | + { | |
6239 | + mlist = acsm->acsmMatchList[i]; | |
6240 | + | |
6241 | + while (mlist) | |
6242 | + { | |
6243 | + ilist = mlist; | |
6244 | + mlist = mlist->next; | |
6245 | + AC_FREE (ilist); | |
6246 | + } | |
6247 | + AC_FREE(acsm->acsmNextState[i]); | |
6248 | + } | |
6249 | + AC_FREE(acsm->acsmFailState); | |
6250 | + AC_FREE(acsm->acsmMatchList); | |
6251 | +} | |
6252 | + | |
6253 | +/* ********************************** */ | |
6254 | + | |
6255 | +static void ring_sock_destruct(struct sock *sk) { | |
6256 | + | |
6257 | +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) | |
6258 | + skb_queue_purge(&sk->sk_receive_queue); | |
6259 | + | |
6260 | + if (!sock_flag(sk, SOCK_DEAD)) { | |
6261 | +#if defined(RING_DEBUG) | |
6262 | + printk("Attempt to release alive ring socket: %p\n", sk); | |
6263 | +#endif | |
6264 | + return; | |
6265 | + } | |
6266 | + | |
6267 | + BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc)); | |
6268 | + BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc)); | |
6269 | +#else | |
6270 | + | |
6271 | + BUG_TRAP(atomic_read(&sk->rmem_alloc)==0); | |
6272 | + BUG_TRAP(atomic_read(&sk->wmem_alloc)==0); | |
6273 | + | |
6274 | + if (!sk->dead) { | |
6275 | +#if defined(RING_DEBUG) | |
6276 | + printk("Attempt to release alive ring socket: %p\n", sk); | |
6277 | +#endif | |
6278 | + return; | |
6279 | + } | |
6280 | +#endif | |
6281 | + | |
6282 | + kfree(ring_sk(sk)); | |
6283 | + | |
6284 | +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) | |
6285 | + MOD_DEC_USE_COUNT; | |
6286 | +#endif | |
6287 | +} | |
6288 | + | |
6289 | +/* ********************************** */ | |
6290 | + | |
6291 | +static void ring_proc_add(struct ring_opt *pfr) { | |
6292 | + if(ring_proc_dir != NULL) { | |
6293 | + char name[16]; | |
6294 | + | |
6295 | + pfr->ring_pid = current->pid; | |
6296 | + | |
6297 | + snprintf(name, sizeof(name), "%d", pfr->ring_pid); | |
6298 | + create_proc_read_entry(name, 0, ring_proc_dir, | |
6299 | + ring_proc_get_info, pfr); | |
6300 | + /* printk("PF_RING: added /proc/net/pf_ring/%s\n", name); */ | |
6301 | + } | |
6302 | +} | |
6303 | + | |
6304 | +/* ********************************** */ | |
6305 | + | |
6306 | +static void ring_proc_remove(struct ring_opt *pfr) { | |
6307 | + if(ring_proc_dir != NULL) { | |
6308 | + char name[16]; | |
6309 | + | |
6310 | + snprintf(name, sizeof(name), "%d", pfr->ring_pid); | |
6311 | + remove_proc_entry(name, ring_proc_dir); | |
6312 | + /* printk("PF_RING: removed /proc/net/pf_ring/%s\n", name); */ | |
6313 | + } | |
6314 | +} | |
6315 | + | |
6316 | +/* ********************************** */ | |
6317 | + | |
6318 | +static int ring_proc_get_info(char *buf, char **start, off_t offset, | |
6319 | + int len, int *unused, void *data) | |
6320 | +{ | |
6321 | + int rlen = 0; | |
6322 | + struct ring_opt *pfr; | |
6323 | + FlowSlotInfo *fsi; | |
6324 | + | |
6325 | + if(data == NULL) { | |
6326 | + /* /proc/net/pf_ring/info */ | |
6327 | + rlen = sprintf(buf,"Version : %s\n", RING_VERSION); | |
6328 | + rlen += sprintf(buf + rlen,"Bucket length : %d bytes\n", bucket_len); | |
6329 | + rlen += sprintf(buf + rlen,"Ring slots : %d\n", num_slots); | |
6330 | + rlen += sprintf(buf + rlen,"Sample rate : %d [1=no sampling]\n", sample_rate); | |
6331 | + | |
6332 | + rlen += sprintf(buf + rlen,"Capture TX : %s\n", | |
6333 | + enable_tx_capture ? "Yes [RX+TX]" : "No [RX only]"); | |
6334 | + rlen += sprintf(buf + rlen,"Transparent mode : %s\n", | |
6335 | + transparent_mode ? "Yes" : "No"); | |
6336 | + rlen += sprintf(buf + rlen,"Total rings : %d\n", ring_table_size); | |
6337 | + } else { | |
6338 | + /* detailed statistics about a PF_RING */ | |
6339 | + pfr = (struct ring_opt*)data; | |
6340 | + | |
6341 | + if(data) { | |
6342 | + fsi = pfr->slots_info; | |
6343 | + | |
6344 | + if(fsi) { | |
6345 | + rlen = sprintf(buf, "Bound Device : %s\n", | |
6346 | + pfr->ring_netdev->name == NULL ? "<NULL>" : pfr->ring_netdev->name); | |
6347 | + rlen += sprintf(buf + rlen,"Version : %d\n", fsi->version); | |
6348 | + rlen += sprintf(buf + rlen,"Sampling Rate : %d\n", pfr->sample_rate); | |
6349 | + rlen += sprintf(buf + rlen,"BPF Filtering : %s\n", pfr->bpfFilter ? "Enabled" : "Disabled"); | |
6350 | + rlen += sprintf(buf + rlen,"Bloom Filters : %s\n", pfr->bitmask_enabled ? "Enabled" : "Disabled"); | |
6351 | + rlen += sprintf(buf + rlen,"Pattern Search: %s\n", pfr->acsm ? "Enabled" : "Disabled"); | |
6352 | + rlen += sprintf(buf + rlen,"Cluster Id : %d\n", pfr->cluster_id); | |
6353 | + rlen += sprintf(buf + rlen,"Tot Slots : %d\n", fsi->tot_slots); | |
6354 | + rlen += sprintf(buf + rlen,"Slot Len : %d\n", fsi->slot_len); | |
6355 | + rlen += sprintf(buf + rlen,"Data Len : %d\n", fsi->data_len); | |
6356 | + rlen += sprintf(buf + rlen,"Tot Memory : %d\n", fsi->tot_mem); | |
6357 | + rlen += sprintf(buf + rlen,"Tot Packets : %lu\n", (unsigned long)fsi->tot_pkts); | |
6358 | + rlen += sprintf(buf + rlen,"Tot Pkt Lost : %lu\n", (unsigned long)fsi->tot_lost); | |
6359 | + rlen += sprintf(buf + rlen,"Tot Insert : %lu\n", (unsigned long)fsi->tot_insert); | |
6360 | + rlen += sprintf(buf + rlen,"Tot Read : %lu\n", (unsigned long)fsi->tot_read); | |
6361 | + | |
6362 | + } else | |
6363 | + rlen = sprintf(buf, "WARNING fsi == NULL\n"); | |
6364 | + } else | |
6365 | + rlen = sprintf(buf, "WARNING data == NULL\n"); | |
6366 | + } | |
6367 | + | |
6368 | + return rlen; | |
6369 | +} | |
6370 | + | |
6371 | +/* ********************************** */ | |
6372 | + | |
6373 | +static void ring_proc_init(void) { | |
6374 | + ring_proc_dir = proc_mkdir("pf_ring", proc_net); | |
6375 | + | |
6376 | + if(ring_proc_dir) { | |
6377 | + ring_proc_dir->owner = THIS_MODULE; | |
6378 | + ring_proc = create_proc_read_entry("info", 0, ring_proc_dir, | |
6379 | + ring_proc_get_info, NULL); | |
6380 | + if(!ring_proc) | |
6381 | + printk("PF_RING: unable to register proc file\n"); | |
6382 | + else { | |
6383 | + ring_proc->owner = THIS_MODULE; | |
6384 | + printk("PF_RING: registered /proc/net/pf_ring/\n"); | |
6385 | + } | |
6386 | + } else | |
6387 | + printk("PF_RING: unable to create /proc/net/pf_ring\n"); | |
6388 | +} | |
6389 | + | |
6390 | +/* ********************************** */ | |
6391 | + | |
6392 | +static void ring_proc_term(void) { | |
6393 | + if(ring_proc != NULL) { | |
6394 | + remove_proc_entry("info", ring_proc_dir); | |
6395 | + if(ring_proc_dir != NULL) remove_proc_entry("pf_ring", proc_net); | |
6396 | + | |
6397 | + printk("PF_RING: deregistered /proc/net/pf_ring\n"); | |
6398 | + } | |
6399 | +} | |
6400 | + | |
6401 | +/* ********************************** */ | |
6402 | + | |
6403 | +/* | |
6404 | + * ring_insert() | |
6405 | + * | |
6406 | + * store the sk in a new element and add it | |
6407 | + * to the head of the list. | |
6408 | + */ | |
6409 | +static inline void ring_insert(struct sock *sk) { | |
6410 | + struct ring_element *next; | |
6411 | + | |
6412 | +#if defined(RING_DEBUG) | |
6413 | + printk("RING: ring_insert()\n"); | |
6414 | +#endif | |
6415 | + | |
6416 | + next = kmalloc(sizeof(struct ring_element), GFP_ATOMIC); | |
6417 | + if(next != NULL) { | |
6418 | + next->sk = sk; | |
6419 | + write_lock_irq(&ring_mgmt_lock); | |
6420 | + list_add(&next->list, &ring_table); | |
6421 | + write_unlock_irq(&ring_mgmt_lock); | |
6422 | + } else { | |
6423 | + if(net_ratelimit()) | |
6424 | + printk("RING: could not kmalloc slot!!\n"); | |
6425 | + } | |
6426 | + | |
6427 | + ring_table_size++; | |
6428 | + ring_proc_add(ring_sk(sk)); | |
6429 | +} | |
6430 | + | |
6431 | +/* ********************************** */ | |
6432 | + | |
6433 | +/* | |
6434 | + * ring_remove() | |
6435 | + * | |
6436 | + * For each of the elements in the list: | |
6437 | + * - check if this is the element we want to delete | |
6438 | + * - if it is, remove it from the list, and free it. | |
6439 | + * | |
6440 | + * stop when we find the one we're looking for (break), | |
6441 | + * or when we reach the end of the list. | |
6442 | + */ | |
6443 | +static inline void ring_remove(struct sock *sk) { | |
6444 | + struct list_head *ptr; | |
6445 | + struct ring_element *entry; | |
6446 | + | |
6447 | + for(ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) { | |
6448 | + entry = list_entry(ptr, struct ring_element, list); | |
6449 | + | |
6450 | + if(entry->sk == sk) { | |
6451 | + list_del(ptr); | |
6452 | + kfree(ptr); | |
6453 | + ring_table_size--; | |
6454 | + break; | |
6455 | + } | |
6456 | + } | |
6457 | +} | |
6458 | + | |
6459 | +/* ********************************** */ | |
6460 | + | |
6461 | +static u_int32_t num_queued_pkts(struct ring_opt *pfr) { | |
6462 | + | |
6463 | + if(pfr->ring_slots != NULL) { | |
6464 | + | |
6465 | + u_int32_t tot_insert = pfr->slots_info->insert_idx, | |
6466 | +#if defined(RING_DEBUG) | |
6467 | + tot_read = pfr->slots_info->tot_read, tot_pkts; | |
6468 | +#else | |
6469 | + tot_read = pfr->slots_info->tot_read; | |
6470 | +#endif | |
6471 | + | |
6472 | + if(tot_insert >= tot_read) { | |
6473 | +#if defined(RING_DEBUG) | |
6474 | + tot_pkts = tot_insert-tot_read; | |
6475 | +#endif | |
6476 | + return(tot_insert-tot_read); | |
6477 | + } else { | |
6478 | +#if defined(RING_DEBUG) | |
6479 | + tot_pkts = ((u_int32_t)-1)+tot_insert-tot_read; | |
6480 | +#endif | |
6481 | + return(((u_int32_t)-1)+tot_insert-tot_read); | |
6482 | + } | |
6483 | + | |
6484 | +#if defined(RING_DEBUG) | |
6485 | + printk("-> num_queued_pkts=%d [tot_insert=%d][tot_read=%d]\n", | |
6486 | + tot_pkts, tot_insert, tot_read); | |
6487 | +#endif | |
6488 | + | |
6489 | + } else | |
6490 | + return(0); | |
6491 | +} | |
6492 | + | |
6493 | +/* ********************************** */ | |
6494 | + | |
6495 | +static inline FlowSlot* get_insert_slot(struct ring_opt *pfr) { | |
6496 | +#if defined(RING_DEBUG) | |
6497 | + printk("get_insert_slot(%d)\n", pfr->slots_info->insert_idx); | |
6498 | +#endif | |
6499 | + | |
6500 | + if(pfr->ring_slots != NULL) { | |
6501 | + FlowSlot *slot = (FlowSlot*)&(pfr->ring_slots[pfr->slots_info->insert_idx | |
6502 | + *pfr->slots_info->slot_len]); | |
6503 | + return(slot); | |
6504 | + } else | |
6505 | + return(NULL); | |
6506 | +} | |
6507 | + | |
6508 | +/* ********************************** */ | |
6509 | + | |
6510 | +static inline FlowSlot* get_remove_slot(struct ring_opt *pfr) { | |
6511 | +#if defined(RING_DEBUG) | |
6512 | + printk("get_remove_slot(%d)\n", pfr->slots_info->remove_idx); | |
6513 | +#endif | |
6514 | + | |
6515 | + if(pfr->ring_slots != NULL) | |
6516 | + return((FlowSlot*)&(pfr->ring_slots[pfr->slots_info->remove_idx* | |
6517 | + pfr->slots_info->slot_len])); | |
6518 | + else | |
6519 | + return(NULL); | |
6520 | +} | |
6521 | + | |
6522 | +/* ******************************************************* */ | |
6523 | + | |
6524 | +static int parse_pkt(struct sk_buff *skb, u_int16_t skb_displ, | |
6525 | + u_int8_t *l3_proto, u_int16_t *eth_type, | |
6526 | + u_int16_t *l3_offset, u_int16_t *l4_offset, | |
6527 | + u_int16_t *vlan_id, u_int32_t *ipv4_src, | |
6528 | + u_int32_t *ipv4_dst, | |
6529 | + u_int16_t *l4_src_port, u_int16_t *l4_dst_port, | |
6530 | + u_int16_t *payload_offset) { | |
6531 | + struct iphdr *ip; | |
6532 | + struct ethhdr *eh = (struct ethhdr*)(skb->data-skb_displ); | |
6533 | + u_int16_t displ; | |
6534 | + | |
6535 | + *l3_offset = *l4_offset = *l3_proto = *payload_offset = 0; | |
6536 | + *eth_type = ntohs(eh->h_proto); | |
6537 | + | |
6538 | + if(*eth_type == 0x8100 /* 802.1q (VLAN) */) { | |
6539 | + (*vlan_id) = (skb->data[14] & 15)*256 + skb->data[15]; | |
6540 | + *eth_type = (skb->data[16])*256 + skb->data[17]; | |
6541 | + displ = 4; | |
6542 | + } else { | |
6543 | + displ = 0; | |
6544 | + (*vlan_id) = (u_int16_t)-1; | |
6545 | + } | |
6546 | + | |
6547 | + if(*eth_type == 0x0800 /* IP */) { | |
6548 | + *l3_offset = displ+sizeof(struct ethhdr); | |
6549 | + ip = (struct iphdr*)(skb->data-skb_displ+(*l3_offset)); | |
6550 | + | |
6551 | + *ipv4_src = ntohl(ip->saddr), *ipv4_dst = ntohl(ip->daddr), *l3_proto = ip->protocol; | |
6552 | + | |
6553 | + if((ip->protocol == IPPROTO_TCP) || (ip->protocol == IPPROTO_UDP)) { | |
6554 | + *l4_offset = (*l3_offset)+(ip->ihl*4); | |
6555 | + | |
6556 | + if(ip->protocol == IPPROTO_TCP) { | |
6557 | + struct tcphdr *tcp = (struct tcphdr*)(skb->data-skb_displ+(*l4_offset)); | |
6558 | + *l4_src_port = ntohs(tcp->source), *l4_dst_port = ntohs(tcp->dest); | |
6559 | + *payload_offset = (*l4_offset)+(tcp->doff * 4); | |
6560 | + } else if(ip->protocol == IPPROTO_UDP) { | |
6561 | + struct udphdr *udp = (struct udphdr*)(skb->data-skb_displ+(*l4_offset)); | |
6562 | + *l4_src_port = ntohs(udp->source), *l4_dst_port = ntohs(udp->dest); | |
6563 | + *payload_offset = (*l4_offset)+sizeof(struct udphdr); | |
6564 | + } else | |
6565 | + *payload_offset = (*l4_offset); | |
6566 | + } else | |
6567 | + *l4_src_port = *l4_dst_port = 0; | |
6568 | + | |
6569 | + return(1); /* IP */ | |
6570 | + } /* TODO: handle IPv6 */ | |
6571 | + | |
6572 | + return(0); /* No IP */ | |
6573 | +} | |
6574 | + | |
6575 | +/* **************************************************************** */ | |
6576 | + | |
6577 | +static void reset_bitmask(bitmask_selector *selector) | |
6578 | +{ | |
6579 | + memset((char*)selector->bits_memory, 0, selector->num_bits/8); | |
6580 | + | |
6581 | + while(selector->clashes != NULL) { | |
6582 | + bitmask_counter_list *next = selector->clashes->next; | |
6583 | + kfree(selector->clashes); | |
6584 | + selector->clashes = next; | |
6585 | + } | |
6586 | +} | |
6587 | + | |
6588 | +/* **************************************************************** */ | |
6589 | + | |
6590 | +static void alloc_bitmask(u_int32_t tot_bits, bitmask_selector *selector) | |
6591 | +{ | |
6592 | + u_int tot_mem = tot_bits/8; | |
6593 | + | |
6594 | + if(tot_mem <= PAGE_SIZE) | |
6595 | + selector->order = 1; | |
6596 | + else { | |
6597 | + for(selector->order = 0; (PAGE_SIZE << selector->order) < tot_mem; selector->order++) | |
6598 | + ; | |
6599 | + } | |
6600 | + | |
6601 | + printk("BITMASK: [order=%d][tot_mem=%d]\n", selector->order, tot_mem); | |
6602 | + | |
6603 | + while((selector->bits_memory = __get_free_pages(GFP_ATOMIC, selector->order)) == 0) | |
6604 | + if(selector->order-- == 0) | |
6605 | + break; | |
6606 | + | |
6607 | + if(selector->order == 0) { | |
6608 | + printk("BITMASK: ERROR not enough memory for bitmask\n"); | |
6609 | + selector->num_bits = 0; | |
6610 | + return; | |
6611 | + } | |
6612 | + | |
6613 | + tot_mem = PAGE_SIZE << selector->order; | |
6614 | + printk("BITMASK: succesfully allocated [tot_mem=%d][order=%d]\n", | |
6615 | + tot_mem, selector->order); | |
6616 | + | |
6617 | + selector->num_bits = tot_mem*8; | |
6618 | + selector->clashes = NULL; | |
6619 | + reset_bitmask(selector); | |
6620 | +} | |
6621 | + | |
6622 | +/* ********************************** */ | |
6623 | + | |
6624 | +static void free_bitmask(bitmask_selector *selector) | |
6625 | +{ | |
6626 | + if(selector->bits_memory > 0) | |
6627 | + free_pages(selector->bits_memory, selector->order); | |
6628 | +} | |
6629 | + | |
6630 | +/* ********************************** */ | |
6631 | + | |
6632 | +static void set_bit_bitmask(bitmask_selector *selector, u_int32_t the_bit) { | |
6633 | + u_int32_t idx = the_bit % selector->num_bits; | |
6634 | + | |
6635 | + if(BITMASK_ISSET(idx, selector)) { | |
6636 | + bitmask_counter_list *head = selector->clashes; | |
6637 | + | |
6638 | + printk("BITMASK: bit %u was already set\n", the_bit); | |
6639 | + | |
6640 | + while(head != NULL) { | |
6641 | + if(head->bit_id == the_bit) { | |
6642 | + head->bit_counter++; | |
6643 | + printk("BITMASK: bit %u is now set to %d\n", the_bit, head->bit_counter); | |
6644 | + return; | |
6645 | + } | |
6646 | + | |
6647 | + head = head->next; | |
6648 | + } | |
6649 | + | |
6650 | + head = kmalloc(sizeof(bitmask_counter_list), GFP_KERNEL); | |
6651 | + if(head) { | |
6652 | + head->bit_id = the_bit; | |
6653 | + head->bit_counter = 1 /* previous value */ + 1 /* the requested set */; | |
6654 | + head->next = selector->clashes; | |
6655 | + selector->clashes = head; | |
6656 | + } else { | |
6657 | + printk("BITMASK: not enough memory\n"); | |
6658 | + return; | |
6659 | + } | |
6660 | + } else { | |
6661 | + BITMASK_SET(idx, selector); | |
6662 | + printk("BITMASK: bit %u is now set\n", the_bit); | |
6663 | + } | |
6664 | +} | |
6665 | + | |
6666 | +/* ********************************** */ | |
6667 | + | |
6668 | +static u_char is_set_bit_bitmask(bitmask_selector *selector, u_int32_t the_bit) { | |
6669 | + u_int32_t idx = the_bit % selector->num_bits; | |
6670 | + return(BITMASK_ISSET(idx, selector)); | |
6671 | +} | |
6672 | + | |
6673 | +/* ********************************** */ | |
6674 | + | |
6675 | +static void clear_bit_bitmask(bitmask_selector *selector, u_int32_t the_bit) { | |
6676 | + u_int32_t idx = the_bit % selector->num_bits; | |
6677 | + | |
6678 | + if(!BITMASK_ISSET(idx, selector)) | |
6679 | + printk("BITMASK: bit %u was not set\n", the_bit); | |
6680 | + else { | |
6681 | + bitmask_counter_list *head = selector->clashes, *prev = NULL; | |
6682 | + | |
6683 | + while(head != NULL) { | |
6684 | + if(head->bit_id == the_bit) { | |
6685 | + head->bit_counter--; | |
6686 | + | |
6687 | + printk("BITMASK: bit %u is now set to %d\n", | |
6688 | + the_bit, head->bit_counter); | |
6689 | + | |
6690 | + if(head->bit_counter == 1) { | |
6691 | + /* We can now delete this entry as '1' can be | |
6692 | + accommodated into the bitmask */ | |
6693 | + | |
6694 | + if(prev == NULL) | |
6695 | + selector->clashes = head->next; | |
6696 | + else | |
6697 | + prev->next = head->next; | |
6698 | + | |
6699 | + kfree(head); | |
6700 | + } | |
6701 | + return; | |
6702 | + } | |
6703 | + | |
6704 | + prev = head; head = head->next; | |
6705 | + } | |
6706 | + | |
6707 | + BITMASK_CLR(idx, selector); | |
6708 | + printk("BITMASK: bit %u is now reset\n", the_bit); | |
6709 | + } | |
6710 | +} | |
6711 | + | |
6712 | +/* ********************************** */ | |
6713 | + | |
6714 | +/* Hash function */ | |
6715 | +static u_int32_t sdb_hash(u_int32_t value) { | |
6716 | + u_int32_t hash = 0, i; | |
6717 | + u_int8_t str[sizeof(value)]; | |
6718 | + | |
6719 | + memcpy(str, &value, sizeof(value)); | |
6720 | + | |
6721 | + for(i = 0; i < sizeof(value); i++) { | |
6722 | + hash = str[i] + (hash << 6) + (hash << 16) - hash; | |
6723 | + } | |
6724 | + | |
6725 | + return(hash); | |
6726 | +} | |
6727 | + | |
6728 | +/* ********************************** */ | |
6729 | + | |
6730 | +static void handle_bloom_filter_rule(struct ring_opt *pfr, char *buf) { | |
6731 | + u_int count; | |
6732 | + | |
6733 | + if(buf == NULL) | |
6734 | + return; | |
6735 | + else | |
6736 | + count = strlen(buf); | |
6737 | + | |
6738 | + printk("PF_RING: -> handle_bloom_filter_rule(%s)\n", buf); | |
6739 | + | |
6740 | + if((buf[count-1] == '\n') || (buf[count-1] == '\r')) buf[count-1] = '\0'; | |
6741 | + | |
6742 | + if(count > 1) { | |
6743 | + u_int32_t the_bit; | |
6744 | + | |
6745 | + if(!strncmp(&buf[1], "vlan=", 5)) { | |
6746 | + sscanf(&buf[6], "%d", &the_bit); | |
6747 | + | |
6748 | + if(buf[0] == '+') | |
6749 | + set_bit_bitmask(&pfr->vlan_bitmask, the_bit), pfr->num_vlan_bitmask_add++; | |
6750 | + else | |
6751 | + clear_bit_bitmask(&pfr->vlan_bitmask, the_bit), pfr->num_vlan_bitmask_remove++; | |
6752 | + } else if(!strncmp(&buf[1], "mac=", 4)) { | |
6753 | + int a, b, c, d, e, f; | |
6754 | + | |
6755 | + if(sscanf(&buf[5], "%02x:%02x:%02x:%02x:%02x:%02x:", | |
6756 | + &a, &b, &c, &d, &e, &f) == 6) { | |
6757 | + u_int32_t mac_addr = (a & 0xff) + (b & 0xff) + ((c & 0xff) << 24) + ((d & 0xff) << 16) + ((e & 0xff) << 8) + (f & 0xff); | |
6758 | + | |
6759 | + /* printk("PF_RING: -> [%u][%u][%u][%u][%u][%u] -> [%u]\n", a, b, c, d, e, f, mac_addr); */ | |
6760 | + | |
6761 | + if(buf[0] == '+') | |
6762 | + set_bit_bitmask(&pfr->mac_bitmask, mac_addr), pfr->num_mac_bitmask_add++; | |
6763 | + else | |
6764 | + clear_bit_bitmask(&pfr->mac_bitmask, mac_addr), pfr->num_mac_bitmask_remove++; | |
6765 | + } else | |
6766 | + printk("PF_RING: -> Invalid MAC address '%s'\n", &buf[5]); | |
6767 | + } else if(!strncmp(&buf[1], "ip=", 3)) { | |
6768 | + int a, b, c, d; | |
6769 | + | |
6770 | + if(sscanf(&buf[4], "%d.%d.%d.%d", &a, &b, &c, &d) == 4) { | |
6771 | + u_int32_t ip_addr = ((a & 0xff) << 24) + ((b & 0xff) << 16) + ((c & 0xff) << 8) + (d & 0xff); | |
6772 | + | |
6773 | + if(buf[0] == '+') | |
6774 | + set_bit_bitmask(&pfr->ip_bitmask, ip_addr), set_bit_bitmask(&pfr->ip_bitmask, sdb_hash(ip_addr)), pfr->num_ip_bitmask_add++; | |
6775 | + else | |
6776 | + clear_bit_bitmask(&pfr->ip_bitmask, ip_addr), clear_bit_bitmask(&pfr->twin_ip_bitmask, sdb_hash(ip_addr)), pfr->num_ip_bitmask_remove++; | |
6777 | + } else | |
6778 | + printk("PF_RING: -> Invalid IP address '%s'\n", &buf[4]); | |
6779 | + } else if(!strncmp(&buf[1], "port=", 5)) { | |
6780 | + sscanf(&buf[6], "%d", &the_bit); | |
6781 | + | |
6782 | + if(buf[0] == '+') | |
6783 | + set_bit_bitmask(&pfr->port_bitmask, the_bit), set_bit_bitmask(&pfr->port_bitmask, sdb_hash(the_bit)), pfr->num_port_bitmask_add++; | |
6784 | + else | |
6785 | + clear_bit_bitmask(&pfr->port_bitmask, the_bit), clear_bit_bitmask(&pfr->twin_port_bitmask, sdb_hash(the_bit)), pfr->num_port_bitmask_remove++; | |
6786 | + } else if(!strncmp(&buf[1], "proto=", 6)) { | |
6787 | + if(!strncmp(&buf[7], "tcp", 3)) the_bit = 6; | |
6788 | + else if(!strncmp(&buf[7], "udp", 3)) the_bit = 17; | |
6789 | + else if(!strncmp(&buf[7], "icmp", 4)) the_bit = 1; | |
6790 | + else sscanf(&buf[7], "%d", &the_bit); | |
6791 | + | |
6792 | + if(buf[0] == '+') | |
6793 | + set_bit_bitmask(&pfr->proto_bitmask, the_bit); | |
6794 | + else | |
6795 | + clear_bit_bitmask(&pfr->proto_bitmask, the_bit); | |
6796 | + } else | |
6797 | + printk("PF_RING: -> Unknown rule type '%s'\n", buf); | |
6798 | + } | |
6799 | +} | |
6800 | + | |
6801 | +/* ********************************** */ | |
6802 | + | |
6803 | +static void reset_bloom_filters(struct ring_opt *pfr) { | |
6804 | + reset_bitmask(&pfr->mac_bitmask); | |
6805 | + reset_bitmask(&pfr->vlan_bitmask); | |
6806 | + reset_bitmask(&pfr->ip_bitmask); reset_bitmask(&pfr->twin_ip_bitmask); | |
6807 | + reset_bitmask(&pfr->port_bitmask); reset_bitmask(&pfr->twin_port_bitmask); | |
6808 | + reset_bitmask(&pfr->proto_bitmask); | |
6809 | + | |
6810 | + pfr->num_mac_bitmask_add = pfr->num_mac_bitmask_remove = 0; | |
6811 | + pfr->num_vlan_bitmask_add = pfr->num_vlan_bitmask_remove = 0; | |
6812 | + pfr->num_ip_bitmask_add = pfr->num_ip_bitmask_remove = 0; | |
6813 | + pfr->num_port_bitmask_add = pfr->num_port_bitmask_remove = 0; | |
6814 | + pfr->num_proto_bitmask_add = pfr->num_proto_bitmask_remove = 0; | |
6815 | + | |
6816 | + printk("PF_RING: rules have been reset\n"); | |
6817 | +} | |
6818 | + | |
6819 | +/* ********************************** */ | |
6820 | + | |
6821 | +static void init_blooms(struct ring_opt *pfr) { | |
6822 | + alloc_bitmask(4096, &pfr->mac_bitmask); | |
6823 | + alloc_bitmask(4096, &pfr->vlan_bitmask); | |
6824 | + alloc_bitmask(32768, &pfr->ip_bitmask); alloc_bitmask(32768, &pfr->twin_ip_bitmask); | |
6825 | + alloc_bitmask(4096, &pfr->port_bitmask); alloc_bitmask(4096, &pfr->twin_port_bitmask); | |
6826 | + alloc_bitmask(4096, &pfr->proto_bitmask); | |
6827 | + | |
6828 | + pfr->num_mac_bitmask_add = pfr->num_mac_bitmask_remove = 0; | |
6829 | + pfr->num_vlan_bitmask_add = pfr->num_vlan_bitmask_remove = 0; | |
6830 | + pfr->num_ip_bitmask_add = pfr->num_ip_bitmask_remove = 0; | |
6831 | + pfr->num_port_bitmask_add = pfr->num_port_bitmask_remove = 0; | |
6832 | + pfr->num_proto_bitmask_add = pfr->num_proto_bitmask_remove = 0; | |
6833 | + | |
6834 | + reset_bloom_filters(pfr); | |
6835 | +} | |
6836 | + | |
6837 | +/* ********************************** */ | |
6838 | + | |
6839 | +inline int MatchFound (void* id, int index, void *data) { return(0); } | |
6840 | + | |
6841 | +/* ********************************** */ | |
6842 | + | |
6843 | +static void add_skb_to_ring(struct sk_buff *skb, | |
6844 | + struct ring_opt *pfr, | |
6845 | + u_char recv_packet, | |
6846 | + u_char real_skb /* 1=skb 0=faked skb */) { | |
6847 | + FlowSlot *theSlot; | |
6848 | + int idx, displ, fwd_pkt = 0; | |
6849 | + | |
6850 | + if(recv_packet) { | |
6851 | + /* Hack for identifying a packet received by the e1000 */ | |
6852 | + if(real_skb) { | |
6853 | + displ = SKB_DISPLACEMENT; | |
6854 | + } else | |
6855 | + displ = 0; /* Received by the e1000 wrapper */ | |
6856 | + } else | |
6857 | + displ = 0; | |
6858 | + | |
6859 | + write_lock(&pfr->ring_index_lock); | |
6860 | + pfr->slots_info->tot_pkts++; | |
6861 | + write_unlock(&pfr->ring_index_lock); | |
6862 | + | |
6863 | + /* BPF Filtering (from af_packet.c) */ | |
6864 | + if(pfr->bpfFilter != NULL) { | |
6865 | + unsigned res = 1, len; | |
6866 | + | |
6867 | + len = skb->len-skb->data_len; | |
6868 | + | |
6869 | + write_lock(&pfr->ring_index_lock); | |
6870 | + skb->data -= displ; | |
6871 | + res = sk_run_filter(skb, pfr->bpfFilter->insns, pfr->bpfFilter->len); | |
6872 | + skb->data += displ; | |
6873 | + write_unlock(&pfr->ring_index_lock); | |
6874 | + | |
6875 | + if(res == 0) { | |
6876 | + /* Filter failed */ | |
6877 | + | |
6878 | +#if defined(RING_DEBUG) | |
6879 | + printk("add_skb_to_ring(skb): Filter failed [len=%d][tot=%llu]" | |
6880 | + "[insertIdx=%d][pkt_type=%d][cloned=%d]\n", | |
6881 | + (int)skb->len, pfr->slots_info->tot_pkts, | |
6882 | + pfr->slots_info->insert_idx, | |
6883 | + skb->pkt_type, skb->cloned); | |
6884 | +#endif | |
6885 | + | |
6886 | + return; | |
6887 | + } | |
6888 | + } | |
6889 | + | |
6890 | + /* ************************** */ | |
6891 | + | |
6892 | + if(pfr->sample_rate > 1) { | |
6893 | + if(pfr->pktToSample == 0) { | |
6894 | + write_lock(&pfr->ring_index_lock); | |
6895 | + pfr->pktToSample = pfr->sample_rate; | |
6896 | + write_unlock(&pfr->ring_index_lock); | |
6897 | + } else { | |
6898 | + write_lock(&pfr->ring_index_lock); | |
6899 | + pfr->pktToSample--; | |
6900 | + write_unlock(&pfr->ring_index_lock); | |
6901 | + | |
6902 | +#if defined(RING_DEBUG) | |
6903 | + printk("add_skb_to_ring(skb): sampled packet [len=%d]" | |
6904 | + "[tot=%llu][insertIdx=%d][pkt_type=%d][cloned=%d]\n", | |
6905 | + (int)skb->len, pfr->slots_info->tot_pkts, | |
6906 | + pfr->slots_info->insert_idx, | |
6907 | + skb->pkt_type, skb->cloned); | |
6908 | +#endif | |
6909 | + return; | |
6910 | + } | |
6911 | + } | |
6912 | + | |
6913 | + /* ************************************* */ | |
6914 | + | |
6915 | + if((pfr->reflector_dev != NULL) | |
6916 | + && (!netif_queue_stopped(pfr->reflector_dev))) { | |
6917 | + int cpu = smp_processor_id(); | |
6918 | + | |
6919 | + /* increase reference counter so that this skb is not freed */ | |
6920 | + atomic_inc(&skb->users); | |
6921 | + | |
6922 | + skb->data -= displ; | |
6923 | + | |
6924 | + /* send it */ | |
6925 | + if (pfr->reflector_dev->xmit_lock_owner != cpu) { | |
6926 | + /* Patch below courtesy of Matthew J. Roth <mroth@imminc.com> */ | |
6927 | +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18)) | |
6928 | + spin_lock_bh(&pfr->reflector_dev->xmit_lock); | |
6929 | + pfr->reflector_dev->xmit_lock_owner = cpu; | |
6930 | + spin_unlock_bh(&pfr->reflector_dev->xmit_lock); | |
6931 | +#else | |
6932 | + netif_tx_lock_bh(pfr->reflector_dev); | |
6933 | +#endif | |
6934 | + if (pfr->reflector_dev->hard_start_xmit(skb, pfr->reflector_dev) == 0) { | |
6935 | +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18)) | |
6936 | + spin_lock_bh(&pfr->reflector_dev->xmit_lock); | |
6937 | + pfr->reflector_dev->xmit_lock_owner = -1; | |
6938 | + spin_unlock_bh(&pfr->reflector_dev->xmit_lock); | |
6939 | +#else | |
6940 | + netif_tx_unlock_bh(pfr->reflector_dev); | |
6941 | +#endif | |
6942 | + skb->data += displ; | |
6943 | +#if defined(RING_DEBUG) | |
6944 | + printk("++ hard_start_xmit succeeded\n"); | |
6945 | +#endif | |
6946 | + return; /* OK */ | |
6947 | + } | |
6948 | + | |
6949 | +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18)) | |
6950 | + spin_lock_bh(&pfr->reflector_dev->xmit_lock); | |
6951 | + pfr->reflector_dev->xmit_lock_owner = -1; | |
6952 | + spin_unlock_bh(&pfr->reflector_dev->xmit_lock); | |
6953 | +#else | |
6954 | + netif_tx_unlock_bh(pfr->reflector_dev); | |
6955 | +#endif | |
6956 | + } | |
6957 | + | |
6958 | +#if defined(RING_DEBUG) | |
6959 | + printk("++ hard_start_xmit failed\n"); | |
6960 | +#endif | |
6961 | + skb->data += displ; | |
6962 | + return; /* -ENETDOWN */ | |
6963 | + } | |
6964 | + | |
6965 | + /* ************************************* */ | |
6966 | + | |
6967 | +#if defined(RING_DEBUG) | |
6968 | + printk("add_skb_to_ring(skb) [len=%d][tot=%llu][insertIdx=%d]" | |
6969 | + "[pkt_type=%d][cloned=%d]\n", | |
6970 | + (int)skb->len, pfr->slots_info->tot_pkts, | |
6971 | + pfr->slots_info->insert_idx, | |
6972 | + skb->pkt_type, skb->cloned); | |
6973 | +#endif | |
6974 | + | |
6975 | + idx = pfr->slots_info->insert_idx; | |
6976 | + theSlot = get_insert_slot(pfr); | |
6977 | + | |
6978 | + if((theSlot != NULL) && (theSlot->slot_state == 0)) { | |
6979 | + struct pcap_pkthdr *hdr; | |
6980 | + char *bucket; | |
6981 | + int is_ip_pkt, debug = 0; | |
6982 | + | |
6983 | + /* Update Index */ | |
6984 | + idx++; | |
6985 | + | |
6986 | + bucket = &theSlot->bucket; | |
6987 | + hdr = (struct pcap_pkthdr*)bucket; | |
6988 | + | |
6989 | + /* BD - API changed for time keeping */ | |
6990 | +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14)) | |
6991 | + if(skb->stamp.tv_sec == 0) do_gettimeofday(&skb->stamp); | |
6992 | + | |
6993 | + hdr->ts.tv_sec = skb->stamp.tv_sec, hdr->ts.tv_usec = skb->stamp.tv_usec; | |
6994 | +#else | |
6995 | + if(skb->tstamp.tv64 == 0) __net_timestamp(skb); | |
6996 | + | |
6997 | + struct timeval tv = ktime_to_timeval(skb->tstamp); | |
6998 | + hdr->ts.tv_sec = tv.tv_sec, hdr->ts.tv_usec = tv.tv_usec; | |
6999 | +#endif | |
7000 | + hdr->caplen = skb->len+displ; | |
7001 | + | |
7002 | + if(hdr->caplen > pfr->slots_info->data_len) | |
7003 | + hdr->caplen = pfr->slots_info->data_len; | |
7004 | + | |
7005 | + hdr->len = skb->len+displ; | |
7006 | + | |
7007 | + /* Extensions */ | |
7008 | + is_ip_pkt = parse_pkt(skb, displ, | |
7009 | + &hdr->l3_proto, | |
7010 | + &hdr->eth_type, | |
7011 | + &hdr->l3_offset, | |
7012 | + &hdr->l4_offset, | |
7013 | + &hdr->vlan_id, | |
7014 | + &hdr->ipv4_src, | |
7015 | + &hdr->ipv4_dst, | |
7016 | + &hdr->l4_src_port, | |
7017 | + &hdr->l4_dst_port, | |
7018 | + &hdr->payload_offset); | |
7019 | + | |
7020 | + if(is_ip_pkt && pfr->bitmask_enabled) { | |
7021 | + int vlan_match = 0; | |
7022 | + | |
7023 | + fwd_pkt = 0; | |
7024 | + | |
7025 | + if(debug) { | |
7026 | + if(is_ip_pkt) | |
7027 | + printk(KERN_INFO "PF_RING: [proto=%d][vlan=%d][sport=%d][dport=%d][src=%u][dst=%u]\n", | |
7028 | + hdr->l3_proto, hdr->vlan_id, hdr->l4_src_port, hdr->l4_dst_port, hdr->ipv4_src, hdr->ipv4_dst); | |
7029 | + else | |
7030 | + printk(KERN_INFO "PF_RING: [proto=%d][vlan=%d]\n", hdr->l3_proto, hdr->vlan_id); | |
7031 | + } | |
7032 | + | |
7033 | + if(hdr->vlan_id != (u_int16_t)-1) { | |
7034 | + vlan_match = is_set_bit_bitmask(&pfr->vlan_bitmask, hdr->vlan_id); | |
7035 | + } else | |
7036 | + vlan_match = 1; | |
7037 | + | |
7038 | + if(vlan_match) { | |
7039 | + struct ethhdr *eh = (struct ethhdr*)(skb->data); | |
7040 | + u_int32_t src_mac = (eh->h_source[0] & 0xff) + (eh->h_source[1] & 0xff) + ((eh->h_source[2] & 0xff) << 24) | |
7041 | + + ((eh->h_source[3] & 0xff) << 16) + ((eh->h_source[4] & 0xff) << 8) + (eh->h_source[5] & 0xff); | |
7042 | + | |
7043 | + if(debug) printk(KERN_INFO "PF_RING: [src_mac=%u]\n", src_mac); | |
7044 | + | |
7045 | + fwd_pkt |= is_set_bit_bitmask(&pfr->mac_bitmask, src_mac); | |
7046 | + | |
7047 | + if(!fwd_pkt) { | |
7048 | + u_int32_t dst_mac = (eh->h_dest[0] & 0xff) + (eh->h_dest[1] & 0xff) + ((eh->h_dest[2] & 0xff) << 24) | |
7049 | + + ((eh->h_dest[3] & 0xff) << 16) + ((eh->h_dest[4] & 0xff) << 8) + (eh->h_dest[5] & 0xff); | |
7050 | + | |
7051 | + if(debug) printk(KERN_INFO "PF_RING: [dst_mac=%u]\n", dst_mac); | |
7052 | + | |
7053 | + fwd_pkt |= is_set_bit_bitmask(&pfr->mac_bitmask, dst_mac); | |
7054 | + | |
7055 | + if(is_ip_pkt && (!fwd_pkt)) { | |
7056 | + fwd_pkt |= is_set_bit_bitmask(&pfr->ip_bitmask, hdr->ipv4_src); | |
7057 | + | |
7058 | + if(!fwd_pkt) { | |
7059 | + fwd_pkt |= is_set_bit_bitmask(&pfr->ip_bitmask, hdr->ipv4_dst); | |
7060 | + | |
7061 | + if((!fwd_pkt) && ((hdr->l3_proto == IPPROTO_TCP) | |
7062 | + || (hdr->l3_proto == IPPROTO_UDP))) { | |
7063 | + fwd_pkt |= is_set_bit_bitmask(&pfr->port_bitmask, hdr->l4_src_port); | |
7064 | + if(!fwd_pkt) fwd_pkt |= is_set_bit_bitmask(&pfr->port_bitmask, hdr->l4_dst_port); | |
7065 | + } | |
7066 | + | |
7067 | + if(!fwd_pkt) fwd_pkt |= is_set_bit_bitmask(&pfr->proto_bitmask, hdr->l3_proto); | |
7068 | + } | |
7069 | + } | |
7070 | + } | |
7071 | + } | |
7072 | + } else | |
7073 | + fwd_pkt = 1; | |
7074 | + | |
7075 | + if(fwd_pkt && (pfr->acsm != NULL)) { | |
7076 | + if((hdr->payload_offset > 0) && ((skb->len+skb->mac_len) > hdr->payload_offset)) { | |
7077 | + char *payload = (skb->data-displ+hdr->payload_offset); | |
7078 | + int payload_len = skb->len /* + skb->mac_len */ - hdr->payload_offset; | |
7079 | + | |
7080 | + if((payload_len > 0) | |
7081 | + && ((hdr->l4_src_port == 80) || (hdr->l4_dst_port == 80))) { | |
7082 | + int rc; | |
7083 | + | |
7084 | + if(0) { | |
7085 | + char buf[1500]; | |
7086 | + | |
7087 | + memcpy(buf, payload, payload_len); | |
7088 | + buf[payload_len] = '\0'; | |
7089 | + printk("[%s]\n", payload); | |
7090 | + } | |
7091 | + | |
7092 | + /* printk("Tring to match pattern [len=%d][%s]\n", payload_len, payload); */ | |
7093 | + rc = acsmSearch2(pfr->acsm, payload, payload_len, MatchFound, (void *)0) ? 1 : 0; | |
7094 | + | |
7095 | + // printk("Match result: %d\n", fwd_pkt); | |
7096 | + if(rc) { | |
7097 | + printk("Pattern matched!\n"); | |
7098 | + } else { | |
7099 | + fwd_pkt = 0; | |
7100 | + } | |
7101 | + } else | |
7102 | + fwd_pkt = 0; | |
7103 | + } else | |
7104 | + fwd_pkt = 0; | |
7105 | + } | |
7106 | + | |
7107 | + if(fwd_pkt) { | |
7108 | + memcpy(&bucket[sizeof(struct pcap_pkthdr)], skb->data-displ, hdr->caplen); | |
7109 | + | |
7110 | +#if defined(RING_DEBUG) | |
7111 | + { | |
7112 | + static unsigned int lastLoss = 0; | |
7113 | + | |
7114 | + if(pfr->slots_info->tot_lost | |
7115 | + && (lastLoss != pfr->slots_info->tot_lost)) { | |
7116 | + printk("add_skb_to_ring(%d): [data_len=%d]" | |
7117 | + "[hdr.caplen=%d][skb->len=%d]" | |
7118 | + "[pcap_pkthdr=%d][removeIdx=%d]" | |
7119 | + "[loss=%lu][page=%u][slot=%u]\n", | |
7120 | + idx-1, pfr->slots_info->data_len, hdr->caplen, skb->len, | |
7121 | + sizeof(struct pcap_pkthdr), | |
7122 | + pfr->slots_info->remove_idx, | |
7123 | + (long unsigned int)pfr->slots_info->tot_lost, | |
7124 | + pfr->insert_page_id, pfr->insert_slot_id); | |
7125 | + | |
7126 | + lastLoss = pfr->slots_info->tot_lost; | |
7127 | + } | |
7128 | + } | |
7129 | +#endif | |
7130 | + | |
7131 | + write_lock(&pfr->ring_index_lock); | |
7132 | + if(idx == pfr->slots_info->tot_slots) | |
7133 | + pfr->slots_info->insert_idx = 0; | |
7134 | + else | |
7135 | + pfr->slots_info->insert_idx = idx; | |
7136 | + | |
7137 | + pfr->slots_info->tot_insert++; | |
7138 | + theSlot->slot_state = 1; | |
7139 | + write_unlock(&pfr->ring_index_lock); | |
7140 | + } | |
7141 | + } else { | |
7142 | + write_lock(&pfr->ring_index_lock); | |
7143 | + pfr->slots_info->tot_lost++; | |
7144 | + write_unlock(&pfr->ring_index_lock); | |
7145 | + | |
7146 | +#if defined(RING_DEBUG) | |
7147 | + printk("add_skb_to_ring(skb): packet lost [loss=%lu]" | |
7148 | + "[removeIdx=%u][insertIdx=%u]\n", | |
7149 | + (long unsigned int)pfr->slots_info->tot_lost, | |
7150 | + pfr->slots_info->remove_idx, pfr->slots_info->insert_idx); | |
7151 | +#endif | |
7152 | + } | |
7153 | + | |
7154 | + if(fwd_pkt) { | |
7155 | + | |
7156 | + /* wakeup in case of poll() */ | |
7157 | + if(waitqueue_active(&pfr->ring_slots_waitqueue)) | |
7158 | + wake_up_interruptible(&pfr->ring_slots_waitqueue); | |
7159 | + } | |
7160 | +} | |
7161 | + | |
7162 | +/* ********************************** */ | |
7163 | + | |
7164 | +static u_int hash_skb(struct ring_cluster *cluster_ptr, | |
7165 | + struct sk_buff *skb, u_char recv_packet) { | |
7166 | + u_int idx; | |
7167 | + int displ; | |
7168 | + struct iphdr *ip; | |
7169 | + | |
7170 | + if(cluster_ptr->hashing_mode == cluster_round_robin) { | |
7171 | + idx = cluster_ptr->hashing_id++; | |
7172 | + } else { | |
7173 | + /* Per-flow clustering */ | |
7174 | + if(skb->len > sizeof(struct iphdr)+sizeof(struct tcphdr)) { | |
7175 | + if(recv_packet) | |
7176 | + displ = 0; | |
7177 | + else | |
7178 | + displ = SKB_DISPLACEMENT; | |
7179 | + | |
7180 | + /* | |
7181 | + skb->data+displ | |
7182 | + | |
7183 | + Always points to to the IP part of the packet | |
7184 | + */ | |
7185 | + | |
7186 | + ip = (struct iphdr*)(skb->data+displ); | |
7187 | + | |
7188 | + idx = ip->saddr+ip->daddr+ip->protocol; | |
7189 | + | |
7190 | + if(ip->protocol == IPPROTO_TCP) { | |
7191 | + struct tcphdr *tcp = (struct tcphdr*)(skb->data+displ | |
7192 | + +sizeof(struct iphdr)); | |
7193 | + idx += tcp->source+tcp->dest; | |
7194 | + } else if(ip->protocol == IPPROTO_UDP) { | |
7195 | + struct udphdr *udp = (struct udphdr*)(skb->data+displ | |
7196 | + +sizeof(struct iphdr)); | |
7197 | + idx += udp->source+udp->dest; | |
7198 | + } | |
7199 | + } else | |
7200 | + idx = skb->len; | |
7201 | + } | |
7202 | + | |
7203 | + return(idx % cluster_ptr->num_cluster_elements); | |
7204 | +} | |
7205 | + | |
7206 | +/* ********************************** */ | |
7207 | + | |
7208 | +static int skb_ring_handler(struct sk_buff *skb, | |
7209 | + u_char recv_packet, | |
7210 | + u_char real_skb /* 1=skb 0=faked skb */) { | |
7211 | + struct sock *skElement; | |
7212 | + int rc = 0; | |
7213 | + struct list_head *ptr; | |
7214 | + struct ring_cluster *cluster_ptr; | |
7215 | + | |
7216 | +#ifdef PROFILING | |
7217 | + uint64_t rdt = _rdtsc(), rdt1, rdt2; | |
7218 | +#endif | |
7219 | + | |
7220 | + if((!skb) /* Invalid skb */ | |
7221 | + || ((!enable_tx_capture) && (!recv_packet))) { | |
7222 | + /* | |
7223 | + An outgoing packet is about to be sent out | |
7224 | + but we decided not to handle transmitted | |
7225 | + packets. | |
7226 | + */ | |
7227 | + return(0); | |
7228 | + } | |
7229 | + | |
7230 | +#if defined(RING_DEBUG) | |
7231 | + if(0) { | |
7232 | + printk("skb_ring_handler() [len=%d][dev=%s]\n", skb->len, | |
7233 | + skb->dev->name == NULL ? "<NULL>" : skb->dev->name); | |
7234 | + } | |
7235 | +#endif | |
7236 | + | |
7237 | +#ifdef PROFILING | |
7238 | + rdt1 = _rdtsc(); | |
7239 | +#endif | |
7240 | + | |
7241 | + /* [1] Check unclustered sockets */ | |
7242 | + for (ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) { | |
7243 | + struct ring_opt *pfr; | |
7244 | + struct ring_element *entry; | |
7245 | + | |
7246 | + entry = list_entry(ptr, struct ring_element, list); | |
7247 | + | |
7248 | + read_lock(&ring_mgmt_lock); | |
7249 | + skElement = entry->sk; | |
7250 | + pfr = ring_sk(skElement); | |
7251 | + read_unlock(&ring_mgmt_lock); | |
7252 | + | |
7253 | + if((pfr != NULL) | |
7254 | + && (pfr->cluster_id == 0 /* No cluster */) | |
7255 | + && (pfr->ring_slots != NULL) | |
7256 | + && ((pfr->ring_netdev == skb->dev) || ((skb->dev->flags & IFF_SLAVE) && pfr->ring_netdev == skb->dev->master))) { | |
7257 | + /* We've found the ring where the packet can be stored */ | |
7258 | + read_lock(&ring_mgmt_lock); | |
7259 | + add_skb_to_ring(skb, pfr, recv_packet, real_skb); | |
7260 | + read_unlock(&ring_mgmt_lock); | |
7261 | + | |
7262 | + rc = 1; /* Ring found: we've done our job */ | |
7263 | + } | |
7264 | + } | |
7265 | + | |
7266 | + /* [2] Check socket clusters */ | |
7267 | + cluster_ptr = ring_cluster_list; | |
7268 | + | |
7269 | + while(cluster_ptr != NULL) { | |
7270 | + struct ring_opt *pfr; | |
7271 | + | |
7272 | + if(cluster_ptr->num_cluster_elements > 0) { | |
7273 | + u_int skb_hash = hash_skb(cluster_ptr, skb, recv_packet); | |
7274 | + | |
7275 | + read_lock(&ring_mgmt_lock); | |
7276 | + skElement = cluster_ptr->sk[skb_hash]; | |
7277 | + read_unlock(&ring_mgmt_lock); | |
7278 | + | |
7279 | + if(skElement != NULL) { | |
7280 | + pfr = ring_sk(skElement); | |
7281 | + | |
7282 | + if((pfr != NULL) | |
7283 | + && (pfr->ring_slots != NULL) | |
7284 | + && ((pfr->ring_netdev == skb->dev) || ((skb->dev->flags & IFF_SLAVE) && pfr->ring_netdev == skb->dev->master))) { | |
7285 | + /* We've found the ring where the packet can be stored */ | |
7286 | + read_lock(&ring_mgmt_lock); | |
7287 | + add_skb_to_ring(skb, pfr, recv_packet, real_skb); | |
7288 | + read_unlock(&ring_mgmt_lock); | |
7289 | + | |
7290 | + rc = 1; /* Ring found: we've done our job */ | |
7291 | + } | |
7292 | + } | |
7293 | + } | |
7294 | + | |
7295 | + cluster_ptr = cluster_ptr->next; | |
7296 | + } | |
7297 | + | |
7298 | +#ifdef PROFILING | |
7299 | + rdt1 = _rdtsc()-rdt1; | |
7300 | +#endif | |
7301 | + | |
7302 | +#ifdef PROFILING | |
7303 | + rdt2 = _rdtsc(); | |
7304 | +#endif | |
7305 | + | |
7306 | + if(transparent_mode) rc = 0; | |
7307 | + | |
7308 | + if((rc != 0) && real_skb) | |
7309 | + dev_kfree_skb(skb); /* Free the skb */ | |
7310 | + | |
7311 | +#ifdef PROFILING | |
7312 | + rdt2 = _rdtsc()-rdt2; | |
7313 | + rdt = _rdtsc()-rdt; | |
7314 | + | |
7315 | +#if defined(RING_DEBUG) | |
7316 | + printk("# cycles: %d [lock costed %d %d%%][free costed %d %d%%]\n", | |
7317 | + (int)rdt, rdt-rdt1, | |
7318 | + (int)((float)((rdt-rdt1)*100)/(float)rdt), | |
7319 | + rdt2, | |
7320 | + (int)((float)(rdt2*100)/(float)rdt)); | |
7321 | +#endif | |
7322 | +#endif | |
7323 | + | |
7324 | + return(rc); /* 0 = packet not handled */ | |
7325 | +} | |
7326 | + | |
7327 | +/* ********************************** */ | |
7328 | + | |
7329 | +struct sk_buff skb; | |
7330 | + | |
7331 | +static int buffer_ring_handler(struct net_device *dev, | |
7332 | + char *data, int len) { | |
7333 | + | |
7334 | +#if defined(RING_DEBUG) | |
7335 | + printk("buffer_ring_handler: [dev=%s][len=%d]\n", | |
7336 | + dev->name == NULL ? "<NULL>" : dev->name, len); | |
7337 | +#endif | |
7338 | + | |
7339 | + /* BD - API changed for time keeping */ | |
7340 | +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14)) | |
7341 | + skb.dev = dev, skb.len = len, skb.data = data, | |
7342 | + skb.data_len = len, skb.stamp.tv_sec = 0; /* Calculate the time */ | |
7343 | +#else | |
7344 | + skb.dev = dev, skb.len = len, skb.data = data, | |
7345 | + skb.data_len = len, skb.tstamp.tv64 = 0; /* Calculate the time */ | |
7346 | +#endif | |
7347 | + | |
7348 | + skb_ring_handler(&skb, 1, 0 /* fake skb */); | |
7349 | + | |
7350 | + return(0); | |
7351 | +} | |
7352 | + | |
7353 | +/* ********************************** */ | |
7354 | + | |
7355 | +static int ring_create(struct socket *sock, int protocol) { | |
7356 | + struct sock *sk; | |
7357 | + struct ring_opt *pfr; | |
7358 | + int err; | |
7359 | + | |
7360 | +#if defined(RING_DEBUG) | |
7361 | + printk("RING: ring_create()\n"); | |
7362 | +#endif | |
7363 | + | |
7364 | + /* Are you root, superuser or so ? */ | |
7365 | + if(!capable(CAP_NET_ADMIN)) | |
7366 | + return -EPERM; | |
7367 | + | |
7368 | + if(sock->type != SOCK_RAW) | |
7369 | + return -ESOCKTNOSUPPORT; | |
7370 | + | |
7371 | + if(protocol != htons(ETH_P_ALL)) | |
7372 | + return -EPROTONOSUPPORT; | |
7373 | + | |
7374 | +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) | |
7375 | + MOD_INC_USE_COUNT; | |
7376 | +#endif | |
7377 | + | |
7378 | + err = -ENOMEM; | |
7379 | + | |
7380 | + // BD: -- broke this out to keep it more simple and clear as to what the | |
7381 | + // options are. | |
7382 | +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) | |
7383 | +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,11)) | |
7384 | + sk = sk_alloc(PF_RING, GFP_KERNEL, 1, NULL); | |
7385 | +#else | |
7386 | + // BD: API changed in 2.6.12, ref: | |
7387 | + // http://svn.clkao.org/svnweb/linux/revision/?rev=28201 | |
7388 | + sk = sk_alloc(PF_RING, GFP_ATOMIC, &ring_proto, 1); | |
7389 | +#endif | |
7390 | +#else | |
7391 | + /* Kernel 2.4 */ | |
7392 | + sk = sk_alloc(PF_RING, GFP_KERNEL, 1); | |
7393 | +#endif | |
7394 | + | |
7395 | + if (sk == NULL) | |
7396 | + goto out; | |
7397 | + | |
7398 | + sock->ops = &ring_ops; | |
7399 | + sock_init_data(sock, sk); | |
7400 | +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) | |
7401 | +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,11)) | |
7402 | + sk_set_owner(sk, THIS_MODULE); | |
7403 | +#endif | |
7404 | +#endif | |
7405 | + | |
7406 | + err = -ENOMEM; | |
7407 | + ring_sk(sk) = ring_sk_datatype(kmalloc(sizeof(*pfr), GFP_KERNEL)); | |
7408 | + | |
7409 | + if (!(pfr = ring_sk(sk))) { | |
7410 | + sk_free(sk); | |
7411 | + goto out; | |
7412 | + } | |
7413 | + memset(pfr, 0, sizeof(*pfr)); | |
7414 | + init_waitqueue_head(&pfr->ring_slots_waitqueue); | |
7415 | + pfr->ring_index_lock = RW_LOCK_UNLOCKED; | |
7416 | + atomic_set(&pfr->num_ring_slots_waiters, 0); | |
7417 | + init_blooms(pfr); | |
7418 | + pfr->acsm = NULL; | |
7419 | + | |
7420 | +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) | |
7421 | + sk->sk_family = PF_RING; | |
7422 | + sk->sk_destruct = ring_sock_destruct; | |
7423 | +#else | |
7424 | + sk->family = PF_RING; | |
7425 | + sk->destruct = ring_sock_destruct; | |
7426 | + sk->num = protocol; | |
7427 | +#endif | |
7428 | + | |
7429 | + ring_insert(sk); | |
7430 | + | |
7431 | +#if defined(RING_DEBUG) | |
7432 | + printk("RING: ring_create() - created\n"); | |
7433 | +#endif | |
7434 | + | |
7435 | + return(0); | |
7436 | + out: | |
7437 | +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0)) | |
7438 | + MOD_DEC_USE_COUNT; | |
7439 | +#endif | |
7440 | + return err; | |
7441 | +} | |
7442 | + | |
7443 | +/* *********************************************** */ | |
7444 | + | |
7445 | +static int ring_release(struct socket *sock) | |
7446 | +{ | |
7447 | + struct sock *sk = sock->sk; | |
7448 | + struct ring_opt *pfr = ring_sk(sk); | |
7449 | + | |
7450 | + if(!sk) return 0; | |
7451 | + | |
7452 | +#if defined(RING_DEBUG) | |
7453 | + printk("RING: called ring_release\n"); | |
7454 | +#endif | |
7455 | + | |
7456 | +#if defined(RING_DEBUG) | |
7457 | + printk("RING: ring_release entered\n"); | |
7458 | +#endif | |
7459 | + | |
7460 | + /* | |
7461 | + The calls below must be placed outside the | |
7462 | + write_lock_irq...write_unlock_irq block. | |
7463 | + */ | |
7464 | + sock_orphan(sk); | |
7465 | + ring_proc_remove(ring_sk(sk)); | |
7466 | + | |
7467 | + write_lock_irq(&ring_mgmt_lock); | |
7468 | + ring_remove(sk); | |
7469 | + sock->sk = NULL; | |
7470 | + | |
7471 | + /* Free the ring buffer */ | |
7472 | + if(pfr->ring_memory) { | |
7473 | + struct page *page, *page_end; | |
7474 | + | |
7475 | + page_end = virt_to_page(pfr->ring_memory + (PAGE_SIZE << pfr->order) - 1); | |
7476 | + for(page = virt_to_page(pfr->ring_memory); page <= page_end; page++) | |
7477 | + ClearPageReserved(page); | |
7478 | + | |
7479 | + free_pages(pfr->ring_memory, pfr->order); | |
7480 | + } | |
7481 | + | |
7482 | + free_bitmask(&pfr->mac_bitmask); | |
7483 | + free_bitmask(&pfr->vlan_bitmask); | |
7484 | + free_bitmask(&pfr->ip_bitmask); free_bitmask(&pfr->twin_ip_bitmask); | |
7485 | + free_bitmask(&pfr->port_bitmask); free_bitmask(&pfr->twin_port_bitmask); | |
7486 | + free_bitmask(&pfr->proto_bitmask); | |
7487 | + | |
7488 | + if(pfr->acsm != NULL) acsmFree2(pfr->acsm); | |
7489 | + | |
7490 | + kfree(pfr); | |
7491 | + ring_sk(sk) = NULL; | |
7492 | + | |
7493 | +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) | |
7494 | + skb_queue_purge(&sk->sk_write_queue); | |
7495 | +#endif | |
7496 | + | |
7497 | + sock_put(sk); | |
7498 | + write_unlock_irq(&ring_mgmt_lock); | |
7499 | + | |
7500 | +#if defined(RING_DEBUG) | |
7501 | + printk("RING: ring_release leaving\n"); | |
7502 | +#endif | |
7503 | + | |
7504 | + return 0; | |
7505 | +} | |
7506 | + | |
7507 | +/* ********************************** */ | |
7508 | +/* | |
7509 | + * We create a ring for this socket and bind it to the specified device | |
7510 | + */ | |
7511 | +static int packet_ring_bind(struct sock *sk, struct net_device *dev) | |
7512 | +{ | |
7513 | + u_int the_slot_len; | |
7514 | + u_int32_t tot_mem; | |
7515 | + struct ring_opt *pfr = ring_sk(sk); | |
7516 | + struct page *page, *page_end; | |
7517 | + | |
7518 | + if(!dev) return(-1); | |
7519 | + | |
7520 | +#if defined(RING_DEBUG) | |
7521 | + printk("RING: packet_ring_bind(%s) called\n", dev->name); | |
7522 | +#endif | |
7523 | + | |
7524 | + /* ********************************************** | |
7525 | + | |
7526 | + ************************************* | |
7527 | + * * | |
7528 | + * FlowSlotInfo * | |
7529 | + * * | |
7530 | + ************************************* <-+ | |
7531 | + * FlowSlot * | | |
7532 | + ************************************* | | |
7533 | + * FlowSlot * | | |
7534 | + ************************************* +- num_slots | |
7535 | + * FlowSlot * | | |
7536 | + ************************************* | | |
7537 | + * FlowSlot * | | |
7538 | + ************************************* <-+ | |
7539 | + | |
7540 | + ********************************************** */ | |
7541 | + | |
7542 | + the_slot_len = sizeof(u_char) /* flowSlot.slot_state */ | |
7543 | +#ifdef RING_MAGIC | |
7544 | + + sizeof(u_char) | |
7545 | +#endif | |
7546 | + + sizeof(struct pcap_pkthdr) | |
7547 | + + bucket_len /* flowSlot.bucket */; | |
7548 | + | |
7549 | + tot_mem = sizeof(FlowSlotInfo) + num_slots*the_slot_len; | |
7550 | + | |
7551 | + /* | |
7552 | + Calculate the value of the order parameter used later. | |
7553 | + See http://www.linuxjournal.com/article.php?sid=1133 | |
7554 | + */ | |
7555 | + for(pfr->order = 0;(PAGE_SIZE << pfr->order) < tot_mem; pfr->order++) ; | |
7556 | + | |
7557 | + /* | |
7558 | + We now try to allocate the memory as required. If we fail | |
7559 | + we try to allocate a smaller amount or memory (hence a | |
7560 | + smaller ring). | |
7561 | + */ | |
7562 | + while((pfr->ring_memory = __get_free_pages(GFP_ATOMIC, pfr->order)) == 0) | |
7563 | + if(pfr->order-- == 0) | |
7564 | + break; | |
7565 | + | |
7566 | + if(pfr->order == 0) { | |
7567 | + printk("RING: ERROR not enough memory for ring\n"); | |
7568 | + return(-1); | |
7569 | + } else { | |
7570 | + printk("RING: succesfully allocated %lu KB [tot_mem=%d][order=%ld]\n", | |
7571 | + PAGE_SIZE >> (10 - pfr->order), tot_mem, pfr->order); | |
7572 | + } | |
7573 | + | |
7574 | + tot_mem = PAGE_SIZE << pfr->order; | |
7575 | + memset((char*)pfr->ring_memory, 0, tot_mem); | |
7576 | + | |
7577 | + /* Now we need to reserve the pages */ | |
7578 | + page_end = virt_to_page(pfr->ring_memory + (PAGE_SIZE << pfr->order) - 1); | |
7579 | + for(page = virt_to_page(pfr->ring_memory); page <= page_end; page++) | |
7580 | + SetPageReserved(page); | |
7581 | + | |
7582 | + pfr->slots_info = (FlowSlotInfo*)pfr->ring_memory; | |
7583 | + pfr->ring_slots = (char*)(pfr->ring_memory+sizeof(FlowSlotInfo)); | |
7584 | + | |
7585 | + pfr->slots_info->version = RING_FLOWSLOT_VERSION; | |
7586 | + pfr->slots_info->slot_len = the_slot_len; | |
7587 | + pfr->slots_info->data_len = bucket_len; | |
7588 | + pfr->slots_info->tot_slots = (tot_mem-sizeof(FlowSlotInfo))/the_slot_len; | |
7589 | + pfr->slots_info->tot_mem = tot_mem; | |
7590 | + pfr->slots_info->sample_rate = sample_rate; | |
7591 | + | |
7592 | + printk("RING: allocated %d slots [slot_len=%d][tot_mem=%u]\n", | |
7593 | + pfr->slots_info->tot_slots, pfr->slots_info->slot_len, | |
7594 | + pfr->slots_info->tot_mem); | |
7595 | + | |
7596 | +#ifdef RING_MAGIC | |
7597 | + { | |
7598 | + int i; | |
7599 | + | |
7600 | + for(i=0; i<pfr->slots_info->tot_slots; i++) { | |
7601 | + unsigned long idx = i*pfr->slots_info->slot_len; | |
7602 | + FlowSlot *slot = (FlowSlot*)&pfr->ring_slots[idx]; | |
7603 | + slot->magic = RING_MAGIC_VALUE; slot->slot_state = 0; | |
7604 | + } | |
7605 | + } | |
7606 | +#endif | |
7607 | + | |
7608 | + pfr->insert_page_id = 1, pfr->insert_slot_id = 0; | |
7609 | + | |
7610 | + /* | |
7611 | + IMPORTANT | |
7612 | + Leave this statement here as last one. In fact when | |
7613 | + the ring_netdev != NULL the socket is ready to be used. | |
7614 | + */ | |
7615 | + pfr->ring_netdev = dev; | |
7616 | + | |
7617 | + return(0); | |
7618 | +} | |
7619 | + | |
7620 | +/* ************************************* */ | |
7621 | + | |
7622 | +/* Bind to a device */ | |
7623 | +static int ring_bind(struct socket *sock, | |
7624 | + struct sockaddr *sa, int addr_len) | |
7625 | +{ | |
7626 | + struct sock *sk=sock->sk; | |
7627 | + struct net_device *dev = NULL; | |
7628 | + | |
7629 | +#if defined(RING_DEBUG) | |
7630 | + printk("RING: ring_bind() called\n"); | |
7631 | +#endif | |
7632 | + | |
7633 | + /* | |
7634 | + * Check legality | |
7635 | + */ | |
7636 | + if (addr_len != sizeof(struct sockaddr)) | |
7637 | + return -EINVAL; | |
7638 | + if (sa->sa_family != PF_RING) | |
7639 | + return -EINVAL; | |
7640 | + | |
7641 | + /* Safety check: add trailing zero if missing */ | |
7642 | + sa->sa_data[sizeof(sa->sa_data)-1] = '\0'; | |
7643 | + | |
7644 | +#if defined(RING_DEBUG) | |
7645 | + printk("RING: searching device %s\n", sa->sa_data); | |
7646 | +#endif | |
7647 | + | |
7648 | + if((dev = __dev_get_by_name(sa->sa_data)) == NULL) { | |
7649 | +#if defined(RING_DEBUG) | |
7650 | + printk("RING: search failed\n"); | |
7651 | +#endif | |
7652 | + return(-EINVAL); | |
7653 | + } else | |
7654 | + return(packet_ring_bind(sk, dev)); | |
7655 | +} | |
7656 | + | |
7657 | +/* ************************************* */ | |
7658 | + | |
7659 | +static int ring_mmap(struct file *file, | |
7660 | + struct socket *sock, | |
7661 | + struct vm_area_struct *vma) | |
7662 | +{ | |
7663 | + struct sock *sk = sock->sk; | |
7664 | + struct ring_opt *pfr = ring_sk(sk); | |
7665 | + unsigned long size, start; | |
7666 | + u_int pagesToMap; | |
7667 | + char *ptr; | |
7668 | + | |
7669 | +#if defined(RING_DEBUG) | |
7670 | + printk("RING: ring_mmap() called\n"); | |
7671 | +#endif | |
7672 | + | |
7673 | + if(pfr->ring_memory == 0) { | |
7674 | +#if defined(RING_DEBUG) | |
7675 | + printk("RING: ring_mmap() failed: mapping area to an unbound socket\n"); | |
7676 | +#endif | |
7677 | + return -EINVAL; | |
7678 | + } | |
7679 | + | |
7680 | + size = (unsigned long)(vma->vm_end-vma->vm_start); | |
7681 | + | |
7682 | + if(size % PAGE_SIZE) { | |
7683 | +#if defined(RING_DEBUG) | |
7684 | + printk("RING: ring_mmap() failed: len is not multiple of PAGE_SIZE\n"); | |
7685 | +#endif | |
7686 | + return(-EINVAL); | |
7687 | + } | |
7688 | + | |
7689 | + /* if userspace tries to mmap beyond end of our buffer, fail */ | |
7690 | + if(size > pfr->slots_info->tot_mem) { | |
7691 | +#if defined(RING_DEBUG) | |
7692 | + printk("proc_mmap() failed: area too large [%ld > %d]\n", size, pfr->slots_info->tot_mem); | |
7693 | +#endif | |
7694 | + return(-EINVAL); | |
7695 | + } | |
7696 | + | |
7697 | + pagesToMap = size/PAGE_SIZE; | |
7698 | + | |
7699 | +#if defined(RING_DEBUG) | |
7700 | + printk("RING: ring_mmap() called. %d pages to map\n", pagesToMap); | |
7701 | +#endif | |
7702 | + | |
7703 | +#if defined(RING_DEBUG) | |
7704 | + printk("RING: mmap [slot_len=%d][tot_slots=%d] for ring on device %s\n", | |
7705 | + pfr->slots_info->slot_len, pfr->slots_info->tot_slots, | |
7706 | + pfr->ring_netdev->name); | |
7707 | +#endif | |
7708 | + | |
7709 | + /* we do not want to have this area swapped out, lock it */ | |
7710 | + vma->vm_flags |= VM_LOCKED; | |
7711 | + start = vma->vm_start; | |
7712 | + | |
7713 | + /* Ring slots start from page 1 (page 0 is reserved for FlowSlotInfo) */ | |
7714 | + ptr = (char*)(start+PAGE_SIZE); | |
7715 | + | |
7716 | + if(remap_page_range( | |
7717 | +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) | |
7718 | + vma, | |
7719 | +#endif | |
7720 | + start, | |
7721 | + __pa(pfr->ring_memory), | |
7722 | + PAGE_SIZE*pagesToMap, vma->vm_page_prot)) { | |
7723 | +#if defined(RING_DEBUG) | |
7724 | + printk("remap_page_range() failed\n"); | |
7725 | +#endif | |
7726 | + return(-EAGAIN); | |
7727 | + } | |
7728 | + | |
7729 | +#if defined(RING_DEBUG) | |
7730 | + printk("proc_mmap(pagesToMap=%d): success.\n", pagesToMap); | |
7731 | +#endif | |
7732 | + | |
7733 | + return 0; | |
7734 | +} | |
7735 | + | |
7736 | +/* ************************************* */ | |
7737 | + | |
7738 | +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) | |
7739 | +static int ring_recvmsg(struct kiocb *iocb, struct socket *sock, | |
7740 | + struct msghdr *msg, size_t len, int flags) | |
7741 | +#else | |
7742 | + static int ring_recvmsg(struct socket *sock, struct msghdr *msg, int len, | |
7743 | + int flags, struct scm_cookie *scm) | |
7744 | +#endif | |
7745 | +{ | |
7746 | + FlowSlot* slot; | |
7747 | + struct ring_opt *pfr = ring_sk(sock->sk); | |
7748 | + u_int32_t queued_pkts, num_loops = 0; | |
7749 | + | |
7750 | +#if defined(RING_DEBUG) | |
7751 | + printk("ring_recvmsg called\n"); | |
7752 | +#endif | |
7753 | + | |
7754 | + slot = get_remove_slot(pfr); | |
7755 | + | |
7756 | + while((queued_pkts = num_queued_pkts(pfr)) < MIN_QUEUED_PKTS) { | |
7757 | + wait_event_interruptible(pfr->ring_slots_waitqueue, 1); | |
7758 | + | |
7759 | +#if defined(RING_DEBUG) | |
7760 | + printk("-> ring_recvmsg returning %d [queued_pkts=%d][num_loops=%d]\n", | |
7761 | + slot->slot_state, queued_pkts, num_loops); | |
7762 | +#endif | |
7763 | + | |
7764 | + if(queued_pkts > 0) { | |
7765 | + if(num_loops++ > MAX_QUEUE_LOOPS) | |
7766 | + break; | |
7767 | + } | |
7768 | + } | |
7769 | + | |
7770 | +#if defined(RING_DEBUG) | |
7771 | + if(slot != NULL) | |
7772 | + printk("ring_recvmsg is returning [queued_pkts=%d][num_loops=%d]\n", | |
7773 | + queued_pkts, num_loops); | |
7774 | +#endif | |
7775 | + | |
7776 | + return(queued_pkts); | |
7777 | +} | |
7778 | + | |
7779 | +/* ************************************* */ | |
7780 | + | |
7781 | +unsigned int ring_poll(struct file * file, | |
7782 | + struct socket *sock, poll_table *wait) | |
7783 | +{ | |
7784 | + FlowSlot* slot; | |
7785 | + struct ring_opt *pfr = ring_sk(sock->sk); | |
7786 | + | |
7787 | +#if defined(RING_DEBUG) | |
7788 | + printk("poll called\n"); | |
7789 | +#endif | |
7790 | + | |
7791 | + slot = get_remove_slot(pfr); | |
7792 | + | |
7793 | + if((slot != NULL) && (slot->slot_state == 0)) | |
7794 | + poll_wait(file, &pfr->ring_slots_waitqueue, wait); | |
7795 | + | |
7796 | +#if defined(RING_DEBUG) | |
7797 | + printk("poll returning %d\n", slot->slot_state); | |
7798 | +#endif | |
7799 | + | |
7800 | + if((slot != NULL) && (slot->slot_state == 1)) | |
7801 | + return(POLLIN | POLLRDNORM); | |
7802 | + else | |
7803 | + return(0); | |
7804 | +} | |
7805 | + | |
7806 | +/* ************************************* */ | |
7807 | + | |
7808 | +int add_to_cluster_list(struct ring_cluster *el, | |
7809 | + struct sock *sock) { | |
7810 | + | |
7811 | + if(el->num_cluster_elements == CLUSTER_LEN) | |
7812 | + return(-1); /* Cluster full */ | |
7813 | + | |
7814 | + ring_sk_datatype(ring_sk(sock))->cluster_id = el->cluster_id; | |
7815 | + el->sk[el->num_cluster_elements] = sock; | |
7816 | + el->num_cluster_elements++; | |
7817 | + return(0); | |
7818 | +} | |
7819 | + | |
7820 | +/* ************************************* */ | |
7821 | + | |
7822 | +int remove_from_cluster_list(struct ring_cluster *el, | |
7823 | + struct sock *sock) { | |
7824 | + int i, j; | |
7825 | + | |
7826 | + for(i=0; i<CLUSTER_LEN; i++) | |
7827 | + if(el->sk[i] == sock) { | |
7828 | + el->num_cluster_elements--; | |
7829 | + | |
7830 | + if(el->num_cluster_elements > 0) { | |
7831 | + /* The cluster contains other elements */ | |
7832 | + for(j=i; j<CLUSTER_LEN-1; j++) | |
7833 | + el->sk[j] = el->sk[j+1]; | |
7834 | + | |
7835 | + el->sk[CLUSTER_LEN-1] = NULL; | |
7836 | + } else { | |
7837 | + /* Empty cluster */ | |
7838 | + memset(el->sk, 0, sizeof(el->sk)); | |
7839 | + } | |
7840 | + | |
7841 | + return(0); | |
7842 | + } | |
7843 | + | |
7844 | + return(-1); /* Not found */ | |
7845 | +} | |
7846 | + | |
7847 | +/* ************************************* */ | |
7848 | + | |
7849 | +static int remove_from_cluster(struct sock *sock, | |
7850 | + struct ring_opt *pfr) | |
7851 | +{ | |
7852 | + struct ring_cluster *el; | |
7853 | + | |
7854 | +#if defined(RING_DEBUG) | |
7855 | + printk("--> remove_from_cluster(%d)\n", pfr->cluster_id); | |
7856 | +#endif | |
7857 | + | |
7858 | + if(pfr->cluster_id == 0 /* 0 = No Cluster */) | |
7859 | + return(0); /* Noting to do */ | |
7860 | + | |
7861 | + el = ring_cluster_list; | |
7862 | + | |
7863 | + while(el != NULL) { | |
7864 | + if(el->cluster_id == pfr->cluster_id) { | |
7865 | + return(remove_from_cluster_list(el, sock)); | |
7866 | + } else | |
7867 | + el = el->next; | |
7868 | + } | |
7869 | + | |
7870 | + return(-EINVAL); /* Not found */ | |
7871 | +} | |
7872 | + | |
7873 | +/* ************************************* */ | |
7874 | + | |
7875 | +static int add_to_cluster(struct sock *sock, | |
7876 | + struct ring_opt *pfr, | |
7877 | + u_short cluster_id) | |
7878 | +{ | |
7879 | + struct ring_cluster *el; | |
7880 | + | |
7881 | +#ifndef RING_DEBUG | |
7882 | + printk("--> add_to_cluster(%d)\n", cluster_id); | |
7883 | +#endif | |
7884 | + | |
7885 | + if(cluster_id == 0 /* 0 = No Cluster */) return(-EINVAL); | |
7886 | + | |
7887 | + if(pfr->cluster_id != 0) | |
7888 | + remove_from_cluster(sock, pfr); | |
7889 | + | |
7890 | + el = ring_cluster_list; | |
7891 | + | |
7892 | + while(el != NULL) { | |
7893 | + if(el->cluster_id == cluster_id) { | |
7894 | + return(add_to_cluster_list(el, sock)); | |
7895 | + } else | |
7896 | + el = el->next; | |
7897 | + } | |
7898 | + | |
7899 | + /* There's no existing cluster. We need to create one */ | |
7900 | + if((el = kmalloc(sizeof(struct ring_cluster), GFP_KERNEL)) == NULL) | |
7901 | + return(-ENOMEM); | |
7902 | + | |
7903 | + el->cluster_id = cluster_id; | |
7904 | + el->num_cluster_elements = 1; | |
7905 | + el->hashing_mode = cluster_per_flow; /* Default */ | |
7906 | + el->hashing_id = 0; | |
7907 | + | |
7908 | + memset(el->sk, 0, sizeof(el->sk)); | |
7909 | + el->sk[0] = sock; | |
7910 | + el->next = ring_cluster_list; | |
7911 | + ring_cluster_list = el; | |
7912 | + pfr->cluster_id = cluster_id; | |
7913 | + | |
7914 | + return(0); /* 0 = OK */ | |
7915 | +} | |
7916 | + | |
7917 | +/* ************************************* */ | |
7918 | + | |
7919 | +/* Code taken/inspired from core/sock.c */ | |
7920 | +static int ring_setsockopt(struct socket *sock, | |
7921 | + int level, int optname, | |
7922 | + char *optval, int optlen) | |
7923 | +{ | |
7924 | + struct ring_opt *pfr = ring_sk(sock->sk); | |
7925 | + int val, found, ret = 0; | |
7926 | + u_int cluster_id, do_enable; | |
7927 | + char devName[8], bloom_filter[256], aho_pattern[256]; | |
7928 | + | |
7929 | + if(pfr == NULL) return(-EINVAL); | |
7930 | + | |
7931 | + if (get_user(val, (int *)optval)) | |
7932 | + return -EFAULT; | |
7933 | + | |
7934 | + found = 1; | |
7935 | + | |
7936 | + switch(optname) | |
7937 | + { | |
7938 | + case SO_ATTACH_FILTER: | |
7939 | + ret = -EINVAL; | |
7940 | + if (optlen == sizeof(struct sock_fprog)) { | |
7941 | + unsigned int fsize; | |
7942 | + struct sock_fprog fprog; | |
7943 | + struct sk_filter *filter; | |
7944 | + | |
7945 | + ret = -EFAULT; | |
7946 | + | |
7947 | + /* | |
7948 | + NOTE | |
7949 | + | |
7950 | + Do not call copy_from_user within a held | |
7951 | + splinlock (e.g. ring_mgmt_lock) as this caused | |
7952 | + problems when certain debugging was enabled under | |
7953 | + 2.6.5 -- including hard lockups of the machine. | |
7954 | + */ | |
7955 | + if(copy_from_user(&fprog, optval, sizeof(fprog))) | |
7956 | + break; | |
7957 | + | |
7958 | + fsize = sizeof(struct sock_filter) * fprog.len; | |
7959 | + filter = kmalloc(fsize, GFP_KERNEL); | |
7960 | + | |
7961 | + if(filter == NULL) { | |
7962 | + ret = -ENOMEM; | |
7963 | + break; | |
7964 | + } | |
7965 | + | |
7966 | + if(copy_from_user(filter->insns, fprog.filter, fsize)) | |
7967 | + break; | |
7968 | + | |
7969 | + filter->len = fprog.len; | |
7970 | + | |
7971 | + if(sk_chk_filter(filter->insns, filter->len) != 0) { | |
7972 | + /* Bad filter specified */ | |
7973 | + kfree(filter); | |
7974 | + pfr->bpfFilter = NULL; | |
7975 | + break; | |
7976 | + } | |
7977 | + | |
7978 | + /* get the lock, set the filter, release the lock */ | |
7979 | + write_lock(&ring_mgmt_lock); | |
7980 | + pfr->bpfFilter = filter; | |
7981 | + write_unlock(&ring_mgmt_lock); | |
7982 | + ret = 0; | |
7983 | + } | |
7984 | + break; | |
7985 | + | |
7986 | + case SO_DETACH_FILTER: | |
7987 | + write_lock(&ring_mgmt_lock); | |
7988 | + found = 1; | |
7989 | + if(pfr->bpfFilter != NULL) { | |
7990 | + kfree(pfr->bpfFilter); | |
7991 | + pfr->bpfFilter = NULL; | |
7992 | + write_unlock(&ring_mgmt_lock); | |
7993 | + break; | |
7994 | + } | |
7995 | + ret = -ENONET; | |
7996 | + break; | |
7997 | + | |
7998 | + case SO_ADD_TO_CLUSTER: | |
7999 | + if (optlen!=sizeof(val)) | |
8000 | + return -EINVAL; | |
8001 | + | |
8002 | + if (copy_from_user(&cluster_id, optval, sizeof(cluster_id))) | |
8003 | + return -EFAULT; | |
8004 | + | |
8005 | + write_lock(&ring_mgmt_lock); | |
8006 | + ret = add_to_cluster(sock->sk, pfr, cluster_id); | |
8007 | + write_unlock(&ring_mgmt_lock); | |
8008 | + break; | |
8009 | + | |
8010 | + case SO_REMOVE_FROM_CLUSTER: | |
8011 | + write_lock(&ring_mgmt_lock); | |
8012 | + ret = remove_from_cluster(sock->sk, pfr); | |
8013 | + write_unlock(&ring_mgmt_lock); | |
8014 | + break; | |
8015 | + | |
8016 | + case SO_SET_REFLECTOR: | |
8017 | + if(optlen >= (sizeof(devName)-1)) | |
8018 | + return -EINVAL; | |
8019 | + | |
8020 | + if(optlen > 0) { | |
8021 | + if(copy_from_user(devName, optval, optlen)) | |
8022 | + return -EFAULT; | |
8023 | + } | |
8024 | + | |
8025 | + devName[optlen] = '\0'; | |
8026 | + | |
8027 | +#if defined(RING_DEBUG) | |
8028 | + printk("+++ SO_SET_REFLECTOR(%s)\n", devName); | |
8029 | +#endif | |
8030 | + | |
8031 | + write_lock(&ring_mgmt_lock); | |
8032 | + pfr->reflector_dev = dev_get_by_name(devName); | |
8033 | + write_unlock(&ring_mgmt_lock); | |
8034 | + | |
8035 | +#if defined(RING_DEBUG) | |
8036 | + if(pfr->reflector_dev != NULL) | |
8037 | + printk("SO_SET_REFLECTOR(%s): succeded\n", devName); | |
8038 | + else | |
8039 | + printk("SO_SET_REFLECTOR(%s): device unknown\n", devName); | |
8040 | +#endif | |
8041 | + break; | |
8042 | + | |
8043 | + case SO_SET_BLOOM: | |
8044 | + if(optlen >= (sizeof(bloom_filter)-1)) | |
8045 | + return -EINVAL; | |
8046 | + | |
8047 | + if(optlen > 0) { | |
8048 | + if(copy_from_user(bloom_filter, optval, optlen)) | |
8049 | + return -EFAULT; | |
8050 | + } | |
8051 | + | |
8052 | + bloom_filter[optlen] = '\0'; | |
8053 | + | |
8054 | + write_lock(&ring_mgmt_lock); | |
8055 | + handle_bloom_filter_rule(pfr, bloom_filter); | |
8056 | + write_unlock(&ring_mgmt_lock); | |
8057 | + break; | |
8058 | + | |
8059 | + case SO_SET_STRING: | |
8060 | + if(optlen >= (sizeof(aho_pattern)-1)) | |
8061 | + return -EINVAL; | |
8062 | + | |
8063 | + if(optlen > 0) { | |
8064 | + if(copy_from_user(aho_pattern, optval, optlen)) | |
8065 | + return -EFAULT; | |
8066 | + } | |
8067 | + | |
8068 | + aho_pattern[optlen] = '\0'; | |
8069 | + | |
8070 | + write_lock(&ring_mgmt_lock); | |
8071 | + if(pfr->acsm != NULL) acsmFree2(pfr->acsm); | |
8072 | + if(optlen > 0) { | |
8073 | +#if 1 | |
8074 | + if((pfr->acsm = acsmNew2()) != NULL) { | |
8075 | + int nc=1 /* case sensitive */, i = 0; | |
8076 | + | |
8077 | + pfr->acsm->acsmFormat = ACF_BANDED; | |
8078 | + acsmAddPattern2(pfr->acsm, (unsigned char*)aho_pattern, | |
8079 | + (int)strlen(aho_pattern), nc, 0, 0,(void*)aho_pattern, i); | |
8080 | + acsmCompile2(pfr->acsm); | |
8081 | + } | |
8082 | +#else | |
8083 | + pfr->acsm = kmalloc (10, GFP_KERNEL); /* TEST */ | |
8084 | +#endif | |
8085 | + } | |
8086 | + write_unlock(&ring_mgmt_lock); | |
8087 | + break; | |
8088 | + | |
8089 | + case SO_TOGGLE_BLOOM_STATE: | |
8090 | + if(optlen >= (sizeof(bloom_filter)-1)) | |
8091 | + return -EINVAL; | |
8092 | + | |
8093 | + if(optlen > 0) { | |
8094 | + if(copy_from_user(&do_enable, optval, optlen)) | |
8095 | + return -EFAULT; | |
8096 | + } | |
8097 | + | |
8098 | + write_lock(&ring_mgmt_lock); | |
8099 | + if(do_enable) | |
8100 | + pfr->bitmask_enabled = 1; | |
8101 | + else | |
8102 | + pfr->bitmask_enabled = 0; | |
8103 | + write_unlock(&ring_mgmt_lock); | |
8104 | + printk("SO_TOGGLE_BLOOM_STATE: bloom bitmask %s\n", | |
8105 | + pfr->bitmask_enabled ? "enabled" : "disabled"); | |
8106 | + break; | |
8107 | + | |
8108 | + case SO_RESET_BLOOM_FILTERS: | |
8109 | + if(optlen >= (sizeof(bloom_filter)-1)) | |
8110 | + return -EINVAL; | |
8111 | + | |
8112 | + if(optlen > 0) { | |
8113 | + if(copy_from_user(&do_enable, optval, optlen)) | |
8114 | + return -EFAULT; | |
8115 | + } | |
8116 | + | |
8117 | + write_lock(&ring_mgmt_lock); | |
8118 | + reset_bloom_filters(pfr); | |
8119 | + write_unlock(&ring_mgmt_lock); | |
8120 | + break; | |
8121 | + | |
8122 | + default: | |
8123 | + found = 0; | |
8124 | + break; | |
8125 | + } | |
8126 | + | |
8127 | + if(found) | |
8128 | + return(ret); | |
8129 | + else | |
8130 | + return(sock_setsockopt(sock, level, optname, optval, optlen)); | |
8131 | +} | |
8132 | + | |
8133 | +/* ************************************* */ | |
8134 | + | |
8135 | +static int ring_ioctl(struct socket *sock, | |
8136 | + unsigned int cmd, unsigned long arg) | |
8137 | +{ | |
8138 | + switch(cmd) | |
8139 | + { | |
8140 | +#ifdef CONFIG_INET | |
8141 | + case SIOCGIFFLAGS: | |
8142 | + case SIOCSIFFLAGS: | |
8143 | + case SIOCGIFCONF: | |
8144 | + case SIOCGIFMETRIC: | |
8145 | + case SIOCSIFMETRIC: | |
8146 | + case SIOCGIFMEM: | |
8147 | + case SIOCSIFMEM: | |
8148 | + case SIOCGIFMTU: | |
8149 | + case SIOCSIFMTU: | |
8150 | + case SIOCSIFLINK: | |
8151 | + case SIOCGIFHWADDR: | |
8152 | + case SIOCSIFHWADDR: | |
8153 | + case SIOCSIFMAP: | |
8154 | + case SIOCGIFMAP: | |
8155 | + case SIOCSIFSLAVE: | |
8156 | + case SIOCGIFSLAVE: | |
8157 | + case SIOCGIFINDEX: | |
8158 | + case SIOCGIFNAME: | |
8159 | + case SIOCGIFCOUNT: | |
8160 | + case SIOCSIFHWBROADCAST: | |
8161 | + return(inet_dgram_ops.ioctl(sock, cmd, arg)); | |
8162 | +#endif | |
8163 | + | |
8164 | + default: | |
8165 | + return -ENOIOCTLCMD; | |
8166 | + } | |
8167 | + | |
8168 | + return 0; | |
8169 | +} | |
8170 | + | |
8171 | +/* ************************************* */ | |
8172 | + | |
8173 | +static struct proto_ops ring_ops = { | |
8174 | + .family = PF_RING, | |
8175 | +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) | |
8176 | + .owner = THIS_MODULE, | |
8177 | +#endif | |
8178 | + | |
8179 | + /* Operations that make no sense on ring sockets. */ | |
8180 | + .connect = sock_no_connect, | |
8181 | + .socketpair = sock_no_socketpair, | |
8182 | + .accept = sock_no_accept, | |
8183 | + .getname = sock_no_getname, | |
8184 | + .listen = sock_no_listen, | |
8185 | + .shutdown = sock_no_shutdown, | |
8186 | + .sendpage = sock_no_sendpage, | |
8187 | + .sendmsg = sock_no_sendmsg, | |
8188 | + .getsockopt = sock_no_getsockopt, | |
8189 | + | |
8190 | + /* Now the operations that really occur. */ | |
8191 | + .release = ring_release, | |
8192 | + .bind = ring_bind, | |
8193 | + .mmap = ring_mmap, | |
8194 | + .poll = ring_poll, | |
8195 | + .setsockopt = ring_setsockopt, | |
8196 | + .ioctl = ring_ioctl, | |
8197 | + .recvmsg = ring_recvmsg, | |
8198 | +}; | |
8199 | + | |
8200 | +/* ************************************ */ | |
8201 | + | |
8202 | +static struct net_proto_family ring_family_ops = { | |
8203 | + .family = PF_RING, | |
8204 | + .create = ring_create, | |
8205 | +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) | |
8206 | + .owner = THIS_MODULE, | |
8207 | +#endif | |
8208 | +}; | |
8209 | + | |
8210 | +// BD: API changed in 2.6.12, ref: | |
8211 | +// http://svn.clkao.org/svnweb/linux/revision/?rev=28201 | |
8212 | +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11)) | |
8213 | +static struct proto ring_proto = { | |
8214 | + .name = "PF_RING", | |
8215 | + .owner = THIS_MODULE, | |
8216 | + .obj_size = sizeof(struct sock), | |
8217 | +}; | |
8218 | +#endif | |
8219 | + | |
8220 | +/* ************************************ */ | |
8221 | + | |
8222 | +static void __exit ring_exit(void) | |
8223 | +{ | |
8224 | + struct list_head *ptr; | |
8225 | + struct ring_element *entry; | |
8226 | + | |
8227 | + for(ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) { | |
8228 | + entry = list_entry(ptr, struct ring_element, list); | |
8229 | + kfree(entry); | |
8230 | + } | |
8231 | + | |
8232 | + while(ring_cluster_list != NULL) { | |
8233 | + struct ring_cluster *next = ring_cluster_list->next; | |
8234 | + kfree(ring_cluster_list); | |
8235 | + ring_cluster_list = next; | |
8236 | + } | |
8237 | + | |
8238 | + set_skb_ring_handler(NULL); | |
8239 | + set_buffer_ring_handler(NULL); | |
8240 | + sock_unregister(PF_RING); | |
8241 | + ring_proc_term(); | |
8242 | + printk("PF_RING shut down.\n"); | |
8243 | +} | |
8244 | + | |
8245 | +/* ************************************ */ | |
8246 | + | |
8247 | +static int __init ring_init(void) | |
8248 | +{ | |
8249 | + printk("Welcome to PF_RING %s\n(C) 2004-07 L.Deri <deri@ntop.org>\n", | |
8250 | + RING_VERSION); | |
8251 | + | |
8252 | + INIT_LIST_HEAD(&ring_table); | |
8253 | + ring_cluster_list = NULL; | |
8254 | + | |
8255 | + sock_register(&ring_family_ops); | |
8256 | + | |
8257 | + set_skb_ring_handler(skb_ring_handler); | |
8258 | + set_buffer_ring_handler(buffer_ring_handler); | |
8259 | + | |
8260 | + if(get_buffer_ring_handler() != buffer_ring_handler) { | |
8261 | + printk("PF_RING: set_buffer_ring_handler FAILED\n"); | |
8262 | + | |
8263 | + set_skb_ring_handler(NULL); | |
8264 | + set_buffer_ring_handler(NULL); | |
8265 | + sock_unregister(PF_RING); | |
8266 | + return -1; | |
8267 | + } else { | |
8268 | + printk("PF_RING: bucket length %d bytes\n", bucket_len); | |
8269 | + printk("PF_RING: ring slots %d\n", num_slots); | |
8270 | + printk("PF_RING: sample rate %d [1=no sampling]\n", sample_rate); | |
8271 | + printk("PF_RING: capture TX %s\n", | |
8272 | + enable_tx_capture ? "Yes [RX+TX]" : "No [RX only]"); | |
8273 | + printk("PF_RING: transparent mode %s\n", | |
8274 | + transparent_mode ? "Yes" : "No"); | |
8275 | + | |
8276 | + printk("PF_RING initialized correctly.\n"); | |
8277 | + | |
8278 | + ring_proc_init(); | |
8279 | + return 0; | |
8280 | + } | |
8281 | +} | |
8282 | + | |
8283 | +module_init(ring_init); | |
8284 | +module_exit(ring_exit); | |
8285 | +MODULE_LICENSE("GPL"); | |
8286 | + | |
8287 | +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) | |
8288 | +MODULE_ALIAS_NETPROTO(PF_RING); | |
8289 | +#endif |