1 diff --unified --recursive --new-file linux-2.6.21.4/include/linux/ring.h linux-2.6.21.4-1-686-smp-ring3/include/linux/ring.h
2 --- linux-2.6.21.4/include/linux/ring.h 1970-01-01 00:00:00.000000000 +0000
3 +++ linux-2.6.21.4-1-686-smp-ring3/include/linux/ring.h 2007-06-10 16:43:04.346421348 +0000
6 + * Definitions for packet ring
8 + * 2004-07 Luca Deri <deri@ntop.org>
13 +#define INCLUDE_MAC_INFO
15 +#ifdef INCLUDE_MAC_INFO
16 +#define SKB_DISPLACEMENT 14 /* Include MAC address information */
18 +#define SKB_DISPLACEMENT 0 /* Do NOT include MAC address information */
22 +#define RING_MAGIC_VALUE 0x88
23 +#define RING_FLOWSLOT_VERSION 6
24 +#define RING_VERSION "3.4.1"
26 +#define SO_ADD_TO_CLUSTER 99
27 +#define SO_REMOVE_FROM_CLUSTER 100
28 +#define SO_SET_REFLECTOR 101
29 +#define SO_SET_BLOOM 102
30 +#define SO_SET_STRING 103
31 +#define SO_TOGGLE_BLOOM_STATE 104
32 +#define SO_RESET_BLOOM_FILTERS 105
34 +#define BITMASK_SET(n, p) (((char*)p->bits_memory)[n/8] |= (1<<(n % 8)))
35 +#define BITMASK_CLR(n, p) (((char*)p->bits_memory)[n/8] &= ~(1<<(n % 8)))
36 +#define BITMASK_ISSET(n, p) (((char*)p->bits_memory)[n/8] & (1<<(n % 8)))
38 +/* *********************************** */
41 + Aho-Corasick code taken from Snort
45 + * DEFINES and Typedef's
47 +#define MAX_ALPHABET_SIZE 256
50 + FAIL STATE for 1,2,or 4 bytes for state transitions
52 + Uncomment this define to use 32 bit state values
56 +typedef unsigned short acstate_t;
57 +#define ACSM_FAIL_STATE2 0xffff
63 +struct _acsm_pattern2
65 + struct _acsm_pattern2 *next;
67 + unsigned char *patrn;
68 + unsigned char *casepatrn;
79 + * transition nodes - either 8 or 12 bytes
82 +struct trans_node_s {
84 + acstate_t key; /* The character that got us here - sized to keep structure aligned on 4 bytes */
85 + /* to better the caching opportunities. A value that crosses the cache line */
86 + /* forces an expensive reconstruction, typing this as acstate_t stops that. */
87 + acstate_t next_state; /* */
88 + struct trans_node_s * next; /* next transition for this state */
94 + * User specified final storage type for the state transitions
104 + * User specified machine types
106 + * TRIE : Keyword trie
117 + * Aho-Corasick State Machine Struct - one per group of pattterns
123 + ACSM_PATTERN2 * acsmPatterns;
124 + acstate_t * acsmFailState;
125 + ACSM_PATTERN2 ** acsmMatchList;
127 + /* list of transitions in each state, this is used to build the nfa & dfa */
128 + /* after construction we convert to sparse or full format matrix and free */
129 + /* the transition lists */
130 + trans_node_t ** acsmTransTable;
132 + acstate_t ** acsmNextState;
134 + int acsmSparseMaxRowNodes;
135 + int acsmSparseMaxZcnt;
138 + int acsmAlphabetSize;
143 +/* *********************************** */
146 +struct pcap_pkthdr {
147 + struct timeval ts; /* time stamp */
148 + u_int32_t caplen; /* length of portion present */
149 + u_int32_t len; /* length this packet (off wire) */
150 + /* packet parsing info */
151 + u_int16_t eth_type; /* Ethernet type */
152 + u_int16_t vlan_id; /* VLAN Id or -1 for no vlan */
153 + u_int8_t l3_proto; /* Layer 3 protocol */
154 + u_int16_t l3_offset, l4_offset, payload_offset; /* Offsets of L3/L4/payload elements */
155 + u_int32_t ipv4_src, ipv4_dst; /* IPv4 src/dst IP addresses */
156 + u_int16_t l4_src_port, l4_dst_port; /* Layer 4 src/dst ports */
160 +/* *********************************** */
162 +typedef struct _counter_list {
164 + u_int32_t bit_counter;
165 + struct _counter_list *next;
166 +} bitmask_counter_list;
169 + u_int32_t num_bits, order, num_pages;
170 + unsigned long bits_memory;
171 + bitmask_counter_list *clashes;
174 +/* *********************************** */
177 + cluster_per_flow = 0,
178 + cluster_round_robin
181 +/* *********************************** */
183 +#define RING_MIN_SLOT_SIZE (60+sizeof(struct pcap_pkthdr))
184 +#define RING_MAX_SLOT_SIZE (1514+sizeof(struct pcap_pkthdr))
186 +/* *********************************** */
188 +typedef struct flowSlotInfo {
189 + u_int16_t version, sample_rate;
190 + u_int32_t tot_slots, slot_len, data_len, tot_mem;
192 + u_int64_t tot_pkts, tot_lost;
193 + u_int64_t tot_insert, tot_read;
194 + u_int32_t insert_idx, remove_idx;
197 +/* *********************************** */
199 +typedef struct flowSlot {
201 + u_char magic; /* It must alwasy be zero */
203 + u_char slot_state; /* 0=empty, 1=full */
204 + u_char bucket; /* bucket[bucketLen] */
207 +/* *********************************** */
211 +FlowSlotInfo* getRingPtr(void);
212 +int allocateRing(char *deviceName, u_int numSlots,
213 + u_int bucketLen, u_int sampleRate);
214 +unsigned int pollRing(struct file *fp, struct poll_table_struct * wait);
215 +void deallocateRing(void);
217 +/* ************************* */
219 +typedef int (*handle_ring_skb)(struct sk_buff *skb,
220 + u_char recv_packet, u_char real_skb);
221 +extern handle_ring_skb get_skb_ring_handler(void);
222 +extern void set_skb_ring_handler(handle_ring_skb the_handler);
223 +extern void do_skb_ring_handler(struct sk_buff *skb,
224 + u_char recv_packet, u_char real_skb);
226 +typedef int (*handle_ring_buffer)(struct net_device *dev,
227 + char *data, int len);
228 +extern handle_ring_buffer get_buffer_ring_handler(void);
229 +extern void set_buffer_ring_handler(handle_ring_buffer the_handler);
230 +extern int do_buffer_ring_handler(struct net_device *dev,
231 + char *data, int len);
232 +#endif /* __KERNEL__ */
234 +/* *********************************** */
236 +#define PF_RING 27 /* Packet Ring */
237 +#define SOCK_RING PF_RING
240 +#define SIORINGPOLL 0x8888
242 +/* *********************************** */
244 +#endif /* __RING_H */
245 diff --unified --recursive --new-file linux-2.6.21.4/net/Kconfig linux-2.6.21.4-1-686-smp-ring3/net/Kconfig
246 --- linux-2.6.21.4/net/Kconfig 2007-06-07 21:27:31.000000000 +0000
247 +++ linux-2.6.21.4-1-686-smp-ring3/net/Kconfig 2007-06-10 16:43:04.402423771 +0000
249 source "net/xfrm/Kconfig"
250 source "net/iucv/Kconfig"
252 +source "net/ring/Kconfig"
254 bool "TCP/IP networking"
256 diff --unified --recursive --new-file linux-2.6.21.4/net/Makefile linux-2.6.21.4-1-686-smp-ring3/net/Makefile
257 --- linux-2.6.21.4/net/Makefile 2007-06-07 21:27:31.000000000 +0000
258 +++ linux-2.6.21.4-1-686-smp-ring3/net/Makefile 2007-06-10 16:43:04.394423425 +0000
260 obj-$(CONFIG_DECNET) += decnet/
261 obj-$(CONFIG_ECONET) += econet/
262 obj-$(CONFIG_VLAN_8021Q) += 8021q/
263 +obj-$(CONFIG_RING) += ring/
264 obj-$(CONFIG_IP_DCCP) += dccp/
265 obj-$(CONFIG_IP_SCTP) += sctp/
266 obj-$(CONFIG_IEEE80211) += ieee80211/
267 diff --unified --recursive --new-file linux-2.6.21.4/net/core/dev.c linux-2.6.21.4-1-686-smp-ring3/net/core/dev.c
268 --- linux-2.6.21.4/net/core/dev.c 2007-06-07 21:27:31.000000000 +0000
269 +++ linux-2.6.21.4-1-686-smp-ring3/net/core/dev.c 2007-06-10 16:43:04.382422906 +0000
271 #include <linux/err.h>
272 #include <linux/ctype.h>
274 +#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
276 +/* #define RING_DEBUG */
278 +#include <linux/ring.h>
279 +#include <linux/version.h>
281 +static handle_ring_skb ring_handler = NULL;
283 +handle_ring_skb get_skb_ring_handler() { return(ring_handler); }
285 +void set_skb_ring_handler(handle_ring_skb the_handler) {
286 + ring_handler = the_handler;
289 +void do_skb_ring_handler(struct sk_buff *skb,
290 + u_char recv_packet, u_char real_skb) {
292 + ring_handler(skb, recv_packet, real_skb);
295 +/* ******************* */
297 +static handle_ring_buffer buffer_ring_handler = NULL;
299 +handle_ring_buffer get_buffer_ring_handler() { return(buffer_ring_handler); }
301 +void set_buffer_ring_handler(handle_ring_buffer the_handler) {
302 + buffer_ring_handler = the_handler;
305 +int do_buffer_ring_handler(struct net_device *dev, char *data, int len) {
306 + if(buffer_ring_handler) {
307 + buffer_ring_handler(dev, data, len);
313 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
314 +EXPORT_SYMBOL(get_skb_ring_handler);
315 +EXPORT_SYMBOL(set_skb_ring_handler);
316 +EXPORT_SYMBOL(do_skb_ring_handler);
318 +EXPORT_SYMBOL(get_buffer_ring_handler);
319 +EXPORT_SYMBOL(set_buffer_ring_handler);
320 +EXPORT_SYMBOL(do_buffer_ring_handler);
325 * The list of packet types we will receive (as opposed to discard)
326 * and the routines to invoke.
327 @@ -1474,6 +1524,10 @@
328 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
331 +#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
332 + if(ring_handler) ring_handler(skb, 0, 1);
333 +#endif /* CONFIG_RING */
335 /* Grab device queue */
336 spin_lock(&dev->queue_lock);
338 @@ -1574,6 +1628,13 @@
341 /* if netpoll wants it, pretend we never saw it */
342 +#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
343 + if(ring_handler && ring_handler(skb, 1, 1)) {
344 + /* The packet has been copied into a ring */
345 + return(NET_RX_SUCCESS);
347 +#endif /* CONFIG_RING */
352 @@ -1764,6 +1825,13 @@
353 struct net_device *orig_dev;
354 int ret = NET_RX_DROP;
356 +#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
357 + if(ring_handler && ring_handler(skb, 1, 1)) {
358 + /* The packet has been copied into a ring */
359 + return(NET_RX_SUCCESS);
361 +#endif /* CONFIG_RING */
364 /* if we've gotten here through NAPI, check netpoll */
365 if (skb->dev->poll && netpoll_rx(skb))
366 diff --unified --recursive --new-file linux-2.6.21.4/net/ring/Kconfig linux-2.6.21.4-1-686-smp-ring3/net/ring/Kconfig
367 --- linux-2.6.21.4/net/ring/Kconfig 1970-01-01 00:00:00.000000000 +0000
368 +++ linux-2.6.21.4-1-686-smp-ring3/net/ring/Kconfig 2007-06-10 16:43:04.406423944 +0000
371 + tristate "PF_RING sockets (EXPERIMENTAL)"
372 + depends on EXPERIMENTAL
374 + PF_RING socket family, optimized for packet capture.
375 + If a PF_RING socket is bound to an adapter (via the bind() system
376 + call), such adapter will be used in read-only mode until the socket
377 + is destroyed. Whenever an incoming packet is received from the adapter
378 + it will not passed to upper layers, but instead it is copied to a ring
379 + buffer, which in turn is exported to user space applications via mmap.
380 + Please refer to http://luca.ntop.org/Ring.pdf for more.
382 + Say N unless you know what you are doing.
384 diff --unified --recursive --new-file linux-2.6.21.4/net/ring/Makefile linux-2.6.21.4-1-686-smp-ring3/net/ring/Makefile
385 --- linux-2.6.21.4/net/ring/Makefile 1970-01-01 00:00:00.000000000 +0000
386 +++ linux-2.6.21.4-1-686-smp-ring3/net/ring/Makefile 2007-06-10 16:43:04.350421521 +0000
389 +# Makefile for the ring driver.
394 +ring-objs := ring_packet.o
395 diff --unified --recursive --new-file linux-2.6.21.4/net/ring/ring_packet.c linux-2.6.21.4-1-686-smp-ring3/net/ring/ring_packet.c
396 --- linux-2.6.21.4/net/ring/ring_packet.c 1970-01-01 00:00:00.000000000 +0000
397 +++ linux-2.6.21.4-1-686-smp-ring3/net/ring/ring_packet.c 2007-06-10 16:43:04.354421694 +0000
399 +/* ***************************************************************
401 + * (C) 2004-07 - Luca Deri <deri@ntop.org>
403 + * This code includes contributions courtesy of
404 + * - Jeff Randall <jrandall@nexvu.com>
405 + * - Helmut Manck <helmut.manck@secunet.com>
406 + * - Brad Doctor <brad@stillsecure.com>
407 + * - Amit D. Chaudhary <amit_ml@rajgad.com>
408 + * - Francesco Fusco <fusco@ntop.org>
409 + * - Michael Stiller <ms@2scale.net>
412 + * This program is free software; you can redistribute it and/or modify
413 + * it under the terms of the GNU General Public License as published by
414 + * the Free Software Foundation; either version 2 of the License, or
415 + * (at your option) any later version.
417 + * This program is distributed in the hope that it will be useful,
418 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
419 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
420 + * GNU General Public License for more details.
422 + * You should have received a copy of the GNU General Public License
423 + * along with this program; if not, write to the Free Software Foundation,
424 + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
428 +#include <linux/version.h>
429 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,19))
430 +#include <linux/autoconf.h>
432 +#include <linux/config.h>
434 +#include <linux/module.h>
435 +#include <linux/kernel.h>
436 +#include <linux/socket.h>
437 +#include <linux/skbuff.h>
438 +#include <linux/rtnetlink.h>
439 +#include <linux/in.h>
440 +#include <linux/inet.h>
441 +#include <linux/in6.h>
442 +#include <linux/init.h>
443 +#include <linux/filter.h>
444 +#include <linux/ring.h>
445 +#include <linux/ip.h>
446 +#include <linux/tcp.h>
447 +#include <linux/udp.h>
448 +#include <linux/list.h>
449 +#include <linux/proc_fs.h>
450 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
451 +#include <net/xfrm.h>
453 +#include <linux/poll.h>
455 +#include <net/sock.h>
456 +#include <asm/io.h> /* needed for virt_to_phys() */
458 +#include <net/inet_common.h>
461 +/* #define RING_DEBUG */
463 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11))
464 +static inline int remap_page_range(struct vm_area_struct *vma,
465 + unsigned long uvaddr,
466 + unsigned long paddr,
467 + unsigned long size,
469 + return(remap_pfn_range(vma, uvaddr, paddr >> PAGE_SHIFT,
474 +/* ************************************************* */
476 +#define CLUSTER_LEN 8
478 +struct ring_cluster {
479 + u_short cluster_id; /* 0 = no cluster */
480 + u_short num_cluster_elements;
481 + enum cluster_type hashing_mode;
482 + u_short hashing_id;
483 + struct sock *sk[CLUSTER_LEN];
484 + struct ring_cluster *next; /* NULL = last element of the cluster */
487 +/* ************************************************* */
489 +struct ring_element {
490 + struct list_head list;
494 +/* ************************************************* */
497 + struct net_device *ring_netdev;
502 + u_short cluster_id; /* 0 = no cluster */
505 + struct net_device *reflector_dev;
507 + /* Packet buffers */
508 + unsigned long order;
511 + unsigned long ring_memory;
512 + FlowSlotInfo *slots_info; /* Basically it points to ring_memory */
513 + char *ring_slots; /* Basically it points to ring_memory
514 + +sizeof(FlowSlotInfo) */
516 + /* Packet Sampling */
517 + u_int pktToSample, sample_rate;
520 + struct sk_filter *bpfFilter;
523 + ACSM_STRUCT2 * acsm;
526 + atomic_t num_ring_slots_waiters;
527 + wait_queue_head_t ring_slots_waitqueue;
528 + rwlock_t ring_index_lock;
530 + /* Bloom Filters */
531 + u_char bitmask_enabled;
532 + bitmask_selector mac_bitmask, vlan_bitmask, ip_bitmask, twin_ip_bitmask,
533 + port_bitmask, twin_port_bitmask, proto_bitmask;
534 + u_int32_t num_mac_bitmask_add, num_mac_bitmask_remove;
535 + u_int32_t num_vlan_bitmask_add, num_vlan_bitmask_remove;
536 + u_int32_t num_ip_bitmask_add, num_ip_bitmask_remove;
537 + u_int32_t num_port_bitmask_add, num_port_bitmask_remove;
538 + u_int32_t num_proto_bitmask_add, num_proto_bitmask_remove;
540 + /* Indexes (Internal) */
541 + u_int insert_page_id, insert_slot_id;
544 +/* ************************************************* */
546 +/* List of all ring sockets. */
547 +static struct list_head ring_table;
548 +static u_int ring_table_size;
550 +/* List of all clusters */
551 +static struct ring_cluster *ring_cluster_list;
553 +static rwlock_t ring_mgmt_lock = RW_LOCK_UNLOCKED;
555 +/* ********************************** */
557 +/* /proc entry for ring module */
558 +struct proc_dir_entry *ring_proc_dir = NULL;
559 +struct proc_dir_entry *ring_proc = NULL;
561 +static int ring_proc_get_info(char *, char **, off_t, int, int *, void *);
562 +static void ring_proc_add(struct ring_opt *pfr);
563 +static void ring_proc_remove(struct ring_opt *pfr);
564 +static void ring_proc_init(void);
565 +static void ring_proc_term(void);
567 +/* ********************************** */
570 +static struct proto_ops ring_ops;
572 +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11))
573 +static struct proto ring_proto;
576 +static int skb_ring_handler(struct sk_buff *skb, u_char recv_packet,
578 +static int buffer_ring_handler(struct net_device *dev, char *data, int len);
579 +static int remove_from_cluster(struct sock *sock, struct ring_opt *pfr);
583 +/* ********************************** */
586 +static unsigned int bucket_len = 128, num_slots = 4096, sample_rate = 1,
587 + transparent_mode = 1, enable_tx_capture = 1;
589 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16))
590 +module_param(bucket_len, uint, 0644);
591 +module_param(num_slots, uint, 0644);
592 +module_param(sample_rate, uint, 0644);
593 +module_param(transparent_mode, uint, 0644);
594 +module_param(enable_tx_capture, uint, 0644);
596 +MODULE_PARM(bucket_len, "i");
597 +MODULE_PARM(num_slots, "i");
598 +MODULE_PARM(sample_rate, "i");
599 +MODULE_PARM(transparent_mode, "i");
600 +MODULE_PARM(enable_tx_capture, "i");
603 +MODULE_PARM_DESC(bucket_len, "Number of ring buckets");
604 +MODULE_PARM_DESC(num_slots, "Number of ring slots");
605 +MODULE_PARM_DESC(sample_rate, "Ring packet sample rate");
606 +MODULE_PARM_DESC(transparent_mode,
607 + "Set to 1 to set transparent mode "
608 + "(slower but backwards compatible)");
610 +MODULE_PARM_DESC(enable_tx_capture, "Set to 1 to capture outgoing packets");
612 +/* ********************************** */
614 +#define MIN_QUEUED_PKTS 64
615 +#define MAX_QUEUE_LOOPS 64
618 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
619 +#define ring_sk_datatype(__sk) ((struct ring_opt *)__sk)
620 +#define ring_sk(__sk) ((__sk)->sk_protinfo)
622 +#define ring_sk_datatype(a) (a)
623 +#define ring_sk(__sk) ((__sk)->protinfo.pf_ring)
626 +#define _rdtsc() ({ uint64_t x; asm volatile("rdtsc" : "=A" (x)); x; })
629 + int dev_queue_xmit(struct sk_buff *skb)
631 + struct net_device *dev_get_by_name(const char *name)
634 +/* ********************************** */
641 +** Multi-Pattern Search Engine
643 +** Aho-Corasick State Machine - version 2.0
645 +** Supports both Non-Deterministic and Deterministic Finite Automata
648 +** Reference - Efficient String matching: An Aid to Bibliographic Search
649 +** Alfred V Aho and Margaret J Corasick
651 +** Copyright(C) 1975 Association for Computing Machinery,Inc
654 +** +++ Version 1.0 notes - Marc Norton:
657 +** Original implementation based on the 4 algorithms in the paper by Aho & Corasick,
658 +** some implementation ideas from 'Practical Algorithms in C', and some
661 +** 1) Finds all occurrences of all patterns within a text.
664 +** +++ Version 2.0 Notes - Marc Norton/Dan Roelker:
667 +** New implementation modifies the state table storage and access model to use
668 +** compacted sparse vector storage. Dan Roelker and I hammered this strategy out
669 +** amongst many others in order to reduce memory usage and improve caching performance.
670 +** The memory usage is greatly reduced, we only use 1/4 of what we use to. The caching
671 +** performance is better in pure benchmarking tests, but does not show overall improvement
672 +** in Snort. Unfortunately, once a pattern match test has been performed Snort moves on to doing
673 +** many other things before we get back to a patteren match test, so the cache is voided.
675 +** This versions has better caching performance characteristics, reduced memory,
676 +** more state table storage options, and requires no a priori case conversions.
677 +** It does maintain the same public interface. (Snort only used banded storage).
679 +** 1) Supports NFA and DFA state machines, and basic keyword state machines
680 +** 2) Initial transition table uses Linked Lists
681 +** 3) Improved state table memory options. NFA and DFA state
682 +** transition tables are converted to one of 4 formats during compilation.
685 +** c) Banded matrix (Default-this is the only one used in snort)
686 +** d) Sparse-Banded matrix
687 +** 4) Added support for acstate_t in .h file so we can compile states as
688 +** 16, or 32 bit state values for another reduction in memory consumption,
689 +** smaller states allows more of the state table to be cached, and improves
690 +** performance on x86-P4. Your mileage may vary, especially on risc systems.
691 +** 5) Added a bool to each state transition list to indicate if there is a matching
692 +** pattern in the state. This prevents us from accessing another data array
693 +** and can improve caching/performance.
694 +** 6) The search functions are very sensitive, don't change them without extensive testing,
695 +** or you'll just spoil the caching and prefetching opportunities.
697 +** Extras for fellow pattern matchers:
698 +** The table below explains the storage format used at each step.
699 +** You can use an NFA or DFA to match with, the NFA is slower but tiny - set the structure directly.
700 +** You can use any of the 4 storage modes above -full,sparse,banded,sparse-bands, set the structure directly.
701 +** For applications where you have lots of data and a pattern set to search, this version was up to 3x faster
702 +** than the previous verion, due to caching performance. This cannot be fully realized in Snort yet,
703 +** but other applications may have better caching opportunities.
704 +** Snort only needs to use the banded or full storage.
706 +** Transition table format at each processing stage.
707 +** -------------------------------------------------
708 +** Patterns -> Keyword State Table (List)
709 +** Keyword State Table -> NFA (List)
710 +** NFA -> DFA (List)
711 +** DFA (List)-> Sparse Rows O(m-avg # transitions per state)
712 +** -> Banded Rows O(1)
713 +** -> Sparse-Banded Rows O(nb-# bands)
714 +** -> Full Matrix O(1)
716 +** Copyright(C) 2002,2003,2004 Marc Norton
717 +** Copyright(C) 2003,2004 Daniel Roelker
718 +** Copyright(C) 2002,2003,2004 Sourcefire,Inc.
720 +** This program is free software; you can redistribute it and/or modify
721 +** it under the terms of the GNU General Public License as published by
722 +** the Free Software Foundation; either version 2 of the License, or
723 +** (at your option) any later version.
725 +** This program is distributed in the hope that it will be useful,
726 +** but WITHOUT ANY WARRANTY; without even the implied warranty of
727 +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
728 +** GNU General Public License for more details.
730 +** You should have received a copy of the GNU General Public License
731 +** along with this program; if not, write to the Free Software
732 +** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
739 +#define MEMASSERT(p,s) if(!p){printk("ACSM-No Memory: %s!\n",s);}
744 +static int max_memory = 0;
749 +typedef struct acsm_summary_s
751 + unsigned num_states;
752 + unsigned num_transitions;
760 +static acsm_summary_t summary={0,0};
763 +** Case Translation Table
765 +static unsigned char xlatcase[256];
770 +inline int toupper(int ch) {
771 + if ( (unsigned int)(ch - 'a') < 26u )
776 +static void init_xlatcase(void)
779 + for (i = 0; i < 256; i++)
781 + xlatcase[i] = toupper(i);
791 +ConvertCaseEx (unsigned char *d, unsigned char *s, int m)
799 + for (i = 0; i < m; i++ )
801 + d[0] = xlatcase[ s[0] ];
802 + d[2] = xlatcase[ s[2] ];
803 + d[1] = xlatcase[ s[1] ];
804 + d[3] = xlatcase[ s[3] ];
809 + for (i=0; i < n; i++)
811 + d[i] = xlatcase[ s[i] ];
814 + for (i=0; i < m; i++)
816 + d[i] = xlatcase[ s[i] ];
830 + p = kmalloc (n, GFP_KERNEL);
849 + * Simple QUEUE NODE
851 +typedef struct _qnode
854 + struct _qnode *next;
859 + * Simple QUEUE Structure
861 +typedef struct _queue
863 + QNODE * head, *tail;
869 + * Initialize the queue
872 +queue_init (QUEUE * s)
874 + s->head = s->tail = 0;
879 + * Find a State in the queue
882 +queue_find (QUEUE * s, int state)
888 + if( q->state == state ) return 1;
895 + * Add Tail Item to queue (FiFo/LiLo)
898 +queue_add (QUEUE * s, int state)
902 + if( queue_find( s, state ) ) return;
906 + q = s->tail = s->head = (QNODE *) AC_MALLOC (sizeof (QNODE));
907 + MEMASSERT (q, "queue_add");
913 + q = (QNODE *) AC_MALLOC (sizeof (QNODE));
924 + * Remove Head Item from queue
927 +queue_remove (QUEUE * s)
935 + s->head = s->head->next;
950 + * Return items in the queue
953 +queue_count (QUEUE * s)
963 +queue_free (QUEUE * s)
965 + while (queue_count (s))
972 + * Get Next State-NFA
975 +int List_GetNextState( ACSM_STRUCT2 * acsm, int state, int input )
977 + trans_node_t * t = acsm->acsmTransTable[state];
981 + if( t->key == input )
983 + return t->next_state;
988 + if( state == 0 ) return 0;
990 + return ACSM_FAIL_STATE2; /* Fail state ??? */
994 + * Get Next State-DFA
997 +int List_GetNextState2( ACSM_STRUCT2 * acsm, int state, int input )
999 + trans_node_t * t = acsm->acsmTransTable[state];
1003 + if( t->key == input )
1005 + return t->next_state;
1010 + return 0; /* default state */
1013 + * Put Next State - Head insertion, and transition updates
1016 +int List_PutNextState( ACSM_STRUCT2 * acsm, int state, int input, int next_state )
1019 + trans_node_t * tnew;
1021 + // printk(" List_PutNextState: state=%d, input='%c', next_state=%d\n",state,input,next_state);
1024 + /* Check if the transition already exists, if so just update the next_state */
1025 + p = acsm->acsmTransTable[state];
1028 + if( p->key == input ) /* transition already exists- reset the next state */
1030 + p->next_state = next_state;
1036 + /* Definitely not an existing transition - add it */
1037 + tnew = (trans_node_t*)AC_MALLOC(sizeof(trans_node_t));
1038 + if( !tnew ) return -1;
1040 + tnew->key = input;
1041 + tnew->next_state = next_state;
1044 + tnew->next = acsm->acsmTransTable[state];
1045 + acsm->acsmTransTable[state] = tnew;
1047 + acsm->acsmNumTrans++;
1052 + * Free the entire transition table
1055 +int List_FreeTransTable( ACSM_STRUCT2 * acsm )
1058 + trans_node_t * t, *p;
1060 + if( !acsm->acsmTransTable ) return 0;
1062 + for(i=0;i< acsm->acsmMaxStates;i++)
1064 + t = acsm->acsmTransTable[i];
1071 + max_memory -= sizeof(trans_node_t);
1075 + kfree(acsm->acsmTransTable);
1077 + max_memory -= sizeof(void*) * acsm->acsmMaxStates;
1079 + acsm->acsmTransTable = 0;
1089 + int List_FreeList( trans_node_t * t )
1100 + max_memory -= sizeof(trans_node_t);
1109 + * Converts row of states from list to a full vector format
1112 +int List_ConvToFull(ACSM_STRUCT2 * acsm, acstate_t state, acstate_t * full )
1115 + trans_node_t * t = acsm->acsmTransTable[ state ];
1117 + memset(full,0,sizeof(acstate_t)*acsm->acsmAlphabetSize);
1119 + if( !t ) return 0;
1123 + full[ t->key ] = t->next_state;
1131 + * Copy a Match List Entry - don't dup the pattern data
1133 +static ACSM_PATTERN2*
1134 +CopyMatchListEntry (ACSM_PATTERN2 * px)
1136 + ACSM_PATTERN2 * p;
1138 + p = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
1139 + MEMASSERT (p, "CopyMatchListEntry");
1141 + memcpy (p, px, sizeof (ACSM_PATTERN2));
1149 + * Check if a pattern is in the list already,
1150 + * validate it using the 'id' field. This must be unique
1151 + * for every pattern.
1155 + int FindMatchListEntry (ACSM_STRUCT2 * acsm, int state, ACSM_PATTERN2 * px)
1157 + ACSM_PATTERN2 * p;
1159 + p = acsm->acsmMatchList[state];
1162 + if( p->id == px->id ) return 1;
1172 + * Add a pattern to the list of patterns terminated at this state.
1173 + * Insert at front of list.
1176 +AddMatchListEntry (ACSM_STRUCT2 * acsm, int state, ACSM_PATTERN2 * px)
1178 + ACSM_PATTERN2 * p;
1180 + p = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
1182 + MEMASSERT (p, "AddMatchListEntry");
1184 + memcpy (p, px, sizeof (ACSM_PATTERN2));
1186 + p->next = acsm->acsmMatchList[state];
1188 + acsm->acsmMatchList[state] = p;
1193 +AddPatternStates (ACSM_STRUCT2 * acsm, ACSM_PATTERN2 * p)
1195 + int state, next, n;
1196 + unsigned char *pattern;
1199 + pattern = p->patrn;
1203 + * Match up pattern with existing states
1205 + for (; n > 0; pattern++, n--)
1207 + next = List_GetNextState(acsm,state,*pattern);
1208 + if (next == ACSM_FAIL_STATE2 || next == 0)
1216 + * Add new states for the rest of the pattern bytes, 1 state per byte
1218 + for (; n > 0; pattern++, n--)
1220 + acsm->acsmNumStates++;
1221 + List_PutNextState(acsm,state,*pattern,acsm->acsmNumStates);
1222 + state = acsm->acsmNumStates;
1225 + AddMatchListEntry (acsm, state, p );
1229 + * Build A Non-Deterministic Finite Automata
1230 + * The keyword state table must already be built, via AddPatternStates().
1233 +Build_NFA (ACSM_STRUCT2 * acsm)
1236 + QUEUE q, *queue = &q;
1237 + acstate_t * FailState = acsm->acsmFailState;
1238 + ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
1239 + ACSM_PATTERN2 * mlist,* px;
1241 + /* Init a Queue */
1242 + queue_init (queue);
1245 + /* Add the state 0 transitions 1st, the states at depth 1, fail to state 0 */
1246 + for (i = 0; i < acsm->acsmAlphabetSize; i++)
1248 + s = List_GetNextState2(acsm,0,i);
1251 + queue_add (queue, s);
1256 + /* Build the fail state successive layer of transitions */
1257 + while (queue_count (queue) > 0)
1259 + r = queue_remove (queue);
1261 + /* Find Final States for any Failure */
1262 + for (i = 0; i < acsm->acsmAlphabetSize; i++)
1266 + s = List_GetNextState(acsm,r,i);
1268 + if( s != ACSM_FAIL_STATE2 )
1270 + queue_add (queue, s);
1272 + fs = FailState[r];
1275 + * Locate the next valid state for 'i' starting at fs
1277 + while( (next=List_GetNextState(acsm,fs,i)) == ACSM_FAIL_STATE2 )
1279 + fs = FailState[fs];
1283 + * Update 's' state failure state to point to the next valid state
1285 + FailState[s] = next;
1288 + * Copy 'next'states MatchList to 's' states MatchList,
1289 + * we copy them so each list can be AC_FREE'd later,
1290 + * else we could just manipulate pointers to fake the copy.
1292 + for( mlist = MatchList[next];
1294 + mlist = mlist->next)
1296 + px = CopyMatchListEntry (mlist);
1298 + /* Insert at front of MatchList */
1299 + px->next = MatchList[s];
1300 + MatchList[s] = px;
1306 + /* Clean up the queue */
1307 + queue_free (queue);
1311 + * Build Deterministic Finite Automata from the NFA
1314 +Convert_NFA_To_DFA (ACSM_STRUCT2 * acsm)
1316 + int i, r, s, cFailState;
1317 + QUEUE q, *queue = &q;
1318 + acstate_t * FailState = acsm->acsmFailState;
1320 + /* Init a Queue */
1321 + queue_init (queue);
1323 + /* Add the state 0 transitions 1st */
1324 + for(i=0; i<acsm->acsmAlphabetSize; i++)
1326 + s = List_GetNextState(acsm,0,i);
1329 + queue_add (queue, s);
1333 + /* Start building the next layer of transitions */
1334 + while( queue_count(queue) > 0 )
1336 + r = queue_remove(queue);
1338 + /* Process this states layer */
1339 + for (i = 0; i < acsm->acsmAlphabetSize; i++)
1341 + s = List_GetNextState(acsm,r,i);
1343 + if( s != ACSM_FAIL_STATE2 && s!= 0)
1345 + queue_add (queue, s);
1349 + cFailState = List_GetNextState(acsm,FailState[r],i);
1351 + if( cFailState != 0 && cFailState != ACSM_FAIL_STATE2 )
1353 + List_PutNextState(acsm,r,i,cFailState);
1359 + /* Clean up the queue */
1360 + queue_free (queue);
1365 + * Convert a row lists for the state table to a full vector format
1369 +Conv_List_To_Full(ACSM_STRUCT2 * acsm)
1373 + acstate_t ** NextState = acsm->acsmNextState;
1375 + for(k=0;k<acsm->acsmMaxStates;k++)
1377 + p = AC_MALLOC( sizeof(acstate_t) * (acsm->acsmAlphabetSize+2) );
1380 + tcnt = List_ConvToFull( acsm, (acstate_t)k, p+2 );
1383 + p[1] = 0; /* no matches yet */
1385 + NextState[k] = p; /* now we have a full format row vector */
1392 + * Convert DFA memory usage from list based storage to a sparse-row storage.
1394 + * The Sparse format allows each row to be either full or sparse formatted. If the sparse row has
1395 + * too many transitions, performance or space may dictate that we use the standard full formatting
1396 + * for the row. More than 5 or 10 transitions per state ought to really whack performance. So the
1397 + * user can specify the max state transitions per state allowed in the sparse format.
1399 + * Standard Full Matrix Format
1400 + * ---------------------------
1401 + * acstate_t ** NextState ( 1st index is row/state, 2nd index is column=event/input)
1405 + * events -> a b c d e f g h i j k l m n o p
1407 + * N 1 7 0 0 0 3 0 0 0 0 0 0 0 0 0 0
1409 + * Sparse Format, each row : Words Value
1410 + * 1-1 fmt(0-full,1-sparse,2-banded,3-sparsebands)
1411 + * 2-2 bool match flag (indicates this state has pattern matches)
1412 + * 3-3 sparse state count ( # of input/next-state pairs )
1413 + * 4-3+2*cnt 'input,next-state' pairs... each sizof(acstate_t)
1415 + * above example case yields:
1416 + * Full Format: 0, 1 7 0 0 0 3 0 0 0 0 0 0 0 0 0 0 ...
1417 + * Sparse format: 1, 3, 'a',1,'b',7,'f',3 - uses 2+2*ntransitions (non-default transitions)
1420 +Conv_Full_DFA_To_Sparse(ACSM_STRUCT2 * acsm)
1423 + acstate_t * p, state, maxstates=0;
1424 + acstate_t ** NextState = acsm->acsmNextState;
1425 + acstate_t full[MAX_ALPHABET_SIZE];
1427 + for(k=0;k<acsm->acsmMaxStates;k++)
1431 + List_ConvToFull(acsm, (acstate_t)k, full );
1433 + for (i = 0; i < acsm->acsmAlphabetSize; i++)
1436 + if( state != 0 && state != ACSM_FAIL_STATE2 ) cnt++;
1439 + if( cnt > 0 ) maxstates++;
1441 + if( k== 0 || cnt > acsm->acsmSparseMaxRowNodes )
1443 + p = AC_MALLOC(sizeof(acstate_t)*(acsm->acsmAlphabetSize+2) );
1448 + memcpy(&p[2],full,acsm->acsmAlphabetSize*sizeof(acstate_t));
1452 + p = AC_MALLOC(sizeof(acstate_t)*(3+2*cnt));
1456 + p[m++] = ACF_SPARSE;
1457 + p[m++] = 0; /* no matches */
1460 + for(i = 0; i < acsm->acsmAlphabetSize ; i++)
1463 + if( state != 0 && state != ACSM_FAIL_STATE2 )
1471 + NextState[k] = p; /* now we are a sparse formatted state transition array */
1477 + Convert Full matrix to Banded row format.
1481 + 2 n number of values
1482 + 3 i index of 1st value (0-256)
1483 + 4 - 3+n next-state values at each index
1487 +Conv_Full_DFA_To_Banded(ACSM_STRUCT2 * acsm)
1489 + int first = -1, last;
1490 + acstate_t * p, state, full[MAX_ALPHABET_SIZE];
1491 + acstate_t ** NextState = acsm->acsmNextState;
1494 + for(k=0;k<acsm->acsmMaxStates;k++)
1498 + List_ConvToFull(acsm, (acstate_t)k, full );
1503 + for (i = 0; i < acsm->acsmAlphabetSize; i++)
1507 + if( state !=0 && state != ACSM_FAIL_STATE2 )
1509 + if( first < 0 ) first = i;
1514 + /* calc band width */
1515 + cnt= last - first + 1;
1517 + p = AC_MALLOC(sizeof(acstate_t)*(4+cnt));
1522 + p[m++] = ACF_BANDED;
1523 + p[m++] = 0; /* no matches */
1527 + for(i = first; i <= last; i++)
1532 + NextState[k] = p; /* now we are a banded formatted state transition array */
1539 + * Convert full matrix to Sparse Band row format.
1541 + * next - Full formatted row of next states
1542 + * asize - size of alphabet
1543 + * zcnt - max number of zeros in a run of zeros in any given band.
1546 + * 1 ACF_SPARSEBANDS
1547 + * 2 number of bands
1548 + * repeat 3 - 5+ ....once for each band in this row.
1549 + * 3 number of items in this band* 4 start index of this band
1550 + * 5- next-state values in this band...
1553 +int calcSparseBands( acstate_t * next, int * begin, int * end, int asize, int zmax )
1555 + int i, nbands,zcnt,last=0;
1559 + for( i=0; i<asize; i++ )
1563 + if( state !=0 && state != ACSM_FAIL_STATE2 )
1565 + begin[nbands] = i;
1568 + for( ; i< asize; i++ )
1571 + if( state ==0 || state == ACSM_FAIL_STATE2 )
1574 + if( zcnt > zmax ) break;
1583 + end[nbands++] = last;
1597 + * 1 SPARSEBANDS format indicator
1598 + * 2 bool indicates a pattern match in this state
1599 + * 3 number of sparse bands
1600 + * 4 number of elements in this band
1601 + * 5 start index of this band
1602 + * 6- list of next states
1604 + * m number of elements in this band
1605 + * m+1 start index of this band
1606 + * m+2- list of next states
1609 +Conv_Full_DFA_To_SparseBands(ACSM_STRUCT2 * acsm)
1612 + acstate_t ** NextState = acsm->acsmNextState;
1613 + int cnt,m,k,i,zcnt=acsm->acsmSparseMaxZcnt;
1615 + int band_begin[MAX_ALPHABET_SIZE];
1616 + int band_end[MAX_ALPHABET_SIZE];
1618 + acstate_t full[MAX_ALPHABET_SIZE];
1620 + for(k=0;k<acsm->acsmMaxStates;k++)
1624 + List_ConvToFull(acsm, (acstate_t)k, full );
1626 + nbands = calcSparseBands( full, band_begin, band_end, acsm->acsmAlphabetSize, zcnt );
1628 + /* calc band width space*/
1630 + for(i=0;i<nbands;i++)
1633 + cnt += band_end[i] - band_begin[i] + 1;
1635 + /*printk("state %d: sparseband %d, first=%d, last=%d, cnt=%d\n",k,i,band_begin[i],band_end[i],band_end[i]-band_begin[i]+1); */
1638 + p = AC_MALLOC(sizeof(acstate_t)*(cnt));
1643 + p[m++] = ACF_SPARSEBANDS;
1644 + p[m++] = 0; /* no matches */
1647 + for( i=0;i<nbands;i++ )
1649 + p[m++] = band_end[i] - band_begin[i] + 1; /* # states in this band */
1650 + p[m++] = band_begin[i]; /* start index */
1652 + for( j=band_begin[i]; j<=band_end[i]; j++ )
1654 + p[m++] = full[j]; /* some states may be state zero */
1658 + NextState[k] = p; /* now we are a sparse-banded formatted state transition array */
1666 + * Convert an NFA or DFA row from sparse to full format
1667 + * and store into the 'full' buffer.
1670 + * 0 - failed, no state transitions
1671 + * *p - pointer to 'full' buffer
1676 + acstate_t * acsmConvToFull(ACSM_STRUCT2 * acsm, acstate_t k, acstate_t * full )
1679 + acstate_t * p, n, fmt, index, nb, bmatch;
1680 + acstate_t ** NextState = acsm->acsmNextState;
1684 + if( !p ) return 0;
1690 + if( fmt ==ACF_SPARSE )
1693 + for( ; n>0; n--, p+=2 )
1695 + full[ p[0] ] = p[1];
1698 + else if( fmt ==ACF_BANDED )
1704 + for( ; n>0; n--, p++ )
1706 + full[ index++ ] = p[0];
1709 + else if( fmt ==ACF_SPARSEBANDS )
1716 + for( ; n>0; n--, p++ )
1718 + full[ index++ ] = p[0];
1722 + else if( fmt == ACF_FULL )
1724 + memcpy(full,p,acsm->acsmAlphabetSize*sizeof(acstate_t));
1732 + * Select the desired storage mode
1734 +int acsmSelectFormat2( ACSM_STRUCT2 * acsm, int m )
1741 + case ACF_SPARSEBANDS:
1742 + acsm->acsmFormat = m;
1753 +void acsmSetMaxSparseBandZeros2( ACSM_STRUCT2 * acsm, int n )
1755 + acsm->acsmSparseMaxZcnt = n;
1760 +void acsmSetMaxSparseElements2( ACSM_STRUCT2 * acsm, int n )
1762 + acsm->acsmSparseMaxRowNodes = n;
1767 +int acsmSelectFSA2( ACSM_STRUCT2 * acsm, int m )
1774 + acsm->acsmFSA = m;
1782 +int acsmSetAlphabetSize2( ACSM_STRUCT2 * acsm, int n )
1784 + if( n <= MAX_ALPHABET_SIZE )
1786 + acsm->acsmAlphabetSize = n;
1795 + * Create a new AC state machine
1797 +static ACSM_STRUCT2 * acsmNew2 (void)
1803 + p = (ACSM_STRUCT2 *) AC_MALLOC(sizeof (ACSM_STRUCT2));
1804 + MEMASSERT (p, "acsmNew");
1808 + memset (p, 0, sizeof (ACSM_STRUCT2));
1810 + /* Some defaults */
1811 + p->acsmFSA = FSA_DFA;
1812 + p->acsmFormat = ACF_BANDED;
1813 + p->acsmAlphabetSize = 256;
1814 + p->acsmSparseMaxRowNodes = 256;
1815 + p->acsmSparseMaxZcnt = 10;
1821 + * Add a pattern to the list of patterns for this state machine
1825 +acsmAddPattern2 (ACSM_STRUCT2 * p, unsigned char *pat, int n, int nocase,
1826 + int offset, int depth, void * id, int iid)
1828 + ACSM_PATTERN2 * plist;
1830 + plist = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
1831 + MEMASSERT (plist, "acsmAddPattern");
1833 + plist->patrn = (unsigned char *) AC_MALLOC ( n );
1834 + MEMASSERT (plist->patrn, "acsmAddPattern");
1836 + ConvertCaseEx(plist->patrn, pat, n);
1838 + plist->casepatrn = (unsigned char *) AC_MALLOC ( n );
1839 + MEMASSERT (plist->casepatrn, "acsmAddPattern");
1841 + memcpy (plist->casepatrn, pat, n);
1844 + plist->nocase = nocase;
1845 + plist->offset = offset;
1846 + plist->depth = depth;
1850 + plist->next = p->acsmPatterns;
1851 + p->acsmPatterns = plist;
1856 + * Add a Key to the list of key+data pairs
1858 +int acsmAddKey2(ACSM_STRUCT2 * p, unsigned char *key, int klen, int nocase, void * data)
1860 + ACSM_PATTERN2 * plist;
1862 + plist = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
1863 + MEMASSERT (plist, "acsmAddPattern");
1865 + plist->patrn = (unsigned char *) AC_MALLOC (klen);
1866 + memcpy (plist->patrn, key, klen);
1868 + plist->casepatrn = (unsigned char *) AC_MALLOC (klen);
1869 + memcpy (plist->casepatrn, key, klen);
1872 + plist->nocase = nocase;
1873 + plist->offset = 0;
1878 + plist->next = p->acsmPatterns;
1879 + p->acsmPatterns = plist;
1885 + * Copy a boolean match flag int NextState table, for caching purposes.
1888 +void acsmUpdateMatchStates( ACSM_STRUCT2 * acsm )
1891 + acstate_t ** NextState = acsm->acsmNextState;
1892 + ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
1894 + for( state=0; state<acsm->acsmNumStates; state++ )
1896 + if( MatchList[state] )
1898 + NextState[state][1] = 1;
1902 + NextState[state][1] = 0;
1908 + * Compile State Machine - NFA or DFA and Full or Banded or Sparse or SparseBands
1911 +acsmCompile2 (ACSM_STRUCT2 * acsm)
1914 + ACSM_PATTERN2 * plist;
1916 + /* Count number of states */
1917 + for (plist = acsm->acsmPatterns; plist != NULL; plist = plist->next)
1919 + acsm->acsmMaxStates += plist->n;
1920 + /* acsm->acsmMaxStates += plist->n*2; if we handle case in the table */
1922 + acsm->acsmMaxStates++; /* one extra */
1924 + /* Alloc a List based State Transition table */
1925 + acsm->acsmTransTable =(trans_node_t**) AC_MALLOC(sizeof(trans_node_t*) * acsm->acsmMaxStates );
1926 + MEMASSERT (acsm->acsmTransTable, "acsmCompile");
1928 + memset (acsm->acsmTransTable, 0, sizeof(trans_node_t*) * acsm->acsmMaxStates);
1930 + /* Alloc a failure table - this has a failure state, and a match list for each state */
1931 + acsm->acsmFailState =(acstate_t*) AC_MALLOC(sizeof(acstate_t) * acsm->acsmMaxStates );
1932 + MEMASSERT (acsm->acsmFailState, "acsmCompile");
1934 + memset (acsm->acsmFailState, 0, sizeof(acstate_t) * acsm->acsmMaxStates );
1936 + /* Alloc a MatchList table - this has a lis tof pattern matches for each state, if any */
1937 + acsm->acsmMatchList=(ACSM_PATTERN2**) AC_MALLOC(sizeof(ACSM_PATTERN2*) * acsm->acsmMaxStates );
1938 + MEMASSERT (acsm->acsmMatchList, "acsmCompile");
1940 + memset (acsm->acsmMatchList, 0, sizeof(ACSM_PATTERN2*) * acsm->acsmMaxStates );
1942 + /* Alloc a separate state transition table == in state 's' due to event 'k', transition to 'next' state */
1943 + acsm->acsmNextState=(acstate_t**)AC_MALLOC( acsm->acsmMaxStates * sizeof(acstate_t*) );
1944 + MEMASSERT(acsm->acsmNextState, "acsmCompile-NextState");
1946 + for (k = 0; k < acsm->acsmMaxStates; k++)
1948 + acsm->acsmNextState[k]=(acstate_t*)0;
1951 + /* Initialize state zero as a branch */
1952 + acsm->acsmNumStates = 0;
1954 + /* Add the 0'th state, */
1955 + //acsm->acsmNumStates++;
1957 + /* Add each Pattern to the State Table - This forms a keywords state table */
1958 + for (plist = acsm->acsmPatterns; plist != NULL; plist = plist->next)
1960 + AddPatternStates (acsm, plist);
1963 + acsm->acsmNumStates++;
1965 + if( acsm->acsmFSA == FSA_DFA || acsm->acsmFSA == FSA_NFA )
1967 + /* Build the NFA */
1971 + if( acsm->acsmFSA == FSA_DFA )
1973 + /* Convert the NFA to a DFA */
1974 + Convert_NFA_To_DFA (acsm);
1979 + * Select Final Transition Table Storage Mode
1982 + if( acsm->acsmFormat == ACF_SPARSE )
1984 + /* Convert DFA Full matrix to a Sparse matrix */
1985 + if( Conv_Full_DFA_To_Sparse(acsm) )
1989 + else if( acsm->acsmFormat == ACF_BANDED )
1991 + /* Convert DFA Full matrix to a Sparse matrix */
1992 + if( Conv_Full_DFA_To_Banded(acsm) )
1996 + else if( acsm->acsmFormat == ACF_SPARSEBANDS )
1998 + /* Convert DFA Full matrix to a Sparse matrix */
1999 + if( Conv_Full_DFA_To_SparseBands(acsm) )
2002 + else if( acsm->acsmFormat == ACF_FULL )
2004 + if( Conv_List_To_Full( acsm ) )
2008 + acsmUpdateMatchStates( acsm ); /* load boolean match flags into state table */
2010 + /* Free up the Table Of Transition Lists */
2011 + List_FreeTransTable( acsm );
2013 + /* For now -- show this info */
2015 + * acsmPrintInfo( acsm );
2019 + /* Accrue Summary State Stats */
2020 + summary.num_states += acsm->acsmNumStates;
2021 + summary.num_transitions += acsm->acsmNumTrans;
2023 + memcpy( &summary.acsm, acsm, sizeof(ACSM_STRUCT2));
2029 + * Get the NextState from the NFA, all NFA storage formats use this
2032 +acstate_t SparseGetNextStateNFA(acstate_t * ps, acstate_t state, unsigned input)
2041 + ps++; /* skip bMatchState */
2050 + if( input < index )
2058 + return (acstate_t)ACSM_FAIL_STATE2;
2061 + if( input >= index + n )
2069 + return (acstate_t)ACSM_FAIL_STATE2;
2072 + if( ps[input-index] == 0 )
2076 + return ACSM_FAIL_STATE2;
2080 + return (acstate_t) ps[input-index];
2085 + n = *ps++; /* number of sparse index-value entries */
2087 + for( ; n>0 ; n-- )
2089 + if( ps[0] > input ) /* cannot match the input, already a higher value than the input */
2091 + return (acstate_t)ACSM_FAIL_STATE2; /* default state */
2093 + else if( ps[0] == input )
2095 + return ps[1]; /* next state */
2103 + return ACSM_FAIL_STATE2;
2106 + case ACF_SPARSEBANDS:
2108 + nb = *ps++; /* number of bands */
2110 + while( nb > 0 ) /* for each band */
2112 + n = *ps++; /* number of elements */
2113 + index = *ps++; /* 1st element value */
2115 + if( input < index )
2119 + return (acstate_t)ACSM_FAIL_STATE2;
2121 + return (acstate_t)0;
2123 + if( (input >= index) && (input < (index + n)) )
2125 + if( ps[input-index] == 0 )
2129 + return ACSM_FAIL_STATE2;
2132 + return (acstate_t) ps[input-index];
2139 + return (acstate_t)ACSM_FAIL_STATE2;
2141 + return (acstate_t)0;
2146 + if( ps[input] == 0 )
2150 + return ACSM_FAIL_STATE2;
2163 + * Get the NextState from the DFA Next State Transition table
2164 + * Full and banded are supported separately, this is for
2165 + * sparse and sparse-bands
2168 +acstate_t SparseGetNextStateDFA(acstate_t * ps, acstate_t state, unsigned input)
2178 + /* n=ps[2] : number of entries in the band */
2179 + /* index=ps[3] : index of the 1st entry, sequential thereafter */
2181 + if( input < ps[3] ) return 0;
2182 + if( input >= (ps[3]+ps[2]) ) return 0;
2184 + return ps[4+input-ps[3]];
2190 + return ps[2+input];
2196 + n = ps[2]; /* number of entries/ key+next pairs */
2200 + for( ; n>0 ; n-- )
2202 + if( input < ps[0] ) /* cannot match the input, already a higher value than the input */
2204 + return (acstate_t)0; /* default state */
2206 + else if( ps[0] == input )
2208 + return ps[1]; /* next state */
2212 + return (acstate_t)0;
2217 + case ACF_SPARSEBANDS:
2219 + nb = ps[2]; /* number of bands */
2223 + while( nb > 0 ) /* for each band */
2225 + n = ps[0]; /* number of elements in this band */
2226 + index = ps[1]; /* start index/char of this band */
2227 + if( input < index )
2229 + return (acstate_t)0;
2231 + if( (input < (index + n)) )
2233 + return (acstate_t) ps[2+input-index];
2238 + return (acstate_t)0;
2245 + * Search Text or Binary Data for Pattern matches
2247 + * Sparse & Sparse-Banded Matrix search
2252 +acsmSearchSparseDFA(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
2253 + int (*Match) (void * id, int index, void *data),
2257 + ACSM_PATTERN2 * mlist;
2258 + unsigned char * Tend;
2260 + unsigned char * T, * Tc;
2262 + acstate_t ** NextState = acsm->acsmNextState;
2263 + ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
2269 + for( state = 0; T < Tend; T++ )
2271 + state = SparseGetNextStateDFA ( NextState[state], state, xlatcase[*T] );
2273 + /* test if this state has any matching patterns */
2274 + if( NextState[state][1] )
2276 + for( mlist = MatchList[state];
2278 + mlist = mlist->next )
2280 + index = T - mlist->n - Tc;
2281 + if( mlist->nocase )
2284 + if (Match (mlist->id, index, data))
2289 + if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
2292 + if (Match (mlist->id, index, data))
2302 + * Full format DFA search
2303 + * Do not change anything here without testing, caching and prefetching
2304 + * performance is very sensitive to any changes.
2307 + * 1) replaced ConvertCaseEx with inline xlatcase - this improves performance 5-10%
2308 + * 2) using 'nocase' improves performance again by 10-15%, since memcmp is not needed
2314 +acsmSearchSparseDFA_Full(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
2315 + int (*Match) (void * id, int index, void *data),
2318 + ACSM_PATTERN2 * mlist;
2319 + unsigned char * Tend;
2320 + unsigned char * T;
2325 + acstate_t ** NextState = acsm->acsmNextState;
2326 + ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
2332 + for( state = 0; T < Tend; T++ )
2334 + ps = NextState[ state ];
2336 + sindex = xlatcase[ T[0] ];
2338 + /* check the current state for a pattern match */
2341 + for( mlist = MatchList[state];
2343 + mlist = mlist->next )
2345 + index = T - mlist->n - Tx;
2348 + if( mlist->nocase )
2351 + if (Match (mlist->id, index, data))
2356 + if( memcmp (mlist->casepatrn, Tx + index, mlist->n ) == 0 )
2359 + if (Match (mlist->id, index, data))
2367 + state = ps[ 2u + sindex ];
2370 + /* Check the last state for a pattern match */
2371 + for( mlist = MatchList[state];
2373 + mlist = mlist->next )
2375 + index = T - mlist->n - Tx;
2377 + if( mlist->nocase )
2380 + if (Match (mlist->id, index, data))
2385 + if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
2388 + if (Match (mlist->id, index, data))
2397 + * Banded-Row format DFA search
2398 + * Do not change anything here, caching and prefetching
2399 + * performance is very sensitive to any changes.
2401 + * ps[0] = storage fmt
2402 + * ps[1] = bool match flag
2403 + * ps[2] = # elements in band
2404 + * ps[3] = index of 1st element
2409 +acsmSearchSparseDFA_Banded(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
2410 + int (*Match) (void * id, int index, void *data),
2414 + unsigned char * Tend;
2415 + unsigned char * T;
2418 + acstate_t ** NextState = acsm->acsmNextState;
2419 + ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
2420 + ACSM_PATTERN2 * mlist;
2427 + for( state = 0; T < Tend; T++ )
2429 + ps = NextState[state];
2431 + sindex = xlatcase[ T[0] ];
2433 + /* test if this state has any matching patterns */
2436 + for( mlist = MatchList[state];
2438 + mlist = mlist->next )
2440 + index = T - mlist->n - Tx;
2442 + if( mlist->nocase )
2445 + if (Match (mlist->id, index, data))
2450 + if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
2453 + if (Match (mlist->id, index, data))
2460 + if( sindex < ps[3] ) state = 0;
2461 + else if( sindex >= (ps[3] + ps[2]) ) state = 0;
2462 + else state = ps[ 4u + sindex - ps[3] ];
2465 + /* Check the last state for a pattern match */
2466 + for( mlist = MatchList[state];
2468 + mlist = mlist->next )
2470 + index = T - mlist->n - Tx;
2472 + if( mlist->nocase )
2475 + if (Match (mlist->id, index, data))
2480 + if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
2483 + if (Match (mlist->id, index, data))
2495 + * Search Text or Binary Data for Pattern matches
2497 + * Sparse Storage Version
2502 +acsmSearchSparseNFA(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
2503 + int (*Match) (void * id, int index, void *data),
2507 + ACSM_PATTERN2 * mlist;
2508 + unsigned char * Tend;
2510 + unsigned char * T, *Tc;
2512 + acstate_t ** NextState= acsm->acsmNextState;
2513 + acstate_t * FailState= acsm->acsmFailState;
2514 + ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
2515 + unsigned char Tchar;
2521 + for( state = 0; T < Tend; T++ )
2525 + Tchar = xlatcase[ *T ];
2527 + while( (nstate=SparseGetNextStateNFA(NextState[state],state,Tchar))==ACSM_FAIL_STATE2 )
2528 + state = FailState[state];
2532 + for( mlist = MatchList[state];
2534 + mlist = mlist->next )
2536 + index = T - mlist->n - Tx;
2537 + if( mlist->nocase )
2540 + if (Match (mlist->id, index, data))
2545 + if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
2548 + if (Match (mlist->id, index, data))
2562 +acsmSearch2(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
2563 + int (*Match) (void * id, int index, void *data),
2567 + switch( acsm->acsmFSA )
2571 + if( acsm->acsmFormat == ACF_FULL )
2573 + return acsmSearchSparseDFA_Full( acsm, Tx, n, Match,data );
2575 + else if( acsm->acsmFormat == ACF_BANDED )
2577 + return acsmSearchSparseDFA_Banded( acsm, Tx, n, Match,data );
2581 + return acsmSearchSparseDFA( acsm, Tx, n, Match,data );
2586 + return acsmSearchSparseNFA( acsm, Tx, n, Match,data );
2600 +acsmFree2 (ACSM_STRUCT2 * acsm)
2603 + ACSM_PATTERN2 * mlist, *ilist;
2604 + for (i = 0; i < acsm->acsmMaxStates; i++)
2606 + mlist = acsm->acsmMatchList[i];
2611 + mlist = mlist->next;
2614 + AC_FREE(acsm->acsmNextState[i]);
2616 + AC_FREE(acsm->acsmFailState);
2617 + AC_FREE(acsm->acsmMatchList);
2620 +/* ********************************** */
2622 +static void ring_sock_destruct(struct sock *sk) {
2624 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
2625 + skb_queue_purge(&sk->sk_receive_queue);
2627 + if (!sock_flag(sk, SOCK_DEAD)) {
2628 +#if defined(RING_DEBUG)
2629 + printk("Attempt to release alive ring socket: %p\n", sk);
2634 + BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
2635 + BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
2638 + BUG_TRAP(atomic_read(&sk->rmem_alloc)==0);
2639 + BUG_TRAP(atomic_read(&sk->wmem_alloc)==0);
2642 +#if defined(RING_DEBUG)
2643 + printk("Attempt to release alive ring socket: %p\n", sk);
2649 + kfree(ring_sk(sk));
2651 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
2652 + MOD_DEC_USE_COUNT;
2656 +/* ********************************** */
2658 +static void ring_proc_add(struct ring_opt *pfr) {
2659 + if(ring_proc_dir != NULL) {
2662 + pfr->ring_pid = current->pid;
2664 + snprintf(name, sizeof(name), "%d", pfr->ring_pid);
2665 + create_proc_read_entry(name, 0, ring_proc_dir,
2666 + ring_proc_get_info, pfr);
2667 + /* printk("PF_RING: added /proc/net/pf_ring/%s\n", name); */
2671 +/* ********************************** */
2673 +static void ring_proc_remove(struct ring_opt *pfr) {
2674 + if(ring_proc_dir != NULL) {
2677 + snprintf(name, sizeof(name), "%d", pfr->ring_pid);
2678 + remove_proc_entry(name, ring_proc_dir);
2679 + /* printk("PF_RING: removed /proc/net/pf_ring/%s\n", name); */
2683 +/* ********************************** */
2685 +static int ring_proc_get_info(char *buf, char **start, off_t offset,
2686 + int len, int *unused, void *data)
2689 + struct ring_opt *pfr;
2690 + FlowSlotInfo *fsi;
2692 + if(data == NULL) {
2693 + /* /proc/net/pf_ring/info */
2694 + rlen = sprintf(buf,"Version : %s\n", RING_VERSION);
2695 + rlen += sprintf(buf + rlen,"Bucket length : %d bytes\n", bucket_len);
2696 + rlen += sprintf(buf + rlen,"Ring slots : %d\n", num_slots);
2697 + rlen += sprintf(buf + rlen,"Sample rate : %d [1=no sampling]\n", sample_rate);
2699 + rlen += sprintf(buf + rlen,"Capture TX : %s\n",
2700 + enable_tx_capture ? "Yes [RX+TX]" : "No [RX only]");
2701 + rlen += sprintf(buf + rlen,"Transparent mode : %s\n",
2702 + transparent_mode ? "Yes" : "No");
2703 + rlen += sprintf(buf + rlen,"Total rings : %d\n", ring_table_size);
2705 + /* detailed statistics about a PF_RING */
2706 + pfr = (struct ring_opt*)data;
2709 + fsi = pfr->slots_info;
2712 + rlen = sprintf(buf, "Bound Device : %s\n",
2713 + pfr->ring_netdev->name == NULL ? "<NULL>" : pfr->ring_netdev->name);
2714 + rlen += sprintf(buf + rlen,"Version : %d\n", fsi->version);
2715 + rlen += sprintf(buf + rlen,"Sampling Rate : %d\n", pfr->sample_rate);
2716 + rlen += sprintf(buf + rlen,"BPF Filtering : %s\n", pfr->bpfFilter ? "Enabled" : "Disabled");
2717 + rlen += sprintf(buf + rlen,"Bloom Filters : %s\n", pfr->bitmask_enabled ? "Enabled" : "Disabled");
2718 + rlen += sprintf(buf + rlen,"Pattern Search: %s\n", pfr->acsm ? "Enabled" : "Disabled");
2719 + rlen += sprintf(buf + rlen,"Cluster Id : %d\n", pfr->cluster_id);
2720 + rlen += sprintf(buf + rlen,"Tot Slots : %d\n", fsi->tot_slots);
2721 + rlen += sprintf(buf + rlen,"Slot Len : %d\n", fsi->slot_len);
2722 + rlen += sprintf(buf + rlen,"Data Len : %d\n", fsi->data_len);
2723 + rlen += sprintf(buf + rlen,"Tot Memory : %d\n", fsi->tot_mem);
2724 + rlen += sprintf(buf + rlen,"Tot Packets : %lu\n", (unsigned long)fsi->tot_pkts);
2725 + rlen += sprintf(buf + rlen,"Tot Pkt Lost : %lu\n", (unsigned long)fsi->tot_lost);
2726 + rlen += sprintf(buf + rlen,"Tot Insert : %lu\n", (unsigned long)fsi->tot_insert);
2727 + rlen += sprintf(buf + rlen,"Tot Read : %lu\n", (unsigned long)fsi->tot_read);
2730 + rlen = sprintf(buf, "WARNING fsi == NULL\n");
2732 + rlen = sprintf(buf, "WARNING data == NULL\n");
2738 +/* ********************************** */
2740 +static void ring_proc_init(void) {
2741 + ring_proc_dir = proc_mkdir("pf_ring", init_net.proc_net);
2743 + if(ring_proc_dir) {
2744 + ring_proc_dir->owner = THIS_MODULE;
2745 + ring_proc = create_proc_read_entry("info", 0, ring_proc_dir,
2746 + ring_proc_get_info, NULL);
2748 + printk("PF_RING: unable to register proc file\n");
2750 + ring_proc->owner = THIS_MODULE;
2751 + printk("PF_RING: registered /proc/net/pf_ring/\n");
2754 + printk("PF_RING: unable to create /proc/net/pf_ring\n");
2757 +/* ********************************** */
2759 +static void ring_proc_term(void) {
2760 + if(ring_proc != NULL) {
2761 + remove_proc_entry("info", ring_proc_dir);
2762 + if(ring_proc_dir != NULL) remove_proc_entry("pf_ring", init_net.proc_net);
2764 + printk("PF_RING: deregistered /proc/net/pf_ring\n");
2768 +/* ********************************** */
2773 + * store the sk in a new element and add it
2774 + * to the head of the list.
2776 +static inline void ring_insert(struct sock *sk) {
2777 + struct ring_element *next;
2779 +#if defined(RING_DEBUG)
2780 + printk("RING: ring_insert()\n");
2783 + next = kmalloc(sizeof(struct ring_element), GFP_ATOMIC);
2784 + if(next != NULL) {
2786 + write_lock_irq(&ring_mgmt_lock);
2787 + list_add(&next->list, &ring_table);
2788 + write_unlock_irq(&ring_mgmt_lock);
2790 + if(net_ratelimit())
2791 + printk("RING: could not kmalloc slot!!\n");
2794 + ring_table_size++;
2795 + ring_proc_add(ring_sk(sk));
2798 +/* ********************************** */
2803 + * For each of the elements in the list:
2804 + * - check if this is the element we want to delete
2805 + * - if it is, remove it from the list, and free it.
2807 + * stop when we find the one we're looking for (break),
2808 + * or when we reach the end of the list.
2810 +static inline void ring_remove(struct sock *sk) {
2811 + struct list_head *ptr;
2812 + struct ring_element *entry;
2814 + for(ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) {
2815 + entry = list_entry(ptr, struct ring_element, list);
2817 + if(entry->sk == sk) {
2820 + ring_table_size--;
2826 +/* ********************************** */
2828 +static u_int32_t num_queued_pkts(struct ring_opt *pfr) {
2830 + if(pfr->ring_slots != NULL) {
2832 + u_int32_t tot_insert = pfr->slots_info->insert_idx,
2833 +#if defined(RING_DEBUG)
2834 + tot_read = pfr->slots_info->tot_read, tot_pkts;
2836 + tot_read = pfr->slots_info->tot_read;
2839 + if(tot_insert >= tot_read) {
2840 +#if defined(RING_DEBUG)
2841 + tot_pkts = tot_insert-tot_read;
2843 + return(tot_insert-tot_read);
2845 +#if defined(RING_DEBUG)
2846 + tot_pkts = ((u_int32_t)-1)+tot_insert-tot_read;
2848 + return(((u_int32_t)-1)+tot_insert-tot_read);
2851 +#if defined(RING_DEBUG)
2852 + printk("-> num_queued_pkts=%d [tot_insert=%d][tot_read=%d]\n",
2853 + tot_pkts, tot_insert, tot_read);
2860 +/* ********************************** */
2862 +static inline FlowSlot* get_insert_slot(struct ring_opt *pfr) {
2863 +#if defined(RING_DEBUG)
2864 + printk("get_insert_slot(%d)\n", pfr->slots_info->insert_idx);
2867 + if(pfr->ring_slots != NULL) {
2868 + FlowSlot *slot = (FlowSlot*)&(pfr->ring_slots[pfr->slots_info->insert_idx
2869 + *pfr->slots_info->slot_len]);
2875 +/* ********************************** */
2877 +static inline FlowSlot* get_remove_slot(struct ring_opt *pfr) {
2878 +#if defined(RING_DEBUG)
2879 + printk("get_remove_slot(%d)\n", pfr->slots_info->remove_idx);
2882 + if(pfr->ring_slots != NULL)
2883 + return((FlowSlot*)&(pfr->ring_slots[pfr->slots_info->remove_idx*
2884 + pfr->slots_info->slot_len]));
2889 +/* ******************************************************* */
2891 +static int parse_pkt(struct sk_buff *skb, u_int16_t skb_displ,
2892 + u_int8_t *l3_proto, u_int16_t *eth_type,
2893 + u_int16_t *l3_offset, u_int16_t *l4_offset,
2894 + u_int16_t *vlan_id, u_int32_t *ipv4_src,
2895 + u_int32_t *ipv4_dst,
2896 + u_int16_t *l4_src_port, u_int16_t *l4_dst_port,
2897 + u_int16_t *payload_offset) {
2899 + struct ethhdr *eh = (struct ethhdr*)(skb->data-skb_displ);
2902 + *l3_offset = *l4_offset = *l3_proto = *payload_offset = 0;
2903 + *eth_type = ntohs(eh->h_proto);
2905 + if(*eth_type == 0x8100 /* 802.1q (VLAN) */) {
2906 + (*vlan_id) = (skb->data[14] & 15)*256 + skb->data[15];
2907 + *eth_type = (skb->data[16])*256 + skb->data[17];
2911 + (*vlan_id) = (u_int16_t)-1;
2914 + if(*eth_type == 0x0800 /* IP */) {
2915 + *l3_offset = displ+sizeof(struct ethhdr);
2916 + ip = (struct iphdr*)(skb->data-skb_displ+(*l3_offset));
2918 + *ipv4_src = ntohl(ip->saddr), *ipv4_dst = ntohl(ip->daddr), *l3_proto = ip->protocol;
2920 + if((ip->protocol == IPPROTO_TCP) || (ip->protocol == IPPROTO_UDP)) {
2921 + *l4_offset = (*l3_offset)+(ip->ihl*4);
2923 + if(ip->protocol == IPPROTO_TCP) {
2924 + struct tcphdr *tcp = (struct tcphdr*)(skb->data-skb_displ+(*l4_offset));
2925 + *l4_src_port = ntohs(tcp->source), *l4_dst_port = ntohs(tcp->dest);
2926 + *payload_offset = (*l4_offset)+(tcp->doff * 4);
2927 + } else if(ip->protocol == IPPROTO_UDP) {
2928 + struct udphdr *udp = (struct udphdr*)(skb->data-skb_displ+(*l4_offset));
2929 + *l4_src_port = ntohs(udp->source), *l4_dst_port = ntohs(udp->dest);
2930 + *payload_offset = (*l4_offset)+sizeof(struct udphdr);
2932 + *payload_offset = (*l4_offset);
2934 + *l4_src_port = *l4_dst_port = 0;
2936 + return(1); /* IP */
2937 + } /* TODO: handle IPv6 */
2939 + return(0); /* No IP */
2942 +/* **************************************************************** */
2944 +static void reset_bitmask(bitmask_selector *selector)
2946 + memset((char*)selector->bits_memory, 0, selector->num_bits/8);
2948 + while(selector->clashes != NULL) {
2949 + bitmask_counter_list *next = selector->clashes->next;
2950 + kfree(selector->clashes);
2951 + selector->clashes = next;
2955 +/* **************************************************************** */
2957 +static void alloc_bitmask(u_int32_t tot_bits, bitmask_selector *selector)
2959 + u_int tot_mem = tot_bits/8;
2961 + if(tot_mem <= PAGE_SIZE)
2962 + selector->order = 1;
2964 + for(selector->order = 0; (PAGE_SIZE << selector->order) < tot_mem; selector->order++)
2968 + printk("BITMASK: [order=%d][tot_mem=%d]\n", selector->order, tot_mem);
2970 + while((selector->bits_memory = __get_free_pages(GFP_ATOMIC, selector->order)) == 0)
2971 + if(selector->order-- == 0)
2974 + if(selector->order == 0) {
2975 + printk("BITMASK: ERROR not enough memory for bitmask\n");
2976 + selector->num_bits = 0;
2980 + tot_mem = PAGE_SIZE << selector->order;
2981 + printk("BITMASK: succesfully allocated [tot_mem=%d][order=%d]\n",
2982 + tot_mem, selector->order);
2984 + selector->num_bits = tot_mem*8;
2985 + selector->clashes = NULL;
2986 + reset_bitmask(selector);
2989 +/* ********************************** */
2991 +static void free_bitmask(bitmask_selector *selector)
2993 + if(selector->bits_memory > 0)
2994 + free_pages(selector->bits_memory, selector->order);
2997 +/* ********************************** */
2999 +static void set_bit_bitmask(bitmask_selector *selector, u_int32_t the_bit) {
3000 + u_int32_t idx = the_bit % selector->num_bits;
3002 + if(BITMASK_ISSET(idx, selector)) {
3003 + bitmask_counter_list *head = selector->clashes;
3005 + printk("BITMASK: bit %u was already set\n", the_bit);
3007 + while(head != NULL) {
3008 + if(head->bit_id == the_bit) {
3009 + head->bit_counter++;
3010 + printk("BITMASK: bit %u is now set to %d\n", the_bit, head->bit_counter);
3014 + head = head->next;
3017 + head = kmalloc(sizeof(bitmask_counter_list), GFP_KERNEL);
3019 + head->bit_id = the_bit;
3020 + head->bit_counter = 1 /* previous value */ + 1 /* the requested set */;
3021 + head->next = selector->clashes;
3022 + selector->clashes = head;
3024 + printk("BITMASK: not enough memory\n");
3028 + BITMASK_SET(idx, selector);
3029 + printk("BITMASK: bit %u is now set\n", the_bit);
3033 +/* ********************************** */
3035 +static u_char is_set_bit_bitmask(bitmask_selector *selector, u_int32_t the_bit) {
3036 + u_int32_t idx = the_bit % selector->num_bits;
3037 + return(BITMASK_ISSET(idx, selector));
3040 +/* ********************************** */
3042 +static void clear_bit_bitmask(bitmask_selector *selector, u_int32_t the_bit) {
3043 + u_int32_t idx = the_bit % selector->num_bits;
3045 + if(!BITMASK_ISSET(idx, selector))
3046 + printk("BITMASK: bit %u was not set\n", the_bit);
3048 + bitmask_counter_list *head = selector->clashes, *prev = NULL;
3050 + while(head != NULL) {
3051 + if(head->bit_id == the_bit) {
3052 + head->bit_counter--;
3054 + printk("BITMASK: bit %u is now set to %d\n",
3055 + the_bit, head->bit_counter);
3057 + if(head->bit_counter == 1) {
3058 + /* We can now delete this entry as '1' can be
3059 + accommodated into the bitmask */
3062 + selector->clashes = head->next;
3064 + prev->next = head->next;
3071 + prev = head; head = head->next;
3074 + BITMASK_CLR(idx, selector);
3075 + printk("BITMASK: bit %u is now reset\n", the_bit);
3079 +/* ********************************** */
3081 +/* Hash function */
3082 +static u_int32_t sdb_hash(u_int32_t value) {
3083 + u_int32_t hash = 0, i;
3084 + u_int8_t str[sizeof(value)];
3086 + memcpy(str, &value, sizeof(value));
3088 + for(i = 0; i < sizeof(value); i++) {
3089 + hash = str[i] + (hash << 6) + (hash << 16) - hash;
3095 +/* ********************************** */
3097 +static void handle_bloom_filter_rule(struct ring_opt *pfr, char *buf) {
3103 + count = strlen(buf);
3105 + printk("PF_RING: -> handle_bloom_filter_rule(%s)\n", buf);
3107 + if((buf[count-1] == '\n') || (buf[count-1] == '\r')) buf[count-1] = '\0';
3110 + u_int32_t the_bit;
3112 + if(!strncmp(&buf[1], "vlan=", 5)) {
3113 + sscanf(&buf[6], "%d", &the_bit);
3116 + set_bit_bitmask(&pfr->vlan_bitmask, the_bit), pfr->num_vlan_bitmask_add++;
3118 + clear_bit_bitmask(&pfr->vlan_bitmask, the_bit), pfr->num_vlan_bitmask_remove++;
3119 + } else if(!strncmp(&buf[1], "mac=", 4)) {
3120 + int a, b, c, d, e, f;
3122 + if(sscanf(&buf[5], "%02x:%02x:%02x:%02x:%02x:%02x:",
3123 + &a, &b, &c, &d, &e, &f) == 6) {
3124 + u_int32_t mac_addr = (a & 0xff) + (b & 0xff) + ((c & 0xff) << 24) + ((d & 0xff) << 16) + ((e & 0xff) << 8) + (f & 0xff);
3126 + /* printk("PF_RING: -> [%u][%u][%u][%u][%u][%u] -> [%u]\n", a, b, c, d, e, f, mac_addr); */
3129 + set_bit_bitmask(&pfr->mac_bitmask, mac_addr), pfr->num_mac_bitmask_add++;
3131 + clear_bit_bitmask(&pfr->mac_bitmask, mac_addr), pfr->num_mac_bitmask_remove++;
3133 + printk("PF_RING: -> Invalid MAC address '%s'\n", &buf[5]);
3134 + } else if(!strncmp(&buf[1], "ip=", 3)) {
3137 + if(sscanf(&buf[4], "%d.%d.%d.%d", &a, &b, &c, &d) == 4) {
3138 + u_int32_t ip_addr = ((a & 0xff) << 24) + ((b & 0xff) << 16) + ((c & 0xff) << 8) + (d & 0xff);
3141 + set_bit_bitmask(&pfr->ip_bitmask, ip_addr), set_bit_bitmask(&pfr->ip_bitmask, sdb_hash(ip_addr)), pfr->num_ip_bitmask_add++;
3143 + clear_bit_bitmask(&pfr->ip_bitmask, ip_addr), clear_bit_bitmask(&pfr->twin_ip_bitmask, sdb_hash(ip_addr)), pfr->num_ip_bitmask_remove++;
3145 + printk("PF_RING: -> Invalid IP address '%s'\n", &buf[4]);
3146 + } else if(!strncmp(&buf[1], "port=", 5)) {
3147 + sscanf(&buf[6], "%d", &the_bit);
3150 + set_bit_bitmask(&pfr->port_bitmask, the_bit), set_bit_bitmask(&pfr->port_bitmask, sdb_hash(the_bit)), pfr->num_port_bitmask_add++;
3152 + clear_bit_bitmask(&pfr->port_bitmask, the_bit), clear_bit_bitmask(&pfr->twin_port_bitmask, sdb_hash(the_bit)), pfr->num_port_bitmask_remove++;
3153 + } else if(!strncmp(&buf[1], "proto=", 6)) {
3154 + if(!strncmp(&buf[7], "tcp", 3)) the_bit = 6;
3155 + else if(!strncmp(&buf[7], "udp", 3)) the_bit = 17;
3156 + else if(!strncmp(&buf[7], "icmp", 4)) the_bit = 1;
3157 + else sscanf(&buf[7], "%d", &the_bit);
3160 + set_bit_bitmask(&pfr->proto_bitmask, the_bit);
3162 + clear_bit_bitmask(&pfr->proto_bitmask, the_bit);
3164 + printk("PF_RING: -> Unknown rule type '%s'\n", buf);
3168 +/* ********************************** */
3170 +static void reset_bloom_filters(struct ring_opt *pfr) {
3171 + reset_bitmask(&pfr->mac_bitmask);
3172 + reset_bitmask(&pfr->vlan_bitmask);
3173 + reset_bitmask(&pfr->ip_bitmask); reset_bitmask(&pfr->twin_ip_bitmask);
3174 + reset_bitmask(&pfr->port_bitmask); reset_bitmask(&pfr->twin_port_bitmask);
3175 + reset_bitmask(&pfr->proto_bitmask);
3177 + pfr->num_mac_bitmask_add = pfr->num_mac_bitmask_remove = 0;
3178 + pfr->num_vlan_bitmask_add = pfr->num_vlan_bitmask_remove = 0;
3179 + pfr->num_ip_bitmask_add = pfr->num_ip_bitmask_remove = 0;
3180 + pfr->num_port_bitmask_add = pfr->num_port_bitmask_remove = 0;
3181 + pfr->num_proto_bitmask_add = pfr->num_proto_bitmask_remove = 0;
3183 + printk("PF_RING: rules have been reset\n");
3186 +/* ********************************** */
3188 +static void init_blooms(struct ring_opt *pfr) {
3189 + alloc_bitmask(4096, &pfr->mac_bitmask);
3190 + alloc_bitmask(4096, &pfr->vlan_bitmask);
3191 + alloc_bitmask(32768, &pfr->ip_bitmask); alloc_bitmask(32768, &pfr->twin_ip_bitmask);
3192 + alloc_bitmask(4096, &pfr->port_bitmask); alloc_bitmask(4096, &pfr->twin_port_bitmask);
3193 + alloc_bitmask(4096, &pfr->proto_bitmask);
3195 + pfr->num_mac_bitmask_add = pfr->num_mac_bitmask_remove = 0;
3196 + pfr->num_vlan_bitmask_add = pfr->num_vlan_bitmask_remove = 0;
3197 + pfr->num_ip_bitmask_add = pfr->num_ip_bitmask_remove = 0;
3198 + pfr->num_port_bitmask_add = pfr->num_port_bitmask_remove = 0;
3199 + pfr->num_proto_bitmask_add = pfr->num_proto_bitmask_remove = 0;
3201 + reset_bloom_filters(pfr);
3204 +/* ********************************** */
3206 +inline int MatchFound (void* id, int index, void *data) { return(0); }
3208 +/* ********************************** */
3210 +static void add_skb_to_ring(struct sk_buff *skb,
3211 + struct ring_opt *pfr,
3212 + u_char recv_packet,
3213 + u_char real_skb /* 1=skb 0=faked skb */) {
3214 + FlowSlot *theSlot;
3215 + int idx, displ, fwd_pkt = 0;
3218 + /* Hack for identifying a packet received by the e1000 */
3220 + displ = SKB_DISPLACEMENT;
3222 + displ = 0; /* Received by the e1000 wrapper */
3226 + write_lock(&pfr->ring_index_lock);
3227 + pfr->slots_info->tot_pkts++;
3228 + write_unlock(&pfr->ring_index_lock);
3230 + /* BPF Filtering (from af_packet.c) */
3231 + if(pfr->bpfFilter != NULL) {
3232 + unsigned res = 1, len;
3234 + len = skb->len-skb->data_len;
3236 + write_lock(&pfr->ring_index_lock);
3237 + skb->data -= displ;
3238 + res = sk_run_filter(skb, pfr->bpfFilter->insns, pfr->bpfFilter->len);
3239 + skb->data += displ;
3240 + write_unlock(&pfr->ring_index_lock);
3243 + /* Filter failed */
3245 +#if defined(RING_DEBUG)
3246 + printk("add_skb_to_ring(skb): Filter failed [len=%d][tot=%llu]"
3247 + "[insertIdx=%d][pkt_type=%d][cloned=%d]\n",
3248 + (int)skb->len, pfr->slots_info->tot_pkts,
3249 + pfr->slots_info->insert_idx,
3250 + skb->pkt_type, skb->cloned);
3257 + /* ************************** */
3259 + if(pfr->sample_rate > 1) {
3260 + if(pfr->pktToSample == 0) {
3261 + write_lock(&pfr->ring_index_lock);
3262 + pfr->pktToSample = pfr->sample_rate;
3263 + write_unlock(&pfr->ring_index_lock);
3265 + write_lock(&pfr->ring_index_lock);
3266 + pfr->pktToSample--;
3267 + write_unlock(&pfr->ring_index_lock);
3269 +#if defined(RING_DEBUG)
3270 + printk("add_skb_to_ring(skb): sampled packet [len=%d]"
3271 + "[tot=%llu][insertIdx=%d][pkt_type=%d][cloned=%d]\n",
3272 + (int)skb->len, pfr->slots_info->tot_pkts,
3273 + pfr->slots_info->insert_idx,
3274 + skb->pkt_type, skb->cloned);
3280 + /* ************************************* */
3282 + if((pfr->reflector_dev != NULL)
3283 + && (!netif_queue_stopped(pfr->reflector_dev))) {
3284 + int cpu = smp_processor_id();
3286 + /* increase reference counter so that this skb is not freed */
3287 + atomic_inc(&skb->users);
3289 + skb->data -= displ;
3292 + if (pfr->reflector_dev->xmit_lock_owner != cpu) {
3293 + /* Patch below courtesy of Matthew J. Roth <mroth@imminc.com> */
3294 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
3295 + spin_lock_bh(&pfr->reflector_dev->xmit_lock);
3296 + pfr->reflector_dev->xmit_lock_owner = cpu;
3297 + spin_unlock_bh(&pfr->reflector_dev->xmit_lock);
3299 + netif_tx_lock_bh(pfr->reflector_dev);
3301 + if (pfr->reflector_dev->hard_start_xmit(skb, pfr->reflector_dev) == 0) {
3302 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
3303 + spin_lock_bh(&pfr->reflector_dev->xmit_lock);
3304 + pfr->reflector_dev->xmit_lock_owner = -1;
3305 + spin_unlock_bh(&pfr->reflector_dev->xmit_lock);
3307 + netif_tx_unlock_bh(pfr->reflector_dev);
3309 + skb->data += displ;
3310 +#if defined(RING_DEBUG)
3311 + printk("++ hard_start_xmit succeeded\n");
3316 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
3317 + spin_lock_bh(&pfr->reflector_dev->xmit_lock);
3318 + pfr->reflector_dev->xmit_lock_owner = -1;
3319 + spin_unlock_bh(&pfr->reflector_dev->xmit_lock);
3321 + netif_tx_unlock_bh(pfr->reflector_dev);
3325 +#if defined(RING_DEBUG)
3326 + printk("++ hard_start_xmit failed\n");
3328 + skb->data += displ;
3329 + return; /* -ENETDOWN */
3332 + /* ************************************* */
3334 +#if defined(RING_DEBUG)
3335 + printk("add_skb_to_ring(skb) [len=%d][tot=%llu][insertIdx=%d]"
3336 + "[pkt_type=%d][cloned=%d]\n",
3337 + (int)skb->len, pfr->slots_info->tot_pkts,
3338 + pfr->slots_info->insert_idx,
3339 + skb->pkt_type, skb->cloned);
3342 + idx = pfr->slots_info->insert_idx;
3343 + theSlot = get_insert_slot(pfr);
3345 + if((theSlot != NULL) && (theSlot->slot_state == 0)) {
3346 + struct pcap_pkthdr *hdr;
3348 + int is_ip_pkt, debug = 0;
3350 + /* Update Index */
3353 + bucket = &theSlot->bucket;
3354 + hdr = (struct pcap_pkthdr*)bucket;
3356 + /* BD - API changed for time keeping */
3357 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14))
3358 + if(skb->stamp.tv_sec == 0) do_gettimeofday(&skb->stamp);
3360 + hdr->ts.tv_sec = skb->stamp.tv_sec, hdr->ts.tv_usec = skb->stamp.tv_usec;
3362 + if(skb->tstamp.tv64 == 0) __net_timestamp(skb);
3364 + struct timeval tv = ktime_to_timeval(skb->tstamp);
3365 + hdr->ts.tv_sec = tv.tv_sec, hdr->ts.tv_usec = tv.tv_usec;
3367 + hdr->caplen = skb->len+displ;
3369 + if(hdr->caplen > pfr->slots_info->data_len)
3370 + hdr->caplen = pfr->slots_info->data_len;
3372 + hdr->len = skb->len+displ;
3375 + is_ip_pkt = parse_pkt(skb, displ,
3383 + &hdr->l4_src_port,
3384 + &hdr->l4_dst_port,
3385 + &hdr->payload_offset);
3387 + if(is_ip_pkt && pfr->bitmask_enabled) {
3388 + int vlan_match = 0;
3394 + printk(KERN_INFO "PF_RING: [proto=%d][vlan=%d][sport=%d][dport=%d][src=%u][dst=%u]\n",
3395 + hdr->l3_proto, hdr->vlan_id, hdr->l4_src_port, hdr->l4_dst_port, hdr->ipv4_src, hdr->ipv4_dst);
3397 + printk(KERN_INFO "PF_RING: [proto=%d][vlan=%d]\n", hdr->l3_proto, hdr->vlan_id);
3400 + if(hdr->vlan_id != (u_int16_t)-1) {
3401 + vlan_match = is_set_bit_bitmask(&pfr->vlan_bitmask, hdr->vlan_id);
3406 + struct ethhdr *eh = (struct ethhdr*)(skb->data);
3407 + u_int32_t src_mac = (eh->h_source[0] & 0xff) + (eh->h_source[1] & 0xff) + ((eh->h_source[2] & 0xff) << 24)
3408 + + ((eh->h_source[3] & 0xff) << 16) + ((eh->h_source[4] & 0xff) << 8) + (eh->h_source[5] & 0xff);
3410 + if(debug) printk(KERN_INFO "PF_RING: [src_mac=%u]\n", src_mac);
3412 + fwd_pkt |= is_set_bit_bitmask(&pfr->mac_bitmask, src_mac);
3415 + u_int32_t dst_mac = (eh->h_dest[0] & 0xff) + (eh->h_dest[1] & 0xff) + ((eh->h_dest[2] & 0xff) << 24)
3416 + + ((eh->h_dest[3] & 0xff) << 16) + ((eh->h_dest[4] & 0xff) << 8) + (eh->h_dest[5] & 0xff);
3418 + if(debug) printk(KERN_INFO "PF_RING: [dst_mac=%u]\n", dst_mac);
3420 + fwd_pkt |= is_set_bit_bitmask(&pfr->mac_bitmask, dst_mac);
3422 + if(is_ip_pkt && (!fwd_pkt)) {
3423 + fwd_pkt |= is_set_bit_bitmask(&pfr->ip_bitmask, hdr->ipv4_src);
3426 + fwd_pkt |= is_set_bit_bitmask(&pfr->ip_bitmask, hdr->ipv4_dst);
3428 + if((!fwd_pkt) && ((hdr->l3_proto == IPPROTO_TCP)
3429 + || (hdr->l3_proto == IPPROTO_UDP))) {
3430 + fwd_pkt |= is_set_bit_bitmask(&pfr->port_bitmask, hdr->l4_src_port);
3431 + if(!fwd_pkt) fwd_pkt |= is_set_bit_bitmask(&pfr->port_bitmask, hdr->l4_dst_port);
3434 + if(!fwd_pkt) fwd_pkt |= is_set_bit_bitmask(&pfr->proto_bitmask, hdr->l3_proto);
3442 + if(fwd_pkt && (pfr->acsm != NULL)) {
3443 + if((hdr->payload_offset > 0) && ((skb->len+skb->mac_len) > hdr->payload_offset)) {
3444 + char *payload = (skb->data-displ+hdr->payload_offset);
3445 + int payload_len = skb->len /* + skb->mac_len */ - hdr->payload_offset;
3447 + if((payload_len > 0)
3448 + && ((hdr->l4_src_port == 80) || (hdr->l4_dst_port == 80))) {
3454 + memcpy(buf, payload, payload_len);
3455 + buf[payload_len] = '\0';
3456 + printk("[%s]\n", payload);
3459 + /* printk("Tring to match pattern [len=%d][%s]\n", payload_len, payload); */
3460 + rc = acsmSearch2(pfr->acsm, payload, payload_len, MatchFound, (void *)0) ? 1 : 0;
3462 + // printk("Match result: %d\n", fwd_pkt);
3464 + printk("Pattern matched!\n");
3475 + memcpy(&bucket[sizeof(struct pcap_pkthdr)], skb->data-displ, hdr->caplen);
3477 +#if defined(RING_DEBUG)
3479 + static unsigned int lastLoss = 0;
3481 + if(pfr->slots_info->tot_lost
3482 + && (lastLoss != pfr->slots_info->tot_lost)) {
3483 + printk("add_skb_to_ring(%d): [data_len=%d]"
3484 + "[hdr.caplen=%d][skb->len=%d]"
3485 + "[pcap_pkthdr=%d][removeIdx=%d]"
3486 + "[loss=%lu][page=%u][slot=%u]\n",
3487 + idx-1, pfr->slots_info->data_len, hdr->caplen, skb->len,
3488 + sizeof(struct pcap_pkthdr),
3489 + pfr->slots_info->remove_idx,
3490 + (long unsigned int)pfr->slots_info->tot_lost,
3491 + pfr->insert_page_id, pfr->insert_slot_id);
3493 + lastLoss = pfr->slots_info->tot_lost;
3498 + write_lock(&pfr->ring_index_lock);
3499 + if(idx == pfr->slots_info->tot_slots)
3500 + pfr->slots_info->insert_idx = 0;
3502 + pfr->slots_info->insert_idx = idx;
3504 + pfr->slots_info->tot_insert++;
3505 + theSlot->slot_state = 1;
3506 + write_unlock(&pfr->ring_index_lock);
3509 + write_lock(&pfr->ring_index_lock);
3510 + pfr->slots_info->tot_lost++;
3511 + write_unlock(&pfr->ring_index_lock);
3513 +#if defined(RING_DEBUG)
3514 + printk("add_skb_to_ring(skb): packet lost [loss=%lu]"
3515 + "[removeIdx=%u][insertIdx=%u]\n",
3516 + (long unsigned int)pfr->slots_info->tot_lost,
3517 + pfr->slots_info->remove_idx, pfr->slots_info->insert_idx);
3523 + /* wakeup in case of poll() */
3524 + if(waitqueue_active(&pfr->ring_slots_waitqueue))
3525 + wake_up_interruptible(&pfr->ring_slots_waitqueue);
3529 +/* ********************************** */
3531 +static u_int hash_skb(struct ring_cluster *cluster_ptr,
3532 + struct sk_buff *skb, u_char recv_packet) {
3537 + if(cluster_ptr->hashing_mode == cluster_round_robin) {
3538 + idx = cluster_ptr->hashing_id++;
3540 + /* Per-flow clustering */
3541 + if(skb->len > sizeof(struct iphdr)+sizeof(struct tcphdr)) {
3545 + displ = SKB_DISPLACEMENT;
3550 + Always points to to the IP part of the packet
3553 + ip = (struct iphdr*)(skb->data+displ);
3555 + idx = ip->saddr+ip->daddr+ip->protocol;
3557 + if(ip->protocol == IPPROTO_TCP) {
3558 + struct tcphdr *tcp = (struct tcphdr*)(skb->data+displ
3559 + +sizeof(struct iphdr));
3560 + idx += tcp->source+tcp->dest;
3561 + } else if(ip->protocol == IPPROTO_UDP) {
3562 + struct udphdr *udp = (struct udphdr*)(skb->data+displ
3563 + +sizeof(struct iphdr));
3564 + idx += udp->source+udp->dest;
3570 + return(idx % cluster_ptr->num_cluster_elements);
3573 +/* ********************************** */
3575 +static int skb_ring_handler(struct sk_buff *skb,
3576 + u_char recv_packet,
3577 + u_char real_skb /* 1=skb 0=faked skb */) {
3578 + struct sock *skElement;
3580 + struct list_head *ptr;
3581 + struct ring_cluster *cluster_ptr;
3584 + uint64_t rdt = _rdtsc(), rdt1, rdt2;
3587 + if((!skb) /* Invalid skb */
3588 + || ((!enable_tx_capture) && (!recv_packet))) {
3590 + An outgoing packet is about to be sent out
3591 + but we decided not to handle transmitted
3597 +#if defined(RING_DEBUG)
3599 + printk("skb_ring_handler() [len=%d][dev=%s]\n", skb->len,
3600 + skb->dev->name == NULL ? "<NULL>" : skb->dev->name);
3608 + /* [1] Check unclustered sockets */
3609 + for (ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) {
3610 + struct ring_opt *pfr;
3611 + struct ring_element *entry;
3613 + entry = list_entry(ptr, struct ring_element, list);
3615 + read_lock(&ring_mgmt_lock);
3616 + skElement = entry->sk;
3617 + pfr = ring_sk(skElement);
3618 + read_unlock(&ring_mgmt_lock);
3621 + && (pfr->cluster_id == 0 /* No cluster */)
3622 + && (pfr->ring_slots != NULL)
3623 + && ((pfr->ring_netdev == skb->dev) || ((skb->dev->flags & IFF_SLAVE) && pfr->ring_netdev == skb->dev->master))) {
3624 + /* We've found the ring where the packet can be stored */
3625 + read_lock(&ring_mgmt_lock);
3626 + add_skb_to_ring(skb, pfr, recv_packet, real_skb);
3627 + read_unlock(&ring_mgmt_lock);
3629 + rc = 1; /* Ring found: we've done our job */
3633 + /* [2] Check socket clusters */
3634 + cluster_ptr = ring_cluster_list;
3636 + while(cluster_ptr != NULL) {
3637 + struct ring_opt *pfr;
3639 + if(cluster_ptr->num_cluster_elements > 0) {
3640 + u_int skb_hash = hash_skb(cluster_ptr, skb, recv_packet);
3642 + read_lock(&ring_mgmt_lock);
3643 + skElement = cluster_ptr->sk[skb_hash];
3644 + read_unlock(&ring_mgmt_lock);
3646 + if(skElement != NULL) {
3647 + pfr = ring_sk(skElement);
3650 + && (pfr->ring_slots != NULL)
3651 + && ((pfr->ring_netdev == skb->dev) || ((skb->dev->flags & IFF_SLAVE) && pfr->ring_netdev == skb->dev->master))) {
3652 + /* We've found the ring where the packet can be stored */
3653 + read_lock(&ring_mgmt_lock);
3654 + add_skb_to_ring(skb, pfr, recv_packet, real_skb);
3655 + read_unlock(&ring_mgmt_lock);
3657 + rc = 1; /* Ring found: we've done our job */
3662 + cluster_ptr = cluster_ptr->next;
3666 + rdt1 = _rdtsc()-rdt1;
3673 + if(transparent_mode) rc = 0;
3675 + if((rc != 0) && real_skb)
3676 + dev_kfree_skb(skb); /* Free the skb */
3679 + rdt2 = _rdtsc()-rdt2;
3680 + rdt = _rdtsc()-rdt;
3682 +#if defined(RING_DEBUG)
3683 + printk("# cycles: %d [lock costed %d %d%%][free costed %d %d%%]\n",
3684 + (int)rdt, rdt-rdt1,
3685 + (int)((float)((rdt-rdt1)*100)/(float)rdt),
3687 + (int)((float)(rdt2*100)/(float)rdt));
3691 + return(rc); /* 0 = packet not handled */
3694 +/* ********************************** */
3696 +struct sk_buff skb;
3698 +static int buffer_ring_handler(struct net_device *dev,
3699 + char *data, int len) {
3701 +#if defined(RING_DEBUG)
3702 + printk("buffer_ring_handler: [dev=%s][len=%d]\n",
3703 + dev->name == NULL ? "<NULL>" : dev->name, len);
3706 + /* BD - API changed for time keeping */
3707 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14))
3708 + skb.dev = dev, skb.len = len, skb.data = data,
3709 + skb.data_len = len, skb.stamp.tv_sec = 0; /* Calculate the time */
3711 + skb.dev = dev, skb.len = len, skb.data = data,
3712 + skb.data_len = len, skb.tstamp.tv64 = 0; /* Calculate the time */
3715 + skb_ring_handler(&skb, 1, 0 /* fake skb */);
3720 +/* ********************************** */
3722 +static int ring_create(struct socket *sock, int protocol) {
3724 + struct ring_opt *pfr;
3727 +#if defined(RING_DEBUG)
3728 + printk("RING: ring_create()\n");
3731 + /* Are you root, superuser or so ? */
3732 + if(!capable(CAP_NET_ADMIN))
3735 + if(sock->type != SOCK_RAW)
3736 + return -ESOCKTNOSUPPORT;
3738 + if(protocol != htons(ETH_P_ALL))
3739 + return -EPROTONOSUPPORT;
3741 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
3742 + MOD_INC_USE_COUNT;
3747 + // BD: -- broke this out to keep it more simple and clear as to what the
3749 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
3750 +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,11))
3751 + sk = sk_alloc(PF_RING, GFP_KERNEL, 1, NULL);
3753 + // BD: API changed in 2.6.12, ref:
3754 + // http://svn.clkao.org/svnweb/linux/revision/?rev=28201
3755 + sk = sk_alloc(PF_RING, GFP_ATOMIC, &ring_proto, 1);
3759 + sk = sk_alloc(PF_RING, GFP_KERNEL, 1);
3765 + sock->ops = &ring_ops;
3766 + sock_init_data(sock, sk);
3767 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
3768 +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,11))
3769 + sk_set_owner(sk, THIS_MODULE);
3774 + ring_sk(sk) = ring_sk_datatype(kmalloc(sizeof(*pfr), GFP_KERNEL));
3776 + if (!(pfr = ring_sk(sk))) {
3780 + memset(pfr, 0, sizeof(*pfr));
3781 + init_waitqueue_head(&pfr->ring_slots_waitqueue);
3782 + pfr->ring_index_lock = RW_LOCK_UNLOCKED;
3783 + atomic_set(&pfr->num_ring_slots_waiters, 0);
3787 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
3788 + sk->sk_family = PF_RING;
3789 + sk->sk_destruct = ring_sock_destruct;
3791 + sk->family = PF_RING;
3792 + sk->destruct = ring_sock_destruct;
3793 + sk->num = protocol;
3798 +#if defined(RING_DEBUG)
3799 + printk("RING: ring_create() - created\n");
3804 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
3805 + MOD_DEC_USE_COUNT;
3810 +/* *********************************************** */
3812 +static int ring_release(struct socket *sock)
3814 + struct sock *sk = sock->sk;
3815 + struct ring_opt *pfr = ring_sk(sk);
3819 +#if defined(RING_DEBUG)
3820 + printk("RING: called ring_release\n");
3823 +#if defined(RING_DEBUG)
3824 + printk("RING: ring_release entered\n");
3828 + The calls below must be placed outside the
3829 + write_lock_irq...write_unlock_irq block.
3832 + ring_proc_remove(ring_sk(sk));
3834 + write_lock_irq(&ring_mgmt_lock);
3838 + /* Free the ring buffer */
3839 + if(pfr->ring_memory) {
3840 + struct page *page, *page_end;
3842 + page_end = virt_to_page(pfr->ring_memory + (PAGE_SIZE << pfr->order) - 1);
3843 + for(page = virt_to_page(pfr->ring_memory); page <= page_end; page++)
3844 + ClearPageReserved(page);
3846 + free_pages(pfr->ring_memory, pfr->order);
3849 + free_bitmask(&pfr->mac_bitmask);
3850 + free_bitmask(&pfr->vlan_bitmask);
3851 + free_bitmask(&pfr->ip_bitmask); free_bitmask(&pfr->twin_ip_bitmask);
3852 + free_bitmask(&pfr->port_bitmask); free_bitmask(&pfr->twin_port_bitmask);
3853 + free_bitmask(&pfr->proto_bitmask);
3855 + if(pfr->acsm != NULL) acsmFree2(pfr->acsm);
3858 + ring_sk(sk) = NULL;
3860 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
3861 + skb_queue_purge(&sk->sk_write_queue);
3865 + write_unlock_irq(&ring_mgmt_lock);
3867 +#if defined(RING_DEBUG)
3868 + printk("RING: ring_release leaving\n");
3874 +/* ********************************** */
3876 + * We create a ring for this socket and bind it to the specified device
3878 +static int packet_ring_bind(struct sock *sk, struct net_device *dev)
3880 + u_int the_slot_len;
3881 + u_int32_t tot_mem;
3882 + struct ring_opt *pfr = ring_sk(sk);
3883 + struct page *page, *page_end;
3885 + if(!dev) return(-1);
3887 +#if defined(RING_DEBUG)
3888 + printk("RING: packet_ring_bind(%s) called\n", dev->name);
3891 + /* **********************************************
3893 + *************************************
3897 + ************************************* <-+
3899 + ************************************* |
3901 + ************************************* +- num_slots
3903 + ************************************* |
3905 + ************************************* <-+
3907 + ********************************************** */
3909 + the_slot_len = sizeof(u_char) /* flowSlot.slot_state */
3913 + + sizeof(struct pcap_pkthdr)
3914 + + bucket_len /* flowSlot.bucket */;
3916 + tot_mem = sizeof(FlowSlotInfo) + num_slots*the_slot_len;
3919 + Calculate the value of the order parameter used later.
3920 + See http://www.linuxjournal.com/article.php?sid=1133
3922 + for(pfr->order = 0;(PAGE_SIZE << pfr->order) < tot_mem; pfr->order++) ;
3925 + We now try to allocate the memory as required. If we fail
3926 + we try to allocate a smaller amount or memory (hence a
3929 + while((pfr->ring_memory = __get_free_pages(GFP_ATOMIC, pfr->order)) == 0)
3930 + if(pfr->order-- == 0)
3933 + if(pfr->order == 0) {
3934 + printk("RING: ERROR not enough memory for ring\n");
3937 + printk("RING: succesfully allocated %lu KB [tot_mem=%d][order=%ld]\n",
3938 + PAGE_SIZE >> (10 - pfr->order), tot_mem, pfr->order);
3941 + tot_mem = PAGE_SIZE << pfr->order;
3942 + memset((char*)pfr->ring_memory, 0, tot_mem);
3944 + /* Now we need to reserve the pages */
3945 + page_end = virt_to_page(pfr->ring_memory + (PAGE_SIZE << pfr->order) - 1);
3946 + for(page = virt_to_page(pfr->ring_memory); page <= page_end; page++)
3947 + SetPageReserved(page);
3949 + pfr->slots_info = (FlowSlotInfo*)pfr->ring_memory;
3950 + pfr->ring_slots = (char*)(pfr->ring_memory+sizeof(FlowSlotInfo));
3952 + pfr->slots_info->version = RING_FLOWSLOT_VERSION;
3953 + pfr->slots_info->slot_len = the_slot_len;
3954 + pfr->slots_info->data_len = bucket_len;
3955 + pfr->slots_info->tot_slots = (tot_mem-sizeof(FlowSlotInfo))/the_slot_len;
3956 + pfr->slots_info->tot_mem = tot_mem;
3957 + pfr->slots_info->sample_rate = sample_rate;
3959 + printk("RING: allocated %d slots [slot_len=%d][tot_mem=%u]\n",
3960 + pfr->slots_info->tot_slots, pfr->slots_info->slot_len,
3961 + pfr->slots_info->tot_mem);
3967 + for(i=0; i<pfr->slots_info->tot_slots; i++) {
3968 + unsigned long idx = i*pfr->slots_info->slot_len;
3969 + FlowSlot *slot = (FlowSlot*)&pfr->ring_slots[idx];
3970 + slot->magic = RING_MAGIC_VALUE; slot->slot_state = 0;
3975 + pfr->insert_page_id = 1, pfr->insert_slot_id = 0;
3979 + Leave this statement here as last one. In fact when
3980 + the ring_netdev != NULL the socket is ready to be used.
3982 + pfr->ring_netdev = dev;
3987 +/* ************************************* */
3989 +/* Bind to a device */
3990 +static int ring_bind(struct socket *sock,
3991 + struct sockaddr *sa, int addr_len)
3993 + struct sock *sk=sock->sk;
3994 + struct net_device *dev = NULL;
3996 +#if defined(RING_DEBUG)
3997 + printk("RING: ring_bind() called\n");
4003 + if (addr_len != sizeof(struct sockaddr))
4005 + if (sa->sa_family != PF_RING)
4008 + /* Safety check: add trailing zero if missing */
4009 + sa->sa_data[sizeof(sa->sa_data)-1] = '\0';
4011 +#if defined(RING_DEBUG)
4012 + printk("RING: searching device %s\n", sa->sa_data);
4015 + if((dev = __dev_get_by_name(&init_net, sa->sa_data)) == NULL) {
4016 +#if defined(RING_DEBUG)
4017 + printk("RING: search failed\n");
4021 + return(packet_ring_bind(sk, dev));
4024 +/* ************************************* */
4026 +static int ring_mmap(struct file *file,
4027 + struct socket *sock,
4028 + struct vm_area_struct *vma)
4030 + struct sock *sk = sock->sk;
4031 + struct ring_opt *pfr = ring_sk(sk);
4032 + unsigned long size, start;
4036 +#if defined(RING_DEBUG)
4037 + printk("RING: ring_mmap() called\n");
4040 + if(pfr->ring_memory == 0) {
4041 +#if defined(RING_DEBUG)
4042 + printk("RING: ring_mmap() failed: mapping area to an unbound socket\n");
4047 + size = (unsigned long)(vma->vm_end-vma->vm_start);
4049 + if(size % PAGE_SIZE) {
4050 +#if defined(RING_DEBUG)
4051 + printk("RING: ring_mmap() failed: len is not multiple of PAGE_SIZE\n");
4056 + /* if userspace tries to mmap beyond end of our buffer, fail */
4057 + if(size > pfr->slots_info->tot_mem) {
4058 +#if defined(RING_DEBUG)
4059 + printk("proc_mmap() failed: area too large [%ld > %d]\n", size, pfr->slots_info->tot_mem);
4064 + pagesToMap = size/PAGE_SIZE;
4066 +#if defined(RING_DEBUG)
4067 + printk("RING: ring_mmap() called. %d pages to map\n", pagesToMap);
4070 +#if defined(RING_DEBUG)
4071 + printk("RING: mmap [slot_len=%d][tot_slots=%d] for ring on device %s\n",
4072 + pfr->slots_info->slot_len, pfr->slots_info->tot_slots,
4073 + pfr->ring_netdev->name);
4076 + /* we do not want to have this area swapped out, lock it */
4077 + vma->vm_flags |= VM_LOCKED;
4078 + start = vma->vm_start;
4080 + /* Ring slots start from page 1 (page 0 is reserved for FlowSlotInfo) */
4081 + ptr = (char*)(start+PAGE_SIZE);
4083 + if(remap_page_range(
4084 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
4088 + __pa(pfr->ring_memory),
4089 + PAGE_SIZE*pagesToMap, vma->vm_page_prot)) {
4090 +#if defined(RING_DEBUG)
4091 + printk("remap_page_range() failed\n");
4096 +#if defined(RING_DEBUG)
4097 + printk("proc_mmap(pagesToMap=%d): success.\n", pagesToMap);
4103 +/* ************************************* */
4105 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
4106 +static int ring_recvmsg(struct kiocb *iocb, struct socket *sock,
4107 + struct msghdr *msg, size_t len, int flags)
4109 + static int ring_recvmsg(struct socket *sock, struct msghdr *msg, int len,
4110 + int flags, struct scm_cookie *scm)
4114 + struct ring_opt *pfr = ring_sk(sock->sk);
4115 + u_int32_t queued_pkts, num_loops = 0;
4117 +#if defined(RING_DEBUG)
4118 + printk("ring_recvmsg called\n");
4121 + slot = get_remove_slot(pfr);
4123 + while((queued_pkts = num_queued_pkts(pfr)) < MIN_QUEUED_PKTS) {
4124 + wait_event_interruptible(pfr->ring_slots_waitqueue, 1);
4126 +#if defined(RING_DEBUG)
4127 + printk("-> ring_recvmsg returning %d [queued_pkts=%d][num_loops=%d]\n",
4128 + slot->slot_state, queued_pkts, num_loops);
4131 + if(queued_pkts > 0) {
4132 + if(num_loops++ > MAX_QUEUE_LOOPS)
4137 +#if defined(RING_DEBUG)
4139 + printk("ring_recvmsg is returning [queued_pkts=%d][num_loops=%d]\n",
4140 + queued_pkts, num_loops);
4143 + return(queued_pkts);
4146 +/* ************************************* */
4148 +unsigned int ring_poll(struct file * file,
4149 + struct socket *sock, poll_table *wait)
4152 + struct ring_opt *pfr = ring_sk(sock->sk);
4154 +#if defined(RING_DEBUG)
4155 + printk("poll called\n");
4158 + slot = get_remove_slot(pfr);
4160 + if((slot != NULL) && (slot->slot_state == 0))
4161 + poll_wait(file, &pfr->ring_slots_waitqueue, wait);
4163 +#if defined(RING_DEBUG)
4164 + printk("poll returning %d\n", slot->slot_state);
4167 + if((slot != NULL) && (slot->slot_state == 1))
4168 + return(POLLIN | POLLRDNORM);
4173 +/* ************************************* */
4175 +int add_to_cluster_list(struct ring_cluster *el,
4176 + struct sock *sock) {
4178 + if(el->num_cluster_elements == CLUSTER_LEN)
4179 + return(-1); /* Cluster full */
4181 + ring_sk_datatype(ring_sk(sock))->cluster_id = el->cluster_id;
4182 + el->sk[el->num_cluster_elements] = sock;
4183 + el->num_cluster_elements++;
4187 +/* ************************************* */
4189 +int remove_from_cluster_list(struct ring_cluster *el,
4190 + struct sock *sock) {
4193 + for(i=0; i<CLUSTER_LEN; i++)
4194 + if(el->sk[i] == sock) {
4195 + el->num_cluster_elements--;
4197 + if(el->num_cluster_elements > 0) {
4198 + /* The cluster contains other elements */
4199 + for(j=i; j<CLUSTER_LEN-1; j++)
4200 + el->sk[j] = el->sk[j+1];
4202 + el->sk[CLUSTER_LEN-1] = NULL;
4204 + /* Empty cluster */
4205 + memset(el->sk, 0, sizeof(el->sk));
4211 + return(-1); /* Not found */
4214 +/* ************************************* */
4216 +static int remove_from_cluster(struct sock *sock,
4217 + struct ring_opt *pfr)
4219 + struct ring_cluster *el;
4221 +#if defined(RING_DEBUG)
4222 + printk("--> remove_from_cluster(%d)\n", pfr->cluster_id);
4225 + if(pfr->cluster_id == 0 /* 0 = No Cluster */)
4226 + return(0); /* Noting to do */
4228 + el = ring_cluster_list;
4230 + while(el != NULL) {
4231 + if(el->cluster_id == pfr->cluster_id) {
4232 + return(remove_from_cluster_list(el, sock));
4237 + return(-EINVAL); /* Not found */
4240 +/* ************************************* */
4242 +static int add_to_cluster(struct sock *sock,
4243 + struct ring_opt *pfr,
4244 + u_short cluster_id)
4246 + struct ring_cluster *el;
4249 + printk("--> add_to_cluster(%d)\n", cluster_id);
4252 + if(cluster_id == 0 /* 0 = No Cluster */) return(-EINVAL);
4254 + if(pfr->cluster_id != 0)
4255 + remove_from_cluster(sock, pfr);
4257 + el = ring_cluster_list;
4259 + while(el != NULL) {
4260 + if(el->cluster_id == cluster_id) {
4261 + return(add_to_cluster_list(el, sock));
4266 + /* There's no existing cluster. We need to create one */
4267 + if((el = kmalloc(sizeof(struct ring_cluster), GFP_KERNEL)) == NULL)
4270 + el->cluster_id = cluster_id;
4271 + el->num_cluster_elements = 1;
4272 + el->hashing_mode = cluster_per_flow; /* Default */
4273 + el->hashing_id = 0;
4275 + memset(el->sk, 0, sizeof(el->sk));
4277 + el->next = ring_cluster_list;
4278 + ring_cluster_list = el;
4279 + pfr->cluster_id = cluster_id;
4281 + return(0); /* 0 = OK */
4284 +/* ************************************* */
4286 +/* Code taken/inspired from core/sock.c */
4287 +static int ring_setsockopt(struct socket *sock,
4288 + int level, int optname,
4289 + char *optval, int optlen)
4291 + struct ring_opt *pfr = ring_sk(sock->sk);
4292 + int val, found, ret = 0;
4293 + u_int cluster_id, do_enable;
4294 + char devName[8], bloom_filter[256], aho_pattern[256];
4296 + if(pfr == NULL) return(-EINVAL);
4298 + if (get_user(val, (int *)optval))
4305 + case SO_ATTACH_FILTER:
4307 + if (optlen == sizeof(struct sock_fprog)) {
4308 + unsigned int fsize;
4309 + struct sock_fprog fprog;
4310 + struct sk_filter *filter;
4317 + Do not call copy_from_user within a held
4318 + splinlock (e.g. ring_mgmt_lock) as this caused
4319 + problems when certain debugging was enabled under
4320 + 2.6.5 -- including hard lockups of the machine.
4322 + if(copy_from_user(&fprog, optval, sizeof(fprog)))
4325 + fsize = sizeof(struct sock_filter) * fprog.len;
4326 + filter = kmalloc(fsize, GFP_KERNEL);
4328 + if(filter == NULL) {
4333 + if(copy_from_user(filter->insns, fprog.filter, fsize))
4336 + filter->len = fprog.len;
4338 + if(sk_chk_filter(filter->insns, filter->len) != 0) {
4339 + /* Bad filter specified */
4341 + pfr->bpfFilter = NULL;
4345 + /* get the lock, set the filter, release the lock */
4346 + write_lock(&ring_mgmt_lock);
4347 + pfr->bpfFilter = filter;
4348 + write_unlock(&ring_mgmt_lock);
4353 + case SO_DETACH_FILTER:
4354 + write_lock(&ring_mgmt_lock);
4356 + if(pfr->bpfFilter != NULL) {
4357 + kfree(pfr->bpfFilter);
4358 + pfr->bpfFilter = NULL;
4359 + write_unlock(&ring_mgmt_lock);
4365 + case SO_ADD_TO_CLUSTER:
4366 + if (optlen!=sizeof(val))
4369 + if (copy_from_user(&cluster_id, optval, sizeof(cluster_id)))
4372 + write_lock(&ring_mgmt_lock);
4373 + ret = add_to_cluster(sock->sk, pfr, cluster_id);
4374 + write_unlock(&ring_mgmt_lock);
4377 + case SO_REMOVE_FROM_CLUSTER:
4378 + write_lock(&ring_mgmt_lock);
4379 + ret = remove_from_cluster(sock->sk, pfr);
4380 + write_unlock(&ring_mgmt_lock);
4383 + case SO_SET_REFLECTOR:
4384 + if(optlen >= (sizeof(devName)-1))
4388 + if(copy_from_user(devName, optval, optlen))
4392 + devName[optlen] = '\0';
4394 +#if defined(RING_DEBUG)
4395 + printk("+++ SO_SET_REFLECTOR(%s)\n", devName);
4398 + write_lock(&ring_mgmt_lock);
4399 + pfr->reflector_dev = dev_get_by_name(&init_net, devName);
4400 + write_unlock(&ring_mgmt_lock);
4402 +#if defined(RING_DEBUG)
4403 + if(pfr->reflector_dev != NULL)
4404 + printk("SO_SET_REFLECTOR(%s): succeded\n", devName);
4406 + printk("SO_SET_REFLECTOR(%s): device unknown\n", devName);
4410 + case SO_SET_BLOOM:
4411 + if(optlen >= (sizeof(bloom_filter)-1))
4415 + if(copy_from_user(bloom_filter, optval, optlen))
4419 + bloom_filter[optlen] = '\0';
4421 + write_lock(&ring_mgmt_lock);
4422 + handle_bloom_filter_rule(pfr, bloom_filter);
4423 + write_unlock(&ring_mgmt_lock);
4426 + case SO_SET_STRING:
4427 + if(optlen >= (sizeof(aho_pattern)-1))
4431 + if(copy_from_user(aho_pattern, optval, optlen))
4435 + aho_pattern[optlen] = '\0';
4437 + write_lock(&ring_mgmt_lock);
4438 + if(pfr->acsm != NULL) acsmFree2(pfr->acsm);
4441 + if((pfr->acsm = acsmNew2()) != NULL) {
4442 + int nc=1 /* case sensitive */, i = 0;
4444 + pfr->acsm->acsmFormat = ACF_BANDED;
4445 + acsmAddPattern2(pfr->acsm, (unsigned char*)aho_pattern,
4446 + (int)strlen(aho_pattern), nc, 0, 0,(void*)aho_pattern, i);
4447 + acsmCompile2(pfr->acsm);
4450 + pfr->acsm = kmalloc (10, GFP_KERNEL); /* TEST */
4453 + write_unlock(&ring_mgmt_lock);
4456 + case SO_TOGGLE_BLOOM_STATE:
4457 + if(optlen >= (sizeof(bloom_filter)-1))
4461 + if(copy_from_user(&do_enable, optval, optlen))
4465 + write_lock(&ring_mgmt_lock);
4467 + pfr->bitmask_enabled = 1;
4469 + pfr->bitmask_enabled = 0;
4470 + write_unlock(&ring_mgmt_lock);
4471 + printk("SO_TOGGLE_BLOOM_STATE: bloom bitmask %s\n",
4472 + pfr->bitmask_enabled ? "enabled" : "disabled");
4475 + case SO_RESET_BLOOM_FILTERS:
4476 + if(optlen >= (sizeof(bloom_filter)-1))
4480 + if(copy_from_user(&do_enable, optval, optlen))
4484 + write_lock(&ring_mgmt_lock);
4485 + reset_bloom_filters(pfr);
4486 + write_unlock(&ring_mgmt_lock);
4497 + return(sock_setsockopt(sock, level, optname, optval, optlen));
4500 +/* ************************************* */
4502 +static int ring_ioctl(struct socket *sock,
4503 + unsigned int cmd, unsigned long arg)
4508 + case SIOCGIFFLAGS:
4509 + case SIOCSIFFLAGS:
4511 + case SIOCGIFMETRIC:
4512 + case SIOCSIFMETRIC:
4518 + case SIOCGIFHWADDR:
4519 + case SIOCSIFHWADDR:
4522 + case SIOCSIFSLAVE:
4523 + case SIOCGIFSLAVE:
4524 + case SIOCGIFINDEX:
4526 + case SIOCGIFCOUNT:
4527 + case SIOCSIFHWBROADCAST:
4528 + return(inet_dgram_ops.ioctl(sock, cmd, arg));
4532 + return -ENOIOCTLCMD;
4538 +/* ************************************* */
4540 +static struct proto_ops ring_ops = {
4541 + .family = PF_RING,
4542 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
4543 + .owner = THIS_MODULE,
4546 + /* Operations that make no sense on ring sockets. */
4547 + .connect = sock_no_connect,
4548 + .socketpair = sock_no_socketpair,
4549 + .accept = sock_no_accept,
4550 + .getname = sock_no_getname,
4551 + .listen = sock_no_listen,
4552 + .shutdown = sock_no_shutdown,
4553 + .sendpage = sock_no_sendpage,
4554 + .sendmsg = sock_no_sendmsg,
4555 + .getsockopt = sock_no_getsockopt,
4557 + /* Now the operations that really occur. */
4558 + .release = ring_release,
4559 + .bind = ring_bind,
4560 + .mmap = ring_mmap,
4561 + .poll = ring_poll,
4562 + .setsockopt = ring_setsockopt,
4563 + .ioctl = ring_ioctl,
4564 + .recvmsg = ring_recvmsg,
4567 +/* ************************************ */
4569 +static struct net_proto_family ring_family_ops = {
4570 + .family = PF_RING,
4571 + .create = ring_create,
4572 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
4573 + .owner = THIS_MODULE,
4577 +// BD: API changed in 2.6.12, ref:
4578 +// http://svn.clkao.org/svnweb/linux/revision/?rev=28201
4579 +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11))
4580 +static struct proto ring_proto = {
4581 + .name = "PF_RING",
4582 + .owner = THIS_MODULE,
4583 + .obj_size = sizeof(struct sock),
4587 +/* ************************************ */
4589 +static void __exit ring_exit(void)
4591 + struct list_head *ptr;
4592 + struct ring_element *entry;
4594 + for(ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) {
4595 + entry = list_entry(ptr, struct ring_element, list);
4599 + while(ring_cluster_list != NULL) {
4600 + struct ring_cluster *next = ring_cluster_list->next;
4601 + kfree(ring_cluster_list);
4602 + ring_cluster_list = next;
4605 + set_skb_ring_handler(NULL);
4606 + set_buffer_ring_handler(NULL);
4607 + sock_unregister(PF_RING);
4609 + printk("PF_RING shut down.\n");
4612 +/* ************************************ */
4614 +static int __init ring_init(void)
4616 + printk("Welcome to PF_RING %s\n(C) 2004-07 L.Deri <deri@ntop.org>\n",
4619 + INIT_LIST_HEAD(&ring_table);
4620 + ring_cluster_list = NULL;
4622 + sock_register(&ring_family_ops);
4624 + set_skb_ring_handler(skb_ring_handler);
4625 + set_buffer_ring_handler(buffer_ring_handler);
4627 + if(get_buffer_ring_handler() != buffer_ring_handler) {
4628 + printk("PF_RING: set_buffer_ring_handler FAILED\n");
4630 + set_skb_ring_handler(NULL);
4631 + set_buffer_ring_handler(NULL);
4632 + sock_unregister(PF_RING);
4635 + printk("PF_RING: bucket length %d bytes\n", bucket_len);
4636 + printk("PF_RING: ring slots %d\n", num_slots);
4637 + printk("PF_RING: sample rate %d [1=no sampling]\n", sample_rate);
4638 + printk("PF_RING: capture TX %s\n",
4639 + enable_tx_capture ? "Yes [RX+TX]" : "No [RX only]");
4640 + printk("PF_RING: transparent mode %s\n",
4641 + transparent_mode ? "Yes" : "No");
4643 + printk("PF_RING initialized correctly.\n");
4650 +module_init(ring_init);
4651 +module_exit(ring_exit);
4652 +MODULE_LICENSE("GPL");
4654 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
4655 +MODULE_ALIAS_NETPROTO(PF_RING);