1 diff --unified --recursive --new-file linux-2.6.21.4/include/linux/ring.h linux-2.6.21.4-1-686-smp-ring3/include/linux/ring.h
2 --- linux-2.6.21.4/include/linux/ring.h 1970-01-01 00:00:00.000000000 +0000
3 +++ linux-2.6.21.4-1-686-smp-ring3/include/linux/ring.h 2007-06-10 16:43:04.346421348 +0000
6 + * Definitions for packet ring
8 + * 2004-07 Luca Deri <deri@ntop.org>
13 +#define INCLUDE_MAC_INFO
15 +#ifdef INCLUDE_MAC_INFO
16 +#define SKB_DISPLACEMENT 14 /* Include MAC address information */
18 +#define SKB_DISPLACEMENT 0 /* Do NOT include MAC address information */
22 +#define RING_MAGIC_VALUE 0x88
23 +#define RING_FLOWSLOT_VERSION 6
24 +#define RING_VERSION "3.4.1"
26 +#define SO_ADD_TO_CLUSTER 99
27 +#define SO_REMOVE_FROM_CLUSTER 100
28 +#define SO_SET_REFLECTOR 101
29 +#define SO_SET_BLOOM 102
30 +#define SO_SET_STRING 103
31 +#define SO_TOGGLE_BLOOM_STATE 104
32 +#define SO_RESET_BLOOM_FILTERS 105
34 +#define BITMASK_SET(n, p) (((char*)p->bits_memory)[n/8] |= (1<<(n % 8)))
35 +#define BITMASK_CLR(n, p) (((char*)p->bits_memory)[n/8] &= ~(1<<(n % 8)))
36 +#define BITMASK_ISSET(n, p) (((char*)p->bits_memory)[n/8] & (1<<(n % 8)))
38 +/* *********************************** */
41 + Aho-Corasick code taken from Snort
45 + * DEFINES and Typedef's
47 +#define MAX_ALPHABET_SIZE 256
50 + FAIL STATE for 1,2,or 4 bytes for state transitions
52 + Uncomment this define to use 32 bit state values
56 +typedef unsigned short acstate_t;
57 +#define ACSM_FAIL_STATE2 0xffff
63 +struct _acsm_pattern2
65 + struct _acsm_pattern2 *next;
67 + unsigned char *patrn;
68 + unsigned char *casepatrn;
79 + * transition nodes - either 8 or 12 bytes
82 +struct trans_node_s {
84 + acstate_t key; /* The character that got us here - sized to keep structure aligned on 4 bytes */
85 + /* to better the caching opportunities. A value that crosses the cache line */
86 + /* forces an expensive reconstruction, typing this as acstate_t stops that. */
87 + acstate_t next_state; /* */
88 + struct trans_node_s * next; /* next transition for this state */
94 + * User specified final storage type for the state transitions
104 + * User specified machine types
106 + * TRIE : Keyword trie
117 + * Aho-Corasick State Machine Struct - one per group of pattterns
123 + ACSM_PATTERN2 * acsmPatterns;
124 + acstate_t * acsmFailState;
125 + ACSM_PATTERN2 ** acsmMatchList;
127 + /* list of transitions in each state, this is used to build the nfa & dfa */
128 + /* after construction we convert to sparse or full format matrix and free */
129 + /* the transition lists */
130 + trans_node_t ** acsmTransTable;
132 + acstate_t ** acsmNextState;
134 + int acsmSparseMaxRowNodes;
135 + int acsmSparseMaxZcnt;
138 + int acsmAlphabetSize;
143 +/* *********************************** */
146 +struct pcap_pkthdr {
147 + struct timeval ts; /* time stamp */
148 + u_int32_t caplen; /* length of portion present */
149 + u_int32_t len; /* length this packet (off wire) */
150 + /* packet parsing info */
151 + u_int16_t eth_type; /* Ethernet type */
152 + u_int16_t vlan_id; /* VLAN Id or -1 for no vlan */
153 + u_int8_t l3_proto; /* Layer 3 protocol */
154 + u_int16_t l3_offset, l4_offset, payload_offset; /* Offsets of L3/L4/payload elements */
155 + u_int32_t ipv4_src, ipv4_dst; /* IPv4 src/dst IP addresses */
156 + u_int16_t l4_src_port, l4_dst_port; /* Layer 4 src/dst ports */
160 +/* *********************************** */
162 +typedef struct _counter_list {
164 + u_int32_t bit_counter;
165 + struct _counter_list *next;
166 +} bitmask_counter_list;
169 + u_int32_t num_bits, order, num_pages;
170 + unsigned long bits_memory;
171 + bitmask_counter_list *clashes;
174 +/* *********************************** */
177 + cluster_per_flow = 0,
178 + cluster_round_robin
181 +/* *********************************** */
183 +#define RING_MIN_SLOT_SIZE (60+sizeof(struct pcap_pkthdr))
184 +#define RING_MAX_SLOT_SIZE (1514+sizeof(struct pcap_pkthdr))
186 +/* *********************************** */
188 +typedef struct flowSlotInfo {
189 + u_int16_t version, sample_rate;
190 + u_int32_t tot_slots, slot_len, data_len, tot_mem;
192 + u_int64_t tot_pkts, tot_lost;
193 + u_int64_t tot_insert, tot_read;
194 + u_int32_t insert_idx, remove_idx;
197 +/* *********************************** */
199 +typedef struct flowSlot {
201 + u_char magic; /* It must alwasy be zero */
203 + u_char slot_state; /* 0=empty, 1=full */
204 + u_char bucket; /* bucket[bucketLen] */
207 +/* *********************************** */
211 +FlowSlotInfo* getRingPtr(void);
212 +int allocateRing(char *deviceName, u_int numSlots,
213 + u_int bucketLen, u_int sampleRate);
214 +unsigned int pollRing(struct file *fp, struct poll_table_struct * wait);
215 +void deallocateRing(void);
217 +/* ************************* */
219 +typedef int (*handle_ring_skb)(struct sk_buff *skb,
220 + u_char recv_packet, u_char real_skb);
221 +extern handle_ring_skb get_skb_ring_handler(void);
222 +extern void set_skb_ring_handler(handle_ring_skb the_handler);
223 +extern void do_skb_ring_handler(struct sk_buff *skb,
224 + u_char recv_packet, u_char real_skb);
226 +typedef int (*handle_ring_buffer)(struct net_device *dev,
227 + char *data, int len);
228 +extern handle_ring_buffer get_buffer_ring_handler(void);
229 +extern void set_buffer_ring_handler(handle_ring_buffer the_handler);
230 +extern int do_buffer_ring_handler(struct net_device *dev,
231 + char *data, int len);
232 +#endif /* __KERNEL__ */
234 +/* *********************************** */
236 +#define PF_RING 27 /* Packet Ring */
237 +#define SOCK_RING PF_RING
240 +#define SIORINGPOLL 0x8888
242 +/* *********************************** */
244 +#endif /* __RING_H */
245 diff --unified --recursive --new-file linux-2.6.21.4/net/Kconfig linux-2.6.21.4-1-686-smp-ring3/net/Kconfig
246 --- linux-2.6.21.4/net/Kconfig 2007-06-07 21:27:31.000000000 +0000
247 +++ linux-2.6.21.4-1-686-smp-ring3/net/Kconfig 2007-06-10 16:43:04.402423771 +0000
249 source "net/xfrm/Kconfig"
250 source "net/iucv/Kconfig"
252 +source "net/ring/Kconfig"
254 bool "TCP/IP networking"
256 diff --unified --recursive --new-file linux-2.6.21.4/net/Makefile linux-2.6.21.4-1-686-smp-ring3/net/Makefile
257 --- linux-2.6.21.4/net/Makefile 2007-06-07 21:27:31.000000000 +0000
258 +++ linux-2.6.21.4-1-686-smp-ring3/net/Makefile 2007-06-10 16:43:04.394423425 +0000
260 ifneq ($(CONFIG_VLAN_8021Q),)
263 +obj-$(CONFIG_RING) += ring/
264 obj-$(CONFIG_IP_DCCP) += dccp/
265 obj-$(CONFIG_IP_SCTP) += sctp/
267 diff --unified --recursive --new-file linux-2.6.21.4/net/core/dev.c linux-2.6.21.4-1-686-smp-ring3/net/core/dev.c
268 --- linux-2.6.21.4/net/core/dev.c 2007-06-07 21:27:31.000000000 +0000
269 +++ linux-2.6.21.4-1-686-smp-ring3/net/core/dev.c 2007-06-10 16:43:04.382422906 +0000
272 #include "net-sysfs.h"
274 +#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
276 +/* #define RING_DEBUG */
278 +#include <linux/ring.h>
279 +#include <linux/version.h>
281 +static handle_ring_skb ring_handler = NULL;
283 +handle_ring_skb get_skb_ring_handler() { return(ring_handler); }
285 +void set_skb_ring_handler(handle_ring_skb the_handler) {
286 + ring_handler = the_handler;
289 +void do_skb_ring_handler(struct sk_buff *skb,
290 + u_char recv_packet, u_char real_skb) {
292 + ring_handler(skb, recv_packet, real_skb);
295 +/* ******************* */
297 +static handle_ring_buffer buffer_ring_handler = NULL;
299 +handle_ring_buffer get_buffer_ring_handler() { return(buffer_ring_handler); }
301 +void set_buffer_ring_handler(handle_ring_buffer the_handler) {
302 + buffer_ring_handler = the_handler;
305 +int do_buffer_ring_handler(struct net_device *dev, char *data, int len) {
306 + if(buffer_ring_handler) {
307 + buffer_ring_handler(dev, data, len);
313 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
314 +EXPORT_SYMBOL(get_skb_ring_handler);
315 +EXPORT_SYMBOL(set_skb_ring_handler);
316 +EXPORT_SYMBOL(do_skb_ring_handler);
318 +EXPORT_SYMBOL(get_buffer_ring_handler);
319 +EXPORT_SYMBOL(set_buffer_ring_handler);
320 +EXPORT_SYMBOL(do_buffer_ring_handler);
325 * The list of packet types we will receive (as opposed to discard)
326 * and the routines to invoke.
327 @@ -1809,6 +1859,9 @@
328 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
331 +#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
332 + if(ring_handler) ring_handler(skb, 0, 1);
333 +#endif /* CONFIG_RING */
334 spinlock_t *root_lock = qdisc_lock(q);
336 spin_lock(root_lock);
337 @@ -1908,6 +1961,13 @@
340 /* if netpoll wants it, pretend we never saw it */
341 +#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
342 + if(ring_handler && ring_handler(skb, 1, 1)) {
343 + /* The packet has been copied into a ring */
344 + return(NET_RX_SUCCESS);
346 +#endif /* CONFIG_RING */
351 @@ -2193,6 +2253,13 @@
352 struct net_device *null_or_orig;
353 int ret = NET_RX_DROP;
355 +#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
356 + if(ring_handler && ring_handler(skb, 1, 1)) {
357 + /* The packet has been copied into a ring */
358 + return(NET_RX_SUCCESS);
360 +#endif /* CONFIG_RING */
363 /* if we've gotten here through NAPI, check netpoll */
364 if (netpoll_receive_skb(skb))
365 diff --unified --recursive --new-file linux-2.6.21.4/net/ring/Kconfig linux-2.6.21.4-1-686-smp-ring3/net/ring/Kconfig
366 --- linux-2.6.21.4/net/ring/Kconfig 1970-01-01 00:00:00.000000000 +0000
367 +++ linux-2.6.21.4-1-686-smp-ring3/net/ring/Kconfig 2007-06-10 16:43:04.406423944 +0000
370 + tristate "PF_RING sockets (EXPERIMENTAL)"
371 + depends on EXPERIMENTAL
373 + PF_RING socket family, optimized for packet capture.
374 + If a PF_RING socket is bound to an adapter (via the bind() system
375 + call), such adapter will be used in read-only mode until the socket
376 + is destroyed. Whenever an incoming packet is received from the adapter
377 + it will not passed to upper layers, but instead it is copied to a ring
378 + buffer, which in turn is exported to user space applications via mmap.
379 + Please refer to http://luca.ntop.org/Ring.pdf for more.
381 + Say N unless you know what you are doing.
383 diff --unified --recursive --new-file linux-2.6.21.4/net/ring/Makefile linux-2.6.21.4-1-686-smp-ring3/net/ring/Makefile
384 --- linux-2.6.21.4/net/ring/Makefile 1970-01-01 00:00:00.000000000 +0000
385 +++ linux-2.6.21.4-1-686-smp-ring3/net/ring/Makefile 2007-06-10 16:43:04.350421521 +0000
388 +# Makefile for the ring driver.
393 +ring-objs := ring_packet.o
394 diff --unified --recursive --new-file linux-2.6.21.4/net/ring/ring_packet.c linux-2.6.21.4-1-686-smp-ring3/net/ring/ring_packet.c
395 --- linux-2.6.21.4/net/ring/ring_packet.c 1970-01-01 00:00:00.000000000 +0000
396 +++ linux-2.6.21.4-1-686-smp-ring3/net/ring/ring_packet.c 2007-06-10 16:43:04.354421694 +0000
398 +/* ***************************************************************
400 + * (C) 2004-07 - Luca Deri <deri@ntop.org>
402 + * This code includes contributions courtesy of
403 + * - Jeff Randall <jrandall@nexvu.com>
404 + * - Helmut Manck <helmut.manck@secunet.com>
405 + * - Brad Doctor <brad@stillsecure.com>
406 + * - Amit D. Chaudhary <amit_ml@rajgad.com>
407 + * - Francesco Fusco <fusco@ntop.org>
408 + * - Michael Stiller <ms@2scale.net>
411 + * This program is free software; you can redistribute it and/or modify
412 + * it under the terms of the GNU General Public License as published by
413 + * the Free Software Foundation; either version 2 of the License, or
414 + * (at your option) any later version.
416 + * This program is distributed in the hope that it will be useful,
417 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
418 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
419 + * GNU General Public License for more details.
421 + * You should have received a copy of the GNU General Public License
422 + * along with this program; if not, write to the Free Software Foundation,
423 + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
427 +#include <linux/version.h>
428 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,19))
429 +#include <linux/autoconf.h>
431 +#include <linux/config.h>
433 +#include <linux/module.h>
434 +#include <linux/kernel.h>
435 +#include <linux/socket.h>
436 +#include <linux/skbuff.h>
437 +#include <linux/rtnetlink.h>
438 +#include <linux/in.h>
439 +#include <linux/inet.h>
440 +#include <linux/in6.h>
441 +#include <linux/init.h>
442 +#include <linux/filter.h>
443 +#include <linux/ring.h>
444 +#include <linux/ip.h>
445 +#include <linux/tcp.h>
446 +#include <linux/udp.h>
447 +#include <linux/list.h>
448 +#include <linux/proc_fs.h>
449 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
450 +#include <net/xfrm.h>
452 +#include <linux/poll.h>
454 +#include <net/sock.h>
455 +#include <asm/io.h> /* needed for virt_to_phys() */
457 +#include <net/inet_common.h>
460 +/* #define RING_DEBUG */
462 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11))
463 +static inline int remap_page_range(struct vm_area_struct *vma,
464 + unsigned long uvaddr,
465 + unsigned long paddr,
466 + unsigned long size,
468 + return(remap_pfn_range(vma, uvaddr, paddr >> PAGE_SHIFT,
473 +/* ************************************************* */
475 +#define CLUSTER_LEN 8
477 +struct ring_cluster {
478 + u_short cluster_id; /* 0 = no cluster */
479 + u_short num_cluster_elements;
480 + enum cluster_type hashing_mode;
481 + u_short hashing_id;
482 + struct sock *sk[CLUSTER_LEN];
483 + struct ring_cluster *next; /* NULL = last element of the cluster */
486 +/* ************************************************* */
488 +struct ring_element {
489 + struct list_head list;
493 +/* ************************************************* */
496 + struct net_device *ring_netdev;
501 + u_short cluster_id; /* 0 = no cluster */
504 + struct net_device *reflector_dev;
506 + /* Packet buffers */
507 + unsigned long order;
510 + unsigned long ring_memory;
511 + FlowSlotInfo *slots_info; /* Basically it points to ring_memory */
512 + char *ring_slots; /* Basically it points to ring_memory
513 + +sizeof(FlowSlotInfo) */
515 + /* Packet Sampling */
516 + u_int pktToSample, sample_rate;
519 + struct sk_filter *bpfFilter;
522 + ACSM_STRUCT2 * acsm;
525 + atomic_t num_ring_slots_waiters;
526 + wait_queue_head_t ring_slots_waitqueue;
527 + rwlock_t ring_index_lock;
529 + /* Bloom Filters */
530 + u_char bitmask_enabled;
531 + bitmask_selector mac_bitmask, vlan_bitmask, ip_bitmask, twin_ip_bitmask,
532 + port_bitmask, twin_port_bitmask, proto_bitmask;
533 + u_int32_t num_mac_bitmask_add, num_mac_bitmask_remove;
534 + u_int32_t num_vlan_bitmask_add, num_vlan_bitmask_remove;
535 + u_int32_t num_ip_bitmask_add, num_ip_bitmask_remove;
536 + u_int32_t num_port_bitmask_add, num_port_bitmask_remove;
537 + u_int32_t num_proto_bitmask_add, num_proto_bitmask_remove;
539 + /* Indexes (Internal) */
540 + u_int insert_page_id, insert_slot_id;
543 +/* ************************************************* */
545 +/* List of all ring sockets. */
546 +static struct list_head ring_table;
547 +static u_int ring_table_size;
549 +/* List of all clusters */
550 +static struct ring_cluster *ring_cluster_list;
552 +static rwlock_t ring_mgmt_lock = RW_LOCK_UNLOCKED;
554 +/* ********************************** */
556 +/* /proc entry for ring module */
557 +struct proc_dir_entry *ring_proc_dir = NULL;
558 +struct proc_dir_entry *ring_proc = NULL;
560 +static int ring_proc_get_info(char *, char **, off_t, int, int *, void *);
561 +static void ring_proc_add(struct ring_opt *pfr);
562 +static void ring_proc_remove(struct ring_opt *pfr);
563 +static void ring_proc_init(void);
564 +static void ring_proc_term(void);
566 +/* ********************************** */
569 +static struct proto_ops ring_ops;
571 +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11))
572 +static struct proto ring_proto;
575 +static int skb_ring_handler(struct sk_buff *skb, u_char recv_packet,
577 +static int buffer_ring_handler(struct net_device *dev, char *data, int len);
578 +static int remove_from_cluster(struct sock *sock, struct ring_opt *pfr);
582 +/* ********************************** */
585 +static unsigned int bucket_len = 128, num_slots = 4096, sample_rate = 1,
586 + transparent_mode = 1, enable_tx_capture = 1;
588 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16))
589 +module_param(bucket_len, uint, 0644);
590 +module_param(num_slots, uint, 0644);
591 +module_param(sample_rate, uint, 0644);
592 +module_param(transparent_mode, uint, 0644);
593 +module_param(enable_tx_capture, uint, 0644);
595 +MODULE_PARM(bucket_len, "i");
596 +MODULE_PARM(num_slots, "i");
597 +MODULE_PARM(sample_rate, "i");
598 +MODULE_PARM(transparent_mode, "i");
599 +MODULE_PARM(enable_tx_capture, "i");
602 +MODULE_PARM_DESC(bucket_len, "Number of ring buckets");
603 +MODULE_PARM_DESC(num_slots, "Number of ring slots");
604 +MODULE_PARM_DESC(sample_rate, "Ring packet sample rate");
605 +MODULE_PARM_DESC(transparent_mode,
606 + "Set to 1 to set transparent mode "
607 + "(slower but backwards compatible)");
609 +MODULE_PARM_DESC(enable_tx_capture, "Set to 1 to capture outgoing packets");
611 +/* ********************************** */
613 +#define MIN_QUEUED_PKTS 64
614 +#define MAX_QUEUE_LOOPS 64
617 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
618 +#define ring_sk_datatype(__sk) ((struct ring_opt *)__sk)
619 +#define ring_sk(__sk) ((__sk)->sk_protinfo)
621 +#define ring_sk_datatype(a) (a)
622 +#define ring_sk(__sk) ((__sk)->protinfo.pf_ring)
625 +#define _rdtsc() ({ uint64_t x; asm volatile("rdtsc" : "=A" (x)); x; })
628 + int dev_queue_xmit(struct sk_buff *skb)
630 + struct net_device *dev_get_by_name(const char *name)
633 +/* ********************************** */
640 +** Multi-Pattern Search Engine
642 +** Aho-Corasick State Machine - version 2.0
644 +** Supports both Non-Deterministic and Deterministic Finite Automata
647 +** Reference - Efficient String matching: An Aid to Bibliographic Search
648 +** Alfred V Aho and Margaret J Corasick
650 +** Copyright(C) 1975 Association for Computing Machinery,Inc
653 +** +++ Version 1.0 notes - Marc Norton:
656 +** Original implementation based on the 4 algorithms in the paper by Aho & Corasick,
657 +** some implementation ideas from 'Practical Algorithms in C', and some
660 +** 1) Finds all occurrences of all patterns within a text.
663 +** +++ Version 2.0 Notes - Marc Norton/Dan Roelker:
666 +** New implementation modifies the state table storage and access model to use
667 +** compacted sparse vector storage. Dan Roelker and I hammered this strategy out
668 +** amongst many others in order to reduce memory usage and improve caching performance.
669 +** The memory usage is greatly reduced, we only use 1/4 of what we use to. The caching
670 +** performance is better in pure benchmarking tests, but does not show overall improvement
671 +** in Snort. Unfortunately, once a pattern match test has been performed Snort moves on to doing
672 +** many other things before we get back to a patteren match test, so the cache is voided.
674 +** This versions has better caching performance characteristics, reduced memory,
675 +** more state table storage options, and requires no a priori case conversions.
676 +** It does maintain the same public interface. (Snort only used banded storage).
678 +** 1) Supports NFA and DFA state machines, and basic keyword state machines
679 +** 2) Initial transition table uses Linked Lists
680 +** 3) Improved state table memory options. NFA and DFA state
681 +** transition tables are converted to one of 4 formats during compilation.
684 +** c) Banded matrix (Default-this is the only one used in snort)
685 +** d) Sparse-Banded matrix
686 +** 4) Added support for acstate_t in .h file so we can compile states as
687 +** 16, or 32 bit state values for another reduction in memory consumption,
688 +** smaller states allows more of the state table to be cached, and improves
689 +** performance on x86-P4. Your mileage may vary, especially on risc systems.
690 +** 5) Added a bool to each state transition list to indicate if there is a matching
691 +** pattern in the state. This prevents us from accessing another data array
692 +** and can improve caching/performance.
693 +** 6) The search functions are very sensitive, don't change them without extensive testing,
694 +** or you'll just spoil the caching and prefetching opportunities.
696 +** Extras for fellow pattern matchers:
697 +** The table below explains the storage format used at each step.
698 +** You can use an NFA or DFA to match with, the NFA is slower but tiny - set the structure directly.
699 +** You can use any of the 4 storage modes above -full,sparse,banded,sparse-bands, set the structure directly.
700 +** For applications where you have lots of data and a pattern set to search, this version was up to 3x faster
701 +** than the previous verion, due to caching performance. This cannot be fully realized in Snort yet,
702 +** but other applications may have better caching opportunities.
703 +** Snort only needs to use the banded or full storage.
705 +** Transition table format at each processing stage.
706 +** -------------------------------------------------
707 +** Patterns -> Keyword State Table (List)
708 +** Keyword State Table -> NFA (List)
709 +** NFA -> DFA (List)
710 +** DFA (List)-> Sparse Rows O(m-avg # transitions per state)
711 +** -> Banded Rows O(1)
712 +** -> Sparse-Banded Rows O(nb-# bands)
713 +** -> Full Matrix O(1)
715 +** Copyright(C) 2002,2003,2004 Marc Norton
716 +** Copyright(C) 2003,2004 Daniel Roelker
717 +** Copyright(C) 2002,2003,2004 Sourcefire,Inc.
719 +** This program is free software; you can redistribute it and/or modify
720 +** it under the terms of the GNU General Public License as published by
721 +** the Free Software Foundation; either version 2 of the License, or
722 +** (at your option) any later version.
724 +** This program is distributed in the hope that it will be useful,
725 +** but WITHOUT ANY WARRANTY; without even the implied warranty of
726 +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
727 +** GNU General Public License for more details.
729 +** You should have received a copy of the GNU General Public License
730 +** along with this program; if not, write to the Free Software
731 +** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
738 +#define MEMASSERT(p,s) if(!p){printk("ACSM-No Memory: %s!\n",s);}
743 +static int max_memory = 0;
748 +typedef struct acsm_summary_s
750 + unsigned num_states;
751 + unsigned num_transitions;
759 +static acsm_summary_t summary={0,0};
762 +** Case Translation Table
764 +static unsigned char xlatcase[256];
769 +inline int toupper(int ch) {
770 + if ( (unsigned int)(ch - 'a') < 26u )
775 +static void init_xlatcase(void)
778 + for (i = 0; i < 256; i++)
780 + xlatcase[i] = toupper(i);
790 +ConvertCaseEx (unsigned char *d, unsigned char *s, int m)
798 + for (i = 0; i < m; i++ )
800 + d[0] = xlatcase[ s[0] ];
801 + d[2] = xlatcase[ s[2] ];
802 + d[1] = xlatcase[ s[1] ];
803 + d[3] = xlatcase[ s[3] ];
808 + for (i=0; i < n; i++)
810 + d[i] = xlatcase[ s[i] ];
813 + for (i=0; i < m; i++)
815 + d[i] = xlatcase[ s[i] ];
829 + p = kmalloc (n, GFP_KERNEL);
848 + * Simple QUEUE NODE
850 +typedef struct _qnode
853 + struct _qnode *next;
858 + * Simple QUEUE Structure
860 +typedef struct _queue
862 + QNODE * head, *tail;
868 + * Initialize the queue
871 +queue_init (QUEUE * s)
873 + s->head = s->tail = 0;
878 + * Find a State in the queue
881 +queue_find (QUEUE * s, int state)
887 + if( q->state == state ) return 1;
894 + * Add Tail Item to queue (FiFo/LiLo)
897 +queue_add (QUEUE * s, int state)
901 + if( queue_find( s, state ) ) return;
905 + q = s->tail = s->head = (QNODE *) AC_MALLOC (sizeof (QNODE));
906 + MEMASSERT (q, "queue_add");
912 + q = (QNODE *) AC_MALLOC (sizeof (QNODE));
923 + * Remove Head Item from queue
926 +queue_remove (QUEUE * s)
934 + s->head = s->head->next;
949 + * Return items in the queue
952 +queue_count (QUEUE * s)
962 +queue_free (QUEUE * s)
964 + while (queue_count (s))
971 + * Get Next State-NFA
974 +int List_GetNextState( ACSM_STRUCT2 * acsm, int state, int input )
976 + trans_node_t * t = acsm->acsmTransTable[state];
980 + if( t->key == input )
982 + return t->next_state;
987 + if( state == 0 ) return 0;
989 + return ACSM_FAIL_STATE2; /* Fail state ??? */
993 + * Get Next State-DFA
996 +int List_GetNextState2( ACSM_STRUCT2 * acsm, int state, int input )
998 + trans_node_t * t = acsm->acsmTransTable[state];
1002 + if( t->key == input )
1004 + return t->next_state;
1009 + return 0; /* default state */
1012 + * Put Next State - Head insertion, and transition updates
1015 +int List_PutNextState( ACSM_STRUCT2 * acsm, int state, int input, int next_state )
1018 + trans_node_t * tnew;
1020 + // printk(" List_PutNextState: state=%d, input='%c', next_state=%d\n",state,input,next_state);
1023 + /* Check if the transition already exists, if so just update the next_state */
1024 + p = acsm->acsmTransTable[state];
1027 + if( p->key == input ) /* transition already exists- reset the next state */
1029 + p->next_state = next_state;
1035 + /* Definitely not an existing transition - add it */
1036 + tnew = (trans_node_t*)AC_MALLOC(sizeof(trans_node_t));
1037 + if( !tnew ) return -1;
1039 + tnew->key = input;
1040 + tnew->next_state = next_state;
1043 + tnew->next = acsm->acsmTransTable[state];
1044 + acsm->acsmTransTable[state] = tnew;
1046 + acsm->acsmNumTrans++;
1051 + * Free the entire transition table
1054 +int List_FreeTransTable( ACSM_STRUCT2 * acsm )
1057 + trans_node_t * t, *p;
1059 + if( !acsm->acsmTransTable ) return 0;
1061 + for(i=0;i< acsm->acsmMaxStates;i++)
1063 + t = acsm->acsmTransTable[i];
1070 + max_memory -= sizeof(trans_node_t);
1074 + kfree(acsm->acsmTransTable);
1076 + max_memory -= sizeof(void*) * acsm->acsmMaxStates;
1078 + acsm->acsmTransTable = 0;
1088 + int List_FreeList( trans_node_t * t )
1099 + max_memory -= sizeof(trans_node_t);
1108 + * Converts row of states from list to a full vector format
1111 +int List_ConvToFull(ACSM_STRUCT2 * acsm, acstate_t state, acstate_t * full )
1114 + trans_node_t * t = acsm->acsmTransTable[ state ];
1116 + memset(full,0,sizeof(acstate_t)*acsm->acsmAlphabetSize);
1118 + if( !t ) return 0;
1122 + full[ t->key ] = t->next_state;
1130 + * Copy a Match List Entry - don't dup the pattern data
1132 +static ACSM_PATTERN2*
1133 +CopyMatchListEntry (ACSM_PATTERN2 * px)
1135 + ACSM_PATTERN2 * p;
1137 + p = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
1138 + MEMASSERT (p, "CopyMatchListEntry");
1140 + memcpy (p, px, sizeof (ACSM_PATTERN2));
1148 + * Check if a pattern is in the list already,
1149 + * validate it using the 'id' field. This must be unique
1150 + * for every pattern.
1154 + int FindMatchListEntry (ACSM_STRUCT2 * acsm, int state, ACSM_PATTERN2 * px)
1156 + ACSM_PATTERN2 * p;
1158 + p = acsm->acsmMatchList[state];
1161 + if( p->id == px->id ) return 1;
1171 + * Add a pattern to the list of patterns terminated at this state.
1172 + * Insert at front of list.
1175 +AddMatchListEntry (ACSM_STRUCT2 * acsm, int state, ACSM_PATTERN2 * px)
1177 + ACSM_PATTERN2 * p;
1179 + p = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
1181 + MEMASSERT (p, "AddMatchListEntry");
1183 + memcpy (p, px, sizeof (ACSM_PATTERN2));
1185 + p->next = acsm->acsmMatchList[state];
1187 + acsm->acsmMatchList[state] = p;
1192 +AddPatternStates (ACSM_STRUCT2 * acsm, ACSM_PATTERN2 * p)
1194 + int state, next, n;
1195 + unsigned char *pattern;
1198 + pattern = p->patrn;
1202 + * Match up pattern with existing states
1204 + for (; n > 0; pattern++, n--)
1206 + next = List_GetNextState(acsm,state,*pattern);
1207 + if (next == ACSM_FAIL_STATE2 || next == 0)
1215 + * Add new states for the rest of the pattern bytes, 1 state per byte
1217 + for (; n > 0; pattern++, n--)
1219 + acsm->acsmNumStates++;
1220 + List_PutNextState(acsm,state,*pattern,acsm->acsmNumStates);
1221 + state = acsm->acsmNumStates;
1224 + AddMatchListEntry (acsm, state, p );
1228 + * Build A Non-Deterministic Finite Automata
1229 + * The keyword state table must already be built, via AddPatternStates().
1232 +Build_NFA (ACSM_STRUCT2 * acsm)
1235 + QUEUE q, *queue = &q;
1236 + acstate_t * FailState = acsm->acsmFailState;
1237 + ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
1238 + ACSM_PATTERN2 * mlist,* px;
1240 + /* Init a Queue */
1241 + queue_init (queue);
1244 + /* Add the state 0 transitions 1st, the states at depth 1, fail to state 0 */
1245 + for (i = 0; i < acsm->acsmAlphabetSize; i++)
1247 + s = List_GetNextState2(acsm,0,i);
1250 + queue_add (queue, s);
1255 + /* Build the fail state successive layer of transitions */
1256 + while (queue_count (queue) > 0)
1258 + r = queue_remove (queue);
1260 + /* Find Final States for any Failure */
1261 + for (i = 0; i < acsm->acsmAlphabetSize; i++)
1265 + s = List_GetNextState(acsm,r,i);
1267 + if( s != ACSM_FAIL_STATE2 )
1269 + queue_add (queue, s);
1271 + fs = FailState[r];
1274 + * Locate the next valid state for 'i' starting at fs
1276 + while( (next=List_GetNextState(acsm,fs,i)) == ACSM_FAIL_STATE2 )
1278 + fs = FailState[fs];
1282 + * Update 's' state failure state to point to the next valid state
1284 + FailState[s] = next;
1287 + * Copy 'next'states MatchList to 's' states MatchList,
1288 + * we copy them so each list can be AC_FREE'd later,
1289 + * else we could just manipulate pointers to fake the copy.
1291 + for( mlist = MatchList[next];
1293 + mlist = mlist->next)
1295 + px = CopyMatchListEntry (mlist);
1297 + /* Insert at front of MatchList */
1298 + px->next = MatchList[s];
1299 + MatchList[s] = px;
1305 + /* Clean up the queue */
1306 + queue_free (queue);
1310 + * Build Deterministic Finite Automata from the NFA
1313 +Convert_NFA_To_DFA (ACSM_STRUCT2 * acsm)
1315 + int i, r, s, cFailState;
1316 + QUEUE q, *queue = &q;
1317 + acstate_t * FailState = acsm->acsmFailState;
1319 + /* Init a Queue */
1320 + queue_init (queue);
1322 + /* Add the state 0 transitions 1st */
1323 + for(i=0; i<acsm->acsmAlphabetSize; i++)
1325 + s = List_GetNextState(acsm,0,i);
1328 + queue_add (queue, s);
1332 + /* Start building the next layer of transitions */
1333 + while( queue_count(queue) > 0 )
1335 + r = queue_remove(queue);
1337 + /* Process this states layer */
1338 + for (i = 0; i < acsm->acsmAlphabetSize; i++)
1340 + s = List_GetNextState(acsm,r,i);
1342 + if( s != ACSM_FAIL_STATE2 && s!= 0)
1344 + queue_add (queue, s);
1348 + cFailState = List_GetNextState(acsm,FailState[r],i);
1350 + if( cFailState != 0 && cFailState != ACSM_FAIL_STATE2 )
1352 + List_PutNextState(acsm,r,i,cFailState);
1358 + /* Clean up the queue */
1359 + queue_free (queue);
1364 + * Convert a row lists for the state table to a full vector format
1368 +Conv_List_To_Full(ACSM_STRUCT2 * acsm)
1372 + acstate_t ** NextState = acsm->acsmNextState;
1374 + for(k=0;k<acsm->acsmMaxStates;k++)
1376 + p = AC_MALLOC( sizeof(acstate_t) * (acsm->acsmAlphabetSize+2) );
1379 + tcnt = List_ConvToFull( acsm, (acstate_t)k, p+2 );
1382 + p[1] = 0; /* no matches yet */
1384 + NextState[k] = p; /* now we have a full format row vector */
1391 + * Convert DFA memory usage from list based storage to a sparse-row storage.
1393 + * The Sparse format allows each row to be either full or sparse formatted. If the sparse row has
1394 + * too many transitions, performance or space may dictate that we use the standard full formatting
1395 + * for the row. More than 5 or 10 transitions per state ought to really whack performance. So the
1396 + * user can specify the max state transitions per state allowed in the sparse format.
1398 + * Standard Full Matrix Format
1399 + * ---------------------------
1400 + * acstate_t ** NextState ( 1st index is row/state, 2nd index is column=event/input)
1404 + * events -> a b c d e f g h i j k l m n o p
1406 + * N 1 7 0 0 0 3 0 0 0 0 0 0 0 0 0 0
1408 + * Sparse Format, each row : Words Value
1409 + * 1-1 fmt(0-full,1-sparse,2-banded,3-sparsebands)
1410 + * 2-2 bool match flag (indicates this state has pattern matches)
1411 + * 3-3 sparse state count ( # of input/next-state pairs )
1412 + * 4-3+2*cnt 'input,next-state' pairs... each sizof(acstate_t)
1414 + * above example case yields:
1415 + * Full Format: 0, 1 7 0 0 0 3 0 0 0 0 0 0 0 0 0 0 ...
1416 + * Sparse format: 1, 3, 'a',1,'b',7,'f',3 - uses 2+2*ntransitions (non-default transitions)
1419 +Conv_Full_DFA_To_Sparse(ACSM_STRUCT2 * acsm)
1422 + acstate_t * p, state, maxstates=0;
1423 + acstate_t ** NextState = acsm->acsmNextState;
1424 + acstate_t full[MAX_ALPHABET_SIZE];
1426 + for(k=0;k<acsm->acsmMaxStates;k++)
1430 + List_ConvToFull(acsm, (acstate_t)k, full );
1432 + for (i = 0; i < acsm->acsmAlphabetSize; i++)
1435 + if( state != 0 && state != ACSM_FAIL_STATE2 ) cnt++;
1438 + if( cnt > 0 ) maxstates++;
1440 + if( k== 0 || cnt > acsm->acsmSparseMaxRowNodes )
1442 + p = AC_MALLOC(sizeof(acstate_t)*(acsm->acsmAlphabetSize+2) );
1447 + memcpy(&p[2],full,acsm->acsmAlphabetSize*sizeof(acstate_t));
1451 + p = AC_MALLOC(sizeof(acstate_t)*(3+2*cnt));
1455 + p[m++] = ACF_SPARSE;
1456 + p[m++] = 0; /* no matches */
1459 + for(i = 0; i < acsm->acsmAlphabetSize ; i++)
1462 + if( state != 0 && state != ACSM_FAIL_STATE2 )
1470 + NextState[k] = p; /* now we are a sparse formatted state transition array */
1476 + Convert Full matrix to Banded row format.
1480 + 2 n number of values
1481 + 3 i index of 1st value (0-256)
1482 + 4 - 3+n next-state values at each index
1486 +Conv_Full_DFA_To_Banded(ACSM_STRUCT2 * acsm)
1488 + int first = -1, last;
1489 + acstate_t * p, state, full[MAX_ALPHABET_SIZE];
1490 + acstate_t ** NextState = acsm->acsmNextState;
1493 + for(k=0;k<acsm->acsmMaxStates;k++)
1497 + List_ConvToFull(acsm, (acstate_t)k, full );
1502 + for (i = 0; i < acsm->acsmAlphabetSize; i++)
1506 + if( state !=0 && state != ACSM_FAIL_STATE2 )
1508 + if( first < 0 ) first = i;
1513 + /* calc band width */
1514 + cnt= last - first + 1;
1516 + p = AC_MALLOC(sizeof(acstate_t)*(4+cnt));
1521 + p[m++] = ACF_BANDED;
1522 + p[m++] = 0; /* no matches */
1526 + for(i = first; i <= last; i++)
1531 + NextState[k] = p; /* now we are a banded formatted state transition array */
1538 + * Convert full matrix to Sparse Band row format.
1540 + * next - Full formatted row of next states
1541 + * asize - size of alphabet
1542 + * zcnt - max number of zeros in a run of zeros in any given band.
1545 + * 1 ACF_SPARSEBANDS
1546 + * 2 number of bands
1547 + * repeat 3 - 5+ ....once for each band in this row.
1548 + * 3 number of items in this band* 4 start index of this band
1549 + * 5- next-state values in this band...
1552 +int calcSparseBands( acstate_t * next, int * begin, int * end, int asize, int zmax )
1554 + int i, nbands,zcnt,last=0;
1558 + for( i=0; i<asize; i++ )
1562 + if( state !=0 && state != ACSM_FAIL_STATE2 )
1564 + begin[nbands] = i;
1567 + for( ; i< asize; i++ )
1570 + if( state ==0 || state == ACSM_FAIL_STATE2 )
1573 + if( zcnt > zmax ) break;
1582 + end[nbands++] = last;
1596 + * 1 SPARSEBANDS format indicator
1597 + * 2 bool indicates a pattern match in this state
1598 + * 3 number of sparse bands
1599 + * 4 number of elements in this band
1600 + * 5 start index of this band
1601 + * 6- list of next states
1603 + * m number of elements in this band
1604 + * m+1 start index of this band
1605 + * m+2- list of next states
1608 +Conv_Full_DFA_To_SparseBands(ACSM_STRUCT2 * acsm)
1611 + acstate_t ** NextState = acsm->acsmNextState;
1612 + int cnt,m,k,i,zcnt=acsm->acsmSparseMaxZcnt;
1614 + int band_begin[MAX_ALPHABET_SIZE];
1615 + int band_end[MAX_ALPHABET_SIZE];
1617 + acstate_t full[MAX_ALPHABET_SIZE];
1619 + for(k=0;k<acsm->acsmMaxStates;k++)
1623 + List_ConvToFull(acsm, (acstate_t)k, full );
1625 + nbands = calcSparseBands( full, band_begin, band_end, acsm->acsmAlphabetSize, zcnt );
1627 + /* calc band width space*/
1629 + for(i=0;i<nbands;i++)
1632 + cnt += band_end[i] - band_begin[i] + 1;
1634 + /*printk("state %d: sparseband %d, first=%d, last=%d, cnt=%d\n",k,i,band_begin[i],band_end[i],band_end[i]-band_begin[i]+1); */
1637 + p = AC_MALLOC(sizeof(acstate_t)*(cnt));
1642 + p[m++] = ACF_SPARSEBANDS;
1643 + p[m++] = 0; /* no matches */
1646 + for( i=0;i<nbands;i++ )
1648 + p[m++] = band_end[i] - band_begin[i] + 1; /* # states in this band */
1649 + p[m++] = band_begin[i]; /* start index */
1651 + for( j=band_begin[i]; j<=band_end[i]; j++ )
1653 + p[m++] = full[j]; /* some states may be state zero */
1657 + NextState[k] = p; /* now we are a sparse-banded formatted state transition array */
1665 + * Convert an NFA or DFA row from sparse to full format
1666 + * and store into the 'full' buffer.
1669 + * 0 - failed, no state transitions
1670 + * *p - pointer to 'full' buffer
1675 + acstate_t * acsmConvToFull(ACSM_STRUCT2 * acsm, acstate_t k, acstate_t * full )
1678 + acstate_t * p, n, fmt, index, nb, bmatch;
1679 + acstate_t ** NextState = acsm->acsmNextState;
1683 + if( !p ) return 0;
1689 + if( fmt ==ACF_SPARSE )
1692 + for( ; n>0; n--, p+=2 )
1694 + full[ p[0] ] = p[1];
1697 + else if( fmt ==ACF_BANDED )
1703 + for( ; n>0; n--, p++ )
1705 + full[ index++ ] = p[0];
1708 + else if( fmt ==ACF_SPARSEBANDS )
1715 + for( ; n>0; n--, p++ )
1717 + full[ index++ ] = p[0];
1721 + else if( fmt == ACF_FULL )
1723 + memcpy(full,p,acsm->acsmAlphabetSize*sizeof(acstate_t));
1731 + * Select the desired storage mode
1733 +int acsmSelectFormat2( ACSM_STRUCT2 * acsm, int m )
1740 + case ACF_SPARSEBANDS:
1741 + acsm->acsmFormat = m;
1752 +void acsmSetMaxSparseBandZeros2( ACSM_STRUCT2 * acsm, int n )
1754 + acsm->acsmSparseMaxZcnt = n;
1759 +void acsmSetMaxSparseElements2( ACSM_STRUCT2 * acsm, int n )
1761 + acsm->acsmSparseMaxRowNodes = n;
1766 +int acsmSelectFSA2( ACSM_STRUCT2 * acsm, int m )
1773 + acsm->acsmFSA = m;
1781 +int acsmSetAlphabetSize2( ACSM_STRUCT2 * acsm, int n )
1783 + if( n <= MAX_ALPHABET_SIZE )
1785 + acsm->acsmAlphabetSize = n;
1794 + * Create a new AC state machine
1796 +static ACSM_STRUCT2 * acsmNew2 (void)
1802 + p = (ACSM_STRUCT2 *) AC_MALLOC(sizeof (ACSM_STRUCT2));
1803 + MEMASSERT (p, "acsmNew");
1807 + memset (p, 0, sizeof (ACSM_STRUCT2));
1809 + /* Some defaults */
1810 + p->acsmFSA = FSA_DFA;
1811 + p->acsmFormat = ACF_BANDED;
1812 + p->acsmAlphabetSize = 256;
1813 + p->acsmSparseMaxRowNodes = 256;
1814 + p->acsmSparseMaxZcnt = 10;
1820 + * Add a pattern to the list of patterns for this state machine
1824 +acsmAddPattern2 (ACSM_STRUCT2 * p, unsigned char *pat, int n, int nocase,
1825 + int offset, int depth, void * id, int iid)
1827 + ACSM_PATTERN2 * plist;
1829 + plist = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
1830 + MEMASSERT (plist, "acsmAddPattern");
1832 + plist->patrn = (unsigned char *) AC_MALLOC ( n );
1833 + MEMASSERT (plist->patrn, "acsmAddPattern");
1835 + ConvertCaseEx(plist->patrn, pat, n);
1837 + plist->casepatrn = (unsigned char *) AC_MALLOC ( n );
1838 + MEMASSERT (plist->casepatrn, "acsmAddPattern");
1840 + memcpy (plist->casepatrn, pat, n);
1843 + plist->nocase = nocase;
1844 + plist->offset = offset;
1845 + plist->depth = depth;
1849 + plist->next = p->acsmPatterns;
1850 + p->acsmPatterns = plist;
1855 + * Add a Key to the list of key+data pairs
1857 +int acsmAddKey2(ACSM_STRUCT2 * p, unsigned char *key, int klen, int nocase, void * data)
1859 + ACSM_PATTERN2 * plist;
1861 + plist = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
1862 + MEMASSERT (plist, "acsmAddPattern");
1864 + plist->patrn = (unsigned char *) AC_MALLOC (klen);
1865 + memcpy (plist->patrn, key, klen);
1867 + plist->casepatrn = (unsigned char *) AC_MALLOC (klen);
1868 + memcpy (plist->casepatrn, key, klen);
1871 + plist->nocase = nocase;
1872 + plist->offset = 0;
1877 + plist->next = p->acsmPatterns;
1878 + p->acsmPatterns = plist;
1884 + * Copy a boolean match flag int NextState table, for caching purposes.
1887 +void acsmUpdateMatchStates( ACSM_STRUCT2 * acsm )
1890 + acstate_t ** NextState = acsm->acsmNextState;
1891 + ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
1893 + for( state=0; state<acsm->acsmNumStates; state++ )
1895 + if( MatchList[state] )
1897 + NextState[state][1] = 1;
1901 + NextState[state][1] = 0;
1907 + * Compile State Machine - NFA or DFA and Full or Banded or Sparse or SparseBands
1910 +acsmCompile2 (ACSM_STRUCT2 * acsm)
1913 + ACSM_PATTERN2 * plist;
1915 + /* Count number of states */
1916 + for (plist = acsm->acsmPatterns; plist != NULL; plist = plist->next)
1918 + acsm->acsmMaxStates += plist->n;
1919 + /* acsm->acsmMaxStates += plist->n*2; if we handle case in the table */
1921 + acsm->acsmMaxStates++; /* one extra */
1923 + /* Alloc a List based State Transition table */
1924 + acsm->acsmTransTable =(trans_node_t**) AC_MALLOC(sizeof(trans_node_t*) * acsm->acsmMaxStates );
1925 + MEMASSERT (acsm->acsmTransTable, "acsmCompile");
1927 + memset (acsm->acsmTransTable, 0, sizeof(trans_node_t*) * acsm->acsmMaxStates);
1929 + /* Alloc a failure table - this has a failure state, and a match list for each state */
1930 + acsm->acsmFailState =(acstate_t*) AC_MALLOC(sizeof(acstate_t) * acsm->acsmMaxStates );
1931 + MEMASSERT (acsm->acsmFailState, "acsmCompile");
1933 + memset (acsm->acsmFailState, 0, sizeof(acstate_t) * acsm->acsmMaxStates );
1935 + /* Alloc a MatchList table - this has a lis tof pattern matches for each state, if any */
1936 + acsm->acsmMatchList=(ACSM_PATTERN2**) AC_MALLOC(sizeof(ACSM_PATTERN2*) * acsm->acsmMaxStates );
1937 + MEMASSERT (acsm->acsmMatchList, "acsmCompile");
1939 + memset (acsm->acsmMatchList, 0, sizeof(ACSM_PATTERN2*) * acsm->acsmMaxStates );
1941 + /* Alloc a separate state transition table == in state 's' due to event 'k', transition to 'next' state */
1942 + acsm->acsmNextState=(acstate_t**)AC_MALLOC( acsm->acsmMaxStates * sizeof(acstate_t*) );
1943 + MEMASSERT(acsm->acsmNextState, "acsmCompile-NextState");
1945 + for (k = 0; k < acsm->acsmMaxStates; k++)
1947 + acsm->acsmNextState[k]=(acstate_t*)0;
1950 + /* Initialize state zero as a branch */
1951 + acsm->acsmNumStates = 0;
1953 + /* Add the 0'th state, */
1954 + //acsm->acsmNumStates++;
1956 + /* Add each Pattern to the State Table - This forms a keywords state table */
1957 + for (plist = acsm->acsmPatterns; plist != NULL; plist = plist->next)
1959 + AddPatternStates (acsm, plist);
1962 + acsm->acsmNumStates++;
1964 + if( acsm->acsmFSA == FSA_DFA || acsm->acsmFSA == FSA_NFA )
1966 + /* Build the NFA */
1970 + if( acsm->acsmFSA == FSA_DFA )
1972 + /* Convert the NFA to a DFA */
1973 + Convert_NFA_To_DFA (acsm);
1978 + * Select Final Transition Table Storage Mode
1981 + if( acsm->acsmFormat == ACF_SPARSE )
1983 + /* Convert DFA Full matrix to a Sparse matrix */
1984 + if( Conv_Full_DFA_To_Sparse(acsm) )
1988 + else if( acsm->acsmFormat == ACF_BANDED )
1990 + /* Convert DFA Full matrix to a Sparse matrix */
1991 + if( Conv_Full_DFA_To_Banded(acsm) )
1995 + else if( acsm->acsmFormat == ACF_SPARSEBANDS )
1997 + /* Convert DFA Full matrix to a Sparse matrix */
1998 + if( Conv_Full_DFA_To_SparseBands(acsm) )
2001 + else if( acsm->acsmFormat == ACF_FULL )
2003 + if( Conv_List_To_Full( acsm ) )
2007 + acsmUpdateMatchStates( acsm ); /* load boolean match flags into state table */
2009 + /* Free up the Table Of Transition Lists */
2010 + List_FreeTransTable( acsm );
2012 + /* For now -- show this info */
2014 + * acsmPrintInfo( acsm );
2018 + /* Accrue Summary State Stats */
2019 + summary.num_states += acsm->acsmNumStates;
2020 + summary.num_transitions += acsm->acsmNumTrans;
2022 + memcpy( &summary.acsm, acsm, sizeof(ACSM_STRUCT2));
2028 + * Get the NextState from the NFA, all NFA storage formats use this
2031 +acstate_t SparseGetNextStateNFA(acstate_t * ps, acstate_t state, unsigned input)
2040 + ps++; /* skip bMatchState */
2049 + if( input < index )
2057 + return (acstate_t)ACSM_FAIL_STATE2;
2060 + if( input >= index + n )
2068 + return (acstate_t)ACSM_FAIL_STATE2;
2071 + if( ps[input-index] == 0 )
2075 + return ACSM_FAIL_STATE2;
2079 + return (acstate_t) ps[input-index];
2084 + n = *ps++; /* number of sparse index-value entries */
2086 + for( ; n>0 ; n-- )
2088 + if( ps[0] > input ) /* cannot match the input, already a higher value than the input */
2090 + return (acstate_t)ACSM_FAIL_STATE2; /* default state */
2092 + else if( ps[0] == input )
2094 + return ps[1]; /* next state */
2102 + return ACSM_FAIL_STATE2;
2105 + case ACF_SPARSEBANDS:
2107 + nb = *ps++; /* number of bands */
2109 + while( nb > 0 ) /* for each band */
2111 + n = *ps++; /* number of elements */
2112 + index = *ps++; /* 1st element value */
2114 + if( input < index )
2118 + return (acstate_t)ACSM_FAIL_STATE2;
2120 + return (acstate_t)0;
2122 + if( (input >= index) && (input < (index + n)) )
2124 + if( ps[input-index] == 0 )
2128 + return ACSM_FAIL_STATE2;
2131 + return (acstate_t) ps[input-index];
2138 + return (acstate_t)ACSM_FAIL_STATE2;
2140 + return (acstate_t)0;
2145 + if( ps[input] == 0 )
2149 + return ACSM_FAIL_STATE2;
2162 + * Get the NextState from the DFA Next State Transition table
2163 + * Full and banded are supported separately, this is for
2164 + * sparse and sparse-bands
2167 +acstate_t SparseGetNextStateDFA(acstate_t * ps, acstate_t state, unsigned input)
2177 + /* n=ps[2] : number of entries in the band */
2178 + /* index=ps[3] : index of the 1st entry, sequential thereafter */
2180 + if( input < ps[3] ) return 0;
2181 + if( input >= (ps[3]+ps[2]) ) return 0;
2183 + return ps[4+input-ps[3]];
2189 + return ps[2+input];
2195 + n = ps[2]; /* number of entries/ key+next pairs */
2199 + for( ; n>0 ; n-- )
2201 + if( input < ps[0] ) /* cannot match the input, already a higher value than the input */
2203 + return (acstate_t)0; /* default state */
2205 + else if( ps[0] == input )
2207 + return ps[1]; /* next state */
2211 + return (acstate_t)0;
2216 + case ACF_SPARSEBANDS:
2218 + nb = ps[2]; /* number of bands */
2222 + while( nb > 0 ) /* for each band */
2224 + n = ps[0]; /* number of elements in this band */
2225 + index = ps[1]; /* start index/char of this band */
2226 + if( input < index )
2228 + return (acstate_t)0;
2230 + if( (input < (index + n)) )
2232 + return (acstate_t) ps[2+input-index];
2237 + return (acstate_t)0;
2244 + * Search Text or Binary Data for Pattern matches
2246 + * Sparse & Sparse-Banded Matrix search
2251 +acsmSearchSparseDFA(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
2252 + int (*Match) (void * id, int index, void *data),
2256 + ACSM_PATTERN2 * mlist;
2257 + unsigned char * Tend;
2259 + unsigned char * T, * Tc;
2261 + acstate_t ** NextState = acsm->acsmNextState;
2262 + ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
2268 + for( state = 0; T < Tend; T++ )
2270 + state = SparseGetNextStateDFA ( NextState[state], state, xlatcase[*T] );
2272 + /* test if this state has any matching patterns */
2273 + if( NextState[state][1] )
2275 + for( mlist = MatchList[state];
2277 + mlist = mlist->next )
2279 + index = T - mlist->n - Tc;
2280 + if( mlist->nocase )
2283 + if (Match (mlist->id, index, data))
2288 + if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
2291 + if (Match (mlist->id, index, data))
2301 + * Full format DFA search
2302 + * Do not change anything here without testing, caching and prefetching
2303 + * performance is very sensitive to any changes.
2306 + * 1) replaced ConvertCaseEx with inline xlatcase - this improves performance 5-10%
2307 + * 2) using 'nocase' improves performance again by 10-15%, since memcmp is not needed
2313 +acsmSearchSparseDFA_Full(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
2314 + int (*Match) (void * id, int index, void *data),
2317 + ACSM_PATTERN2 * mlist;
2318 + unsigned char * Tend;
2319 + unsigned char * T;
2324 + acstate_t ** NextState = acsm->acsmNextState;
2325 + ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
2331 + for( state = 0; T < Tend; T++ )
2333 + ps = NextState[ state ];
2335 + sindex = xlatcase[ T[0] ];
2337 + /* check the current state for a pattern match */
2340 + for( mlist = MatchList[state];
2342 + mlist = mlist->next )
2344 + index = T - mlist->n - Tx;
2347 + if( mlist->nocase )
2350 + if (Match (mlist->id, index, data))
2355 + if( memcmp (mlist->casepatrn, Tx + index, mlist->n ) == 0 )
2358 + if (Match (mlist->id, index, data))
2366 + state = ps[ 2u + sindex ];
2369 + /* Check the last state for a pattern match */
2370 + for( mlist = MatchList[state];
2372 + mlist = mlist->next )
2374 + index = T - mlist->n - Tx;
2376 + if( mlist->nocase )
2379 + if (Match (mlist->id, index, data))
2384 + if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
2387 + if (Match (mlist->id, index, data))
2396 + * Banded-Row format DFA search
2397 + * Do not change anything here, caching and prefetching
2398 + * performance is very sensitive to any changes.
2400 + * ps[0] = storage fmt
2401 + * ps[1] = bool match flag
2402 + * ps[2] = # elements in band
2403 + * ps[3] = index of 1st element
2408 +acsmSearchSparseDFA_Banded(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
2409 + int (*Match) (void * id, int index, void *data),
2413 + unsigned char * Tend;
2414 + unsigned char * T;
2417 + acstate_t ** NextState = acsm->acsmNextState;
2418 + ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
2419 + ACSM_PATTERN2 * mlist;
2426 + for( state = 0; T < Tend; T++ )
2428 + ps = NextState[state];
2430 + sindex = xlatcase[ T[0] ];
2432 + /* test if this state has any matching patterns */
2435 + for( mlist = MatchList[state];
2437 + mlist = mlist->next )
2439 + index = T - mlist->n - Tx;
2441 + if( mlist->nocase )
2444 + if (Match (mlist->id, index, data))
2449 + if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
2452 + if (Match (mlist->id, index, data))
2459 + if( sindex < ps[3] ) state = 0;
2460 + else if( sindex >= (ps[3] + ps[2]) ) state = 0;
2461 + else state = ps[ 4u + sindex - ps[3] ];
2464 + /* Check the last state for a pattern match */
2465 + for( mlist = MatchList[state];
2467 + mlist = mlist->next )
2469 + index = T - mlist->n - Tx;
2471 + if( mlist->nocase )
2474 + if (Match (mlist->id, index, data))
2479 + if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
2482 + if (Match (mlist->id, index, data))
2494 + * Search Text or Binary Data for Pattern matches
2496 + * Sparse Storage Version
2501 +acsmSearchSparseNFA(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
2502 + int (*Match) (void * id, int index, void *data),
2506 + ACSM_PATTERN2 * mlist;
2507 + unsigned char * Tend;
2509 + unsigned char * T, *Tc;
2511 + acstate_t ** NextState= acsm->acsmNextState;
2512 + acstate_t * FailState= acsm->acsmFailState;
2513 + ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
2514 + unsigned char Tchar;
2520 + for( state = 0; T < Tend; T++ )
2524 + Tchar = xlatcase[ *T ];
2526 + while( (nstate=SparseGetNextStateNFA(NextState[state],state,Tchar))==ACSM_FAIL_STATE2 )
2527 + state = FailState[state];
2531 + for( mlist = MatchList[state];
2533 + mlist = mlist->next )
2535 + index = T - mlist->n - Tx;
2536 + if( mlist->nocase )
2539 + if (Match (mlist->id, index, data))
2544 + if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
2547 + if (Match (mlist->id, index, data))
2561 +acsmSearch2(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
2562 + int (*Match) (void * id, int index, void *data),
2566 + switch( acsm->acsmFSA )
2570 + if( acsm->acsmFormat == ACF_FULL )
2572 + return acsmSearchSparseDFA_Full( acsm, Tx, n, Match,data );
2574 + else if( acsm->acsmFormat == ACF_BANDED )
2576 + return acsmSearchSparseDFA_Banded( acsm, Tx, n, Match,data );
2580 + return acsmSearchSparseDFA( acsm, Tx, n, Match,data );
2585 + return acsmSearchSparseNFA( acsm, Tx, n, Match,data );
2599 +acsmFree2 (ACSM_STRUCT2 * acsm)
2602 + ACSM_PATTERN2 * mlist, *ilist;
2603 + for (i = 0; i < acsm->acsmMaxStates; i++)
2605 + mlist = acsm->acsmMatchList[i];
2610 + mlist = mlist->next;
2613 + AC_FREE(acsm->acsmNextState[i]);
2615 + AC_FREE(acsm->acsmFailState);
2616 + AC_FREE(acsm->acsmMatchList);
2619 +/* ********************************** */
2621 +static void ring_sock_destruct(struct sock *sk) {
2623 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
2624 + skb_queue_purge(&sk->sk_receive_queue);
2626 + if (!sock_flag(sk, SOCK_DEAD)) {
2627 +#if defined(RING_DEBUG)
2628 + printk("Attempt to release alive ring socket: %p\n", sk);
2633 + BUG_ON(!atomic_read(&sk->sk_rmem_alloc));
2634 + BUG_ON(!atomic_read(&sk->sk_wmem_alloc));
2637 + BUG_ON(atomic_read(&sk->rmem_alloc)==0);
2638 + BUG_ON(atomic_read(&sk->wmem_alloc)==0);
2641 +#if defined(RING_DEBUG)
2642 + printk("Attempt to release alive ring socket: %p\n", sk);
2648 + kfree(ring_sk(sk));
2650 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
2651 + MOD_DEC_USE_COUNT;
2655 +/* ********************************** */
2657 +static void ring_proc_add(struct ring_opt *pfr) {
2658 + if(ring_proc_dir != NULL) {
2661 + pfr->ring_pid = current->pid;
2663 + snprintf(name, sizeof(name), "%d", pfr->ring_pid);
2664 + create_proc_read_entry(name, 0, ring_proc_dir,
2665 + ring_proc_get_info, pfr);
2666 + /* printk("PF_RING: added /proc/net/pf_ring/%s\n", name); */
2670 +/* ********************************** */
2672 +static void ring_proc_remove(struct ring_opt *pfr) {
2673 + if(ring_proc_dir != NULL) {
2676 + snprintf(name, sizeof(name), "%d", pfr->ring_pid);
2677 + remove_proc_entry(name, ring_proc_dir);
2678 + /* printk("PF_RING: removed /proc/net/pf_ring/%s\n", name); */
2682 +/* ********************************** */
2684 +static int ring_proc_get_info(char *buf, char **start, off_t offset,
2685 + int len, int *unused, void *data)
2688 + struct ring_opt *pfr;
2689 + FlowSlotInfo *fsi;
2691 + if(data == NULL) {
2692 + /* /proc/net/pf_ring/info */
2693 + rlen = sprintf(buf,"Version : %s\n", RING_VERSION);
2694 + rlen += sprintf(buf + rlen,"Bucket length : %d bytes\n", bucket_len);
2695 + rlen += sprintf(buf + rlen,"Ring slots : %d\n", num_slots);
2696 + rlen += sprintf(buf + rlen,"Sample rate : %d [1=no sampling]\n", sample_rate);
2698 + rlen += sprintf(buf + rlen,"Capture TX : %s\n",
2699 + enable_tx_capture ? "Yes [RX+TX]" : "No [RX only]");
2700 + rlen += sprintf(buf + rlen,"Transparent mode : %s\n",
2701 + transparent_mode ? "Yes" : "No");
2702 + rlen += sprintf(buf + rlen,"Total rings : %d\n", ring_table_size);
2704 + /* detailed statistics about a PF_RING */
2705 + pfr = (struct ring_opt*)data;
2708 + fsi = pfr->slots_info;
2711 + rlen = sprintf(buf, "Bound Device : %s\n",
2712 + pfr->ring_netdev->name == NULL ? "<NULL>" : pfr->ring_netdev->name);
2713 + rlen += sprintf(buf + rlen,"Version : %d\n", fsi->version);
2714 + rlen += sprintf(buf + rlen,"Sampling Rate : %d\n", pfr->sample_rate);
2715 + rlen += sprintf(buf + rlen,"BPF Filtering : %s\n", pfr->bpfFilter ? "Enabled" : "Disabled");
2716 + rlen += sprintf(buf + rlen,"Bloom Filters : %s\n", pfr->bitmask_enabled ? "Enabled" : "Disabled");
2717 + rlen += sprintf(buf + rlen,"Pattern Search: %s\n", pfr->acsm ? "Enabled" : "Disabled");
2718 + rlen += sprintf(buf + rlen,"Cluster Id : %d\n", pfr->cluster_id);
2719 + rlen += sprintf(buf + rlen,"Tot Slots : %d\n", fsi->tot_slots);
2720 + rlen += sprintf(buf + rlen,"Slot Len : %d\n", fsi->slot_len);
2721 + rlen += sprintf(buf + rlen,"Data Len : %d\n", fsi->data_len);
2722 + rlen += sprintf(buf + rlen,"Tot Memory : %d\n", fsi->tot_mem);
2723 + rlen += sprintf(buf + rlen,"Tot Packets : %lu\n", (unsigned long)fsi->tot_pkts);
2724 + rlen += sprintf(buf + rlen,"Tot Pkt Lost : %lu\n", (unsigned long)fsi->tot_lost);
2725 + rlen += sprintf(buf + rlen,"Tot Insert : %lu\n", (unsigned long)fsi->tot_insert);
2726 + rlen += sprintf(buf + rlen,"Tot Read : %lu\n", (unsigned long)fsi->tot_read);
2729 + rlen = sprintf(buf, "WARNING fsi == NULL\n");
2731 + rlen = sprintf(buf, "WARNING data == NULL\n");
2737 +/* ********************************** */
2739 +static void ring_proc_init(void) {
2740 + ring_proc_dir = proc_mkdir("pf_ring", init_net.proc_net);
2742 + if(ring_proc_dir) {
2743 + ring_proc_dir->owner = THIS_MODULE;
2744 + ring_proc = create_proc_read_entry("info", 0, ring_proc_dir,
2745 + ring_proc_get_info, NULL);
2747 + printk("PF_RING: unable to register proc file\n");
2749 + ring_proc->owner = THIS_MODULE;
2750 + printk("PF_RING: registered /proc/net/pf_ring/\n");
2753 + printk("PF_RING: unable to create /proc/net/pf_ring\n");
2756 +/* ********************************** */
2758 +static void ring_proc_term(void) {
2759 + if(ring_proc != NULL) {
2760 + remove_proc_entry("info", ring_proc_dir);
2761 + if(ring_proc_dir != NULL) remove_proc_entry("pf_ring", init_net.proc_net);
2763 + printk("PF_RING: deregistered /proc/net/pf_ring\n");
2767 +/* ********************************** */
2772 + * store the sk in a new element and add it
2773 + * to the head of the list.
2775 +static inline void ring_insert(struct sock *sk) {
2776 + struct ring_element *next;
2778 +#if defined(RING_DEBUG)
2779 + printk("RING: ring_insert()\n");
2782 + next = kmalloc(sizeof(struct ring_element), GFP_ATOMIC);
2783 + if(next != NULL) {
2785 + write_lock_irq(&ring_mgmt_lock);
2786 + list_add(&next->list, &ring_table);
2787 + write_unlock_irq(&ring_mgmt_lock);
2789 + if(net_ratelimit())
2790 + printk("RING: could not kmalloc slot!!\n");
2793 + ring_table_size++;
2794 + ring_proc_add(ring_sk(sk));
2797 +/* ********************************** */
2802 + * For each of the elements in the list:
2803 + * - check if this is the element we want to delete
2804 + * - if it is, remove it from the list, and free it.
2806 + * stop when we find the one we're looking for (break),
2807 + * or when we reach the end of the list.
2809 +static inline void ring_remove(struct sock *sk) {
2810 + struct list_head *ptr;
2811 + struct ring_element *entry;
2813 + for(ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) {
2814 + entry = list_entry(ptr, struct ring_element, list);
2816 + if(entry->sk == sk) {
2819 + ring_table_size--;
2825 +/* ********************************** */
2827 +static u_int32_t num_queued_pkts(struct ring_opt *pfr) {
2829 + if(pfr->ring_slots != NULL) {
2831 + u_int32_t tot_insert = pfr->slots_info->insert_idx,
2832 +#if defined(RING_DEBUG)
2833 + tot_read = pfr->slots_info->tot_read, tot_pkts;
2835 + tot_read = pfr->slots_info->tot_read;
2838 + if(tot_insert >= tot_read) {
2839 +#if defined(RING_DEBUG)
2840 + tot_pkts = tot_insert-tot_read;
2842 + return(tot_insert-tot_read);
2844 +#if defined(RING_DEBUG)
2845 + tot_pkts = ((u_int32_t)-1)+tot_insert-tot_read;
2847 + return(((u_int32_t)-1)+tot_insert-tot_read);
2850 +#if defined(RING_DEBUG)
2851 + printk("-> num_queued_pkts=%d [tot_insert=%d][tot_read=%d]\n",
2852 + tot_pkts, tot_insert, tot_read);
2859 +/* ********************************** */
2861 +static inline FlowSlot* get_insert_slot(struct ring_opt *pfr) {
2862 +#if defined(RING_DEBUG)
2863 + printk("get_insert_slot(%d)\n", pfr->slots_info->insert_idx);
2866 + if(pfr->ring_slots != NULL) {
2867 + FlowSlot *slot = (FlowSlot*)&(pfr->ring_slots[pfr->slots_info->insert_idx
2868 + *pfr->slots_info->slot_len]);
2874 +/* ********************************** */
2876 +static inline FlowSlot* get_remove_slot(struct ring_opt *pfr) {
2877 +#if defined(RING_DEBUG)
2878 + printk("get_remove_slot(%d)\n", pfr->slots_info->remove_idx);
2881 + if(pfr->ring_slots != NULL)
2882 + return((FlowSlot*)&(pfr->ring_slots[pfr->slots_info->remove_idx*
2883 + pfr->slots_info->slot_len]));
2888 +/* ******************************************************* */
2890 +static int parse_pkt(struct sk_buff *skb, u_int16_t skb_displ,
2891 + u_int8_t *l3_proto, u_int16_t *eth_type,
2892 + u_int16_t *l3_offset, u_int16_t *l4_offset,
2893 + u_int16_t *vlan_id, u_int32_t *ipv4_src,
2894 + u_int32_t *ipv4_dst,
2895 + u_int16_t *l4_src_port, u_int16_t *l4_dst_port,
2896 + u_int16_t *payload_offset) {
2898 + struct ethhdr *eh = (struct ethhdr*)(skb->data-skb_displ);
2901 + *l3_offset = *l4_offset = *l3_proto = *payload_offset = 0;
2902 + *eth_type = ntohs(eh->h_proto);
2904 + if(*eth_type == 0x8100 /* 802.1q (VLAN) */) {
2905 + (*vlan_id) = (skb->data[14] & 15)*256 + skb->data[15];
2906 + *eth_type = (skb->data[16])*256 + skb->data[17];
2910 + (*vlan_id) = (u_int16_t)-1;
2913 + if(*eth_type == 0x0800 /* IP */) {
2914 + *l3_offset = displ+sizeof(struct ethhdr);
2915 + ip = (struct iphdr*)(skb->data-skb_displ+(*l3_offset));
2917 + *ipv4_src = ntohl(ip->saddr), *ipv4_dst = ntohl(ip->daddr), *l3_proto = ip->protocol;
2919 + if((ip->protocol == IPPROTO_TCP) || (ip->protocol == IPPROTO_UDP)) {
2920 + *l4_offset = (*l3_offset)+(ip->ihl*4);
2922 + if(ip->protocol == IPPROTO_TCP) {
2923 + struct tcphdr *tcp = (struct tcphdr*)(skb->data-skb_displ+(*l4_offset));
2924 + *l4_src_port = ntohs(tcp->source), *l4_dst_port = ntohs(tcp->dest);
2925 + *payload_offset = (*l4_offset)+(tcp->doff * 4);
2926 + } else if(ip->protocol == IPPROTO_UDP) {
2927 + struct udphdr *udp = (struct udphdr*)(skb->data-skb_displ+(*l4_offset));
2928 + *l4_src_port = ntohs(udp->source), *l4_dst_port = ntohs(udp->dest);
2929 + *payload_offset = (*l4_offset)+sizeof(struct udphdr);
2931 + *payload_offset = (*l4_offset);
2933 + *l4_src_port = *l4_dst_port = 0;
2935 + return(1); /* IP */
2936 + } /* TODO: handle IPv6 */
2938 + return(0); /* No IP */
2941 +/* **************************************************************** */
2943 +static void reset_bitmask(bitmask_selector *selector)
2945 + memset((char*)selector->bits_memory, 0, selector->num_bits/8);
2947 + while(selector->clashes != NULL) {
2948 + bitmask_counter_list *next = selector->clashes->next;
2949 + kfree(selector->clashes);
2950 + selector->clashes = next;
2954 +/* **************************************************************** */
2956 +static void alloc_bitmask(u_int32_t tot_bits, bitmask_selector *selector)
2958 + u_int tot_mem = tot_bits/8;
2960 + if(tot_mem <= PAGE_SIZE)
2961 + selector->order = 1;
2963 + for(selector->order = 0; (PAGE_SIZE << selector->order) < tot_mem; selector->order++)
2967 + printk("BITMASK: [order=%d][tot_mem=%d]\n", selector->order, tot_mem);
2969 + while((selector->bits_memory = __get_free_pages(GFP_ATOMIC, selector->order)) == 0)
2970 + if(selector->order-- == 0)
2973 + if(selector->order == 0) {
2974 + printk("BITMASK: ERROR not enough memory for bitmask\n");
2975 + selector->num_bits = 0;
2979 + tot_mem = PAGE_SIZE << selector->order;
2980 + printk("BITMASK: succesfully allocated [tot_mem=%d][order=%d]\n",
2981 + tot_mem, selector->order);
2983 + selector->num_bits = tot_mem*8;
2984 + selector->clashes = NULL;
2985 + reset_bitmask(selector);
2988 +/* ********************************** */
2990 +static void free_bitmask(bitmask_selector *selector)
2992 + if(selector->bits_memory > 0)
2993 + free_pages(selector->bits_memory, selector->order);
2996 +/* ********************************** */
2998 +static void set_bit_bitmask(bitmask_selector *selector, u_int32_t the_bit) {
2999 + u_int32_t idx = the_bit % selector->num_bits;
3001 + if(BITMASK_ISSET(idx, selector)) {
3002 + bitmask_counter_list *head = selector->clashes;
3004 + printk("BITMASK: bit %u was already set\n", the_bit);
3006 + while(head != NULL) {
3007 + if(head->bit_id == the_bit) {
3008 + head->bit_counter++;
3009 + printk("BITMASK: bit %u is now set to %d\n", the_bit, head->bit_counter);
3013 + head = head->next;
3016 + head = kmalloc(sizeof(bitmask_counter_list), GFP_KERNEL);
3018 + head->bit_id = the_bit;
3019 + head->bit_counter = 1 /* previous value */ + 1 /* the requested set */;
3020 + head->next = selector->clashes;
3021 + selector->clashes = head;
3023 + printk("BITMASK: not enough memory\n");
3027 + BITMASK_SET(idx, selector);
3028 + printk("BITMASK: bit %u is now set\n", the_bit);
3032 +/* ********************************** */
3034 +static u_char is_set_bit_bitmask(bitmask_selector *selector, u_int32_t the_bit) {
3035 + u_int32_t idx = the_bit % selector->num_bits;
3036 + return(BITMASK_ISSET(idx, selector));
3039 +/* ********************************** */
3041 +static void clear_bit_bitmask(bitmask_selector *selector, u_int32_t the_bit) {
3042 + u_int32_t idx = the_bit % selector->num_bits;
3044 + if(!BITMASK_ISSET(idx, selector))
3045 + printk("BITMASK: bit %u was not set\n", the_bit);
3047 + bitmask_counter_list *head = selector->clashes, *prev = NULL;
3049 + while(head != NULL) {
3050 + if(head->bit_id == the_bit) {
3051 + head->bit_counter--;
3053 + printk("BITMASK: bit %u is now set to %d\n",
3054 + the_bit, head->bit_counter);
3056 + if(head->bit_counter == 1) {
3057 + /* We can now delete this entry as '1' can be
3058 + accommodated into the bitmask */
3061 + selector->clashes = head->next;
3063 + prev->next = head->next;
3070 + prev = head; head = head->next;
3073 + BITMASK_CLR(idx, selector);
3074 + printk("BITMASK: bit %u is now reset\n", the_bit);
3078 +/* ********************************** */
3080 +/* Hash function */
3081 +static u_int32_t sdb_hash(u_int32_t value) {
3082 + u_int32_t hash = 0, i;
3083 + u_int8_t str[sizeof(value)];
3085 + memcpy(str, &value, sizeof(value));
3087 + for(i = 0; i < sizeof(value); i++) {
3088 + hash = str[i] + (hash << 6) + (hash << 16) - hash;
3094 +/* ********************************** */
3096 +static void handle_bloom_filter_rule(struct ring_opt *pfr, char *buf) {
3102 + count = strlen(buf);
3104 + printk("PF_RING: -> handle_bloom_filter_rule(%s)\n", buf);
3106 + if((buf[count-1] == '\n') || (buf[count-1] == '\r')) buf[count-1] = '\0';
3109 + u_int32_t the_bit;
3111 + if(!strncmp(&buf[1], "vlan=", 5)) {
3112 + sscanf(&buf[6], "%d", &the_bit);
3115 + set_bit_bitmask(&pfr->vlan_bitmask, the_bit), pfr->num_vlan_bitmask_add++;
3117 + clear_bit_bitmask(&pfr->vlan_bitmask, the_bit), pfr->num_vlan_bitmask_remove++;
3118 + } else if(!strncmp(&buf[1], "mac=", 4)) {
3119 + int a, b, c, d, e, f;
3121 + if(sscanf(&buf[5], "%02x:%02x:%02x:%02x:%02x:%02x:",
3122 + &a, &b, &c, &d, &e, &f) == 6) {
3123 + u_int32_t mac_addr = (a & 0xff) + (b & 0xff) + ((c & 0xff) << 24) + ((d & 0xff) << 16) + ((e & 0xff) << 8) + (f & 0xff);
3125 + /* printk("PF_RING: -> [%u][%u][%u][%u][%u][%u] -> [%u]\n", a, b, c, d, e, f, mac_addr); */
3128 + set_bit_bitmask(&pfr->mac_bitmask, mac_addr), pfr->num_mac_bitmask_add++;
3130 + clear_bit_bitmask(&pfr->mac_bitmask, mac_addr), pfr->num_mac_bitmask_remove++;
3132 + printk("PF_RING: -> Invalid MAC address '%s'\n", &buf[5]);
3133 + } else if(!strncmp(&buf[1], "ip=", 3)) {
3136 + if(sscanf(&buf[4], "%d.%d.%d.%d", &a, &b, &c, &d) == 4) {
3137 + u_int32_t ip_addr = ((a & 0xff) << 24) + ((b & 0xff) << 16) + ((c & 0xff) << 8) + (d & 0xff);
3140 + set_bit_bitmask(&pfr->ip_bitmask, ip_addr), set_bit_bitmask(&pfr->ip_bitmask, sdb_hash(ip_addr)), pfr->num_ip_bitmask_add++;
3142 + clear_bit_bitmask(&pfr->ip_bitmask, ip_addr), clear_bit_bitmask(&pfr->twin_ip_bitmask, sdb_hash(ip_addr)), pfr->num_ip_bitmask_remove++;
3144 + printk("PF_RING: -> Invalid IP address '%s'\n", &buf[4]);
3145 + } else if(!strncmp(&buf[1], "port=", 5)) {
3146 + sscanf(&buf[6], "%d", &the_bit);
3149 + set_bit_bitmask(&pfr->port_bitmask, the_bit), set_bit_bitmask(&pfr->port_bitmask, sdb_hash(the_bit)), pfr->num_port_bitmask_add++;
3151 + clear_bit_bitmask(&pfr->port_bitmask, the_bit), clear_bit_bitmask(&pfr->twin_port_bitmask, sdb_hash(the_bit)), pfr->num_port_bitmask_remove++;
3152 + } else if(!strncmp(&buf[1], "proto=", 6)) {
3153 + if(!strncmp(&buf[7], "tcp", 3)) the_bit = 6;
3154 + else if(!strncmp(&buf[7], "udp", 3)) the_bit = 17;
3155 + else if(!strncmp(&buf[7], "icmp", 4)) the_bit = 1;
3156 + else sscanf(&buf[7], "%d", &the_bit);
3159 + set_bit_bitmask(&pfr->proto_bitmask, the_bit);
3161 + clear_bit_bitmask(&pfr->proto_bitmask, the_bit);
3163 + printk("PF_RING: -> Unknown rule type '%s'\n", buf);
3167 +/* ********************************** */
3169 +static void reset_bloom_filters(struct ring_opt *pfr) {
3170 + reset_bitmask(&pfr->mac_bitmask);
3171 + reset_bitmask(&pfr->vlan_bitmask);
3172 + reset_bitmask(&pfr->ip_bitmask); reset_bitmask(&pfr->twin_ip_bitmask);
3173 + reset_bitmask(&pfr->port_bitmask); reset_bitmask(&pfr->twin_port_bitmask);
3174 + reset_bitmask(&pfr->proto_bitmask);
3176 + pfr->num_mac_bitmask_add = pfr->num_mac_bitmask_remove = 0;
3177 + pfr->num_vlan_bitmask_add = pfr->num_vlan_bitmask_remove = 0;
3178 + pfr->num_ip_bitmask_add = pfr->num_ip_bitmask_remove = 0;
3179 + pfr->num_port_bitmask_add = pfr->num_port_bitmask_remove = 0;
3180 + pfr->num_proto_bitmask_add = pfr->num_proto_bitmask_remove = 0;
3182 + printk("PF_RING: rules have been reset\n");
3185 +/* ********************************** */
3187 +static void init_blooms(struct ring_opt *pfr) {
3188 + alloc_bitmask(4096, &pfr->mac_bitmask);
3189 + alloc_bitmask(4096, &pfr->vlan_bitmask);
3190 + alloc_bitmask(32768, &pfr->ip_bitmask); alloc_bitmask(32768, &pfr->twin_ip_bitmask);
3191 + alloc_bitmask(4096, &pfr->port_bitmask); alloc_bitmask(4096, &pfr->twin_port_bitmask);
3192 + alloc_bitmask(4096, &pfr->proto_bitmask);
3194 + pfr->num_mac_bitmask_add = pfr->num_mac_bitmask_remove = 0;
3195 + pfr->num_vlan_bitmask_add = pfr->num_vlan_bitmask_remove = 0;
3196 + pfr->num_ip_bitmask_add = pfr->num_ip_bitmask_remove = 0;
3197 + pfr->num_port_bitmask_add = pfr->num_port_bitmask_remove = 0;
3198 + pfr->num_proto_bitmask_add = pfr->num_proto_bitmask_remove = 0;
3200 + reset_bloom_filters(pfr);
3203 +/* ********************************** */
3205 +inline int MatchFound (void* id, int index, void *data) { return(0); }
3207 +/* ********************************** */
3209 +static void add_skb_to_ring(struct sk_buff *skb,
3210 + struct ring_opt *pfr,
3211 + u_char recv_packet,
3212 + u_char real_skb /* 1=skb 0=faked skb */) {
3213 + FlowSlot *theSlot;
3214 + int idx, displ, fwd_pkt = 0;
3217 + /* Hack for identifying a packet received by the e1000 */
3219 + displ = SKB_DISPLACEMENT;
3221 + displ = 0; /* Received by the e1000 wrapper */
3225 + write_lock(&pfr->ring_index_lock);
3226 + pfr->slots_info->tot_pkts++;
3227 + write_unlock(&pfr->ring_index_lock);
3229 + /* BPF Filtering (from af_packet.c) */
3230 + if(pfr->bpfFilter != NULL) {
3231 + unsigned res = 1, len;
3233 + len = skb->len-skb->data_len;
3235 + write_lock(&pfr->ring_index_lock);
3236 + skb->data -= displ;
3237 + res = sk_run_filter(skb, pfr->bpfFilter->insns, pfr->bpfFilter->len);
3238 + skb->data += displ;
3239 + write_unlock(&pfr->ring_index_lock);
3242 + /* Filter failed */
3244 +#if defined(RING_DEBUG)
3245 + printk("add_skb_to_ring(skb): Filter failed [len=%d][tot=%llu]"
3246 + "[insertIdx=%d][pkt_type=%d][cloned=%d]\n",
3247 + (int)skb->len, pfr->slots_info->tot_pkts,
3248 + pfr->slots_info->insert_idx,
3249 + skb->pkt_type, skb->cloned);
3256 + /* ************************** */
3258 + if(pfr->sample_rate > 1) {
3259 + if(pfr->pktToSample == 0) {
3260 + write_lock(&pfr->ring_index_lock);
3261 + pfr->pktToSample = pfr->sample_rate;
3262 + write_unlock(&pfr->ring_index_lock);
3264 + write_lock(&pfr->ring_index_lock);
3265 + pfr->pktToSample--;
3266 + write_unlock(&pfr->ring_index_lock);
3268 +#if defined(RING_DEBUG)
3269 + printk("add_skb_to_ring(skb): sampled packet [len=%d]"
3270 + "[tot=%llu][insertIdx=%d][pkt_type=%d][cloned=%d]\n",
3271 + (int)skb->len, pfr->slots_info->tot_pkts,
3272 + pfr->slots_info->insert_idx,
3273 + skb->pkt_type, skb->cloned);
3279 + /* ************************************* */
3281 + if((pfr->reflector_dev != NULL)
3282 + && (!netif_queue_stopped(pfr->reflector_dev))) {
3283 + int cpu = smp_processor_id();
3285 + /* increase reference counter so that this skb is not freed */
3286 + atomic_inc(&skb->users);
3288 + skb->data -= displ;
3291 + if (netdev_get_tx_queue(pfr->reflector_dev, 0)->xmit_lock_owner != cpu) {
3292 + /* Patch below courtesy of Matthew J. Roth <mroth@imminc.com> */
3293 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
3294 + spin_lock_bh(&pfr->reflector_dev->xmit_lock);
3295 + pfr->reflector_dev->xmit_lock_owner = cpu;
3296 + spin_unlock_bh(&pfr->reflector_dev->xmit_lock);
3298 + netif_tx_lock_bh(pfr->reflector_dev);
3300 + if (pfr->reflector_dev->hard_start_xmit(skb, pfr->reflector_dev) == 0) {
3301 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
3302 + spin_lock_bh(&pfr->reflector_dev->xmit_lock);
3303 + pfr->reflector_dev->xmit_lock_owner = -1;
3304 + spin_unlock_bh(&pfr->reflector_dev->xmit_lock);
3306 + netif_tx_unlock_bh(pfr->reflector_dev);
3308 + skb->data += displ;
3309 +#if defined(RING_DEBUG)
3310 + printk("++ hard_start_xmit succeeded\n");
3315 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
3316 + spin_lock_bh(&pfr->reflector_dev->xmit_lock);
3317 + pfr->reflector_dev->xmit_lock_owner = -1;
3318 + spin_unlock_bh(&pfr->reflector_dev->xmit_lock);
3320 + netif_tx_unlock_bh(pfr->reflector_dev);
3324 +#if defined(RING_DEBUG)
3325 + printk("++ hard_start_xmit failed\n");
3327 + skb->data += displ;
3328 + return; /* -ENETDOWN */
3331 + /* ************************************* */
3333 +#if defined(RING_DEBUG)
3334 + printk("add_skb_to_ring(skb) [len=%d][tot=%llu][insertIdx=%d]"
3335 + "[pkt_type=%d][cloned=%d]\n",
3336 + (int)skb->len, pfr->slots_info->tot_pkts,
3337 + pfr->slots_info->insert_idx,
3338 + skb->pkt_type, skb->cloned);
3341 + idx = pfr->slots_info->insert_idx;
3342 + theSlot = get_insert_slot(pfr);
3344 + if((theSlot != NULL) && (theSlot->slot_state == 0)) {
3345 + struct pcap_pkthdr *hdr;
3347 + int is_ip_pkt, debug = 0;
3349 + /* Update Index */
3352 + bucket = &theSlot->bucket;
3353 + hdr = (struct pcap_pkthdr*)bucket;
3355 + /* BD - API changed for time keeping */
3356 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14))
3357 + if(skb->stamp.tv_sec == 0) do_gettimeofday(&skb->stamp);
3359 + hdr->ts.tv_sec = skb->stamp.tv_sec, hdr->ts.tv_usec = skb->stamp.tv_usec;
3361 + if(skb->tstamp.tv64 == 0) __net_timestamp(skb);
3363 + struct timeval tv = ktime_to_timeval(skb->tstamp);
3364 + hdr->ts.tv_sec = tv.tv_sec, hdr->ts.tv_usec = tv.tv_usec;
3366 + hdr->caplen = skb->len+displ;
3368 + if(hdr->caplen > pfr->slots_info->data_len)
3369 + hdr->caplen = pfr->slots_info->data_len;
3371 + hdr->len = skb->len+displ;
3374 + is_ip_pkt = parse_pkt(skb, displ,
3382 + &hdr->l4_src_port,
3383 + &hdr->l4_dst_port,
3384 + &hdr->payload_offset);
3386 + if(is_ip_pkt && pfr->bitmask_enabled) {
3387 + int vlan_match = 0;
3393 + printk(KERN_INFO "PF_RING: [proto=%d][vlan=%d][sport=%d][dport=%d][src=%u][dst=%u]\n",
3394 + hdr->l3_proto, hdr->vlan_id, hdr->l4_src_port, hdr->l4_dst_port, hdr->ipv4_src, hdr->ipv4_dst);
3396 + printk(KERN_INFO "PF_RING: [proto=%d][vlan=%d]\n", hdr->l3_proto, hdr->vlan_id);
3399 + if(hdr->vlan_id != (u_int16_t)-1) {
3400 + vlan_match = is_set_bit_bitmask(&pfr->vlan_bitmask, hdr->vlan_id);
3405 + struct ethhdr *eh = (struct ethhdr*)(skb->data);
3406 + u_int32_t src_mac = (eh->h_source[0] & 0xff) + (eh->h_source[1] & 0xff) + ((eh->h_source[2] & 0xff) << 24)
3407 + + ((eh->h_source[3] & 0xff) << 16) + ((eh->h_source[4] & 0xff) << 8) + (eh->h_source[5] & 0xff);
3409 + if(debug) printk(KERN_INFO "PF_RING: [src_mac=%u]\n", src_mac);
3411 + fwd_pkt |= is_set_bit_bitmask(&pfr->mac_bitmask, src_mac);
3414 + u_int32_t dst_mac = (eh->h_dest[0] & 0xff) + (eh->h_dest[1] & 0xff) + ((eh->h_dest[2] & 0xff) << 24)
3415 + + ((eh->h_dest[3] & 0xff) << 16) + ((eh->h_dest[4] & 0xff) << 8) + (eh->h_dest[5] & 0xff);
3417 + if(debug) printk(KERN_INFO "PF_RING: [dst_mac=%u]\n", dst_mac);
3419 + fwd_pkt |= is_set_bit_bitmask(&pfr->mac_bitmask, dst_mac);
3421 + if(is_ip_pkt && (!fwd_pkt)) {
3422 + fwd_pkt |= is_set_bit_bitmask(&pfr->ip_bitmask, hdr->ipv4_src);
3425 + fwd_pkt |= is_set_bit_bitmask(&pfr->ip_bitmask, hdr->ipv4_dst);
3427 + if((!fwd_pkt) && ((hdr->l3_proto == IPPROTO_TCP)
3428 + || (hdr->l3_proto == IPPROTO_UDP))) {
3429 + fwd_pkt |= is_set_bit_bitmask(&pfr->port_bitmask, hdr->l4_src_port);
3430 + if(!fwd_pkt) fwd_pkt |= is_set_bit_bitmask(&pfr->port_bitmask, hdr->l4_dst_port);
3433 + if(!fwd_pkt) fwd_pkt |= is_set_bit_bitmask(&pfr->proto_bitmask, hdr->l3_proto);
3441 + if(fwd_pkt && (pfr->acsm != NULL)) {
3442 + if((hdr->payload_offset > 0) && ((skb->len+skb->mac_len) > hdr->payload_offset)) {
3443 + char *payload = (skb->data-displ+hdr->payload_offset);
3444 + int payload_len = skb->len /* + skb->mac_len */ - hdr->payload_offset;
3446 + if((payload_len > 0)
3447 + && ((hdr->l4_src_port == 80) || (hdr->l4_dst_port == 80))) {
3453 + memcpy(buf, payload, payload_len);
3454 + buf[payload_len] = '\0';
3455 + printk("[%s]\n", payload);
3458 + /* printk("Tring to match pattern [len=%d][%s]\n", payload_len, payload); */
3459 + rc = acsmSearch2(pfr->acsm, payload, payload_len, MatchFound, (void *)0) ? 1 : 0;
3461 + // printk("Match result: %d\n", fwd_pkt);
3463 + printk("Pattern matched!\n");
3474 + memcpy(&bucket[sizeof(struct pcap_pkthdr)], skb->data-displ, hdr->caplen);
3476 +#if defined(RING_DEBUG)
3478 + static unsigned int lastLoss = 0;
3480 + if(pfr->slots_info->tot_lost
3481 + && (lastLoss != pfr->slots_info->tot_lost)) {
3482 + printk("add_skb_to_ring(%d): [data_len=%d]"
3483 + "[hdr.caplen=%d][skb->len=%d]"
3484 + "[pcap_pkthdr=%d][removeIdx=%d]"
3485 + "[loss=%lu][page=%u][slot=%u]\n",
3486 + idx-1, pfr->slots_info->data_len, hdr->caplen, skb->len,
3487 + sizeof(struct pcap_pkthdr),
3488 + pfr->slots_info->remove_idx,
3489 + (long unsigned int)pfr->slots_info->tot_lost,
3490 + pfr->insert_page_id, pfr->insert_slot_id);
3492 + lastLoss = pfr->slots_info->tot_lost;
3497 + write_lock(&pfr->ring_index_lock);
3498 + if(idx == pfr->slots_info->tot_slots)
3499 + pfr->slots_info->insert_idx = 0;
3501 + pfr->slots_info->insert_idx = idx;
3503 + pfr->slots_info->tot_insert++;
3504 + theSlot->slot_state = 1;
3505 + write_unlock(&pfr->ring_index_lock);
3508 + write_lock(&pfr->ring_index_lock);
3509 + pfr->slots_info->tot_lost++;
3510 + write_unlock(&pfr->ring_index_lock);
3512 +#if defined(RING_DEBUG)
3513 + printk("add_skb_to_ring(skb): packet lost [loss=%lu]"
3514 + "[removeIdx=%u][insertIdx=%u]\n",
3515 + (long unsigned int)pfr->slots_info->tot_lost,
3516 + pfr->slots_info->remove_idx, pfr->slots_info->insert_idx);
3522 + /* wakeup in case of poll() */
3523 + if(waitqueue_active(&pfr->ring_slots_waitqueue))
3524 + wake_up_interruptible(&pfr->ring_slots_waitqueue);
3528 +/* ********************************** */
3530 +static u_int hash_skb(struct ring_cluster *cluster_ptr,
3531 + struct sk_buff *skb, u_char recv_packet) {
3536 + if(cluster_ptr->hashing_mode == cluster_round_robin) {
3537 + idx = cluster_ptr->hashing_id++;
3539 + /* Per-flow clustering */
3540 + if(skb->len > sizeof(struct iphdr)+sizeof(struct tcphdr)) {
3544 + displ = SKB_DISPLACEMENT;
3549 + Always points to to the IP part of the packet
3552 + ip = (struct iphdr*)(skb->data+displ);
3554 + idx = ip->saddr+ip->daddr+ip->protocol;
3556 + if(ip->protocol == IPPROTO_TCP) {
3557 + struct tcphdr *tcp = (struct tcphdr*)(skb->data+displ
3558 + +sizeof(struct iphdr));
3559 + idx += tcp->source+tcp->dest;
3560 + } else if(ip->protocol == IPPROTO_UDP) {
3561 + struct udphdr *udp = (struct udphdr*)(skb->data+displ
3562 + +sizeof(struct iphdr));
3563 + idx += udp->source+udp->dest;
3569 + return(idx % cluster_ptr->num_cluster_elements);
3572 +/* ********************************** */
3574 +static int skb_ring_handler(struct sk_buff *skb,
3575 + u_char recv_packet,
3576 + u_char real_skb /* 1=skb 0=faked skb */) {
3577 + struct sock *skElement;
3579 + struct list_head *ptr;
3580 + struct ring_cluster *cluster_ptr;
3583 + uint64_t rdt = _rdtsc(), rdt1, rdt2;
3586 + if((!skb) /* Invalid skb */
3587 + || ((!enable_tx_capture) && (!recv_packet))) {
3589 + An outgoing packet is about to be sent out
3590 + but we decided not to handle transmitted
3596 +#if defined(RING_DEBUG)
3598 + printk("skb_ring_handler() [len=%d][dev=%s]\n", skb->len,
3599 + skb->dev->name == NULL ? "<NULL>" : skb->dev->name);
3607 + /* [1] Check unclustered sockets */
3608 + for (ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) {
3609 + struct ring_opt *pfr;
3610 + struct ring_element *entry;
3612 + entry = list_entry(ptr, struct ring_element, list);
3614 + read_lock(&ring_mgmt_lock);
3615 + skElement = entry->sk;
3616 + pfr = ring_sk(skElement);
3617 + read_unlock(&ring_mgmt_lock);
3620 + && (pfr->cluster_id == 0 /* No cluster */)
3621 + && (pfr->ring_slots != NULL)
3622 + && ((pfr->ring_netdev == skb->dev) || ((skb->dev->flags & IFF_SLAVE) && pfr->ring_netdev == skb->dev->master))) {
3623 + /* We've found the ring where the packet can be stored */
3624 + read_lock(&ring_mgmt_lock);
3625 + add_skb_to_ring(skb, pfr, recv_packet, real_skb);
3626 + read_unlock(&ring_mgmt_lock);
3628 + rc = 1; /* Ring found: we've done our job */
3632 + /* [2] Check socket clusters */
3633 + cluster_ptr = ring_cluster_list;
3635 + while(cluster_ptr != NULL) {
3636 + struct ring_opt *pfr;
3638 + if(cluster_ptr->num_cluster_elements > 0) {
3639 + u_int skb_hash = hash_skb(cluster_ptr, skb, recv_packet);
3641 + read_lock(&ring_mgmt_lock);
3642 + skElement = cluster_ptr->sk[skb_hash];
3643 + read_unlock(&ring_mgmt_lock);
3645 + if(skElement != NULL) {
3646 + pfr = ring_sk(skElement);
3649 + && (pfr->ring_slots != NULL)
3650 + && ((pfr->ring_netdev == skb->dev) || ((skb->dev->flags & IFF_SLAVE) && pfr->ring_netdev == skb->dev->master))) {
3651 + /* We've found the ring where the packet can be stored */
3652 + read_lock(&ring_mgmt_lock);
3653 + add_skb_to_ring(skb, pfr, recv_packet, real_skb);
3654 + read_unlock(&ring_mgmt_lock);
3656 + rc = 1; /* Ring found: we've done our job */
3661 + cluster_ptr = cluster_ptr->next;
3665 + rdt1 = _rdtsc()-rdt1;
3672 + if(transparent_mode) rc = 0;
3674 + if((rc != 0) && real_skb)
3675 + dev_kfree_skb(skb); /* Free the skb */
3678 + rdt2 = _rdtsc()-rdt2;
3679 + rdt = _rdtsc()-rdt;
3681 +#if defined(RING_DEBUG)
3682 + printk("# cycles: %d [lock costed %d %d%%][free costed %d %d%%]\n",
3683 + (int)rdt, rdt-rdt1,
3684 + (int)((float)((rdt-rdt1)*100)/(float)rdt),
3686 + (int)((float)(rdt2*100)/(float)rdt));
3690 + return(rc); /* 0 = packet not handled */
3693 +/* ********************************** */
3695 +struct sk_buff skb;
3697 +static int buffer_ring_handler(struct net_device *dev,
3698 + char *data, int len) {
3700 +#if defined(RING_DEBUG)
3701 + printk("buffer_ring_handler: [dev=%s][len=%d]\n",
3702 + dev->name == NULL ? "<NULL>" : dev->name, len);
3705 + /* BD - API changed for time keeping */
3706 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14))
3707 + skb.dev = dev, skb.len = len, skb.data = data,
3708 + skb.data_len = len, skb.stamp.tv_sec = 0; /* Calculate the time */
3710 + skb.dev = dev, skb.len = len, skb.data = data,
3711 + skb.data_len = len, skb.tstamp.tv64 = 0; /* Calculate the time */
3714 + skb_ring_handler(&skb, 1, 0 /* fake skb */);
3719 +/* ********************************** */
3721 +static int ring_create(struct net *net, struct socket *sock, int protocol) {
3723 + struct ring_opt *pfr;
3726 +#if defined(RING_DEBUG)
3727 + printk("RING: ring_create()\n");
3730 + /* Are you root, superuser or so ? */
3731 + if(!capable(CAP_NET_ADMIN))
3734 + if(sock->type != SOCK_RAW)
3735 + return -ESOCKTNOSUPPORT;
3737 + if(protocol != htons(ETH_P_ALL))
3738 + return -EPROTONOSUPPORT;
3740 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
3741 + MOD_INC_USE_COUNT;
3746 + // BD: -- broke this out to keep it more simple and clear as to what the
3748 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
3749 +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,11))
3750 + sk = sk_alloc(PF_RING, GFP_KERNEL, 1, NULL);
3752 + // BD: API changed in 2.6.12, ref:
3753 + // http://svn.clkao.org/svnweb/linux/revision/?rev=28201
3754 + sk = sk_alloc(net, PF_RING, GFP_ATOMIC, &ring_proto);
3758 + sk = sk_alloc(PF_RING, GFP_KERNEL, 1);
3764 + sock->ops = &ring_ops;
3765 + sock_init_data(sock, sk);
3766 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
3767 +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,11))
3768 + sk_set_owner(sk, THIS_MODULE);
3773 + ring_sk(sk) = ring_sk_datatype(kmalloc(sizeof(*pfr), GFP_KERNEL));
3775 + if (!(pfr = ring_sk(sk))) {
3779 + memset(pfr, 0, sizeof(*pfr));
3780 + init_waitqueue_head(&pfr->ring_slots_waitqueue);
3781 + pfr->ring_index_lock = RW_LOCK_UNLOCKED;
3782 + atomic_set(&pfr->num_ring_slots_waiters, 0);
3786 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
3787 + sk->sk_family = PF_RING;
3788 + sk->sk_destruct = ring_sock_destruct;
3790 + sk->family = PF_RING;
3791 + sk->destruct = ring_sock_destruct;
3792 + sk->num = protocol;
3797 +#if defined(RING_DEBUG)
3798 + printk("RING: ring_create() - created\n");
3803 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
3804 + MOD_DEC_USE_COUNT;
3809 +/* *********************************************** */
3811 +static int ring_release(struct socket *sock)
3813 + struct sock *sk = sock->sk;
3814 + struct ring_opt *pfr = ring_sk(sk);
3818 +#if defined(RING_DEBUG)
3819 + printk("RING: called ring_release\n");
3822 +#if defined(RING_DEBUG)
3823 + printk("RING: ring_release entered\n");
3827 + The calls below must be placed outside the
3828 + write_lock_irq...write_unlock_irq block.
3831 + ring_proc_remove(ring_sk(sk));
3833 + write_lock_irq(&ring_mgmt_lock);
3837 + /* Free the ring buffer */
3838 + if(pfr->ring_memory) {
3839 + struct page *page, *page_end;
3841 + page_end = virt_to_page(pfr->ring_memory + (PAGE_SIZE << pfr->order) - 1);
3842 + for(page = virt_to_page(pfr->ring_memory); page <= page_end; page++)
3843 + ClearPageReserved(page);
3845 + free_pages(pfr->ring_memory, pfr->order);
3848 + free_bitmask(&pfr->mac_bitmask);
3849 + free_bitmask(&pfr->vlan_bitmask);
3850 + free_bitmask(&pfr->ip_bitmask); free_bitmask(&pfr->twin_ip_bitmask);
3851 + free_bitmask(&pfr->port_bitmask); free_bitmask(&pfr->twin_port_bitmask);
3852 + free_bitmask(&pfr->proto_bitmask);
3854 + if(pfr->acsm != NULL) acsmFree2(pfr->acsm);
3857 + ring_sk(sk) = NULL;
3859 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
3860 + skb_queue_purge(&sk->sk_write_queue);
3864 + write_unlock_irq(&ring_mgmt_lock);
3866 +#if defined(RING_DEBUG)
3867 + printk("RING: ring_release leaving\n");
3873 +/* ********************************** */
3875 + * We create a ring for this socket and bind it to the specified device
3877 +static int packet_ring_bind(struct sock *sk, struct net_device *dev)
3879 + u_int the_slot_len;
3880 + u_int32_t tot_mem;
3881 + struct ring_opt *pfr = ring_sk(sk);
3882 + struct page *page, *page_end;
3884 + if(!dev) return(-1);
3886 +#if defined(RING_DEBUG)
3887 + printk("RING: packet_ring_bind(%s) called\n", dev->name);
3890 + /* **********************************************
3892 + *************************************
3896 + ************************************* <-+
3898 + ************************************* |
3900 + ************************************* +- num_slots
3902 + ************************************* |
3904 + ************************************* <-+
3906 + ********************************************** */
3908 + the_slot_len = sizeof(u_char) /* flowSlot.slot_state */
3912 + + sizeof(struct pcap_pkthdr)
3913 + + bucket_len /* flowSlot.bucket */;
3915 + tot_mem = sizeof(FlowSlotInfo) + num_slots*the_slot_len;
3918 + Calculate the value of the order parameter used later.
3919 + See http://www.linuxjournal.com/article.php?sid=1133
3921 + for(pfr->order = 0;(PAGE_SIZE << pfr->order) < tot_mem; pfr->order++) ;
3924 + We now try to allocate the memory as required. If we fail
3925 + we try to allocate a smaller amount or memory (hence a
3928 + while((pfr->ring_memory = __get_free_pages(GFP_ATOMIC, pfr->order)) == 0)
3929 + if(pfr->order-- == 0)
3932 + if(pfr->order == 0) {
3933 + printk("RING: ERROR not enough memory for ring\n");
3936 + printk("RING: succesfully allocated %lu KB [tot_mem=%d][order=%ld]\n",
3937 + PAGE_SIZE >> (10 - pfr->order), tot_mem, pfr->order);
3940 + tot_mem = PAGE_SIZE << pfr->order;
3941 + memset((char*)pfr->ring_memory, 0, tot_mem);
3943 + /* Now we need to reserve the pages */
3944 + page_end = virt_to_page(pfr->ring_memory + (PAGE_SIZE << pfr->order) - 1);
3945 + for(page = virt_to_page(pfr->ring_memory); page <= page_end; page++)
3946 + SetPageReserved(page);
3948 + pfr->slots_info = (FlowSlotInfo*)pfr->ring_memory;
3949 + pfr->ring_slots = (char*)(pfr->ring_memory+sizeof(FlowSlotInfo));
3951 + pfr->slots_info->version = RING_FLOWSLOT_VERSION;
3952 + pfr->slots_info->slot_len = the_slot_len;
3953 + pfr->slots_info->data_len = bucket_len;
3954 + pfr->slots_info->tot_slots = (tot_mem-sizeof(FlowSlotInfo))/the_slot_len;
3955 + pfr->slots_info->tot_mem = tot_mem;
3956 + pfr->slots_info->sample_rate = sample_rate;
3958 + printk("RING: allocated %d slots [slot_len=%d][tot_mem=%u]\n",
3959 + pfr->slots_info->tot_slots, pfr->slots_info->slot_len,
3960 + pfr->slots_info->tot_mem);
3966 + for(i=0; i<pfr->slots_info->tot_slots; i++) {
3967 + unsigned long idx = i*pfr->slots_info->slot_len;
3968 + FlowSlot *slot = (FlowSlot*)&pfr->ring_slots[idx];
3969 + slot->magic = RING_MAGIC_VALUE; slot->slot_state = 0;
3974 + pfr->insert_page_id = 1, pfr->insert_slot_id = 0;
3978 + Leave this statement here as last one. In fact when
3979 + the ring_netdev != NULL the socket is ready to be used.
3981 + pfr->ring_netdev = dev;
3986 +/* ************************************* */
3988 +/* Bind to a device */
3989 +static int ring_bind(struct socket *sock,
3990 + struct sockaddr *sa, int addr_len)
3992 + struct sock *sk=sock->sk;
3993 + struct net_device *dev = NULL;
3995 +#if defined(RING_DEBUG)
3996 + printk("RING: ring_bind() called\n");
4002 + if (addr_len != sizeof(struct sockaddr))
4004 + if (sa->sa_family != PF_RING)
4007 + /* Safety check: add trailing zero if missing */
4008 + sa->sa_data[sizeof(sa->sa_data)-1] = '\0';
4010 +#if defined(RING_DEBUG)
4011 + printk("RING: searching device %s\n", sa->sa_data);
4014 + if((dev = __dev_get_by_name(&init_net, sa->sa_data)) == NULL) {
4015 +#if defined(RING_DEBUG)
4016 + printk("RING: search failed\n");
4020 + return(packet_ring_bind(sk, dev));
4023 +/* ************************************* */
4025 +static int ring_mmap(struct file *file,
4026 + struct socket *sock,
4027 + struct vm_area_struct *vma)
4029 + struct sock *sk = sock->sk;
4030 + struct ring_opt *pfr = ring_sk(sk);
4031 + unsigned long size, start;
4035 +#if defined(RING_DEBUG)
4036 + printk("RING: ring_mmap() called\n");
4039 + if(pfr->ring_memory == 0) {
4040 +#if defined(RING_DEBUG)
4041 + printk("RING: ring_mmap() failed: mapping area to an unbound socket\n");
4046 + size = (unsigned long)(vma->vm_end-vma->vm_start);
4048 + if(size % PAGE_SIZE) {
4049 +#if defined(RING_DEBUG)
4050 + printk("RING: ring_mmap() failed: len is not multiple of PAGE_SIZE\n");
4055 + /* if userspace tries to mmap beyond end of our buffer, fail */
4056 + if(size > pfr->slots_info->tot_mem) {
4057 +#if defined(RING_DEBUG)
4058 + printk("proc_mmap() failed: area too large [%ld > %d]\n", size, pfr->slots_info->tot_mem);
4063 + pagesToMap = size/PAGE_SIZE;
4065 +#if defined(RING_DEBUG)
4066 + printk("RING: ring_mmap() called. %d pages to map\n", pagesToMap);
4069 +#if defined(RING_DEBUG)
4070 + printk("RING: mmap [slot_len=%d][tot_slots=%d] for ring on device %s\n",
4071 + pfr->slots_info->slot_len, pfr->slots_info->tot_slots,
4072 + pfr->ring_netdev->name);
4075 + /* we do not want to have this area swapped out, lock it */
4076 + vma->vm_flags |= VM_LOCKED;
4077 + start = vma->vm_start;
4079 + /* Ring slots start from page 1 (page 0 is reserved for FlowSlotInfo) */
4080 + ptr = (char*)(start+PAGE_SIZE);
4082 + if(remap_page_range(
4083 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
4087 + __pa(pfr->ring_memory),
4088 + PAGE_SIZE*pagesToMap, vma->vm_page_prot)) {
4089 +#if defined(RING_DEBUG)
4090 + printk("remap_page_range() failed\n");
4095 +#if defined(RING_DEBUG)
4096 + printk("proc_mmap(pagesToMap=%d): success.\n", pagesToMap);
4102 +/* ************************************* */
4104 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
4105 +static int ring_recvmsg(struct kiocb *iocb, struct socket *sock,
4106 + struct msghdr *msg, size_t len, int flags)
4108 + static int ring_recvmsg(struct socket *sock, struct msghdr *msg, int len,
4109 + int flags, struct scm_cookie *scm)
4113 + struct ring_opt *pfr = ring_sk(sock->sk);
4114 + u_int32_t queued_pkts, num_loops = 0;
4116 +#if defined(RING_DEBUG)
4117 + printk("ring_recvmsg called\n");
4120 + slot = get_remove_slot(pfr);
4122 + while((queued_pkts = num_queued_pkts(pfr)) < MIN_QUEUED_PKTS) {
4123 + wait_event_interruptible(pfr->ring_slots_waitqueue, 1);
4125 +#if defined(RING_DEBUG)
4126 + printk("-> ring_recvmsg returning %d [queued_pkts=%d][num_loops=%d]\n",
4127 + slot->slot_state, queued_pkts, num_loops);
4130 + if(queued_pkts > 0) {
4131 + if(num_loops++ > MAX_QUEUE_LOOPS)
4136 +#if defined(RING_DEBUG)
4138 + printk("ring_recvmsg is returning [queued_pkts=%d][num_loops=%d]\n",
4139 + queued_pkts, num_loops);
4142 + return(queued_pkts);
4145 +/* ************************************* */
4147 +unsigned int ring_poll(struct file * file,
4148 + struct socket *sock, poll_table *wait)
4151 + struct ring_opt *pfr = ring_sk(sock->sk);
4153 +#if defined(RING_DEBUG)
4154 + printk("poll called\n");
4157 + slot = get_remove_slot(pfr);
4159 + if((slot != NULL) && (slot->slot_state == 0))
4160 + poll_wait(file, &pfr->ring_slots_waitqueue, wait);
4162 +#if defined(RING_DEBUG)
4163 + printk("poll returning %d\n", slot->slot_state);
4166 + if((slot != NULL) && (slot->slot_state == 1))
4167 + return(POLLIN | POLLRDNORM);
4172 +/* ************************************* */
4174 +int add_to_cluster_list(struct ring_cluster *el,
4175 + struct sock *sock) {
4177 + if(el->num_cluster_elements == CLUSTER_LEN)
4178 + return(-1); /* Cluster full */
4180 + ring_sk_datatype(ring_sk(sock))->cluster_id = el->cluster_id;
4181 + el->sk[el->num_cluster_elements] = sock;
4182 + el->num_cluster_elements++;
4186 +/* ************************************* */
4188 +int remove_from_cluster_list(struct ring_cluster *el,
4189 + struct sock *sock) {
4192 + for(i=0; i<CLUSTER_LEN; i++)
4193 + if(el->sk[i] == sock) {
4194 + el->num_cluster_elements--;
4196 + if(el->num_cluster_elements > 0) {
4197 + /* The cluster contains other elements */
4198 + for(j=i; j<CLUSTER_LEN-1; j++)
4199 + el->sk[j] = el->sk[j+1];
4201 + el->sk[CLUSTER_LEN-1] = NULL;
4203 + /* Empty cluster */
4204 + memset(el->sk, 0, sizeof(el->sk));
4210 + return(-1); /* Not found */
4213 +/* ************************************* */
4215 +static int remove_from_cluster(struct sock *sock,
4216 + struct ring_opt *pfr)
4218 + struct ring_cluster *el;
4220 +#if defined(RING_DEBUG)
4221 + printk("--> remove_from_cluster(%d)\n", pfr->cluster_id);
4224 + if(pfr->cluster_id == 0 /* 0 = No Cluster */)
4225 + return(0); /* Noting to do */
4227 + el = ring_cluster_list;
4229 + while(el != NULL) {
4230 + if(el->cluster_id == pfr->cluster_id) {
4231 + return(remove_from_cluster_list(el, sock));
4236 + return(-EINVAL); /* Not found */
4239 +/* ************************************* */
4241 +static int add_to_cluster(struct sock *sock,
4242 + struct ring_opt *pfr,
4243 + u_short cluster_id)
4245 + struct ring_cluster *el;
4248 + printk("--> add_to_cluster(%d)\n", cluster_id);
4251 + if(cluster_id == 0 /* 0 = No Cluster */) return(-EINVAL);
4253 + if(pfr->cluster_id != 0)
4254 + remove_from_cluster(sock, pfr);
4256 + el = ring_cluster_list;
4258 + while(el != NULL) {
4259 + if(el->cluster_id == cluster_id) {
4260 + return(add_to_cluster_list(el, sock));
4265 + /* There's no existing cluster. We need to create one */
4266 + if((el = kmalloc(sizeof(struct ring_cluster), GFP_KERNEL)) == NULL)
4269 + el->cluster_id = cluster_id;
4270 + el->num_cluster_elements = 1;
4271 + el->hashing_mode = cluster_per_flow; /* Default */
4272 + el->hashing_id = 0;
4274 + memset(el->sk, 0, sizeof(el->sk));
4276 + el->next = ring_cluster_list;
4277 + ring_cluster_list = el;
4278 + pfr->cluster_id = cluster_id;
4280 + return(0); /* 0 = OK */
4283 +/* ************************************* */
4285 +/* Code taken/inspired from core/sock.c */
4286 +static int ring_setsockopt(struct socket *sock,
4287 + int level, int optname,
4288 + char *optval, int optlen)
4290 + struct ring_opt *pfr = ring_sk(sock->sk);
4291 + int val, found, ret = 0;
4292 + u_int cluster_id, do_enable;
4293 + char devName[8], bloom_filter[256], aho_pattern[256];
4295 + if(pfr == NULL) return(-EINVAL);
4297 + if (get_user(val, (int *)optval))
4304 + case SO_ATTACH_FILTER:
4306 + if (optlen == sizeof(struct sock_fprog)) {
4307 + unsigned int fsize;
4308 + struct sock_fprog fprog;
4309 + struct sk_filter *filter;
4316 + Do not call copy_from_user within a held
4317 + splinlock (e.g. ring_mgmt_lock) as this caused
4318 + problems when certain debugging was enabled under
4319 + 2.6.5 -- including hard lockups of the machine.
4321 + if(copy_from_user(&fprog, optval, sizeof(fprog)))
4324 + fsize = sizeof(struct sock_filter) * fprog.len;
4325 + filter = kmalloc(fsize, GFP_KERNEL);
4327 + if(filter == NULL) {
4332 + if(copy_from_user(filter->insns, fprog.filter, fsize))
4335 + filter->len = fprog.len;
4337 + if(sk_chk_filter(filter->insns, filter->len) != 0) {
4338 + /* Bad filter specified */
4340 + pfr->bpfFilter = NULL;
4344 + /* get the lock, set the filter, release the lock */
4345 + write_lock(&ring_mgmt_lock);
4346 + pfr->bpfFilter = filter;
4347 + write_unlock(&ring_mgmt_lock);
4352 + case SO_DETACH_FILTER:
4353 + write_lock(&ring_mgmt_lock);
4355 + if(pfr->bpfFilter != NULL) {
4356 + kfree(pfr->bpfFilter);
4357 + pfr->bpfFilter = NULL;
4358 + write_unlock(&ring_mgmt_lock);
4364 + case SO_ADD_TO_CLUSTER:
4365 + if (optlen!=sizeof(val))
4368 + if (copy_from_user(&cluster_id, optval, sizeof(cluster_id)))
4371 + write_lock(&ring_mgmt_lock);
4372 + ret = add_to_cluster(sock->sk, pfr, cluster_id);
4373 + write_unlock(&ring_mgmt_lock);
4376 + case SO_REMOVE_FROM_CLUSTER:
4377 + write_lock(&ring_mgmt_lock);
4378 + ret = remove_from_cluster(sock->sk, pfr);
4379 + write_unlock(&ring_mgmt_lock);
4382 + case SO_SET_REFLECTOR:
4383 + if(optlen >= (sizeof(devName)-1))
4387 + if(copy_from_user(devName, optval, optlen))
4391 + devName[optlen] = '\0';
4393 +#if defined(RING_DEBUG)
4394 + printk("+++ SO_SET_REFLECTOR(%s)\n", devName);
4397 + write_lock(&ring_mgmt_lock);
4398 + pfr->reflector_dev = dev_get_by_name(&init_net, devName);
4399 + write_unlock(&ring_mgmt_lock);
4401 +#if defined(RING_DEBUG)
4402 + if(pfr->reflector_dev != NULL)
4403 + printk("SO_SET_REFLECTOR(%s): succeded\n", devName);
4405 + printk("SO_SET_REFLECTOR(%s): device unknown\n", devName);
4409 + case SO_SET_BLOOM:
4410 + if(optlen >= (sizeof(bloom_filter)-1))
4414 + if(copy_from_user(bloom_filter, optval, optlen))
4418 + bloom_filter[optlen] = '\0';
4420 + write_lock(&ring_mgmt_lock);
4421 + handle_bloom_filter_rule(pfr, bloom_filter);
4422 + write_unlock(&ring_mgmt_lock);
4425 + case SO_SET_STRING:
4426 + if(optlen >= (sizeof(aho_pattern)-1))
4430 + if(copy_from_user(aho_pattern, optval, optlen))
4434 + aho_pattern[optlen] = '\0';
4436 + write_lock(&ring_mgmt_lock);
4437 + if(pfr->acsm != NULL) acsmFree2(pfr->acsm);
4440 + if((pfr->acsm = acsmNew2()) != NULL) {
4441 + int nc=1 /* case sensitive */, i = 0;
4443 + pfr->acsm->acsmFormat = ACF_BANDED;
4444 + acsmAddPattern2(pfr->acsm, (unsigned char*)aho_pattern,
4445 + (int)strlen(aho_pattern), nc, 0, 0,(void*)aho_pattern, i);
4446 + acsmCompile2(pfr->acsm);
4449 + pfr->acsm = kmalloc (10, GFP_KERNEL); /* TEST */
4452 + write_unlock(&ring_mgmt_lock);
4455 + case SO_TOGGLE_BLOOM_STATE:
4456 + if(optlen >= (sizeof(bloom_filter)-1))
4460 + if(copy_from_user(&do_enable, optval, optlen))
4464 + write_lock(&ring_mgmt_lock);
4466 + pfr->bitmask_enabled = 1;
4468 + pfr->bitmask_enabled = 0;
4469 + write_unlock(&ring_mgmt_lock);
4470 + printk("SO_TOGGLE_BLOOM_STATE: bloom bitmask %s\n",
4471 + pfr->bitmask_enabled ? "enabled" : "disabled");
4474 + case SO_RESET_BLOOM_FILTERS:
4475 + if(optlen >= (sizeof(bloom_filter)-1))
4479 + if(copy_from_user(&do_enable, optval, optlen))
4483 + write_lock(&ring_mgmt_lock);
4484 + reset_bloom_filters(pfr);
4485 + write_unlock(&ring_mgmt_lock);
4496 + return(sock_setsockopt(sock, level, optname, optval, optlen));
4499 +/* ************************************* */
4501 +static int ring_ioctl(struct socket *sock,
4502 + unsigned int cmd, unsigned long arg)
4507 + case SIOCGIFFLAGS:
4508 + case SIOCSIFFLAGS:
4510 + case SIOCGIFMETRIC:
4511 + case SIOCSIFMETRIC:
4517 + case SIOCGIFHWADDR:
4518 + case SIOCSIFHWADDR:
4521 + case SIOCSIFSLAVE:
4522 + case SIOCGIFSLAVE:
4523 + case SIOCGIFINDEX:
4525 + case SIOCGIFCOUNT:
4526 + case SIOCSIFHWBROADCAST:
4527 + return(inet_dgram_ops.ioctl(sock, cmd, arg));
4531 + return -ENOIOCTLCMD;
4537 +/* ************************************* */
4539 +static struct proto_ops ring_ops = {
4540 + .family = PF_RING,
4541 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
4542 + .owner = THIS_MODULE,
4545 + /* Operations that make no sense on ring sockets. */
4546 + .connect = sock_no_connect,
4547 + .socketpair = sock_no_socketpair,
4548 + .accept = sock_no_accept,
4549 + .getname = sock_no_getname,
4550 + .listen = sock_no_listen,
4551 + .shutdown = sock_no_shutdown,
4552 + .sendpage = sock_no_sendpage,
4553 + .sendmsg = sock_no_sendmsg,
4554 + .getsockopt = sock_no_getsockopt,
4556 + /* Now the operations that really occur. */
4557 + .release = ring_release,
4558 + .bind = ring_bind,
4559 + .mmap = ring_mmap,
4560 + .poll = ring_poll,
4561 + .setsockopt = ring_setsockopt,
4562 + .ioctl = ring_ioctl,
4563 + .recvmsg = ring_recvmsg,
4566 +/* ************************************ */
4568 +static struct net_proto_family ring_family_ops = {
4569 + .family = PF_RING,
4570 + .create = ring_create,
4571 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
4572 + .owner = THIS_MODULE,
4576 +// BD: API changed in 2.6.12, ref:
4577 +// http://svn.clkao.org/svnweb/linux/revision/?rev=28201
4578 +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11))
4579 +static struct proto ring_proto = {
4580 + .name = "PF_RING",
4581 + .owner = THIS_MODULE,
4582 + .obj_size = sizeof(struct sock),
4586 +/* ************************************ */
4588 +static void __exit ring_exit(void)
4590 + struct list_head *ptr;
4591 + struct ring_element *entry;
4593 + for(ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) {
4594 + entry = list_entry(ptr, struct ring_element, list);
4598 + while(ring_cluster_list != NULL) {
4599 + struct ring_cluster *next = ring_cluster_list->next;
4600 + kfree(ring_cluster_list);
4601 + ring_cluster_list = next;
4604 + set_skb_ring_handler(NULL);
4605 + set_buffer_ring_handler(NULL);
4606 + sock_unregister(PF_RING);
4608 + printk("PF_RING shut down.\n");
4611 +/* ************************************ */
4613 +static int __init ring_init(void)
4615 + printk("Welcome to PF_RING %s\n(C) 2004-07 L.Deri <deri@ntop.org>\n",
4618 + INIT_LIST_HEAD(&ring_table);
4619 + ring_cluster_list = NULL;
4621 + sock_register(&ring_family_ops);
4623 + set_skb_ring_handler(skb_ring_handler);
4624 + set_buffer_ring_handler(buffer_ring_handler);
4626 + if(get_buffer_ring_handler() != buffer_ring_handler) {
4627 + printk("PF_RING: set_buffer_ring_handler FAILED\n");
4629 + set_skb_ring_handler(NULL);
4630 + set_buffer_ring_handler(NULL);
4631 + sock_unregister(PF_RING);
4634 + printk("PF_RING: bucket length %d bytes\n", bucket_len);
4635 + printk("PF_RING: ring slots %d\n", num_slots);
4636 + printk("PF_RING: sample rate %d [1=no sampling]\n", sample_rate);
4637 + printk("PF_RING: capture TX %s\n",
4638 + enable_tx_capture ? "Yes [RX+TX]" : "No [RX only]");
4639 + printk("PF_RING: transparent mode %s\n",
4640 + transparent_mode ? "Yes" : "No");
4642 + printk("PF_RING initialized correctly.\n");
4649 +module_init(ring_init);
4650 +module_exit(ring_exit);
4651 +MODULE_LICENSE("GPL");
4653 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
4654 +MODULE_ALIAS_NETPROTO(PF_RING);