]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-PF_RING.patch
- revert
[packages/kernel.git] / kernel-PF_RING.patch
CommitLineData
40fd095b 1diff --unified --recursive --new-file linux-2.6.21.4/include/linux/ring.h linux-2.6.21.4-1-686-smp-ring3/include/linux/ring.h
2--- linux-2.6.21.4/include/linux/ring.h 1970-01-01 00:00:00.000000000 +0000
3+++ linux-2.6.21.4-1-686-smp-ring3/include/linux/ring.h 2007-06-10 16:43:04.346421348 +0000
4@@ -0,0 +1,240 @@
5+/*
6+ * Definitions for packet ring
7+ *
8+ * 2004-07 Luca Deri <deri@ntop.org>
9+ */
10+#ifndef __RING_H
11+#define __RING_H
12+
13+#define INCLUDE_MAC_INFO
14+
15+#ifdef INCLUDE_MAC_INFO
16+#define SKB_DISPLACEMENT 14 /* Include MAC address information */
17+#else
18+#define SKB_DISPLACEMENT 0 /* Do NOT include MAC address information */
19+#endif
20+
21+#define RING_MAGIC
22+#define RING_MAGIC_VALUE 0x88
23+#define RING_FLOWSLOT_VERSION 6
24+#define RING_VERSION "3.4.1"
25+
26+#define SO_ADD_TO_CLUSTER 99
27+#define SO_REMOVE_FROM_CLUSTER 100
28+#define SO_SET_REFLECTOR 101
29+#define SO_SET_BLOOM 102
30+#define SO_SET_STRING 103
31+#define SO_TOGGLE_BLOOM_STATE 104
32+#define SO_RESET_BLOOM_FILTERS 105
33+
34+#define BITMASK_SET(n, p) (((char*)p->bits_memory)[n/8] |= (1<<(n % 8)))
35+#define BITMASK_CLR(n, p) (((char*)p->bits_memory)[n/8] &= ~(1<<(n % 8)))
36+#define BITMASK_ISSET(n, p) (((char*)p->bits_memory)[n/8] & (1<<(n % 8)))
37+
38+/* *********************************** */
39+
40+/*
41+ Aho-Corasick code taken from Snort
42+ under GPL license
43+*/
44+/*
45+ * DEFINES and Typedef's
46+ */
47+#define MAX_ALPHABET_SIZE 256
48+
49+/*
50+ FAIL STATE for 1,2,or 4 bytes for state transitions
51+
52+ Uncomment this define to use 32 bit state values
53+ #define AC32
54+*/
55+
56+typedef unsigned short acstate_t;
57+#define ACSM_FAIL_STATE2 0xffff
58+
59+/*
60+ *
61+ */
62+typedef
63+struct _acsm_pattern2
64+{
65+ struct _acsm_pattern2 *next;
66+
67+ unsigned char *patrn;
68+ unsigned char *casepatrn;
69+ int n;
70+ int nocase;
71+ int offset;
72+ int depth;
73+ void * id;
74+ int iid;
75+
76+} ACSM_PATTERN2;
77+
78+/*
79+ * transition nodes - either 8 or 12 bytes
80+ */
81+typedef
82+struct trans_node_s {
83+
84+ acstate_t key; /* The character that got us here - sized to keep structure aligned on 4 bytes */
85+ /* to better the caching opportunities. A value that crosses the cache line */
86+ /* forces an expensive reconstruction, typing this as acstate_t stops that. */
87+ acstate_t next_state; /* */
88+ struct trans_node_s * next; /* next transition for this state */
89+
90+} trans_node_t;
91+
92+
93+/*
94+ * User specified final storage type for the state transitions
95+ */
96+enum {
97+ ACF_FULL,
98+ ACF_SPARSE,
99+ ACF_BANDED,
100+ ACF_SPARSEBANDS,
101+};
102+
103+/*
104+ * User specified machine types
105+ *
106+ * TRIE : Keyword trie
107+ * NFA :
108+ * DFA :
109+ */
110+enum {
111+ FSA_TRIE,
112+ FSA_NFA,
113+ FSA_DFA,
114+};
115+
116+/*
117+ * Aho-Corasick State Machine Struct - one per group of pattterns
118+ */
119+typedef struct {
120+ int acsmMaxStates;
121+ int acsmNumStates;
122+
123+ ACSM_PATTERN2 * acsmPatterns;
124+ acstate_t * acsmFailState;
125+ ACSM_PATTERN2 ** acsmMatchList;
126+
127+ /* list of transitions in each state, this is used to build the nfa & dfa */
128+ /* after construction we convert to sparse or full format matrix and free */
129+ /* the transition lists */
130+ trans_node_t ** acsmTransTable;
131+
132+ acstate_t ** acsmNextState;
133+ int acsmFormat;
134+ int acsmSparseMaxRowNodes;
135+ int acsmSparseMaxZcnt;
136+
137+ int acsmNumTrans;
138+ int acsmAlphabetSize;
139+ int acsmFSA;
140+
141+} ACSM_STRUCT2;
142+
143+/* *********************************** */
144+
145+#ifndef HAVE_PCAP
146+struct pcap_pkthdr {
147+ struct timeval ts; /* time stamp */
148+ u_int32_t caplen; /* length of portion present */
149+ u_int32_t len; /* length this packet (off wire) */
150+ /* packet parsing info */
151+ u_int16_t eth_type; /* Ethernet type */
152+ u_int16_t vlan_id; /* VLAN Id or -1 for no vlan */
153+ u_int8_t l3_proto; /* Layer 3 protocol */
154+ u_int16_t l3_offset, l4_offset, payload_offset; /* Offsets of L3/L4/payload elements */
155+ u_int32_t ipv4_src, ipv4_dst; /* IPv4 src/dst IP addresses */
156+ u_int16_t l4_src_port, l4_dst_port; /* Layer 4 src/dst ports */
157+};
158+#endif
159+
160+/* *********************************** */
161+
162+typedef struct _counter_list {
163+ u_int32_t bit_id;
164+ u_int32_t bit_counter;
165+ struct _counter_list *next;
166+} bitmask_counter_list;
167+
168+typedef struct {
169+ u_int32_t num_bits, order, num_pages;
170+ unsigned long bits_memory;
171+ bitmask_counter_list *clashes;
172+} bitmask_selector;
173+
174+/* *********************************** */
175+
176+enum cluster_type {
177+ cluster_per_flow = 0,
178+ cluster_round_robin
179+};
180+
181+/* *********************************** */
182+
183+#define RING_MIN_SLOT_SIZE (60+sizeof(struct pcap_pkthdr))
184+#define RING_MAX_SLOT_SIZE (1514+sizeof(struct pcap_pkthdr))
185+
186+/* *********************************** */
187+
188+typedef struct flowSlotInfo {
189+ u_int16_t version, sample_rate;
190+ u_int32_t tot_slots, slot_len, data_len, tot_mem;
191+
192+ u_int64_t tot_pkts, tot_lost;
193+ u_int64_t tot_insert, tot_read;
194+ u_int32_t insert_idx, remove_idx;
195+} FlowSlotInfo;
196+
197+/* *********************************** */
198+
199+typedef struct flowSlot {
200+#ifdef RING_MAGIC
201+ u_char magic; /* It must alwasy be zero */
202+#endif
203+ u_char slot_state; /* 0=empty, 1=full */
204+ u_char bucket; /* bucket[bucketLen] */
205+} FlowSlot;
206+
207+/* *********************************** */
208+
209+#ifdef __KERNEL__
210+
211+FlowSlotInfo* getRingPtr(void);
212+int allocateRing(char *deviceName, u_int numSlots,
213+ u_int bucketLen, u_int sampleRate);
214+unsigned int pollRing(struct file *fp, struct poll_table_struct * wait);
215+void deallocateRing(void);
216+
217+/* ************************* */
218+
219+typedef int (*handle_ring_skb)(struct sk_buff *skb,
220+ u_char recv_packet, u_char real_skb);
221+extern handle_ring_skb get_skb_ring_handler(void);
222+extern void set_skb_ring_handler(handle_ring_skb the_handler);
223+extern void do_skb_ring_handler(struct sk_buff *skb,
224+ u_char recv_packet, u_char real_skb);
225+
226+typedef int (*handle_ring_buffer)(struct net_device *dev,
227+ char *data, int len);
228+extern handle_ring_buffer get_buffer_ring_handler(void);
229+extern void set_buffer_ring_handler(handle_ring_buffer the_handler);
230+extern int do_buffer_ring_handler(struct net_device *dev,
231+ char *data, int len);
232+#endif /* __KERNEL__ */
233+
234+/* *********************************** */
235+
236+#define PF_RING 27 /* Packet Ring */
237+#define SOCK_RING PF_RING
238+
239+/* ioctl() */
240+#define SIORINGPOLL 0x8888
241+
242+/* *********************************** */
243+
244+#endif /* __RING_H */
245diff --unified --recursive --new-file linux-2.6.21.4/net/Kconfig linux-2.6.21.4-1-686-smp-ring3/net/Kconfig
246--- linux-2.6.21.4/net/Kconfig 2007-06-07 21:27:31.000000000 +0000
247+++ linux-2.6.21.4-1-686-smp-ring3/net/Kconfig 2007-06-10 16:43:04.402423771 +0000
248@@ -39,6 +39,7 @@
249 source "net/xfrm/Kconfig"
250 source "net/iucv/Kconfig"
251
252+source "net/ring/Kconfig"
253 config INET
254 bool "TCP/IP networking"
255 ---help---
256diff --unified --recursive --new-file linux-2.6.21.4/net/Makefile linux-2.6.21.4-1-686-smp-ring3/net/Makefile
257--- linux-2.6.21.4/net/Makefile 2007-06-07 21:27:31.000000000 +0000
258+++ linux-2.6.21.4-1-686-smp-ring3/net/Makefile 2007-06-10 16:43:04.394423425 +0000
1b3b3426
PS
259@@ -45,6 +45,7 @@
260 ifneq ($(CONFIG_VLAN_8021Q),)
261 obj-y += 8021q/
262 endif
40fd095b 263+obj-$(CONFIG_RING) += ring/
264 obj-$(CONFIG_IP_DCCP) += dccp/
265 obj-$(CONFIG_IP_SCTP) += sctp/
1b3b3426 266 obj-y += wireless/
40fd095b 267diff --unified --recursive --new-file linux-2.6.21.4/net/core/dev.c linux-2.6.21.4-1-686-smp-ring3/net/core/dev.c
268--- linux-2.6.21.4/net/core/dev.c 2007-06-07 21:27:31.000000000 +0000
269+++ linux-2.6.21.4-1-686-smp-ring3/net/core/dev.c 2007-06-10 16:43:04.382422906 +0000
1b3b3426
PS
270@@ -133,6 +133,56 @@
271
272 #include "net-sysfs.h"
40fd095b 273
274+#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
275+
276+/* #define RING_DEBUG */
277+
278+#include <linux/ring.h>
279+#include <linux/version.h>
280+
281+static handle_ring_skb ring_handler = NULL;
282+
283+handle_ring_skb get_skb_ring_handler() { return(ring_handler); }
284+
285+void set_skb_ring_handler(handle_ring_skb the_handler) {
286+ ring_handler = the_handler;
287+}
288+
289+void do_skb_ring_handler(struct sk_buff *skb,
290+ u_char recv_packet, u_char real_skb) {
291+ if(ring_handler)
292+ ring_handler(skb, recv_packet, real_skb);
293+}
294+
295+/* ******************* */
296+
297+static handle_ring_buffer buffer_ring_handler = NULL;
298+
299+handle_ring_buffer get_buffer_ring_handler() { return(buffer_ring_handler); }
300+
301+void set_buffer_ring_handler(handle_ring_buffer the_handler) {
302+ buffer_ring_handler = the_handler;
303+}
304+
305+int do_buffer_ring_handler(struct net_device *dev, char *data, int len) {
306+ if(buffer_ring_handler) {
307+ buffer_ring_handler(dev, data, len);
308+ return(1);
309+ } else
310+ return(0);
311+}
312+
313+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
314+EXPORT_SYMBOL(get_skb_ring_handler);
315+EXPORT_SYMBOL(set_skb_ring_handler);
316+EXPORT_SYMBOL(do_skb_ring_handler);
317+
318+EXPORT_SYMBOL(get_buffer_ring_handler);
319+EXPORT_SYMBOL(set_buffer_ring_handler);
320+EXPORT_SYMBOL(do_buffer_ring_handler);
321+#endif
322+
323+#endif
324 /*
325 * The list of packet types we will receive (as opposed to discard)
326 * and the routines to invoke.
1b3b3426 327@@ -1809,6 +1859,9 @@
40fd095b 328 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
329 #endif
330 if (q->enqueue) {
331+#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
332+ if(ring_handler) ring_handler(skb, 0, 1);
333+#endif /* CONFIG_RING */
1b3b3426
PS
334 spinlock_t *root_lock = qdisc_lock(q);
335
336 spin_lock(root_lock);
337@@ -1908,6 +1961,13 @@
40fd095b 338 unsigned long flags;
339
340 /* if netpoll wants it, pretend we never saw it */
341+#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
342+ if(ring_handler && ring_handler(skb, 1, 1)) {
343+ /* The packet has been copied into a ring */
344+ return(NET_RX_SUCCESS);
345+ }
346+#endif /* CONFIG_RING */
347+
348 if (netpoll_rx(skb))
349 return NET_RX_DROP;
350
1b3b3426
PS
351@@ -2193,6 +2253,13 @@
352 struct net_device *null_or_orig;
40fd095b 353 int ret = NET_RX_DROP;
354 __be16 type;
355+#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
356+ if(ring_handler && ring_handler(skb, 1, 1)) {
357+ /* The packet has been copied into a ring */
358+ return(NET_RX_SUCCESS);
359+ }
360+#endif /* CONFIG_RING */
361+
362
363 /* if we've gotten here through NAPI, check netpoll */
1b3b3426 364 if (netpoll_receive_skb(skb))
40fd095b 365diff --unified --recursive --new-file linux-2.6.21.4/net/ring/Kconfig linux-2.6.21.4-1-686-smp-ring3/net/ring/Kconfig
366--- linux-2.6.21.4/net/ring/Kconfig 1970-01-01 00:00:00.000000000 +0000
367+++ linux-2.6.21.4-1-686-smp-ring3/net/ring/Kconfig 2007-06-10 16:43:04.406423944 +0000
368@@ -0,0 +1,14 @@
369+config RING
370+ tristate "PF_RING sockets (EXPERIMENTAL)"
371+ depends on EXPERIMENTAL
372+ ---help---
373+ PF_RING socket family, optimized for packet capture.
374+ If a PF_RING socket is bound to an adapter (via the bind() system
375+ call), such adapter will be used in read-only mode until the socket
376+ is destroyed. Whenever an incoming packet is received from the adapter
377+ it will not passed to upper layers, but instead it is copied to a ring
378+ buffer, which in turn is exported to user space applications via mmap.
379+ Please refer to http://luca.ntop.org/Ring.pdf for more.
380+
381+ Say N unless you know what you are doing.
382+
383diff --unified --recursive --new-file linux-2.6.21.4/net/ring/Makefile linux-2.6.21.4-1-686-smp-ring3/net/ring/Makefile
384--- linux-2.6.21.4/net/ring/Makefile 1970-01-01 00:00:00.000000000 +0000
385+++ linux-2.6.21.4-1-686-smp-ring3/net/ring/Makefile 2007-06-10 16:43:04.350421521 +0000
386@@ -0,0 +1,7 @@
387+#
388+# Makefile for the ring driver.
389+#
390+
391+obj-m += ring.o
392+
393+ring-objs := ring_packet.o
394diff --unified --recursive --new-file linux-2.6.21.4/net/ring/ring_packet.c linux-2.6.21.4-1-686-smp-ring3/net/ring/ring_packet.c
395--- linux-2.6.21.4/net/ring/ring_packet.c 1970-01-01 00:00:00.000000000 +0000
396+++ linux-2.6.21.4-1-686-smp-ring3/net/ring/ring_packet.c 2007-06-10 16:43:04.354421694 +0000
c1c82508 397@@ -0,0 +1,4258 @@
40fd095b 398+/* ***************************************************************
399+ *
400+ * (C) 2004-07 - Luca Deri <deri@ntop.org>
401+ *
402+ * This code includes contributions courtesy of
403+ * - Jeff Randall <jrandall@nexvu.com>
404+ * - Helmut Manck <helmut.manck@secunet.com>
405+ * - Brad Doctor <brad@stillsecure.com>
406+ * - Amit D. Chaudhary <amit_ml@rajgad.com>
407+ * - Francesco Fusco <fusco@ntop.org>
408+ * - Michael Stiller <ms@2scale.net>
409+ *
410+ *
411+ * This program is free software; you can redistribute it and/or modify
412+ * it under the terms of the GNU General Public License as published by
413+ * the Free Software Foundation; either version 2 of the License, or
414+ * (at your option) any later version.
415+ *
416+ * This program is distributed in the hope that it will be useful,
417+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
418+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
419+ * GNU General Public License for more details.
420+ *
421+ * You should have received a copy of the GNU General Public License
422+ * along with this program; if not, write to the Free Software Foundation,
423+ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
424+ *
425+ */
426+
427+#include <linux/version.h>
428+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,19))
429+#include <linux/autoconf.h>
430+#else
431+#include <linux/config.h>
432+#endif
433+#include <linux/module.h>
434+#include <linux/kernel.h>
435+#include <linux/socket.h>
436+#include <linux/skbuff.h>
437+#include <linux/rtnetlink.h>
438+#include <linux/in.h>
439+#include <linux/inet.h>
440+#include <linux/in6.h>
441+#include <linux/init.h>
442+#include <linux/filter.h>
443+#include <linux/ring.h>
444+#include <linux/ip.h>
445+#include <linux/tcp.h>
446+#include <linux/udp.h>
447+#include <linux/list.h>
448+#include <linux/proc_fs.h>
449+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
450+#include <net/xfrm.h>
451+#else
452+#include <linux/poll.h>
453+#endif
454+#include <net/sock.h>
455+#include <asm/io.h> /* needed for virt_to_phys() */
456+#ifdef CONFIG_INET
457+#include <net/inet_common.h>
458+#endif
459+
460+/* #define RING_DEBUG */
461+
462+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11))
463+static inline int remap_page_range(struct vm_area_struct *vma,
464+ unsigned long uvaddr,
465+ unsigned long paddr,
466+ unsigned long size,
467+ pgprot_t prot) {
468+ return(remap_pfn_range(vma, uvaddr, paddr >> PAGE_SHIFT,
469+ size, prot));
470+}
471+#endif
472+
473+/* ************************************************* */
474+
475+#define CLUSTER_LEN 8
476+
477+struct ring_cluster {
478+ u_short cluster_id; /* 0 = no cluster */
479+ u_short num_cluster_elements;
480+ enum cluster_type hashing_mode;
481+ u_short hashing_id;
482+ struct sock *sk[CLUSTER_LEN];
483+ struct ring_cluster *next; /* NULL = last element of the cluster */
484+};
485+
486+/* ************************************************* */
487+
488+struct ring_element {
489+ struct list_head list;
490+ struct sock *sk;
491+};
492+
493+/* ************************************************* */
494+
495+struct ring_opt {
496+ struct net_device *ring_netdev;
497+
498+ u_short ring_pid;
499+
500+ /* Cluster */
501+ u_short cluster_id; /* 0 = no cluster */
502+
503+ /* Reflector */
504+ struct net_device *reflector_dev;
505+
506+ /* Packet buffers */
507+ unsigned long order;
508+
509+ /* Ring Slots */
510+ unsigned long ring_memory;
511+ FlowSlotInfo *slots_info; /* Basically it points to ring_memory */
512+ char *ring_slots; /* Basically it points to ring_memory
513+ +sizeof(FlowSlotInfo) */
514+
515+ /* Packet Sampling */
516+ u_int pktToSample, sample_rate;
517+
518+ /* BPF Filter */
519+ struct sk_filter *bpfFilter;
520+
521+ /* Aho-Corasick */
522+ ACSM_STRUCT2 * acsm;
523+
524+ /* Locks */
525+ atomic_t num_ring_slots_waiters;
526+ wait_queue_head_t ring_slots_waitqueue;
527+ rwlock_t ring_index_lock;
528+
529+ /* Bloom Filters */
530+ u_char bitmask_enabled;
531+ bitmask_selector mac_bitmask, vlan_bitmask, ip_bitmask, twin_ip_bitmask,
532+ port_bitmask, twin_port_bitmask, proto_bitmask;
533+ u_int32_t num_mac_bitmask_add, num_mac_bitmask_remove;
534+ u_int32_t num_vlan_bitmask_add, num_vlan_bitmask_remove;
535+ u_int32_t num_ip_bitmask_add, num_ip_bitmask_remove;
536+ u_int32_t num_port_bitmask_add, num_port_bitmask_remove;
537+ u_int32_t num_proto_bitmask_add, num_proto_bitmask_remove;
538+
539+ /* Indexes (Internal) */
540+ u_int insert_page_id, insert_slot_id;
541+};
542+
543+/* ************************************************* */
544+
545+/* List of all ring sockets. */
546+static struct list_head ring_table;
547+static u_int ring_table_size;
548+
549+/* List of all clusters */
550+static struct ring_cluster *ring_cluster_list;
551+
552+static rwlock_t ring_mgmt_lock = RW_LOCK_UNLOCKED;
553+
554+/* ********************************** */
555+
556+/* /proc entry for ring module */
557+struct proc_dir_entry *ring_proc_dir = NULL;
558+struct proc_dir_entry *ring_proc = NULL;
559+
560+static int ring_proc_get_info(char *, char **, off_t, int, int *, void *);
561+static void ring_proc_add(struct ring_opt *pfr);
562+static void ring_proc_remove(struct ring_opt *pfr);
563+static void ring_proc_init(void);
564+static void ring_proc_term(void);
565+
566+/* ********************************** */
567+
568+/* Forward */
569+static struct proto_ops ring_ops;
570+
571+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11))
572+static struct proto ring_proto;
573+#endif
574+
575+static int skb_ring_handler(struct sk_buff *skb, u_char recv_packet,
576+ u_char real_skb);
577+static int buffer_ring_handler(struct net_device *dev, char *data, int len);
578+static int remove_from_cluster(struct sock *sock, struct ring_opt *pfr);
579+
580+/* Extern */
581+
582+/* ********************************** */
583+
584+/* Defaults */
585+static unsigned int bucket_len = 128, num_slots = 4096, sample_rate = 1,
586+ transparent_mode = 1, enable_tx_capture = 1;
587+
588+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16))
589+module_param(bucket_len, uint, 0644);
590+module_param(num_slots, uint, 0644);
591+module_param(sample_rate, uint, 0644);
592+module_param(transparent_mode, uint, 0644);
593+module_param(enable_tx_capture, uint, 0644);
594+#else
595+MODULE_PARM(bucket_len, "i");
596+MODULE_PARM(num_slots, "i");
597+MODULE_PARM(sample_rate, "i");
598+MODULE_PARM(transparent_mode, "i");
599+MODULE_PARM(enable_tx_capture, "i");
600+#endif
601+
602+MODULE_PARM_DESC(bucket_len, "Number of ring buckets");
603+MODULE_PARM_DESC(num_slots, "Number of ring slots");
604+MODULE_PARM_DESC(sample_rate, "Ring packet sample rate");
605+MODULE_PARM_DESC(transparent_mode,
606+ "Set to 1 to set transparent mode "
607+ "(slower but backwards compatible)");
608+
609+MODULE_PARM_DESC(enable_tx_capture, "Set to 1 to capture outgoing packets");
610+
611+/* ********************************** */
612+
613+#define MIN_QUEUED_PKTS 64
614+#define MAX_QUEUE_LOOPS 64
615+
616+
617+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
618+#define ring_sk_datatype(__sk) ((struct ring_opt *)__sk)
619+#define ring_sk(__sk) ((__sk)->sk_protinfo)
620+#else
621+#define ring_sk_datatype(a) (a)
622+#define ring_sk(__sk) ((__sk)->protinfo.pf_ring)
623+#endif
624+
625+#define _rdtsc() ({ uint64_t x; asm volatile("rdtsc" : "=A" (x)); x; })
626+
627+/*
628+ int dev_queue_xmit(struct sk_buff *skb)
629+ skb->dev;
630+ struct net_device *dev_get_by_name(const char *name)
631+*/
632+
633+/* ********************************** */
634+
635+/*
636+** $Id$
637+**
638+** acsmx2.c
639+**
640+** Multi-Pattern Search Engine
641+**
642+** Aho-Corasick State Machine - version 2.0
643+**
644+** Supports both Non-Deterministic and Deterministic Finite Automata
645+**
646+**
647+** Reference - Efficient String matching: An Aid to Bibliographic Search
648+** Alfred V Aho and Margaret J Corasick
649+** Bell Labratories
650+** Copyright(C) 1975 Association for Computing Machinery,Inc
651+**
652+** +++
653+** +++ Version 1.0 notes - Marc Norton:
654+** +++
655+**
656+** Original implementation based on the 4 algorithms in the paper by Aho & Corasick,
657+** some implementation ideas from 'Practical Algorithms in C', and some
658+** of my own.
659+**
660+** 1) Finds all occurrences of all patterns within a text.
661+**
662+** +++
663+** +++ Version 2.0 Notes - Marc Norton/Dan Roelker:
664+** +++
665+**
666+** New implementation modifies the state table storage and access model to use
667+** compacted sparse vector storage. Dan Roelker and I hammered this strategy out
668+** amongst many others in order to reduce memory usage and improve caching performance.
669+** The memory usage is greatly reduced, we only use 1/4 of what we use to. The caching
670+** performance is better in pure benchmarking tests, but does not show overall improvement
671+** in Snort. Unfortunately, once a pattern match test has been performed Snort moves on to doing
672+** many other things before we get back to a patteren match test, so the cache is voided.
673+**
674+** This versions has better caching performance characteristics, reduced memory,
675+** more state table storage options, and requires no a priori case conversions.
676+** It does maintain the same public interface. (Snort only used banded storage).
677+**
678+** 1) Supports NFA and DFA state machines, and basic keyword state machines
679+** 2) Initial transition table uses Linked Lists
680+** 3) Improved state table memory options. NFA and DFA state
681+** transition tables are converted to one of 4 formats during compilation.
682+** a) Full matrix
683+** b) Sparse matrix
684+** c) Banded matrix (Default-this is the only one used in snort)
685+** d) Sparse-Banded matrix
686+** 4) Added support for acstate_t in .h file so we can compile states as
687+** 16, or 32 bit state values for another reduction in memory consumption,
688+** smaller states allows more of the state table to be cached, and improves
689+** performance on x86-P4. Your mileage may vary, especially on risc systems.
690+** 5) Added a bool to each state transition list to indicate if there is a matching
691+** pattern in the state. This prevents us from accessing another data array
692+** and can improve caching/performance.
693+** 6) The search functions are very sensitive, don't change them without extensive testing,
694+** or you'll just spoil the caching and prefetching opportunities.
695+**
696+** Extras for fellow pattern matchers:
697+** The table below explains the storage format used at each step.
698+** You can use an NFA or DFA to match with, the NFA is slower but tiny - set the structure directly.
699+** You can use any of the 4 storage modes above -full,sparse,banded,sparse-bands, set the structure directly.
700+** For applications where you have lots of data and a pattern set to search, this version was up to 3x faster
701+** than the previous verion, due to caching performance. This cannot be fully realized in Snort yet,
702+** but other applications may have better caching opportunities.
703+** Snort only needs to use the banded or full storage.
704+**
705+** Transition table format at each processing stage.
706+** -------------------------------------------------
707+** Patterns -> Keyword State Table (List)
708+** Keyword State Table -> NFA (List)
709+** NFA -> DFA (List)
710+** DFA (List)-> Sparse Rows O(m-avg # transitions per state)
711+** -> Banded Rows O(1)
712+** -> Sparse-Banded Rows O(nb-# bands)
713+** -> Full Matrix O(1)
714+**
715+** Copyright(C) 2002,2003,2004 Marc Norton
716+** Copyright(C) 2003,2004 Daniel Roelker
717+** Copyright(C) 2002,2003,2004 Sourcefire,Inc.
718+**
719+** This program is free software; you can redistribute it and/or modify
720+** it under the terms of the GNU General Public License as published by
721+** the Free Software Foundation; either version 2 of the License, or
722+** (at your option) any later version.
723+**
724+** This program is distributed in the hope that it will be useful,
725+** but WITHOUT ANY WARRANTY; without even the implied warranty of
726+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
727+** GNU General Public License for more details.
728+**
729+** You should have received a copy of the GNU General Public License
730+** along with this program; if not, write to the Free Software
731+** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
732+*
733+*/
734+
735+/*
736+ *
737+ */
738+#define MEMASSERT(p,s) if(!p){printk("ACSM-No Memory: %s!\n",s);}
739+
740+/*
741+ *
742+ */
743+static int max_memory = 0;
744+
745+/*
746+ *
747+ */
748+typedef struct acsm_summary_s
749+{
750+ unsigned num_states;
751+ unsigned num_transitions;
752+ ACSM_STRUCT2 acsm;
753+
754+}acsm_summary_t;
755+
756+/*
757+ *
758+ */
759+static acsm_summary_t summary={0,0};
760+
761+/*
762+** Case Translation Table
763+*/
764+static unsigned char xlatcase[256];
765+/*
766+ *
767+ */
768+
769+inline int toupper(int ch) {
770+ if ( (unsigned int)(ch - 'a') < 26u )
771+ ch += 'A' - 'a';
772+ return ch;
773+}
774+
775+static void init_xlatcase(void)
776+{
777+ int i;
778+ for (i = 0; i < 256; i++)
779+ {
780+ xlatcase[i] = toupper(i);
781+ }
782+}
783+
784+/*
785+ * Case Conversion
786+ */
787+static
788+inline
789+void
790+ConvertCaseEx (unsigned char *d, unsigned char *s, int m)
791+{
792+ int i;
793+#ifdef XXXX
794+ int n;
795+ n = m & 3;
796+ m >>= 2;
797+
798+ for (i = 0; i < m; i++ )
799+ {
800+ d[0] = xlatcase[ s[0] ];
801+ d[2] = xlatcase[ s[2] ];
802+ d[1] = xlatcase[ s[1] ];
803+ d[3] = xlatcase[ s[3] ];
804+ d+=4;
805+ s+=4;
806+ }
807+
808+ for (i=0; i < n; i++)
809+ {
810+ d[i] = xlatcase[ s[i] ];
811+ }
812+#else
813+ for (i=0; i < m; i++)
814+ {
815+ d[i] = xlatcase[ s[i] ];
816+ }
817+
818+#endif
819+}
820+
821+
822+/*
823+ *
824+ */
825+static void *
826+AC_MALLOC (int n)
827+{
828+ void *p;
829+ p = kmalloc (n, GFP_KERNEL);
830+ if (p)
831+ max_memory += n;
832+ return p;
833+}
834+
835+
836+/*
837+ *
838+ */
839+static void
840+AC_FREE (void *p)
841+{
842+ if (p)
843+ kfree (p);
844+}
845+
846+
847+/*
848+ * Simple QUEUE NODE
849+ */
850+typedef struct _qnode
851+{
852+ int state;
853+ struct _qnode *next;
854+}
855+ QNODE;
856+
857+/*
858+ * Simple QUEUE Structure
859+ */
860+typedef struct _queue
861+{
862+ QNODE * head, *tail;
863+ int count;
864+}
865+ QUEUE;
866+
867+/*
868+ * Initialize the queue
869+ */
870+static void
871+queue_init (QUEUE * s)
872+{
873+ s->head = s->tail = 0;
874+ s->count= 0;
875+}
876+
877+/*
878+ * Find a State in the queue
879+ */
880+static int
881+queue_find (QUEUE * s, int state)
882+{
883+ QNODE * q;
884+ q = s->head;
885+ while( q )
886+ {
887+ if( q->state == state ) return 1;
888+ q = q->next;
889+ }
890+ return 0;
891+}
892+
893+/*
894+ * Add Tail Item to queue (FiFo/LiLo)
895+ */
896+static void
897+queue_add (QUEUE * s, int state)
898+{
899+ QNODE * q;
900+
901+ if( queue_find( s, state ) ) return;
902+
903+ if (!s->head)
904+ {
905+ q = s->tail = s->head = (QNODE *) AC_MALLOC (sizeof (QNODE));
906+ MEMASSERT (q, "queue_add");
907+ q->state = state;
908+ q->next = 0;
909+ }
910+ else
911+ {
912+ q = (QNODE *) AC_MALLOC (sizeof (QNODE));
913+ q->state = state;
914+ q->next = 0;
915+ s->tail->next = q;
916+ s->tail = q;
917+ }
918+ s->count++;
919+}
920+
921+
922+/*
923+ * Remove Head Item from queue
924+ */
925+static int
926+queue_remove (QUEUE * s)
927+{
928+ int state = 0;
929+ QNODE * q;
930+ if (s->head)
931+ {
932+ q = s->head;
933+ state = q->state;
934+ s->head = s->head->next;
935+ s->count--;
936+
937+ if( !s->head )
938+ {
939+ s->tail = 0;
940+ s->count = 0;
941+ }
942+ AC_FREE (q);
943+ }
944+ return state;
945+}
946+
947+
948+/*
949+ * Return items in the queue
950+ */
951+static int
952+queue_count (QUEUE * s)
953+{
954+ return s->count;
955+}
956+
957+
958+/*
959+ * Free the queue
960+ */
961+static void
962+queue_free (QUEUE * s)
963+{
964+ while (queue_count (s))
965+ {
966+ queue_remove (s);
967+ }
968+}
969+
970+/*
971+ * Get Next State-NFA
972+ */
973+static
974+int List_GetNextState( ACSM_STRUCT2 * acsm, int state, int input )
975+{
976+ trans_node_t * t = acsm->acsmTransTable[state];
977+
978+ while( t )
979+ {
980+ if( t->key == input )
981+ {
982+ return t->next_state;
983+ }
984+ t=t->next;
985+ }
986+
987+ if( state == 0 ) return 0;
988+
989+ return ACSM_FAIL_STATE2; /* Fail state ??? */
990+}
991+
992+/*
993+ * Get Next State-DFA
994+ */
995+static
996+int List_GetNextState2( ACSM_STRUCT2 * acsm, int state, int input )
997+{
998+ trans_node_t * t = acsm->acsmTransTable[state];
999+
1000+ while( t )
1001+ {
1002+ if( t->key == input )
1003+ {
1004+ return t->next_state;
1005+ }
1006+ t = t->next;
1007+ }
1008+
1009+ return 0; /* default state */
1010+}
1011+/*
1012+ * Put Next State - Head insertion, and transition updates
1013+ */
1014+static
1015+int List_PutNextState( ACSM_STRUCT2 * acsm, int state, int input, int next_state )
1016+{
1017+ trans_node_t * p;
1018+ trans_node_t * tnew;
1019+
1020+ // printk(" List_PutNextState: state=%d, input='%c', next_state=%d\n",state,input,next_state);
1021+
1022+
1023+ /* Check if the transition already exists, if so just update the next_state */
1024+ p = acsm->acsmTransTable[state];
1025+ while( p )
1026+ {
1027+ if( p->key == input ) /* transition already exists- reset the next state */
1028+ {
1029+ p->next_state = next_state;
1030+ return 0;
1031+ }
1032+ p=p->next;
1033+ }
1034+
1035+ /* Definitely not an existing transition - add it */
1036+ tnew = (trans_node_t*)AC_MALLOC(sizeof(trans_node_t));
1037+ if( !tnew ) return -1;
1038+
1039+ tnew->key = input;
1040+ tnew->next_state = next_state;
1041+ tnew->next = 0;
1042+
1043+ tnew->next = acsm->acsmTransTable[state];
1044+ acsm->acsmTransTable[state] = tnew;
1045+
1046+ acsm->acsmNumTrans++;
1047+
1048+ return 0;
1049+}
1050+/*
1051+ * Free the entire transition table
1052+ */
1053+static
1054+int List_FreeTransTable( ACSM_STRUCT2 * acsm )
1055+{
1056+ int i;
1057+ trans_node_t * t, *p;
1058+
1059+ if( !acsm->acsmTransTable ) return 0;
1060+
1061+ for(i=0;i< acsm->acsmMaxStates;i++)
1062+ {
1063+ t = acsm->acsmTransTable[i];
1064+
1065+ while( t )
1066+ {
1067+ p = t->next;
1068+ kfree(t);
1069+ t = p;
1070+ max_memory -= sizeof(trans_node_t);
1071+ }
1072+ }
1073+
1074+ kfree(acsm->acsmTransTable);
1075+
1076+ max_memory -= sizeof(void*) * acsm->acsmMaxStates;
1077+
1078+ acsm->acsmTransTable = 0;
1079+
1080+ return 0;
1081+}
1082+
1083+/*
1084+ *
1085+ */
1086+/*
1087+ static
1088+ int List_FreeList( trans_node_t * t )
1089+ {
1090+ int tcnt=0;
1091+
1092+ trans_node_t *p;
1093+
1094+ while( t )
1095+ {
1096+ p = t->next;
1097+ kfree(t);
1098+ t = p;
1099+ max_memory -= sizeof(trans_node_t);
1100+ tcnt++;
1101+ }
1102+
1103+ return tcnt;
1104+ }
1105+*/
1106+
1107+/*
1108+ * Converts row of states from list to a full vector format
1109+ */
1110+static
1111+int List_ConvToFull(ACSM_STRUCT2 * acsm, acstate_t state, acstate_t * full )
1112+{
1113+ int tcnt = 0;
1114+ trans_node_t * t = acsm->acsmTransTable[ state ];
1115+
1116+ memset(full,0,sizeof(acstate_t)*acsm->acsmAlphabetSize);
1117+
1118+ if( !t ) return 0;
1119+
1120+ while(t)
1121+ {
1122+ full[ t->key ] = t->next_state;
1123+ tcnt++;
1124+ t = t->next;
1125+ }
1126+ return tcnt;
1127+}
1128+
1129+/*
1130+ * Copy a Match List Entry - don't dup the pattern data
1131+ */
1132+static ACSM_PATTERN2*
1133+CopyMatchListEntry (ACSM_PATTERN2 * px)
1134+{
1135+ ACSM_PATTERN2 * p;
1136+
1137+ p = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
1138+ MEMASSERT (p, "CopyMatchListEntry");
1139+
1140+ memcpy (p, px, sizeof (ACSM_PATTERN2));
1141+
1142+ p->next = 0;
1143+
1144+ return p;
1145+}
1146+
1147+/*
1148+ * Check if a pattern is in the list already,
1149+ * validate it using the 'id' field. This must be unique
1150+ * for every pattern.
1151+ */
1152+/*
1153+ static
1154+ int FindMatchListEntry (ACSM_STRUCT2 * acsm, int state, ACSM_PATTERN2 * px)
1155+ {
1156+ ACSM_PATTERN2 * p;
1157+
1158+ p = acsm->acsmMatchList[state];
1159+ while( p )
1160+ {
1161+ if( p->id == px->id ) return 1;
1162+ p = p->next;
1163+ }
1164+
1165+ return 0;
1166+ }
1167+*/
1168+
1169+
1170+/*
1171+ * Add a pattern to the list of patterns terminated at this state.
1172+ * Insert at front of list.
1173+ */
1174+static void
1175+AddMatchListEntry (ACSM_STRUCT2 * acsm, int state, ACSM_PATTERN2 * px)
1176+{
1177+ ACSM_PATTERN2 * p;
1178+
1179+ p = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
1180+
1181+ MEMASSERT (p, "AddMatchListEntry");
1182+
1183+ memcpy (p, px, sizeof (ACSM_PATTERN2));
1184+
1185+ p->next = acsm->acsmMatchList[state];
1186+
1187+ acsm->acsmMatchList[state] = p;
1188+}
1189+
1190+
1191+static void
1192+AddPatternStates (ACSM_STRUCT2 * acsm, ACSM_PATTERN2 * p)
1193+{
1194+ int state, next, n;
1195+ unsigned char *pattern;
1196+
1197+ n = p->n;
1198+ pattern = p->patrn;
1199+ state = 0;
1200+
1201+ /*
1202+ * Match up pattern with existing states
1203+ */
1204+ for (; n > 0; pattern++, n--)
1205+ {
1206+ next = List_GetNextState(acsm,state,*pattern);
1207+ if (next == ACSM_FAIL_STATE2 || next == 0)
1208+ {
1209+ break;
1210+ }
1211+ state = next;
1212+ }
1213+
1214+ /*
1215+ * Add new states for the rest of the pattern bytes, 1 state per byte
1216+ */
1217+ for (; n > 0; pattern++, n--)
1218+ {
1219+ acsm->acsmNumStates++;
1220+ List_PutNextState(acsm,state,*pattern,acsm->acsmNumStates);
1221+ state = acsm->acsmNumStates;
1222+ }
1223+
1224+ AddMatchListEntry (acsm, state, p );
1225+}
1226+
1227+/*
1228+ * Build A Non-Deterministic Finite Automata
1229+ * The keyword state table must already be built, via AddPatternStates().
1230+ */
1231+static void
1232+Build_NFA (ACSM_STRUCT2 * acsm)
1233+{
1234+ int r, s, i;
1235+ QUEUE q, *queue = &q;
1236+ acstate_t * FailState = acsm->acsmFailState;
1237+ ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
1238+ ACSM_PATTERN2 * mlist,* px;
1239+
1240+ /* Init a Queue */
1241+ queue_init (queue);
1242+
1243+
1244+ /* Add the state 0 transitions 1st, the states at depth 1, fail to state 0 */
1245+ for (i = 0; i < acsm->acsmAlphabetSize; i++)
1246+ {
1247+ s = List_GetNextState2(acsm,0,i);
1248+ if( s )
1249+ {
1250+ queue_add (queue, s);
1251+ FailState[s] = 0;
1252+ }
1253+ }
1254+
1255+ /* Build the fail state successive layer of transitions */
1256+ while (queue_count (queue) > 0)
1257+ {
1258+ r = queue_remove (queue);
1259+
1260+ /* Find Final States for any Failure */
1261+ for (i = 0; i < acsm->acsmAlphabetSize; i++)
1262+ {
1263+ int fs, next;
1264+
1265+ s = List_GetNextState(acsm,r,i);
1266+
1267+ if( s != ACSM_FAIL_STATE2 )
1268+ {
1269+ queue_add (queue, s);
1270+
1271+ fs = FailState[r];
1272+
1273+ /*
1274+ * Locate the next valid state for 'i' starting at fs
1275+ */
1276+ while( (next=List_GetNextState(acsm,fs,i)) == ACSM_FAIL_STATE2 )
1277+ {
1278+ fs = FailState[fs];
1279+ }
1280+
1281+ /*
1282+ * Update 's' state failure state to point to the next valid state
1283+ */
1284+ FailState[s] = next;
1285+
1286+ /*
1287+ * Copy 'next'states MatchList to 's' states MatchList,
1288+ * we copy them so each list can be AC_FREE'd later,
1289+ * else we could just manipulate pointers to fake the copy.
1290+ */
1291+ for( mlist = MatchList[next];
1292+ mlist;
1293+ mlist = mlist->next)
1294+ {
1295+ px = CopyMatchListEntry (mlist);
1296+
1297+ /* Insert at front of MatchList */
1298+ px->next = MatchList[s];
1299+ MatchList[s] = px;
1300+ }
1301+ }
1302+ }
1303+ }
1304+
1305+ /* Clean up the queue */
1306+ queue_free (queue);
1307+}
1308+
1309+/*
1310+ * Build Deterministic Finite Automata from the NFA
1311+ */
1312+static void
1313+Convert_NFA_To_DFA (ACSM_STRUCT2 * acsm)
1314+{
1315+ int i, r, s, cFailState;
1316+ QUEUE q, *queue = &q;
1317+ acstate_t * FailState = acsm->acsmFailState;
1318+
1319+ /* Init a Queue */
1320+ queue_init (queue);
1321+
1322+ /* Add the state 0 transitions 1st */
1323+ for(i=0; i<acsm->acsmAlphabetSize; i++)
1324+ {
1325+ s = List_GetNextState(acsm,0,i);
1326+ if ( s != 0 )
1327+ {
1328+ queue_add (queue, s);
1329+ }
1330+ }
1331+
1332+ /* Start building the next layer of transitions */
1333+ while( queue_count(queue) > 0 )
1334+ {
1335+ r = queue_remove(queue);
1336+
1337+ /* Process this states layer */
1338+ for (i = 0; i < acsm->acsmAlphabetSize; i++)
1339+ {
1340+ s = List_GetNextState(acsm,r,i);
1341+
1342+ if( s != ACSM_FAIL_STATE2 && s!= 0)
1343+ {
1344+ queue_add (queue, s);
1345+ }
1346+ else
1347+ {
1348+ cFailState = List_GetNextState(acsm,FailState[r],i);
1349+
1350+ if( cFailState != 0 && cFailState != ACSM_FAIL_STATE2 )
1351+ {
1352+ List_PutNextState(acsm,r,i,cFailState);
1353+ }
1354+ }
1355+ }
1356+ }
1357+
1358+ /* Clean up the queue */
1359+ queue_free (queue);
1360+}
1361+
1362+/*
1363+ *
1364+ * Convert a row lists for the state table to a full vector format
1365+ *
1366+ */
1367+static int
1368+Conv_List_To_Full(ACSM_STRUCT2 * acsm)
1369+{
1370+ int tcnt, k;
1371+ acstate_t * p;
1372+ acstate_t ** NextState = acsm->acsmNextState;
1373+
1374+ for(k=0;k<acsm->acsmMaxStates;k++)
1375+ {
1376+ p = AC_MALLOC( sizeof(acstate_t) * (acsm->acsmAlphabetSize+2) );
1377+ if(!p) return -1;
1378+
1379+ tcnt = List_ConvToFull( acsm, (acstate_t)k, p+2 );
1380+
1381+ p[0] = ACF_FULL;
1382+ p[1] = 0; /* no matches yet */
1383+
1384+ NextState[k] = p; /* now we have a full format row vector */
1385+ }
1386+
1387+ return 0;
1388+}
1389+
1390+/*
1391+ * Convert DFA memory usage from list based storage to a sparse-row storage.
1392+ *
1393+ * The Sparse format allows each row to be either full or sparse formatted. If the sparse row has
1394+ * too many transitions, performance or space may dictate that we use the standard full formatting
1395+ * for the row. More than 5 or 10 transitions per state ought to really whack performance. So the
1396+ * user can specify the max state transitions per state allowed in the sparse format.
1397+ *
1398+ * Standard Full Matrix Format
1399+ * ---------------------------
1400+ * acstate_t ** NextState ( 1st index is row/state, 2nd index is column=event/input)
1401+ *
1402+ * example:
1403+ *
1404+ * events -> a b c d e f g h i j k l m n o p
1405+ * states
1406+ * N 1 7 0 0 0 3 0 0 0 0 0 0 0 0 0 0
1407+ *
1408+ * Sparse Format, each row : Words Value
1409+ * 1-1 fmt(0-full,1-sparse,2-banded,3-sparsebands)
1410+ * 2-2 bool match flag (indicates this state has pattern matches)
1411+ * 3-3 sparse state count ( # of input/next-state pairs )
1412+ * 4-3+2*cnt 'input,next-state' pairs... each sizof(acstate_t)
1413+ *
1414+ * above example case yields:
1415+ * Full Format: 0, 1 7 0 0 0 3 0 0 0 0 0 0 0 0 0 0 ...
1416+ * Sparse format: 1, 3, 'a',1,'b',7,'f',3 - uses 2+2*ntransitions (non-default transitions)
1417+ */
1418+static int
1419+Conv_Full_DFA_To_Sparse(ACSM_STRUCT2 * acsm)
1420+{
1421+ int cnt, m, k, i;
1422+ acstate_t * p, state, maxstates=0;
1423+ acstate_t ** NextState = acsm->acsmNextState;
1424+ acstate_t full[MAX_ALPHABET_SIZE];
1425+
1426+ for(k=0;k<acsm->acsmMaxStates;k++)
1427+ {
1428+ cnt=0;
1429+
1430+ List_ConvToFull(acsm, (acstate_t)k, full );
1431+
1432+ for (i = 0; i < acsm->acsmAlphabetSize; i++)
1433+ {
1434+ state = full[i];
1435+ if( state != 0 && state != ACSM_FAIL_STATE2 ) cnt++;
1436+ }
1437+
1438+ if( cnt > 0 ) maxstates++;
1439+
1440+ if( k== 0 || cnt > acsm->acsmSparseMaxRowNodes )
1441+ {
1442+ p = AC_MALLOC(sizeof(acstate_t)*(acsm->acsmAlphabetSize+2) );
1443+ if(!p) return -1;
1444+
1445+ p[0] = ACF_FULL;
1446+ p[1] = 0;
1447+ memcpy(&p[2],full,acsm->acsmAlphabetSize*sizeof(acstate_t));
1448+ }
1449+ else
1450+ {
1451+ p = AC_MALLOC(sizeof(acstate_t)*(3+2*cnt));
1452+ if(!p) return -1;
1453+
1454+ m = 0;
1455+ p[m++] = ACF_SPARSE;
1456+ p[m++] = 0; /* no matches */
1457+ p[m++] = cnt;
1458+
1459+ for(i = 0; i < acsm->acsmAlphabetSize ; i++)
1460+ {
1461+ state = full[i];
1462+ if( state != 0 && state != ACSM_FAIL_STATE2 )
1463+ {
1464+ p[m++] = i;
1465+ p[m++] = state;
1466+ }
1467+ }
1468+ }
1469+
1470+ NextState[k] = p; /* now we are a sparse formatted state transition array */
1471+ }
1472+
1473+ return 0;
1474+}
1475+/*
1476+ Convert Full matrix to Banded row format.
1477+
1478+ Word values
1479+ 1 2 -> banded
1480+ 2 n number of values
1481+ 3 i index of 1st value (0-256)
1482+ 4 - 3+n next-state values at each index
1483+
1484+*/
1485+static int
1486+Conv_Full_DFA_To_Banded(ACSM_STRUCT2 * acsm)
1487+{
1488+ int first = -1, last;
1489+ acstate_t * p, state, full[MAX_ALPHABET_SIZE];
1490+ acstate_t ** NextState = acsm->acsmNextState;
1491+ int cnt,m,k,i;
1492+
1493+ for(k=0;k<acsm->acsmMaxStates;k++)
1494+ {
1495+ cnt=0;
1496+
1497+ List_ConvToFull(acsm, (acstate_t)k, full );
1498+
1499+ first=-1;
1500+ last =-2;
1501+
1502+ for (i = 0; i < acsm->acsmAlphabetSize; i++)
1503+ {
1504+ state = full[i];
1505+
1506+ if( state !=0 && state != ACSM_FAIL_STATE2 )
1507+ {
1508+ if( first < 0 ) first = i;
1509+ last = i;
1510+ }
1511+ }
1512+
1513+ /* calc band width */
1514+ cnt= last - first + 1;
1515+
1516+ p = AC_MALLOC(sizeof(acstate_t)*(4+cnt));
1517+
1518+ if(!p) return -1;
1519+
1520+ m = 0;
1521+ p[m++] = ACF_BANDED;
1522+ p[m++] = 0; /* no matches */
1523+ p[m++] = cnt;
1524+ p[m++] = first;
1525+
1526+ for(i = first; i <= last; i++)
1527+ {
1528+ p[m++] = full[i];
1529+ }
1530+
1531+ NextState[k] = p; /* now we are a banded formatted state transition array */
1532+ }
1533+
1534+ return 0;
1535+}
1536+
1537+/*
1538+ * Convert full matrix to Sparse Band row format.
1539+ *
1540+ * next - Full formatted row of next states
1541+ * asize - size of alphabet
1542+ * zcnt - max number of zeros in a run of zeros in any given band.
1543+ *
1544+ * Word Values
1545+ * 1 ACF_SPARSEBANDS
1546+ * 2 number of bands
1547+ * repeat 3 - 5+ ....once for each band in this row.
1548+ * 3 number of items in this band* 4 start index of this band
1549+ * 5- next-state values in this band...
1550+ */
1551+static
1552+int calcSparseBands( acstate_t * next, int * begin, int * end, int asize, int zmax )
1553+{
1554+ int i, nbands,zcnt,last=0;
1555+ acstate_t state;
1556+
1557+ nbands=0;
1558+ for( i=0; i<asize; i++ )
1559+ {
1560+ state = next[i];
1561+
1562+ if( state !=0 && state != ACSM_FAIL_STATE2 )
1563+ {
1564+ begin[nbands] = i;
1565+ zcnt=0;
1566+
1567+ for( ; i< asize; i++ )
1568+ {
1569+ state = next[i];
1570+ if( state ==0 || state == ACSM_FAIL_STATE2 )
1571+ {
1572+ zcnt++;
1573+ if( zcnt > zmax ) break;
1574+ }
1575+ else
1576+ {
1577+ zcnt=0;
1578+ last = i;
1579+ }
1580+ }
1581+
1582+ end[nbands++] = last;
1583+
1584+ }
1585+ }
1586+
1587+ return nbands;
1588+}
1589+
1590+
1591+/*
1592+ * Sparse Bands
1593+ *
1594+ * Row Format:
1595+ * Word
1596+ * 1 SPARSEBANDS format indicator
1597+ * 2 bool indicates a pattern match in this state
1598+ * 3 number of sparse bands
1599+ * 4 number of elements in this band
1600+ * 5 start index of this band
1601+ * 6- list of next states
1602+ *
1603+ * m number of elements in this band
1604+ * m+1 start index of this band
1605+ * m+2- list of next states
1606+ */
1607+static int
1608+Conv_Full_DFA_To_SparseBands(ACSM_STRUCT2 * acsm)
1609+{
1610+ acstate_t * p;
1611+ acstate_t ** NextState = acsm->acsmNextState;
1612+ int cnt,m,k,i,zcnt=acsm->acsmSparseMaxZcnt;
1613+
1614+ int band_begin[MAX_ALPHABET_SIZE];
1615+ int band_end[MAX_ALPHABET_SIZE];
1616+ int nbands,j;
1617+ acstate_t full[MAX_ALPHABET_SIZE];
1618+
1619+ for(k=0;k<acsm->acsmMaxStates;k++)
1620+ {
1621+ cnt=0;
1622+
1623+ List_ConvToFull(acsm, (acstate_t)k, full );
1624+
1625+ nbands = calcSparseBands( full, band_begin, band_end, acsm->acsmAlphabetSize, zcnt );
1626+
1627+ /* calc band width space*/
1628+ cnt = 3;
1629+ for(i=0;i<nbands;i++)
1630+ {
1631+ cnt += 2;
1632+ cnt += band_end[i] - band_begin[i] + 1;
1633+
1634+ /*printk("state %d: sparseband %d, first=%d, last=%d, cnt=%d\n",k,i,band_begin[i],band_end[i],band_end[i]-band_begin[i]+1); */
1635+ }
1636+
1637+ p = AC_MALLOC(sizeof(acstate_t)*(cnt));
1638+
1639+ if(!p) return -1;
1640+
1641+ m = 0;
1642+ p[m++] = ACF_SPARSEBANDS;
1643+ p[m++] = 0; /* no matches */
1644+ p[m++] = nbands;
1645+
1646+ for( i=0;i<nbands;i++ )
1647+ {
1648+ p[m++] = band_end[i] - band_begin[i] + 1; /* # states in this band */
1649+ p[m++] = band_begin[i]; /* start index */
1650+
1651+ for( j=band_begin[i]; j<=band_end[i]; j++ )
1652+ {
1653+ p[m++] = full[j]; /* some states may be state zero */
1654+ }
1655+ }
1656+
1657+ NextState[k] = p; /* now we are a sparse-banded formatted state transition array */
1658+ }
1659+
1660+ return 0;
1661+}
1662+
1663+/*
1664+ *
1665+ * Convert an NFA or DFA row from sparse to full format
1666+ * and store into the 'full' buffer.
1667+ *
1668+ * returns:
1669+ * 0 - failed, no state transitions
1670+ * *p - pointer to 'full' buffer
1671+ *
1672+ */
1673+/*
1674+ static
1675+ acstate_t * acsmConvToFull(ACSM_STRUCT2 * acsm, acstate_t k, acstate_t * full )
1676+ {
1677+ int i;
1678+ acstate_t * p, n, fmt, index, nb, bmatch;
1679+ acstate_t ** NextState = acsm->acsmNextState;
1680+
1681+ p = NextState[k];
1682+
1683+ if( !p ) return 0;
1684+
1685+ fmt = *p++;
1686+
1687+ bmatch = *p++;
1688+
1689+ if( fmt ==ACF_SPARSE )
1690+ {
1691+ n = *p++;
1692+ for( ; n>0; n--, p+=2 )
1693+ {
1694+ full[ p[0] ] = p[1];
1695+ }
1696+ }
1697+ else if( fmt ==ACF_BANDED )
1698+ {
1699+
1700+ n = *p++;
1701+ index = *p++;
1702+
1703+ for( ; n>0; n--, p++ )
1704+ {
1705+ full[ index++ ] = p[0];
1706+ }
1707+ }
1708+ else if( fmt ==ACF_SPARSEBANDS )
1709+ {
1710+ nb = *p++;
1711+ for(i=0;i<nb;i++)
1712+ {
1713+ n = *p++;
1714+ index = *p++;
1715+ for( ; n>0; n--, p++ )
1716+ {
1717+ full[ index++ ] = p[0];
1718+ }
1719+ }
1720+ }
1721+ else if( fmt == ACF_FULL )
1722+ {
1723+ memcpy(full,p,acsm->acsmAlphabetSize*sizeof(acstate_t));
1724+ }
1725+
1726+ return full;
1727+ }
1728+*/
1729+
1730+/*
1731+ * Select the desired storage mode
1732+ */
1733+int acsmSelectFormat2( ACSM_STRUCT2 * acsm, int m )
1734+{
1735+ switch( m )
1736+ {
1737+ case ACF_FULL:
1738+ case ACF_SPARSE:
1739+ case ACF_BANDED:
1740+ case ACF_SPARSEBANDS:
1741+ acsm->acsmFormat = m;
1742+ break;
1743+ default:
1744+ return -1;
1745+ }
1746+
1747+ return 0;
1748+}
1749+/*
1750+ *
1751+ */
1752+void acsmSetMaxSparseBandZeros2( ACSM_STRUCT2 * acsm, int n )
1753+{
1754+ acsm->acsmSparseMaxZcnt = n;
1755+}
1756+/*
1757+ *
1758+ */
1759+void acsmSetMaxSparseElements2( ACSM_STRUCT2 * acsm, int n )
1760+{
1761+ acsm->acsmSparseMaxRowNodes = n;
1762+}
1763+/*
1764+ *
1765+ */
1766+int acsmSelectFSA2( ACSM_STRUCT2 * acsm, int m )
1767+{
1768+ switch( m )
1769+ {
1770+ case FSA_TRIE:
1771+ case FSA_NFA:
1772+ case FSA_DFA:
1773+ acsm->acsmFSA = m;
1774+ default:
1775+ return -1;
1776+ }
1777+}
1778+/*
1779+ *
1780+ */
1781+int acsmSetAlphabetSize2( ACSM_STRUCT2 * acsm, int n )
1782+{
1783+ if( n <= MAX_ALPHABET_SIZE )
1784+ {
1785+ acsm->acsmAlphabetSize = n;
1786+ }
1787+ else
1788+ {
1789+ return -1;
1790+ }
1791+ return 0;
1792+}
1793+/*
1794+ * Create a new AC state machine
1795+ */
1796+static ACSM_STRUCT2 * acsmNew2 (void)
1797+{
1798+ ACSM_STRUCT2 * p;
1799+
1800+ init_xlatcase ();
1801+
1802+ p = (ACSM_STRUCT2 *) AC_MALLOC(sizeof (ACSM_STRUCT2));
1803+ MEMASSERT (p, "acsmNew");
1804+
1805+ if (p)
1806+ {
1807+ memset (p, 0, sizeof (ACSM_STRUCT2));
1808+
1809+ /* Some defaults */
1810+ p->acsmFSA = FSA_DFA;
1811+ p->acsmFormat = ACF_BANDED;
1812+ p->acsmAlphabetSize = 256;
1813+ p->acsmSparseMaxRowNodes = 256;
1814+ p->acsmSparseMaxZcnt = 10;
1815+ }
1816+
1817+ return p;
1818+}
1819+/*
1820+ * Add a pattern to the list of patterns for this state machine
1821+ *
1822+ */
1823+int
1824+acsmAddPattern2 (ACSM_STRUCT2 * p, unsigned char *pat, int n, int nocase,
1825+ int offset, int depth, void * id, int iid)
1826+{
1827+ ACSM_PATTERN2 * plist;
1828+
1829+ plist = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
1830+ MEMASSERT (plist, "acsmAddPattern");
1831+
1832+ plist->patrn = (unsigned char *) AC_MALLOC ( n );
1833+ MEMASSERT (plist->patrn, "acsmAddPattern");
1834+
1835+ ConvertCaseEx(plist->patrn, pat, n);
1836+
1837+ plist->casepatrn = (unsigned char *) AC_MALLOC ( n );
1838+ MEMASSERT (plist->casepatrn, "acsmAddPattern");
1839+
1840+ memcpy (plist->casepatrn, pat, n);
1841+
1842+ plist->n = n;
1843+ plist->nocase = nocase;
1844+ plist->offset = offset;
1845+ plist->depth = depth;
1846+ plist->id = id;
1847+ plist->iid = iid;
1848+
1849+ plist->next = p->acsmPatterns;
1850+ p->acsmPatterns = plist;
1851+
1852+ return 0;
1853+}
1854+/*
1855+ * Add a Key to the list of key+data pairs
1856+ */
1857+int acsmAddKey2(ACSM_STRUCT2 * p, unsigned char *key, int klen, int nocase, void * data)
1858+{
1859+ ACSM_PATTERN2 * plist;
1860+
1861+ plist = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
1862+ MEMASSERT (plist, "acsmAddPattern");
1863+
1864+ plist->patrn = (unsigned char *) AC_MALLOC (klen);
1865+ memcpy (plist->patrn, key, klen);
1866+
1867+ plist->casepatrn = (unsigned char *) AC_MALLOC (klen);
1868+ memcpy (plist->casepatrn, key, klen);
1869+
1870+ plist->n = klen;
1871+ plist->nocase = nocase;
1872+ plist->offset = 0;
1873+ plist->depth = 0;
1874+ plist->id = 0;
1875+ plist->iid = 0;
1876+
1877+ plist->next = p->acsmPatterns;
1878+ p->acsmPatterns = plist;
1879+
1880+ return 0;
1881+}
1882+
1883+/*
1884+ * Copy a boolean match flag int NextState table, for caching purposes.
1885+ */
1886+static
1887+void acsmUpdateMatchStates( ACSM_STRUCT2 * acsm )
1888+{
1889+ acstate_t state;
1890+ acstate_t ** NextState = acsm->acsmNextState;
1891+ ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
1892+
1893+ for( state=0; state<acsm->acsmNumStates; state++ )
1894+ {
1895+ if( MatchList[state] )
1896+ {
1897+ NextState[state][1] = 1;
1898+ }
1899+ else
1900+ {
1901+ NextState[state][1] = 0;
1902+ }
1903+ }
1904+}
1905+
1906+/*
1907+ * Compile State Machine - NFA or DFA and Full or Banded or Sparse or SparseBands
1908+ */
1909+int
1910+acsmCompile2 (ACSM_STRUCT2 * acsm)
1911+{
1912+ int k;
1913+ ACSM_PATTERN2 * plist;
1914+
1915+ /* Count number of states */
1916+ for (plist = acsm->acsmPatterns; plist != NULL; plist = plist->next)
1917+ {
1918+ acsm->acsmMaxStates += plist->n;
1919+ /* acsm->acsmMaxStates += plist->n*2; if we handle case in the table */
1920+ }
1921+ acsm->acsmMaxStates++; /* one extra */
1922+
1923+ /* Alloc a List based State Transition table */
1924+ acsm->acsmTransTable =(trans_node_t**) AC_MALLOC(sizeof(trans_node_t*) * acsm->acsmMaxStates );
1925+ MEMASSERT (acsm->acsmTransTable, "acsmCompile");
1926+
1927+ memset (acsm->acsmTransTable, 0, sizeof(trans_node_t*) * acsm->acsmMaxStates);
1928+
1929+ /* Alloc a failure table - this has a failure state, and a match list for each state */
1930+ acsm->acsmFailState =(acstate_t*) AC_MALLOC(sizeof(acstate_t) * acsm->acsmMaxStates );
1931+ MEMASSERT (acsm->acsmFailState, "acsmCompile");
1932+
1933+ memset (acsm->acsmFailState, 0, sizeof(acstate_t) * acsm->acsmMaxStates );
1934+
1935+ /* Alloc a MatchList table - this has a lis tof pattern matches for each state, if any */
1936+ acsm->acsmMatchList=(ACSM_PATTERN2**) AC_MALLOC(sizeof(ACSM_PATTERN2*) * acsm->acsmMaxStates );
1937+ MEMASSERT (acsm->acsmMatchList, "acsmCompile");
1938+
1939+ memset (acsm->acsmMatchList, 0, sizeof(ACSM_PATTERN2*) * acsm->acsmMaxStates );
1940+
1941+ /* Alloc a separate state transition table == in state 's' due to event 'k', transition to 'next' state */
1942+ acsm->acsmNextState=(acstate_t**)AC_MALLOC( acsm->acsmMaxStates * sizeof(acstate_t*) );
1943+ MEMASSERT(acsm->acsmNextState, "acsmCompile-NextState");
1944+
1945+ for (k = 0; k < acsm->acsmMaxStates; k++)
1946+ {
1947+ acsm->acsmNextState[k]=(acstate_t*)0;
1948+ }
1949+
1950+ /* Initialize state zero as a branch */
1951+ acsm->acsmNumStates = 0;
1952+
1953+ /* Add the 0'th state, */
1954+ //acsm->acsmNumStates++;
1955+
1956+ /* Add each Pattern to the State Table - This forms a keywords state table */
1957+ for (plist = acsm->acsmPatterns; plist != NULL; plist = plist->next)
1958+ {
1959+ AddPatternStates (acsm, plist);
1960+ }
1961+
1962+ acsm->acsmNumStates++;
1963+
1964+ if( acsm->acsmFSA == FSA_DFA || acsm->acsmFSA == FSA_NFA )
1965+ {
1966+ /* Build the NFA */
1967+ Build_NFA (acsm);
1968+ }
1969+
1970+ if( acsm->acsmFSA == FSA_DFA )
1971+ {
1972+ /* Convert the NFA to a DFA */
1973+ Convert_NFA_To_DFA (acsm);
1974+ }
1975+
1976+ /*
1977+ *
1978+ * Select Final Transition Table Storage Mode
1979+ *
1980+ */
1981+ if( acsm->acsmFormat == ACF_SPARSE )
1982+ {
1983+ /* Convert DFA Full matrix to a Sparse matrix */
1984+ if( Conv_Full_DFA_To_Sparse(acsm) )
1985+ return -1;
1986+ }
1987+
1988+ else if( acsm->acsmFormat == ACF_BANDED )
1989+ {
1990+ /* Convert DFA Full matrix to a Sparse matrix */
1991+ if( Conv_Full_DFA_To_Banded(acsm) )
1992+ return -1;
1993+ }
1994+
1995+ else if( acsm->acsmFormat == ACF_SPARSEBANDS )
1996+ {
1997+ /* Convert DFA Full matrix to a Sparse matrix */
1998+ if( Conv_Full_DFA_To_SparseBands(acsm) )
1999+ return -1;
2000+ }
2001+ else if( acsm->acsmFormat == ACF_FULL )
2002+ {
2003+ if( Conv_List_To_Full( acsm ) )
2004+ return -1;
2005+ }
2006+
2007+ acsmUpdateMatchStates( acsm ); /* load boolean match flags into state table */
2008+
2009+ /* Free up the Table Of Transition Lists */
2010+ List_FreeTransTable( acsm );
2011+
2012+ /* For now -- show this info */
2013+ /*
2014+ * acsmPrintInfo( acsm );
2015+ */
2016+
2017+
2018+ /* Accrue Summary State Stats */
2019+ summary.num_states += acsm->acsmNumStates;
2020+ summary.num_transitions += acsm->acsmNumTrans;
2021+
2022+ memcpy( &summary.acsm, acsm, sizeof(ACSM_STRUCT2));
2023+
2024+ return 0;
2025+}
2026+
2027+/*
2028+ * Get the NextState from the NFA, all NFA storage formats use this
2029+ */
2030+inline
2031+acstate_t SparseGetNextStateNFA(acstate_t * ps, acstate_t state, unsigned input)
2032+{
2033+ acstate_t fmt;
2034+ acstate_t n;
2035+ int index;
2036+ int nb;
2037+
2038+ fmt = *ps++;
2039+
2040+ ps++; /* skip bMatchState */
2041+
2042+ switch( fmt )
2043+ {
2044+ case ACF_BANDED:
2045+ {
2046+ n = ps[0];
2047+ index = ps[1];
2048+
2049+ if( input < index )
2050+ {
2051+ if(state==0)
2052+ {
2053+ return 0;
2054+ }
2055+ else
2056+ {
2057+ return (acstate_t)ACSM_FAIL_STATE2;
2058+ }
2059+ }
2060+ if( input >= index + n )
2061+ {
2062+ if(state==0)
2063+ {
2064+ return 0;
2065+ }
2066+ else
2067+ {
2068+ return (acstate_t)ACSM_FAIL_STATE2;
2069+ }
2070+ }
2071+ if( ps[input-index] == 0 )
2072+ {
2073+ if( state != 0 )
2074+ {
2075+ return ACSM_FAIL_STATE2;
2076+ }
2077+ }
2078+
2079+ return (acstate_t) ps[input-index];
2080+ }
2081+
2082+ case ACF_SPARSE:
2083+ {
2084+ n = *ps++; /* number of sparse index-value entries */
2085+
2086+ for( ; n>0 ; n-- )
2087+ {
2088+ if( ps[0] > input ) /* cannot match the input, already a higher value than the input */
2089+ {
2090+ return (acstate_t)ACSM_FAIL_STATE2; /* default state */
2091+ }
2092+ else if( ps[0] == input )
2093+ {
2094+ return ps[1]; /* next state */
2095+ }
2096+ ps+=2;
2097+ }
2098+ if( state == 0 )
2099+ {
2100+ return 0;
2101+ }
2102+ return ACSM_FAIL_STATE2;
2103+ }
2104+
2105+ case ACF_SPARSEBANDS:
2106+ {
2107+ nb = *ps++; /* number of bands */
2108+
2109+ while( nb > 0 ) /* for each band */
2110+ {
2111+ n = *ps++; /* number of elements */
2112+ index = *ps++; /* 1st element value */
2113+
2114+ if( input < index )
2115+ {
2116+ if( state != 0 )
2117+ {
2118+ return (acstate_t)ACSM_FAIL_STATE2;
2119+ }
2120+ return (acstate_t)0;
2121+ }
2122+ if( (input >= index) && (input < (index + n)) )
2123+ {
2124+ if( ps[input-index] == 0 )
2125+ {
2126+ if( state != 0 )
2127+ {
2128+ return ACSM_FAIL_STATE2;
2129+ }
2130+ }
2131+ return (acstate_t) ps[input-index];
2132+ }
2133+ nb--;
2134+ ps += n;
2135+ }
2136+ if( state != 0 )
2137+ {
2138+ return (acstate_t)ACSM_FAIL_STATE2;
2139+ }
2140+ return (acstate_t)0;
2141+ }
2142+
2143+ case ACF_FULL:
2144+ {
2145+ if( ps[input] == 0 )
2146+ {
2147+ if( state != 0 )
2148+ {
2149+ return ACSM_FAIL_STATE2;
2150+ }
2151+ }
2152+ return ps[input];
2153+ }
2154+ }
2155+
2156+ return 0;
2157+}
2158+
2159+
2160+
2161+/*
2162+ * Get the NextState from the DFA Next State Transition table
2163+ * Full and banded are supported separately, this is for
2164+ * sparse and sparse-bands
2165+ */
2166+inline
2167+acstate_t SparseGetNextStateDFA(acstate_t * ps, acstate_t state, unsigned input)
2168+{
2169+ acstate_t n, nb;
2170+ int index;
2171+
2172+ switch( ps[0] )
2173+ {
2174+ /* BANDED */
2175+ case ACF_BANDED:
2176+ {
2177+ /* n=ps[2] : number of entries in the band */
2178+ /* index=ps[3] : index of the 1st entry, sequential thereafter */
2179+
2180+ if( input < ps[3] ) return 0;
2181+ if( input >= (ps[3]+ps[2]) ) return 0;
2182+
2183+ return ps[4+input-ps[3]];
2184+ }
2185+
2186+ /* FULL */
2187+ case ACF_FULL:
2188+ {
2189+ return ps[2+input];
2190+ }
2191+
2192+ /* SPARSE */
2193+ case ACF_SPARSE:
2194+ {
2195+ n = ps[2]; /* number of entries/ key+next pairs */
2196+
2197+ ps += 3;
2198+
2199+ for( ; n>0 ; n-- )
2200+ {
2201+ if( input < ps[0] ) /* cannot match the input, already a higher value than the input */
2202+ {
2203+ return (acstate_t)0; /* default state */
2204+ }
2205+ else if( ps[0] == input )
2206+ {
2207+ return ps[1]; /* next state */
2208+ }
2209+ ps += 2;
2210+ }
2211+ return (acstate_t)0;
2212+ }
2213+
2214+
2215+ /* SPARSEBANDS */
2216+ case ACF_SPARSEBANDS:
2217+ {
2218+ nb = ps[2]; /* number of bands */
2219+
2220+ ps += 3;
2221+
2222+ while( nb > 0 ) /* for each band */
2223+ {
2224+ n = ps[0]; /* number of elements in this band */
2225+ index = ps[1]; /* start index/char of this band */
2226+ if( input < index )
2227+ {
2228+ return (acstate_t)0;
2229+ }
2230+ if( (input < (index + n)) )
2231+ {
2232+ return (acstate_t) ps[2+input-index];
2233+ }
2234+ nb--;
2235+ ps += n;
2236+ }
2237+ return (acstate_t)0;
2238+ }
2239+ }
2240+
2241+ return 0;
2242+}
2243+/*
2244+ * Search Text or Binary Data for Pattern matches
2245+ *
2246+ * Sparse & Sparse-Banded Matrix search
2247+ */
2248+static
2249+inline
2250+int
2251+acsmSearchSparseDFA(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
2252+ int (*Match) (void * id, int index, void *data),
2253+ void *data)
2254+{
2255+ acstate_t state;
2256+ ACSM_PATTERN2 * mlist;
2257+ unsigned char * Tend;
2258+ int nfound = 0;
2259+ unsigned char * T, * Tc;
2260+ int index;
2261+ acstate_t ** NextState = acsm->acsmNextState;
2262+ ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
2263+
2264+ Tc = Tx;
2265+ T = Tx;
2266+ Tend = T + n;
2267+
2268+ for( state = 0; T < Tend; T++ )
2269+ {
2270+ state = SparseGetNextStateDFA ( NextState[state], state, xlatcase[*T] );
2271+
2272+ /* test if this state has any matching patterns */
2273+ if( NextState[state][1] )
2274+ {
2275+ for( mlist = MatchList[state];
2276+ mlist!= NULL;
2277+ mlist = mlist->next )
2278+ {
2279+ index = T - mlist->n - Tc;
2280+ if( mlist->nocase )
2281+ {
2282+ nfound++;
2283+ if (Match (mlist->id, index, data))
2284+ return nfound;
2285+ }
2286+ else
2287+ {
2288+ if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
2289+ {
2290+ nfound++;
2291+ if (Match (mlist->id, index, data))
2292+ return nfound;
2293+ }
2294+ }
2295+ }
2296+ }
2297+ }
2298+ return nfound;
2299+}
2300+/*
2301+ * Full format DFA search
2302+ * Do not change anything here without testing, caching and prefetching
2303+ * performance is very sensitive to any changes.
2304+ *
2305+ * Perf-Notes:
2306+ * 1) replaced ConvertCaseEx with inline xlatcase - this improves performance 5-10%
2307+ * 2) using 'nocase' improves performance again by 10-15%, since memcmp is not needed
2308+ * 3)
2309+ */
2310+static
2311+inline
2312+int
2313+acsmSearchSparseDFA_Full(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
2314+ int (*Match) (void * id, int index, void *data),
2315+ void *data)
2316+{
2317+ ACSM_PATTERN2 * mlist;
2318+ unsigned char * Tend;
2319+ unsigned char * T;
2320+ int index;
2321+ acstate_t state;
2322+ acstate_t * ps;
2323+ acstate_t sindex;
2324+ acstate_t ** NextState = acsm->acsmNextState;
2325+ ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
2326+ int nfound = 0;
2327+
2328+ T = Tx;
2329+ Tend = Tx + n;
2330+
2331+ for( state = 0; T < Tend; T++ )
2332+ {
2333+ ps = NextState[ state ];
2334+
2335+ sindex = xlatcase[ T[0] ];
2336+
2337+ /* check the current state for a pattern match */
2338+ if( ps[1] )
2339+ {
2340+ for( mlist = MatchList[state];
2341+ mlist!= NULL;
2342+ mlist = mlist->next )
2343+ {
2344+ index = T - mlist->n - Tx;
2345+
2346+
2347+ if( mlist->nocase )
2348+ {
2349+ nfound++;
2350+ if (Match (mlist->id, index, data))
2351+ return nfound;
2352+ }
2353+ else
2354+ {
2355+ if( memcmp (mlist->casepatrn, Tx + index, mlist->n ) == 0 )
2356+ {
2357+ nfound++;
2358+ if (Match (mlist->id, index, data))
2359+ return nfound;
2360+ }
2361+ }
2362+
2363+ }
2364+ }
2365+
2366+ state = ps[ 2u + sindex ];
2367+ }
2368+
2369+ /* Check the last state for a pattern match */
2370+ for( mlist = MatchList[state];
2371+ mlist!= NULL;
2372+ mlist = mlist->next )
2373+ {
2374+ index = T - mlist->n - Tx;
2375+
2376+ if( mlist->nocase )
2377+ {
2378+ nfound++;
2379+ if (Match (mlist->id, index, data))
2380+ return nfound;
2381+ }
2382+ else
2383+ {
2384+ if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
2385+ {
2386+ nfound++;
2387+ if (Match (mlist->id, index, data))
2388+ return nfound;
2389+ }
2390+ }
2391+ }
2392+
2393+ return nfound;
2394+}
2395+/*
2396+ * Banded-Row format DFA search
2397+ * Do not change anything here, caching and prefetching
2398+ * performance is very sensitive to any changes.
2399+ *
2400+ * ps[0] = storage fmt
2401+ * ps[1] = bool match flag
2402+ * ps[2] = # elements in band
2403+ * ps[3] = index of 1st element
2404+ */
2405+static
2406+inline
2407+int
2408+acsmSearchSparseDFA_Banded(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
2409+ int (*Match) (void * id, int index, void *data),
2410+ void *data)
2411+{
2412+ acstate_t state;
2413+ unsigned char * Tend;
2414+ unsigned char * T;
2415+ int sindex;
2416+ int index;
2417+ acstate_t ** NextState = acsm->acsmNextState;
2418+ ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
2419+ ACSM_PATTERN2 * mlist;
2420+ acstate_t * ps;
2421+ int nfound = 0;
2422+
2423+ T = Tx;
2424+ Tend = T + n;
2425+
2426+ for( state = 0; T < Tend; T++ )
2427+ {
2428+ ps = NextState[state];
2429+
2430+ sindex = xlatcase[ T[0] ];
2431+
2432+ /* test if this state has any matching patterns */
2433+ if( ps[1] )
2434+ {
2435+ for( mlist = MatchList[state];
2436+ mlist!= NULL;
2437+ mlist = mlist->next )
2438+ {
2439+ index = T - mlist->n - Tx;
2440+
2441+ if( mlist->nocase )
2442+ {
2443+ nfound++;
2444+ if (Match (mlist->id, index, data))
2445+ return nfound;
2446+ }
2447+ else
2448+ {
2449+ if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
2450+ {
2451+ nfound++;
2452+ if (Match (mlist->id, index, data))
2453+ return nfound;
2454+ }
2455+ }
2456+ }
2457+ }
2458+
2459+ if( sindex < ps[3] ) state = 0;
2460+ else if( sindex >= (ps[3] + ps[2]) ) state = 0;
2461+ else state = ps[ 4u + sindex - ps[3] ];
2462+ }
2463+
2464+ /* Check the last state for a pattern match */
2465+ for( mlist = MatchList[state];
2466+ mlist!= NULL;
2467+ mlist = mlist->next )
2468+ {
2469+ index = T - mlist->n - Tx;
2470+
2471+ if( mlist->nocase )
2472+ {
2473+ nfound++;
2474+ if (Match (mlist->id, index, data))
2475+ return nfound;
2476+ }
2477+ else
2478+ {
2479+ if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
2480+ {
2481+ nfound++;
2482+ if (Match (mlist->id, index, data))
2483+ return nfound;
2484+ }
2485+ }
2486+ }
2487+
2488+ return nfound;
2489+}
2490+
2491+
2492+
2493+/*
2494+ * Search Text or Binary Data for Pattern matches
2495+ *
2496+ * Sparse Storage Version
2497+ */
2498+static
2499+inline
2500+int
2501+acsmSearchSparseNFA(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
2502+ int (*Match) (void * id, int index, void *data),
2503+ void *data)
2504+{
2505+ acstate_t state;
2506+ ACSM_PATTERN2 * mlist;
2507+ unsigned char * Tend;
2508+ int nfound = 0;
2509+ unsigned char * T, *Tc;
2510+ int index;
2511+ acstate_t ** NextState= acsm->acsmNextState;
2512+ acstate_t * FailState= acsm->acsmFailState;
2513+ ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
2514+ unsigned char Tchar;
2515+
2516+ Tc = Tx;
2517+ T = Tx;
2518+ Tend = T + n;
2519+
2520+ for( state = 0; T < Tend; T++ )
2521+ {
2522+ acstate_t nstate;
2523+
2524+ Tchar = xlatcase[ *T ];
2525+
2526+ while( (nstate=SparseGetNextStateNFA(NextState[state],state,Tchar))==ACSM_FAIL_STATE2 )
2527+ state = FailState[state];
2528+
2529+ state = nstate;
2530+
2531+ for( mlist = MatchList[state];
2532+ mlist!= NULL;
2533+ mlist = mlist->next )
2534+ {
2535+ index = T - mlist->n - Tx;
2536+ if( mlist->nocase )
2537+ {
2538+ nfound++;
2539+ if (Match (mlist->id, index, data))
2540+ return nfound;
2541+ }
2542+ else
2543+ {
2544+ if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
2545+ {
2546+ nfound++;
2547+ if (Match (mlist->id, index, data))
2548+ return nfound;
2549+ }
2550+ }
2551+ }
2552+ }
2553+
2554+ return nfound;
2555+}
2556+
2557+/*
2558+ * Search Function
2559+ */
2560+int
2561+acsmSearch2(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
2562+ int (*Match) (void * id, int index, void *data),
2563+ void *data)
2564+{
2565+
2566+ switch( acsm->acsmFSA )
2567+ {
2568+ case FSA_DFA:
2569+
2570+ if( acsm->acsmFormat == ACF_FULL )
2571+ {
2572+ return acsmSearchSparseDFA_Full( acsm, Tx, n, Match,data );
2573+ }
2574+ else if( acsm->acsmFormat == ACF_BANDED )
2575+ {
2576+ return acsmSearchSparseDFA_Banded( acsm, Tx, n, Match,data );
2577+ }
2578+ else
2579+ {
2580+ return acsmSearchSparseDFA( acsm, Tx, n, Match,data );
2581+ }
2582+
2583+ case FSA_NFA:
2584+
2585+ return acsmSearchSparseNFA( acsm, Tx, n, Match,data );
2586+
2587+ case FSA_TRIE:
2588+
2589+ return 0;
2590+ }
2591+ return 0;
2592+}
2593+
2594+
2595+/*
2596+ * Free all memory
2597+ */
2598+void
2599+acsmFree2 (ACSM_STRUCT2 * acsm)
2600+{
2601+ int i;
2602+ ACSM_PATTERN2 * mlist, *ilist;
2603+ for (i = 0; i < acsm->acsmMaxStates; i++)
2604+ {
2605+ mlist = acsm->acsmMatchList[i];
2606+
2607+ while (mlist)
2608+ {
2609+ ilist = mlist;
2610+ mlist = mlist->next;
2611+ AC_FREE (ilist);
2612+ }
2613+ AC_FREE(acsm->acsmNextState[i]);
2614+ }
2615+ AC_FREE(acsm->acsmFailState);
2616+ AC_FREE(acsm->acsmMatchList);
2617+}
2618+
2619+/* ********************************** */
2620+
2621+static void ring_sock_destruct(struct sock *sk) {
2622+
2623+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
2624+ skb_queue_purge(&sk->sk_receive_queue);
2625+
2626+ if (!sock_flag(sk, SOCK_DEAD)) {
2627+#if defined(RING_DEBUG)
2628+ printk("Attempt to release alive ring socket: %p\n", sk);
2629+#endif
2630+ return;
2631+ }
2632+
36868e55
PS
2633+ BUG_ON(!atomic_read(&sk->sk_rmem_alloc));
2634+ BUG_ON(!atomic_read(&sk->sk_wmem_alloc));
40fd095b 2635+#else
2636+
36868e55
PS
2637+ BUG_ON(atomic_read(&sk->rmem_alloc)==0);
2638+ BUG_ON(atomic_read(&sk->wmem_alloc)==0);
40fd095b 2639+
2640+ if (!sk->dead) {
2641+#if defined(RING_DEBUG)
2642+ printk("Attempt to release alive ring socket: %p\n", sk);
2643+#endif
2644+ return;
2645+ }
2646+#endif
2647+
2648+ kfree(ring_sk(sk));
2649+
2650+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
2651+ MOD_DEC_USE_COUNT;
2652+#endif
2653+}
2654+
2655+/* ********************************** */
2656+
2657+static void ring_proc_add(struct ring_opt *pfr) {
2658+ if(ring_proc_dir != NULL) {
2659+ char name[16];
2660+
2661+ pfr->ring_pid = current->pid;
2662+
2663+ snprintf(name, sizeof(name), "%d", pfr->ring_pid);
2664+ create_proc_read_entry(name, 0, ring_proc_dir,
2665+ ring_proc_get_info, pfr);
2666+ /* printk("PF_RING: added /proc/net/pf_ring/%s\n", name); */
2667+ }
2668+}
2669+
2670+/* ********************************** */
2671+
2672+static void ring_proc_remove(struct ring_opt *pfr) {
2673+ if(ring_proc_dir != NULL) {
2674+ char name[16];
2675+
2676+ snprintf(name, sizeof(name), "%d", pfr->ring_pid);
2677+ remove_proc_entry(name, ring_proc_dir);
2678+ /* printk("PF_RING: removed /proc/net/pf_ring/%s\n", name); */
2679+ }
2680+}
2681+
2682+/* ********************************** */
2683+
2684+static int ring_proc_get_info(char *buf, char **start, off_t offset,
2685+ int len, int *unused, void *data)
2686+{
2687+ int rlen = 0;
2688+ struct ring_opt *pfr;
2689+ FlowSlotInfo *fsi;
2690+
2691+ if(data == NULL) {
2692+ /* /proc/net/pf_ring/info */
2693+ rlen = sprintf(buf,"Version : %s\n", RING_VERSION);
2694+ rlen += sprintf(buf + rlen,"Bucket length : %d bytes\n", bucket_len);
2695+ rlen += sprintf(buf + rlen,"Ring slots : %d\n", num_slots);
2696+ rlen += sprintf(buf + rlen,"Sample rate : %d [1=no sampling]\n", sample_rate);
2697+
2698+ rlen += sprintf(buf + rlen,"Capture TX : %s\n",
2699+ enable_tx_capture ? "Yes [RX+TX]" : "No [RX only]");
2700+ rlen += sprintf(buf + rlen,"Transparent mode : %s\n",
2701+ transparent_mode ? "Yes" : "No");
2702+ rlen += sprintf(buf + rlen,"Total rings : %d\n", ring_table_size);
2703+ } else {
2704+ /* detailed statistics about a PF_RING */
2705+ pfr = (struct ring_opt*)data;
2706+
2707+ if(data) {
2708+ fsi = pfr->slots_info;
2709+
2710+ if(fsi) {
2711+ rlen = sprintf(buf, "Bound Device : %s\n",
2712+ pfr->ring_netdev->name == NULL ? "<NULL>" : pfr->ring_netdev->name);
2713+ rlen += sprintf(buf + rlen,"Version : %d\n", fsi->version);
2714+ rlen += sprintf(buf + rlen,"Sampling Rate : %d\n", pfr->sample_rate);
2715+ rlen += sprintf(buf + rlen,"BPF Filtering : %s\n", pfr->bpfFilter ? "Enabled" : "Disabled");
2716+ rlen += sprintf(buf + rlen,"Bloom Filters : %s\n", pfr->bitmask_enabled ? "Enabled" : "Disabled");
2717+ rlen += sprintf(buf + rlen,"Pattern Search: %s\n", pfr->acsm ? "Enabled" : "Disabled");
2718+ rlen += sprintf(buf + rlen,"Cluster Id : %d\n", pfr->cluster_id);
2719+ rlen += sprintf(buf + rlen,"Tot Slots : %d\n", fsi->tot_slots);
2720+ rlen += sprintf(buf + rlen,"Slot Len : %d\n", fsi->slot_len);
2721+ rlen += sprintf(buf + rlen,"Data Len : %d\n", fsi->data_len);
2722+ rlen += sprintf(buf + rlen,"Tot Memory : %d\n", fsi->tot_mem);
2723+ rlen += sprintf(buf + rlen,"Tot Packets : %lu\n", (unsigned long)fsi->tot_pkts);
2724+ rlen += sprintf(buf + rlen,"Tot Pkt Lost : %lu\n", (unsigned long)fsi->tot_lost);
2725+ rlen += sprintf(buf + rlen,"Tot Insert : %lu\n", (unsigned long)fsi->tot_insert);
2726+ rlen += sprintf(buf + rlen,"Tot Read : %lu\n", (unsigned long)fsi->tot_read);
2727+
2728+ } else
2729+ rlen = sprintf(buf, "WARNING fsi == NULL\n");
2730+ } else
2731+ rlen = sprintf(buf, "WARNING data == NULL\n");
2732+ }
2733+
2734+ return rlen;
2735+}
2736+
2737+/* ********************************** */
2738+
2739+static void ring_proc_init(void) {
143a4708 2740+ ring_proc_dir = proc_mkdir("pf_ring", init_net.proc_net);
40fd095b 2741+
2742+ if(ring_proc_dir) {
2743+ ring_proc_dir->owner = THIS_MODULE;
2744+ ring_proc = create_proc_read_entry("info", 0, ring_proc_dir,
2745+ ring_proc_get_info, NULL);
2746+ if(!ring_proc)
2747+ printk("PF_RING: unable to register proc file\n");
2748+ else {
2749+ ring_proc->owner = THIS_MODULE;
2750+ printk("PF_RING: registered /proc/net/pf_ring/\n");
2751+ }
2752+ } else
2753+ printk("PF_RING: unable to create /proc/net/pf_ring\n");
2754+}
2755+
2756+/* ********************************** */
2757+
2758+static void ring_proc_term(void) {
2759+ if(ring_proc != NULL) {
2760+ remove_proc_entry("info", ring_proc_dir);
143a4708 2761+ if(ring_proc_dir != NULL) remove_proc_entry("pf_ring", init_net.proc_net);
40fd095b 2762+
2763+ printk("PF_RING: deregistered /proc/net/pf_ring\n");
2764+ }
2765+}
2766+
2767+/* ********************************** */
2768+
2769+/*
2770+ * ring_insert()
2771+ *
2772+ * store the sk in a new element and add it
2773+ * to the head of the list.
2774+ */
2775+static inline void ring_insert(struct sock *sk) {
2776+ struct ring_element *next;
2777+
2778+#if defined(RING_DEBUG)
2779+ printk("RING: ring_insert()\n");
2780+#endif
2781+
2782+ next = kmalloc(sizeof(struct ring_element), GFP_ATOMIC);
2783+ if(next != NULL) {
2784+ next->sk = sk;
2785+ write_lock_irq(&ring_mgmt_lock);
2786+ list_add(&next->list, &ring_table);
2787+ write_unlock_irq(&ring_mgmt_lock);
2788+ } else {
2789+ if(net_ratelimit())
2790+ printk("RING: could not kmalloc slot!!\n");
2791+ }
2792+
2793+ ring_table_size++;
2794+ ring_proc_add(ring_sk(sk));
2795+}
2796+
2797+/* ********************************** */
2798+
2799+/*
2800+ * ring_remove()
2801+ *
2802+ * For each of the elements in the list:
2803+ * - check if this is the element we want to delete
2804+ * - if it is, remove it from the list, and free it.
2805+ *
2806+ * stop when we find the one we're looking for (break),
2807+ * or when we reach the end of the list.
2808+ */
2809+static inline void ring_remove(struct sock *sk) {
2810+ struct list_head *ptr;
2811+ struct ring_element *entry;
2812+
2813+ for(ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) {
2814+ entry = list_entry(ptr, struct ring_element, list);
2815+
2816+ if(entry->sk == sk) {
2817+ list_del(ptr);
2818+ kfree(ptr);
2819+ ring_table_size--;
2820+ break;
2821+ }
2822+ }
2823+}
2824+
2825+/* ********************************** */
2826+
2827+static u_int32_t num_queued_pkts(struct ring_opt *pfr) {
2828+
2829+ if(pfr->ring_slots != NULL) {
2830+
2831+ u_int32_t tot_insert = pfr->slots_info->insert_idx,
2832+#if defined(RING_DEBUG)
2833+ tot_read = pfr->slots_info->tot_read, tot_pkts;
2834+#else
2835+ tot_read = pfr->slots_info->tot_read;
2836+#endif
2837+
2838+ if(tot_insert >= tot_read) {
2839+#if defined(RING_DEBUG)
2840+ tot_pkts = tot_insert-tot_read;
2841+#endif
2842+ return(tot_insert-tot_read);
2843+ } else {
2844+#if defined(RING_DEBUG)
2845+ tot_pkts = ((u_int32_t)-1)+tot_insert-tot_read;
2846+#endif
2847+ return(((u_int32_t)-1)+tot_insert-tot_read);
2848+ }
2849+
2850+#if defined(RING_DEBUG)
2851+ printk("-> num_queued_pkts=%d [tot_insert=%d][tot_read=%d]\n",
2852+ tot_pkts, tot_insert, tot_read);
2853+#endif
2854+
2855+ } else
2856+ return(0);
2857+}
2858+
2859+/* ********************************** */
2860+
2861+static inline FlowSlot* get_insert_slot(struct ring_opt *pfr) {
2862+#if defined(RING_DEBUG)
2863+ printk("get_insert_slot(%d)\n", pfr->slots_info->insert_idx);
2864+#endif
2865+
2866+ if(pfr->ring_slots != NULL) {
2867+ FlowSlot *slot = (FlowSlot*)&(pfr->ring_slots[pfr->slots_info->insert_idx
2868+ *pfr->slots_info->slot_len]);
2869+ return(slot);
2870+ } else
2871+ return(NULL);
2872+}
2873+
2874+/* ********************************** */
2875+
2876+static inline FlowSlot* get_remove_slot(struct ring_opt *pfr) {
2877+#if defined(RING_DEBUG)
2878+ printk("get_remove_slot(%d)\n", pfr->slots_info->remove_idx);
2879+#endif
2880+
2881+ if(pfr->ring_slots != NULL)
2882+ return((FlowSlot*)&(pfr->ring_slots[pfr->slots_info->remove_idx*
2883+ pfr->slots_info->slot_len]));
2884+ else
2885+ return(NULL);
2886+}
2887+
2888+/* ******************************************************* */
2889+
2890+static int parse_pkt(struct sk_buff *skb, u_int16_t skb_displ,
2891+ u_int8_t *l3_proto, u_int16_t *eth_type,
2892+ u_int16_t *l3_offset, u_int16_t *l4_offset,
2893+ u_int16_t *vlan_id, u_int32_t *ipv4_src,
2894+ u_int32_t *ipv4_dst,
2895+ u_int16_t *l4_src_port, u_int16_t *l4_dst_port,
2896+ u_int16_t *payload_offset) {
2897+ struct iphdr *ip;
2898+ struct ethhdr *eh = (struct ethhdr*)(skb->data-skb_displ);
2899+ u_int16_t displ;
2900+
2901+ *l3_offset = *l4_offset = *l3_proto = *payload_offset = 0;
2902+ *eth_type = ntohs(eh->h_proto);
2903+
2904+ if(*eth_type == 0x8100 /* 802.1q (VLAN) */) {
2905+ (*vlan_id) = (skb->data[14] & 15)*256 + skb->data[15];
2906+ *eth_type = (skb->data[16])*256 + skb->data[17];
2907+ displ = 4;
2908+ } else {
2909+ displ = 0;
2910+ (*vlan_id) = (u_int16_t)-1;
2911+ }
2912+
2913+ if(*eth_type == 0x0800 /* IP */) {
2914+ *l3_offset = displ+sizeof(struct ethhdr);
2915+ ip = (struct iphdr*)(skb->data-skb_displ+(*l3_offset));
2916+
2917+ *ipv4_src = ntohl(ip->saddr), *ipv4_dst = ntohl(ip->daddr), *l3_proto = ip->protocol;
2918+
2919+ if((ip->protocol == IPPROTO_TCP) || (ip->protocol == IPPROTO_UDP)) {
2920+ *l4_offset = (*l3_offset)+(ip->ihl*4);
2921+
2922+ if(ip->protocol == IPPROTO_TCP) {
2923+ struct tcphdr *tcp = (struct tcphdr*)(skb->data-skb_displ+(*l4_offset));
2924+ *l4_src_port = ntohs(tcp->source), *l4_dst_port = ntohs(tcp->dest);
2925+ *payload_offset = (*l4_offset)+(tcp->doff * 4);
2926+ } else if(ip->protocol == IPPROTO_UDP) {
2927+ struct udphdr *udp = (struct udphdr*)(skb->data-skb_displ+(*l4_offset));
2928+ *l4_src_port = ntohs(udp->source), *l4_dst_port = ntohs(udp->dest);
2929+ *payload_offset = (*l4_offset)+sizeof(struct udphdr);
2930+ } else
2931+ *payload_offset = (*l4_offset);
2932+ } else
2933+ *l4_src_port = *l4_dst_port = 0;
2934+
2935+ return(1); /* IP */
2936+ } /* TODO: handle IPv6 */
2937+
2938+ return(0); /* No IP */
2939+}
2940+
2941+/* **************************************************************** */
2942+
2943+static void reset_bitmask(bitmask_selector *selector)
2944+{
2945+ memset((char*)selector->bits_memory, 0, selector->num_bits/8);
2946+
2947+ while(selector->clashes != NULL) {
2948+ bitmask_counter_list *next = selector->clashes->next;
2949+ kfree(selector->clashes);
2950+ selector->clashes = next;
2951+ }
2952+}
2953+
2954+/* **************************************************************** */
2955+
2956+static void alloc_bitmask(u_int32_t tot_bits, bitmask_selector *selector)
2957+{
2958+ u_int tot_mem = tot_bits/8;
2959+
2960+ if(tot_mem <= PAGE_SIZE)
2961+ selector->order = 1;
2962+ else {
2963+ for(selector->order = 0; (PAGE_SIZE << selector->order) < tot_mem; selector->order++)
2964+ ;
2965+ }
2966+
2967+ printk("BITMASK: [order=%d][tot_mem=%d]\n", selector->order, tot_mem);
2968+
2969+ while((selector->bits_memory = __get_free_pages(GFP_ATOMIC, selector->order)) == 0)
2970+ if(selector->order-- == 0)
2971+ break;
2972+
2973+ if(selector->order == 0) {
2974+ printk("BITMASK: ERROR not enough memory for bitmask\n");
2975+ selector->num_bits = 0;
2976+ return;
2977+ }
2978+
2979+ tot_mem = PAGE_SIZE << selector->order;
2980+ printk("BITMASK: succesfully allocated [tot_mem=%d][order=%d]\n",
2981+ tot_mem, selector->order);
2982+
2983+ selector->num_bits = tot_mem*8;
2984+ selector->clashes = NULL;
2985+ reset_bitmask(selector);
2986+}
2987+
2988+/* ********************************** */
2989+
2990+static void free_bitmask(bitmask_selector *selector)
2991+{
2992+ if(selector->bits_memory > 0)
2993+ free_pages(selector->bits_memory, selector->order);
2994+}
2995+
2996+/* ********************************** */
2997+
2998+static void set_bit_bitmask(bitmask_selector *selector, u_int32_t the_bit) {
2999+ u_int32_t idx = the_bit % selector->num_bits;
3000+
3001+ if(BITMASK_ISSET(idx, selector)) {
3002+ bitmask_counter_list *head = selector->clashes;
3003+
3004+ printk("BITMASK: bit %u was already set\n", the_bit);
3005+
3006+ while(head != NULL) {
3007+ if(head->bit_id == the_bit) {
3008+ head->bit_counter++;
3009+ printk("BITMASK: bit %u is now set to %d\n", the_bit, head->bit_counter);
3010+ return;
3011+ }
3012+
3013+ head = head->next;
3014+ }
3015+
3016+ head = kmalloc(sizeof(bitmask_counter_list), GFP_KERNEL);
3017+ if(head) {
3018+ head->bit_id = the_bit;
3019+ head->bit_counter = 1 /* previous value */ + 1 /* the requested set */;
3020+ head->next = selector->clashes;
3021+ selector->clashes = head;
3022+ } else {
3023+ printk("BITMASK: not enough memory\n");
3024+ return;
3025+ }
3026+ } else {
3027+ BITMASK_SET(idx, selector);
3028+ printk("BITMASK: bit %u is now set\n", the_bit);
3029+ }
3030+}
3031+
3032+/* ********************************** */
3033+
3034+static u_char is_set_bit_bitmask(bitmask_selector *selector, u_int32_t the_bit) {
3035+ u_int32_t idx = the_bit % selector->num_bits;
3036+ return(BITMASK_ISSET(idx, selector));
3037+}
3038+
3039+/* ********************************** */
3040+
3041+static void clear_bit_bitmask(bitmask_selector *selector, u_int32_t the_bit) {
3042+ u_int32_t idx = the_bit % selector->num_bits;
3043+
3044+ if(!BITMASK_ISSET(idx, selector))
3045+ printk("BITMASK: bit %u was not set\n", the_bit);
3046+ else {
3047+ bitmask_counter_list *head = selector->clashes, *prev = NULL;
3048+
3049+ while(head != NULL) {
3050+ if(head->bit_id == the_bit) {
3051+ head->bit_counter--;
3052+
3053+ printk("BITMASK: bit %u is now set to %d\n",
3054+ the_bit, head->bit_counter);
3055+
3056+ if(head->bit_counter == 1) {
3057+ /* We can now delete this entry as '1' can be
3058+ accommodated into the bitmask */
3059+
3060+ if(prev == NULL)
3061+ selector->clashes = head->next;
3062+ else
3063+ prev->next = head->next;
3064+
3065+ kfree(head);
3066+ }
3067+ return;
3068+ }
3069+
3070+ prev = head; head = head->next;
3071+ }
3072+
3073+ BITMASK_CLR(idx, selector);
3074+ printk("BITMASK: bit %u is now reset\n", the_bit);
3075+ }
3076+}
3077+
3078+/* ********************************** */
3079+
3080+/* Hash function */
3081+static u_int32_t sdb_hash(u_int32_t value) {
3082+ u_int32_t hash = 0, i;
3083+ u_int8_t str[sizeof(value)];
3084+
3085+ memcpy(str, &value, sizeof(value));
3086+
3087+ for(i = 0; i < sizeof(value); i++) {
3088+ hash = str[i] + (hash << 6) + (hash << 16) - hash;
3089+ }
3090+
3091+ return(hash);
3092+}
3093+
3094+/* ********************************** */
3095+
3096+static void handle_bloom_filter_rule(struct ring_opt *pfr, char *buf) {
3097+ u_int count;
3098+
3099+ if(buf == NULL)
3100+ return;
3101+ else
3102+ count = strlen(buf);
3103+
3104+ printk("PF_RING: -> handle_bloom_filter_rule(%s)\n", buf);
3105+
3106+ if((buf[count-1] == '\n') || (buf[count-1] == '\r')) buf[count-1] = '\0';
3107+
3108+ if(count > 1) {
3109+ u_int32_t the_bit;
3110+
3111+ if(!strncmp(&buf[1], "vlan=", 5)) {
3112+ sscanf(&buf[6], "%d", &the_bit);
3113+
3114+ if(buf[0] == '+')
3115+ set_bit_bitmask(&pfr->vlan_bitmask, the_bit), pfr->num_vlan_bitmask_add++;
3116+ else
3117+ clear_bit_bitmask(&pfr->vlan_bitmask, the_bit), pfr->num_vlan_bitmask_remove++;
3118+ } else if(!strncmp(&buf[1], "mac=", 4)) {
3119+ int a, b, c, d, e, f;
3120+
3121+ if(sscanf(&buf[5], "%02x:%02x:%02x:%02x:%02x:%02x:",
3122+ &a, &b, &c, &d, &e, &f) == 6) {
3123+ u_int32_t mac_addr = (a & 0xff) + (b & 0xff) + ((c & 0xff) << 24) + ((d & 0xff) << 16) + ((e & 0xff) << 8) + (f & 0xff);
3124+
3125+ /* printk("PF_RING: -> [%u][%u][%u][%u][%u][%u] -> [%u]\n", a, b, c, d, e, f, mac_addr); */
3126+
3127+ if(buf[0] == '+')
3128+ set_bit_bitmask(&pfr->mac_bitmask, mac_addr), pfr->num_mac_bitmask_add++;
3129+ else
3130+ clear_bit_bitmask(&pfr->mac_bitmask, mac_addr), pfr->num_mac_bitmask_remove++;
3131+ } else
3132+ printk("PF_RING: -> Invalid MAC address '%s'\n", &buf[5]);
3133+ } else if(!strncmp(&buf[1], "ip=", 3)) {
3134+ int a, b, c, d;
3135+
3136+ if(sscanf(&buf[4], "%d.%d.%d.%d", &a, &b, &c, &d) == 4) {
3137+ u_int32_t ip_addr = ((a & 0xff) << 24) + ((b & 0xff) << 16) + ((c & 0xff) << 8) + (d & 0xff);
3138+
3139+ if(buf[0] == '+')
3140+ set_bit_bitmask(&pfr->ip_bitmask, ip_addr), set_bit_bitmask(&pfr->ip_bitmask, sdb_hash(ip_addr)), pfr->num_ip_bitmask_add++;
3141+ else
3142+ clear_bit_bitmask(&pfr->ip_bitmask, ip_addr), clear_bit_bitmask(&pfr->twin_ip_bitmask, sdb_hash(ip_addr)), pfr->num_ip_bitmask_remove++;
3143+ } else
3144+ printk("PF_RING: -> Invalid IP address '%s'\n", &buf[4]);
3145+ } else if(!strncmp(&buf[1], "port=", 5)) {
3146+ sscanf(&buf[6], "%d", &the_bit);
3147+
3148+ if(buf[0] == '+')
3149+ set_bit_bitmask(&pfr->port_bitmask, the_bit), set_bit_bitmask(&pfr->port_bitmask, sdb_hash(the_bit)), pfr->num_port_bitmask_add++;
3150+ else
3151+ clear_bit_bitmask(&pfr->port_bitmask, the_bit), clear_bit_bitmask(&pfr->twin_port_bitmask, sdb_hash(the_bit)), pfr->num_port_bitmask_remove++;
3152+ } else if(!strncmp(&buf[1], "proto=", 6)) {
3153+ if(!strncmp(&buf[7], "tcp", 3)) the_bit = 6;
3154+ else if(!strncmp(&buf[7], "udp", 3)) the_bit = 17;
3155+ else if(!strncmp(&buf[7], "icmp", 4)) the_bit = 1;
3156+ else sscanf(&buf[7], "%d", &the_bit);
3157+
3158+ if(buf[0] == '+')
3159+ set_bit_bitmask(&pfr->proto_bitmask, the_bit);
3160+ else
3161+ clear_bit_bitmask(&pfr->proto_bitmask, the_bit);
3162+ } else
3163+ printk("PF_RING: -> Unknown rule type '%s'\n", buf);
3164+ }
3165+}
3166+
3167+/* ********************************** */
3168+
3169+static void reset_bloom_filters(struct ring_opt *pfr) {
3170+ reset_bitmask(&pfr->mac_bitmask);
3171+ reset_bitmask(&pfr->vlan_bitmask);
3172+ reset_bitmask(&pfr->ip_bitmask); reset_bitmask(&pfr->twin_ip_bitmask);
3173+ reset_bitmask(&pfr->port_bitmask); reset_bitmask(&pfr->twin_port_bitmask);
3174+ reset_bitmask(&pfr->proto_bitmask);
3175+
3176+ pfr->num_mac_bitmask_add = pfr->num_mac_bitmask_remove = 0;
3177+ pfr->num_vlan_bitmask_add = pfr->num_vlan_bitmask_remove = 0;
3178+ pfr->num_ip_bitmask_add = pfr->num_ip_bitmask_remove = 0;
3179+ pfr->num_port_bitmask_add = pfr->num_port_bitmask_remove = 0;
3180+ pfr->num_proto_bitmask_add = pfr->num_proto_bitmask_remove = 0;
3181+
3182+ printk("PF_RING: rules have been reset\n");
3183+}
3184+
3185+/* ********************************** */
3186+
3187+static void init_blooms(struct ring_opt *pfr) {
3188+ alloc_bitmask(4096, &pfr->mac_bitmask);
3189+ alloc_bitmask(4096, &pfr->vlan_bitmask);
3190+ alloc_bitmask(32768, &pfr->ip_bitmask); alloc_bitmask(32768, &pfr->twin_ip_bitmask);
3191+ alloc_bitmask(4096, &pfr->port_bitmask); alloc_bitmask(4096, &pfr->twin_port_bitmask);
3192+ alloc_bitmask(4096, &pfr->proto_bitmask);
3193+
3194+ pfr->num_mac_bitmask_add = pfr->num_mac_bitmask_remove = 0;
3195+ pfr->num_vlan_bitmask_add = pfr->num_vlan_bitmask_remove = 0;
3196+ pfr->num_ip_bitmask_add = pfr->num_ip_bitmask_remove = 0;
3197+ pfr->num_port_bitmask_add = pfr->num_port_bitmask_remove = 0;
3198+ pfr->num_proto_bitmask_add = pfr->num_proto_bitmask_remove = 0;
3199+
3200+ reset_bloom_filters(pfr);
3201+}
3202+
3203+/* ********************************** */
3204+
3205+inline int MatchFound (void* id, int index, void *data) { return(0); }
3206+
3207+/* ********************************** */
3208+
3209+static void add_skb_to_ring(struct sk_buff *skb,
3210+ struct ring_opt *pfr,
3211+ u_char recv_packet,
3212+ u_char real_skb /* 1=skb 0=faked skb */) {
3213+ FlowSlot *theSlot;
3214+ int idx, displ, fwd_pkt = 0;
3215+
3216+ if(recv_packet) {
3217+ /* Hack for identifying a packet received by the e1000 */
3218+ if(real_skb) {
3219+ displ = SKB_DISPLACEMENT;
3220+ } else
3221+ displ = 0; /* Received by the e1000 wrapper */
3222+ } else
3223+ displ = 0;
3224+
3225+ write_lock(&pfr->ring_index_lock);
3226+ pfr->slots_info->tot_pkts++;
3227+ write_unlock(&pfr->ring_index_lock);
3228+
3229+ /* BPF Filtering (from af_packet.c) */
3230+ if(pfr->bpfFilter != NULL) {
3231+ unsigned res = 1, len;
3232+
3233+ len = skb->len-skb->data_len;
3234+
3235+ write_lock(&pfr->ring_index_lock);
3236+ skb->data -= displ;
3237+ res = sk_run_filter(skb, pfr->bpfFilter->insns, pfr->bpfFilter->len);
3238+ skb->data += displ;
3239+ write_unlock(&pfr->ring_index_lock);
3240+
3241+ if(res == 0) {
3242+ /* Filter failed */
3243+
3244+#if defined(RING_DEBUG)
3245+ printk("add_skb_to_ring(skb): Filter failed [len=%d][tot=%llu]"
3246+ "[insertIdx=%d][pkt_type=%d][cloned=%d]\n",
3247+ (int)skb->len, pfr->slots_info->tot_pkts,
3248+ pfr->slots_info->insert_idx,
3249+ skb->pkt_type, skb->cloned);
3250+#endif
3251+
3252+ return;
3253+ }
3254+ }
3255+
3256+ /* ************************** */
3257+
3258+ if(pfr->sample_rate > 1) {
3259+ if(pfr->pktToSample == 0) {
3260+ write_lock(&pfr->ring_index_lock);
3261+ pfr->pktToSample = pfr->sample_rate;
3262+ write_unlock(&pfr->ring_index_lock);
3263+ } else {
3264+ write_lock(&pfr->ring_index_lock);
3265+ pfr->pktToSample--;
3266+ write_unlock(&pfr->ring_index_lock);
3267+
3268+#if defined(RING_DEBUG)
3269+ printk("add_skb_to_ring(skb): sampled packet [len=%d]"
3270+ "[tot=%llu][insertIdx=%d][pkt_type=%d][cloned=%d]\n",
3271+ (int)skb->len, pfr->slots_info->tot_pkts,
3272+ pfr->slots_info->insert_idx,
3273+ skb->pkt_type, skb->cloned);
3274+#endif
3275+ return;
3276+ }
3277+ }
3278+
3279+ /* ************************************* */
3280+
3281+ if((pfr->reflector_dev != NULL)
3282+ && (!netif_queue_stopped(pfr->reflector_dev))) {
3283+ int cpu = smp_processor_id();
3284+
3285+ /* increase reference counter so that this skb is not freed */
3286+ atomic_inc(&skb->users);
3287+
3288+ skb->data -= displ;
3289+
3290+ /* send it */
36868e55 3291+ if (netdev_get_tx_queue(pfr->reflector_dev, 0)->xmit_lock_owner != cpu) {
40fd095b 3292+ /* Patch below courtesy of Matthew J. Roth <mroth@imminc.com> */
3293+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
3294+ spin_lock_bh(&pfr->reflector_dev->xmit_lock);
3295+ pfr->reflector_dev->xmit_lock_owner = cpu;
3296+ spin_unlock_bh(&pfr->reflector_dev->xmit_lock);
3297+#else
3298+ netif_tx_lock_bh(pfr->reflector_dev);
3299+#endif
3300+ if (pfr->reflector_dev->hard_start_xmit(skb, pfr->reflector_dev) == 0) {
3301+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
3302+ spin_lock_bh(&pfr->reflector_dev->xmit_lock);
3303+ pfr->reflector_dev->xmit_lock_owner = -1;
3304+ spin_unlock_bh(&pfr->reflector_dev->xmit_lock);
3305+#else
3306+ netif_tx_unlock_bh(pfr->reflector_dev);
3307+#endif
3308+ skb->data += displ;
3309+#if defined(RING_DEBUG)
3310+ printk("++ hard_start_xmit succeeded\n");
3311+#endif
3312+ return; /* OK */
3313+ }
3314+
3315+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
3316+ spin_lock_bh(&pfr->reflector_dev->xmit_lock);
3317+ pfr->reflector_dev->xmit_lock_owner = -1;
3318+ spin_unlock_bh(&pfr->reflector_dev->xmit_lock);
3319+#else
3320+ netif_tx_unlock_bh(pfr->reflector_dev);
3321+#endif
3322+ }
3323+
3324+#if defined(RING_DEBUG)
3325+ printk("++ hard_start_xmit failed\n");
3326+#endif
3327+ skb->data += displ;
3328+ return; /* -ENETDOWN */
3329+ }
3330+
3331+ /* ************************************* */
3332+
3333+#if defined(RING_DEBUG)
3334+ printk("add_skb_to_ring(skb) [len=%d][tot=%llu][insertIdx=%d]"
3335+ "[pkt_type=%d][cloned=%d]\n",
3336+ (int)skb->len, pfr->slots_info->tot_pkts,
3337+ pfr->slots_info->insert_idx,
3338+ skb->pkt_type, skb->cloned);
3339+#endif
3340+
3341+ idx = pfr->slots_info->insert_idx;
3342+ theSlot = get_insert_slot(pfr);
3343+
3344+ if((theSlot != NULL) && (theSlot->slot_state == 0)) {
3345+ struct pcap_pkthdr *hdr;
3346+ char *bucket;
3347+ int is_ip_pkt, debug = 0;
3348+
3349+ /* Update Index */
3350+ idx++;
3351+
3352+ bucket = &theSlot->bucket;
3353+ hdr = (struct pcap_pkthdr*)bucket;
3354+
3355+ /* BD - API changed for time keeping */
3356+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14))
3357+ if(skb->stamp.tv_sec == 0) do_gettimeofday(&skb->stamp);
3358+
3359+ hdr->ts.tv_sec = skb->stamp.tv_sec, hdr->ts.tv_usec = skb->stamp.tv_usec;
3360+#else
c1c82508 3361+ if(skb->tstamp.tv64 == 0) __net_timestamp(skb);
40fd095b 3362+
e6761c74 3363+ struct timeval tv = ktime_to_timeval(skb->tstamp);
c1c82508 3364+ hdr->ts.tv_sec = tv.tv_sec, hdr->ts.tv_usec = tv.tv_usec;
40fd095b 3365+#endif
3366+ hdr->caplen = skb->len+displ;
3367+
3368+ if(hdr->caplen > pfr->slots_info->data_len)
3369+ hdr->caplen = pfr->slots_info->data_len;
3370+
3371+ hdr->len = skb->len+displ;
3372+
3373+ /* Extensions */
3374+ is_ip_pkt = parse_pkt(skb, displ,
3375+ &hdr->l3_proto,
3376+ &hdr->eth_type,
3377+ &hdr->l3_offset,
3378+ &hdr->l4_offset,
3379+ &hdr->vlan_id,
3380+ &hdr->ipv4_src,
3381+ &hdr->ipv4_dst,
3382+ &hdr->l4_src_port,
3383+ &hdr->l4_dst_port,
3384+ &hdr->payload_offset);
3385+
3386+ if(is_ip_pkt && pfr->bitmask_enabled) {
3387+ int vlan_match = 0;
3388+
3389+ fwd_pkt = 0;
3390+
3391+ if(debug) {
3392+ if(is_ip_pkt)
3393+ printk(KERN_INFO "PF_RING: [proto=%d][vlan=%d][sport=%d][dport=%d][src=%u][dst=%u]\n",
3394+ hdr->l3_proto, hdr->vlan_id, hdr->l4_src_port, hdr->l4_dst_port, hdr->ipv4_src, hdr->ipv4_dst);
3395+ else
3396+ printk(KERN_INFO "PF_RING: [proto=%d][vlan=%d]\n", hdr->l3_proto, hdr->vlan_id);
3397+ }
3398+
3399+ if(hdr->vlan_id != (u_int16_t)-1) {
3400+ vlan_match = is_set_bit_bitmask(&pfr->vlan_bitmask, hdr->vlan_id);
3401+ } else
3402+ vlan_match = 1;
3403+
3404+ if(vlan_match) {
3405+ struct ethhdr *eh = (struct ethhdr*)(skb->data);
3406+ u_int32_t src_mac = (eh->h_source[0] & 0xff) + (eh->h_source[1] & 0xff) + ((eh->h_source[2] & 0xff) << 24)
3407+ + ((eh->h_source[3] & 0xff) << 16) + ((eh->h_source[4] & 0xff) << 8) + (eh->h_source[5] & 0xff);
3408+
3409+ if(debug) printk(KERN_INFO "PF_RING: [src_mac=%u]\n", src_mac);
3410+
3411+ fwd_pkt |= is_set_bit_bitmask(&pfr->mac_bitmask, src_mac);
3412+
3413+ if(!fwd_pkt) {
3414+ u_int32_t dst_mac = (eh->h_dest[0] & 0xff) + (eh->h_dest[1] & 0xff) + ((eh->h_dest[2] & 0xff) << 24)
3415+ + ((eh->h_dest[3] & 0xff) << 16) + ((eh->h_dest[4] & 0xff) << 8) + (eh->h_dest[5] & 0xff);
3416+
3417+ if(debug) printk(KERN_INFO "PF_RING: [dst_mac=%u]\n", dst_mac);
3418+
3419+ fwd_pkt |= is_set_bit_bitmask(&pfr->mac_bitmask, dst_mac);
3420+
3421+ if(is_ip_pkt && (!fwd_pkt)) {
3422+ fwd_pkt |= is_set_bit_bitmask(&pfr->ip_bitmask, hdr->ipv4_src);
3423+
3424+ if(!fwd_pkt) {
3425+ fwd_pkt |= is_set_bit_bitmask(&pfr->ip_bitmask, hdr->ipv4_dst);
3426+
3427+ if((!fwd_pkt) && ((hdr->l3_proto == IPPROTO_TCP)
3428+ || (hdr->l3_proto == IPPROTO_UDP))) {
3429+ fwd_pkt |= is_set_bit_bitmask(&pfr->port_bitmask, hdr->l4_src_port);
3430+ if(!fwd_pkt) fwd_pkt |= is_set_bit_bitmask(&pfr->port_bitmask, hdr->l4_dst_port);
3431+ }
3432+
3433+ if(!fwd_pkt) fwd_pkt |= is_set_bit_bitmask(&pfr->proto_bitmask, hdr->l3_proto);
3434+ }
3435+ }
3436+ }
3437+ }
3438+ } else
3439+ fwd_pkt = 1;
3440+
3441+ if(fwd_pkt && (pfr->acsm != NULL)) {
3442+ if((hdr->payload_offset > 0) && ((skb->len+skb->mac_len) > hdr->payload_offset)) {
3443+ char *payload = (skb->data-displ+hdr->payload_offset);
3444+ int payload_len = skb->len /* + skb->mac_len */ - hdr->payload_offset;
3445+
3446+ if((payload_len > 0)
3447+ && ((hdr->l4_src_port == 80) || (hdr->l4_dst_port == 80))) {
3448+ int rc;
3449+
3450+ if(0) {
3451+ char buf[1500];
3452+
3453+ memcpy(buf, payload, payload_len);
3454+ buf[payload_len] = '\0';
3455+ printk("[%s]\n", payload);
3456+ }
3457+
3458+ /* printk("Tring to match pattern [len=%d][%s]\n", payload_len, payload); */
3459+ rc = acsmSearch2(pfr->acsm, payload, payload_len, MatchFound, (void *)0) ? 1 : 0;
3460+
3461+ // printk("Match result: %d\n", fwd_pkt);
3462+ if(rc) {
3463+ printk("Pattern matched!\n");
3464+ } else {
3465+ fwd_pkt = 0;
3466+ }
3467+ } else
3468+ fwd_pkt = 0;
3469+ } else
3470+ fwd_pkt = 0;
3471+ }
3472+
3473+ if(fwd_pkt) {
3474+ memcpy(&bucket[sizeof(struct pcap_pkthdr)], skb->data-displ, hdr->caplen);
3475+
3476+#if defined(RING_DEBUG)
3477+ {
3478+ static unsigned int lastLoss = 0;
3479+
3480+ if(pfr->slots_info->tot_lost
3481+ && (lastLoss != pfr->slots_info->tot_lost)) {
3482+ printk("add_skb_to_ring(%d): [data_len=%d]"
3483+ "[hdr.caplen=%d][skb->len=%d]"
3484+ "[pcap_pkthdr=%d][removeIdx=%d]"
3485+ "[loss=%lu][page=%u][slot=%u]\n",
3486+ idx-1, pfr->slots_info->data_len, hdr->caplen, skb->len,
3487+ sizeof(struct pcap_pkthdr),
3488+ pfr->slots_info->remove_idx,
3489+ (long unsigned int)pfr->slots_info->tot_lost,
3490+ pfr->insert_page_id, pfr->insert_slot_id);
3491+
3492+ lastLoss = pfr->slots_info->tot_lost;
3493+ }
3494+ }
3495+#endif
3496+
3497+ write_lock(&pfr->ring_index_lock);
3498+ if(idx == pfr->slots_info->tot_slots)
3499+ pfr->slots_info->insert_idx = 0;
3500+ else
3501+ pfr->slots_info->insert_idx = idx;
3502+
3503+ pfr->slots_info->tot_insert++;
3504+ theSlot->slot_state = 1;
3505+ write_unlock(&pfr->ring_index_lock);
3506+ }
3507+ } else {
3508+ write_lock(&pfr->ring_index_lock);
3509+ pfr->slots_info->tot_lost++;
3510+ write_unlock(&pfr->ring_index_lock);
3511+
3512+#if defined(RING_DEBUG)
3513+ printk("add_skb_to_ring(skb): packet lost [loss=%lu]"
3514+ "[removeIdx=%u][insertIdx=%u]\n",
3515+ (long unsigned int)pfr->slots_info->tot_lost,
3516+ pfr->slots_info->remove_idx, pfr->slots_info->insert_idx);
3517+#endif
3518+ }
3519+
3520+ if(fwd_pkt) {
3521+
3522+ /* wakeup in case of poll() */
3523+ if(waitqueue_active(&pfr->ring_slots_waitqueue))
3524+ wake_up_interruptible(&pfr->ring_slots_waitqueue);
3525+ }
3526+}
3527+
3528+/* ********************************** */
3529+
3530+static u_int hash_skb(struct ring_cluster *cluster_ptr,
3531+ struct sk_buff *skb, u_char recv_packet) {
3532+ u_int idx;
3533+ int displ;
3534+ struct iphdr *ip;
3535+
3536+ if(cluster_ptr->hashing_mode == cluster_round_robin) {
3537+ idx = cluster_ptr->hashing_id++;
3538+ } else {
3539+ /* Per-flow clustering */
3540+ if(skb->len > sizeof(struct iphdr)+sizeof(struct tcphdr)) {
3541+ if(recv_packet)
3542+ displ = 0;
3543+ else
3544+ displ = SKB_DISPLACEMENT;
3545+
3546+ /*
3547+ skb->data+displ
3548+
3549+ Always points to to the IP part of the packet
3550+ */
3551+
3552+ ip = (struct iphdr*)(skb->data+displ);
3553+
3554+ idx = ip->saddr+ip->daddr+ip->protocol;
3555+
3556+ if(ip->protocol == IPPROTO_TCP) {
3557+ struct tcphdr *tcp = (struct tcphdr*)(skb->data+displ
3558+ +sizeof(struct iphdr));
3559+ idx += tcp->source+tcp->dest;
3560+ } else if(ip->protocol == IPPROTO_UDP) {
3561+ struct udphdr *udp = (struct udphdr*)(skb->data+displ
3562+ +sizeof(struct iphdr));
3563+ idx += udp->source+udp->dest;
3564+ }
3565+ } else
3566+ idx = skb->len;
3567+ }
3568+
3569+ return(idx % cluster_ptr->num_cluster_elements);
3570+}
3571+
3572+/* ********************************** */
3573+
3574+static int skb_ring_handler(struct sk_buff *skb,
3575+ u_char recv_packet,
3576+ u_char real_skb /* 1=skb 0=faked skb */) {
3577+ struct sock *skElement;
3578+ int rc = 0;
3579+ struct list_head *ptr;
3580+ struct ring_cluster *cluster_ptr;
3581+
3582+#ifdef PROFILING
3583+ uint64_t rdt = _rdtsc(), rdt1, rdt2;
3584+#endif
3585+
3586+ if((!skb) /* Invalid skb */
3587+ || ((!enable_tx_capture) && (!recv_packet))) {
3588+ /*
3589+ An outgoing packet is about to be sent out
3590+ but we decided not to handle transmitted
3591+ packets.
3592+ */
3593+ return(0);
3594+ }
3595+
3596+#if defined(RING_DEBUG)
3597+ if(0) {
3598+ printk("skb_ring_handler() [len=%d][dev=%s]\n", skb->len,
3599+ skb->dev->name == NULL ? "<NULL>" : skb->dev->name);
3600+ }
3601+#endif
3602+
3603+#ifdef PROFILING
3604+ rdt1 = _rdtsc();
3605+#endif
3606+
3607+ /* [1] Check unclustered sockets */
3608+ for (ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) {
3609+ struct ring_opt *pfr;
3610+ struct ring_element *entry;
3611+
3612+ entry = list_entry(ptr, struct ring_element, list);
3613+
3614+ read_lock(&ring_mgmt_lock);
3615+ skElement = entry->sk;
3616+ pfr = ring_sk(skElement);
3617+ read_unlock(&ring_mgmt_lock);
3618+
3619+ if((pfr != NULL)
3620+ && (pfr->cluster_id == 0 /* No cluster */)
3621+ && (pfr->ring_slots != NULL)
3622+ && ((pfr->ring_netdev == skb->dev) || ((skb->dev->flags & IFF_SLAVE) && pfr->ring_netdev == skb->dev->master))) {
3623+ /* We've found the ring where the packet can be stored */
3624+ read_lock(&ring_mgmt_lock);
3625+ add_skb_to_ring(skb, pfr, recv_packet, real_skb);
3626+ read_unlock(&ring_mgmt_lock);
3627+
3628+ rc = 1; /* Ring found: we've done our job */
3629+ }
3630+ }
3631+
3632+ /* [2] Check socket clusters */
3633+ cluster_ptr = ring_cluster_list;
3634+
3635+ while(cluster_ptr != NULL) {
3636+ struct ring_opt *pfr;
3637+
3638+ if(cluster_ptr->num_cluster_elements > 0) {
3639+ u_int skb_hash = hash_skb(cluster_ptr, skb, recv_packet);
3640+
3641+ read_lock(&ring_mgmt_lock);
3642+ skElement = cluster_ptr->sk[skb_hash];
3643+ read_unlock(&ring_mgmt_lock);
3644+
3645+ if(skElement != NULL) {
3646+ pfr = ring_sk(skElement);
3647+
3648+ if((pfr != NULL)
3649+ && (pfr->ring_slots != NULL)
3650+ && ((pfr->ring_netdev == skb->dev) || ((skb->dev->flags & IFF_SLAVE) && pfr->ring_netdev == skb->dev->master))) {
3651+ /* We've found the ring where the packet can be stored */
3652+ read_lock(&ring_mgmt_lock);
3653+ add_skb_to_ring(skb, pfr, recv_packet, real_skb);
3654+ read_unlock(&ring_mgmt_lock);
3655+
3656+ rc = 1; /* Ring found: we've done our job */
3657+ }
3658+ }
3659+ }
3660+
3661+ cluster_ptr = cluster_ptr->next;
3662+ }
3663+
3664+#ifdef PROFILING
3665+ rdt1 = _rdtsc()-rdt1;
3666+#endif
3667+
3668+#ifdef PROFILING
3669+ rdt2 = _rdtsc();
3670+#endif
3671+
3672+ if(transparent_mode) rc = 0;
3673+
3674+ if((rc != 0) && real_skb)
3675+ dev_kfree_skb(skb); /* Free the skb */
3676+
3677+#ifdef PROFILING
3678+ rdt2 = _rdtsc()-rdt2;
3679+ rdt = _rdtsc()-rdt;
3680+
3681+#if defined(RING_DEBUG)
3682+ printk("# cycles: %d [lock costed %d %d%%][free costed %d %d%%]\n",
3683+ (int)rdt, rdt-rdt1,
3684+ (int)((float)((rdt-rdt1)*100)/(float)rdt),
3685+ rdt2,
3686+ (int)((float)(rdt2*100)/(float)rdt));
3687+#endif
3688+#endif
3689+
3690+ return(rc); /* 0 = packet not handled */
3691+}
3692+
3693+/* ********************************** */
3694+
3695+struct sk_buff skb;
3696+
3697+static int buffer_ring_handler(struct net_device *dev,
3698+ char *data, int len) {
3699+
3700+#if defined(RING_DEBUG)
3701+ printk("buffer_ring_handler: [dev=%s][len=%d]\n",
3702+ dev->name == NULL ? "<NULL>" : dev->name, len);
3703+#endif
3704+
3705+ /* BD - API changed for time keeping */
3706+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14))
3707+ skb.dev = dev, skb.len = len, skb.data = data,
3708+ skb.data_len = len, skb.stamp.tv_sec = 0; /* Calculate the time */
3709+#else
3710+ skb.dev = dev, skb.len = len, skb.data = data,
c1c82508 3711+ skb.data_len = len, skb.tstamp.tv64 = 0; /* Calculate the time */
40fd095b 3712+#endif
3713+
3714+ skb_ring_handler(&skb, 1, 0 /* fake skb */);
3715+
3716+ return(0);
3717+}
3718+
3719+/* ********************************** */
3720+
36868e55 3721+static int ring_create(struct net *net, struct socket *sock, int protocol) {
40fd095b 3722+ struct sock *sk;
3723+ struct ring_opt *pfr;
3724+ int err;
3725+
3726+#if defined(RING_DEBUG)
3727+ printk("RING: ring_create()\n");
3728+#endif
3729+
3730+ /* Are you root, superuser or so ? */
3731+ if(!capable(CAP_NET_ADMIN))
3732+ return -EPERM;
3733+
3734+ if(sock->type != SOCK_RAW)
3735+ return -ESOCKTNOSUPPORT;
3736+
3737+ if(protocol != htons(ETH_P_ALL))
3738+ return -EPROTONOSUPPORT;
3739+
3740+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
3741+ MOD_INC_USE_COUNT;
3742+#endif
3743+
3744+ err = -ENOMEM;
3745+
3746+ // BD: -- broke this out to keep it more simple and clear as to what the
3747+ // options are.
3748+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
3749+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,11))
3750+ sk = sk_alloc(PF_RING, GFP_KERNEL, 1, NULL);
3751+#else
3752+ // BD: API changed in 2.6.12, ref:
3753+ // http://svn.clkao.org/svnweb/linux/revision/?rev=28201
36868e55 3754+ sk = sk_alloc(net, PF_RING, GFP_ATOMIC, &ring_proto);
40fd095b 3755+#endif
3756+#else
3757+ /* Kernel 2.4 */
3758+ sk = sk_alloc(PF_RING, GFP_KERNEL, 1);
3759+#endif
3760+
3761+ if (sk == NULL)
3762+ goto out;
3763+
3764+ sock->ops = &ring_ops;
3765+ sock_init_data(sock, sk);
3766+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
3767+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,11))
3768+ sk_set_owner(sk, THIS_MODULE);
3769+#endif
3770+#endif
3771+
3772+ err = -ENOMEM;
3773+ ring_sk(sk) = ring_sk_datatype(kmalloc(sizeof(*pfr), GFP_KERNEL));
3774+
3775+ if (!(pfr = ring_sk(sk))) {
3776+ sk_free(sk);
3777+ goto out;
3778+ }
3779+ memset(pfr, 0, sizeof(*pfr));
3780+ init_waitqueue_head(&pfr->ring_slots_waitqueue);
3781+ pfr->ring_index_lock = RW_LOCK_UNLOCKED;
3782+ atomic_set(&pfr->num_ring_slots_waiters, 0);
3783+ init_blooms(pfr);
3784+ pfr->acsm = NULL;
3785+
3786+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
3787+ sk->sk_family = PF_RING;
3788+ sk->sk_destruct = ring_sock_destruct;
3789+#else
3790+ sk->family = PF_RING;
3791+ sk->destruct = ring_sock_destruct;
3792+ sk->num = protocol;
3793+#endif
3794+
3795+ ring_insert(sk);
3796+
3797+#if defined(RING_DEBUG)
3798+ printk("RING: ring_create() - created\n");
3799+#endif
3800+
3801+ return(0);
3802+ out:
3803+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
3804+ MOD_DEC_USE_COUNT;
3805+#endif
3806+ return err;
3807+}
3808+
3809+/* *********************************************** */
3810+
3811+static int ring_release(struct socket *sock)
3812+{
3813+ struct sock *sk = sock->sk;
3814+ struct ring_opt *pfr = ring_sk(sk);
3815+
3816+ if(!sk) return 0;
3817+
3818+#if defined(RING_DEBUG)
3819+ printk("RING: called ring_release\n");
3820+#endif
3821+
3822+#if defined(RING_DEBUG)
3823+ printk("RING: ring_release entered\n");
3824+#endif
3825+
3826+ /*
3827+ The calls below must be placed outside the
3828+ write_lock_irq...write_unlock_irq block.
3829+ */
3830+ sock_orphan(sk);
3831+ ring_proc_remove(ring_sk(sk));
3832+
3833+ write_lock_irq(&ring_mgmt_lock);
3834+ ring_remove(sk);
3835+ sock->sk = NULL;
3836+
3837+ /* Free the ring buffer */
3838+ if(pfr->ring_memory) {
3839+ struct page *page, *page_end;
3840+
3841+ page_end = virt_to_page(pfr->ring_memory + (PAGE_SIZE << pfr->order) - 1);
3842+ for(page = virt_to_page(pfr->ring_memory); page <= page_end; page++)
3843+ ClearPageReserved(page);
3844+
3845+ free_pages(pfr->ring_memory, pfr->order);
3846+ }
3847+
3848+ free_bitmask(&pfr->mac_bitmask);
3849+ free_bitmask(&pfr->vlan_bitmask);
3850+ free_bitmask(&pfr->ip_bitmask); free_bitmask(&pfr->twin_ip_bitmask);
3851+ free_bitmask(&pfr->port_bitmask); free_bitmask(&pfr->twin_port_bitmask);
3852+ free_bitmask(&pfr->proto_bitmask);
3853+
3854+ if(pfr->acsm != NULL) acsmFree2(pfr->acsm);
3855+
3856+ kfree(pfr);
3857+ ring_sk(sk) = NULL;
3858+
3859+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
3860+ skb_queue_purge(&sk->sk_write_queue);
3861+#endif
3862+
3863+ sock_put(sk);
3864+ write_unlock_irq(&ring_mgmt_lock);
3865+
3866+#if defined(RING_DEBUG)
3867+ printk("RING: ring_release leaving\n");
3868+#endif
3869+
3870+ return 0;
3871+}
3872+
3873+/* ********************************** */
3874+/*
3875+ * We create a ring for this socket and bind it to the specified device
3876+ */
3877+static int packet_ring_bind(struct sock *sk, struct net_device *dev)
3878+{
3879+ u_int the_slot_len;
3880+ u_int32_t tot_mem;
3881+ struct ring_opt *pfr = ring_sk(sk);
3882+ struct page *page, *page_end;
3883+
3884+ if(!dev) return(-1);
3885+
3886+#if defined(RING_DEBUG)
3887+ printk("RING: packet_ring_bind(%s) called\n", dev->name);
3888+#endif
3889+
3890+ /* **********************************************
3891+
3892+ *************************************
3893+ * *
3894+ * FlowSlotInfo *
3895+ * *
3896+ ************************************* <-+
3897+ * FlowSlot * |
3898+ ************************************* |
3899+ * FlowSlot * |
3900+ ************************************* +- num_slots
3901+ * FlowSlot * |
3902+ ************************************* |
3903+ * FlowSlot * |
3904+ ************************************* <-+
3905+
3906+ ********************************************** */
3907+
3908+ the_slot_len = sizeof(u_char) /* flowSlot.slot_state */
3909+#ifdef RING_MAGIC
3910+ + sizeof(u_char)
3911+#endif
3912+ + sizeof(struct pcap_pkthdr)
3913+ + bucket_len /* flowSlot.bucket */;
3914+
3915+ tot_mem = sizeof(FlowSlotInfo) + num_slots*the_slot_len;
3916+
3917+ /*
3918+ Calculate the value of the order parameter used later.
3919+ See http://www.linuxjournal.com/article.php?sid=1133
3920+ */
3921+ for(pfr->order = 0;(PAGE_SIZE << pfr->order) < tot_mem; pfr->order++) ;
3922+
3923+ /*
3924+ We now try to allocate the memory as required. If we fail
3925+ we try to allocate a smaller amount or memory (hence a
3926+ smaller ring).
3927+ */
3928+ while((pfr->ring_memory = __get_free_pages(GFP_ATOMIC, pfr->order)) == 0)
3929+ if(pfr->order-- == 0)
3930+ break;
3931+
3932+ if(pfr->order == 0) {
3933+ printk("RING: ERROR not enough memory for ring\n");
3934+ return(-1);
3935+ } else {
3936+ printk("RING: succesfully allocated %lu KB [tot_mem=%d][order=%ld]\n",
3937+ PAGE_SIZE >> (10 - pfr->order), tot_mem, pfr->order);
3938+ }
3939+
3940+ tot_mem = PAGE_SIZE << pfr->order;
3941+ memset((char*)pfr->ring_memory, 0, tot_mem);
3942+
3943+ /* Now we need to reserve the pages */
3944+ page_end = virt_to_page(pfr->ring_memory + (PAGE_SIZE << pfr->order) - 1);
3945+ for(page = virt_to_page(pfr->ring_memory); page <= page_end; page++)
3946+ SetPageReserved(page);
3947+
3948+ pfr->slots_info = (FlowSlotInfo*)pfr->ring_memory;
3949+ pfr->ring_slots = (char*)(pfr->ring_memory+sizeof(FlowSlotInfo));
3950+
3951+ pfr->slots_info->version = RING_FLOWSLOT_VERSION;
3952+ pfr->slots_info->slot_len = the_slot_len;
3953+ pfr->slots_info->data_len = bucket_len;
3954+ pfr->slots_info->tot_slots = (tot_mem-sizeof(FlowSlotInfo))/the_slot_len;
3955+ pfr->slots_info->tot_mem = tot_mem;
3956+ pfr->slots_info->sample_rate = sample_rate;
3957+
3958+ printk("RING: allocated %d slots [slot_len=%d][tot_mem=%u]\n",
3959+ pfr->slots_info->tot_slots, pfr->slots_info->slot_len,
3960+ pfr->slots_info->tot_mem);
3961+
3962+#ifdef RING_MAGIC
3963+ {
3964+ int i;
3965+
3966+ for(i=0; i<pfr->slots_info->tot_slots; i++) {
3967+ unsigned long idx = i*pfr->slots_info->slot_len;
3968+ FlowSlot *slot = (FlowSlot*)&pfr->ring_slots[idx];
3969+ slot->magic = RING_MAGIC_VALUE; slot->slot_state = 0;
3970+ }
3971+ }
3972+#endif
3973+
3974+ pfr->insert_page_id = 1, pfr->insert_slot_id = 0;
3975+
3976+ /*
3977+ IMPORTANT
3978+ Leave this statement here as last one. In fact when
3979+ the ring_netdev != NULL the socket is ready to be used.
3980+ */
3981+ pfr->ring_netdev = dev;
3982+
3983+ return(0);
3984+}
3985+
3986+/* ************************************* */
3987+
3988+/* Bind to a device */
3989+static int ring_bind(struct socket *sock,
3990+ struct sockaddr *sa, int addr_len)
3991+{
3992+ struct sock *sk=sock->sk;
3993+ struct net_device *dev = NULL;
3994+
3995+#if defined(RING_DEBUG)
3996+ printk("RING: ring_bind() called\n");
3997+#endif
3998+
3999+ /*
4000+ * Check legality
4001+ */
4002+ if (addr_len != sizeof(struct sockaddr))
4003+ return -EINVAL;
4004+ if (sa->sa_family != PF_RING)
4005+ return -EINVAL;
4006+
4007+ /* Safety check: add trailing zero if missing */
4008+ sa->sa_data[sizeof(sa->sa_data)-1] = '\0';
4009+
4010+#if defined(RING_DEBUG)
4011+ printk("RING: searching device %s\n", sa->sa_data);
4012+#endif
4013+
8924bddf 4014+ if((dev = __dev_get_by_name(&init_net, sa->sa_data)) == NULL) {
40fd095b 4015+#if defined(RING_DEBUG)
4016+ printk("RING: search failed\n");
4017+#endif
4018+ return(-EINVAL);
4019+ } else
4020+ return(packet_ring_bind(sk, dev));
4021+}
4022+
4023+/* ************************************* */
4024+
4025+static int ring_mmap(struct file *file,
4026+ struct socket *sock,
4027+ struct vm_area_struct *vma)
4028+{
4029+ struct sock *sk = sock->sk;
4030+ struct ring_opt *pfr = ring_sk(sk);
4031+ unsigned long size, start;
4032+ u_int pagesToMap;
4033+ char *ptr;
4034+
4035+#if defined(RING_DEBUG)
4036+ printk("RING: ring_mmap() called\n");
4037+#endif
4038+
4039+ if(pfr->ring_memory == 0) {
4040+#if defined(RING_DEBUG)
4041+ printk("RING: ring_mmap() failed: mapping area to an unbound socket\n");
4042+#endif
4043+ return -EINVAL;
4044+ }
4045+
4046+ size = (unsigned long)(vma->vm_end-vma->vm_start);
4047+
4048+ if(size % PAGE_SIZE) {
4049+#if defined(RING_DEBUG)
4050+ printk("RING: ring_mmap() failed: len is not multiple of PAGE_SIZE\n");
4051+#endif
4052+ return(-EINVAL);
4053+ }
4054+
4055+ /* if userspace tries to mmap beyond end of our buffer, fail */
4056+ if(size > pfr->slots_info->tot_mem) {
4057+#if defined(RING_DEBUG)
4058+ printk("proc_mmap() failed: area too large [%ld > %d]\n", size, pfr->slots_info->tot_mem);
4059+#endif
4060+ return(-EINVAL);
4061+ }
4062+
4063+ pagesToMap = size/PAGE_SIZE;
4064+
4065+#if defined(RING_DEBUG)
4066+ printk("RING: ring_mmap() called. %d pages to map\n", pagesToMap);
4067+#endif
4068+
4069+#if defined(RING_DEBUG)
4070+ printk("RING: mmap [slot_len=%d][tot_slots=%d] for ring on device %s\n",
4071+ pfr->slots_info->slot_len, pfr->slots_info->tot_slots,
4072+ pfr->ring_netdev->name);
4073+#endif
4074+
4075+ /* we do not want to have this area swapped out, lock it */
4076+ vma->vm_flags |= VM_LOCKED;
4077+ start = vma->vm_start;
4078+
4079+ /* Ring slots start from page 1 (page 0 is reserved for FlowSlotInfo) */
4080+ ptr = (char*)(start+PAGE_SIZE);
4081+
4082+ if(remap_page_range(
4083+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
4084+ vma,
4085+#endif
4086+ start,
4087+ __pa(pfr->ring_memory),
4088+ PAGE_SIZE*pagesToMap, vma->vm_page_prot)) {
4089+#if defined(RING_DEBUG)
4090+ printk("remap_page_range() failed\n");
4091+#endif
4092+ return(-EAGAIN);
4093+ }
4094+
4095+#if defined(RING_DEBUG)
4096+ printk("proc_mmap(pagesToMap=%d): success.\n", pagesToMap);
4097+#endif
4098+
4099+ return 0;
4100+}
4101+
4102+/* ************************************* */
4103+
4104+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
4105+static int ring_recvmsg(struct kiocb *iocb, struct socket *sock,
4106+ struct msghdr *msg, size_t len, int flags)
4107+#else
4108+ static int ring_recvmsg(struct socket *sock, struct msghdr *msg, int len,
4109+ int flags, struct scm_cookie *scm)
4110+#endif
4111+{
4112+ FlowSlot* slot;
4113+ struct ring_opt *pfr = ring_sk(sock->sk);
4114+ u_int32_t queued_pkts, num_loops = 0;
4115+
4116+#if defined(RING_DEBUG)
4117+ printk("ring_recvmsg called\n");
4118+#endif
4119+
4120+ slot = get_remove_slot(pfr);
4121+
4122+ while((queued_pkts = num_queued_pkts(pfr)) < MIN_QUEUED_PKTS) {
4123+ wait_event_interruptible(pfr->ring_slots_waitqueue, 1);
4124+
4125+#if defined(RING_DEBUG)
4126+ printk("-> ring_recvmsg returning %d [queued_pkts=%d][num_loops=%d]\n",
4127+ slot->slot_state, queued_pkts, num_loops);
4128+#endif
4129+
4130+ if(queued_pkts > 0) {
4131+ if(num_loops++ > MAX_QUEUE_LOOPS)
4132+ break;
4133+ }
4134+ }
4135+
4136+#if defined(RING_DEBUG)
4137+ if(slot != NULL)
4138+ printk("ring_recvmsg is returning [queued_pkts=%d][num_loops=%d]\n",
4139+ queued_pkts, num_loops);
4140+#endif
4141+
4142+ return(queued_pkts);
4143+}
4144+
4145+/* ************************************* */
4146+
4147+unsigned int ring_poll(struct file * file,
4148+ struct socket *sock, poll_table *wait)
4149+{
4150+ FlowSlot* slot;
4151+ struct ring_opt *pfr = ring_sk(sock->sk);
4152+
4153+#if defined(RING_DEBUG)
4154+ printk("poll called\n");
4155+#endif
4156+
4157+ slot = get_remove_slot(pfr);
4158+
4159+ if((slot != NULL) && (slot->slot_state == 0))
4160+ poll_wait(file, &pfr->ring_slots_waitqueue, wait);
4161+
4162+#if defined(RING_DEBUG)
4163+ printk("poll returning %d\n", slot->slot_state);
4164+#endif
4165+
4166+ if((slot != NULL) && (slot->slot_state == 1))
4167+ return(POLLIN | POLLRDNORM);
4168+ else
4169+ return(0);
4170+}
4171+
4172+/* ************************************* */
4173+
4174+int add_to_cluster_list(struct ring_cluster *el,
4175+ struct sock *sock) {
4176+
4177+ if(el->num_cluster_elements == CLUSTER_LEN)
4178+ return(-1); /* Cluster full */
4179+
4180+ ring_sk_datatype(ring_sk(sock))->cluster_id = el->cluster_id;
4181+ el->sk[el->num_cluster_elements] = sock;
4182+ el->num_cluster_elements++;
4183+ return(0);
4184+}
4185+
4186+/* ************************************* */
4187+
4188+int remove_from_cluster_list(struct ring_cluster *el,
4189+ struct sock *sock) {
4190+ int i, j;
4191+
4192+ for(i=0; i<CLUSTER_LEN; i++)
4193+ if(el->sk[i] == sock) {
4194+ el->num_cluster_elements--;
4195+
4196+ if(el->num_cluster_elements > 0) {
4197+ /* The cluster contains other elements */
4198+ for(j=i; j<CLUSTER_LEN-1; j++)
4199+ el->sk[j] = el->sk[j+1];
4200+
4201+ el->sk[CLUSTER_LEN-1] = NULL;
4202+ } else {
4203+ /* Empty cluster */
4204+ memset(el->sk, 0, sizeof(el->sk));
4205+ }
4206+
4207+ return(0);
4208+ }
4209+
4210+ return(-1); /* Not found */
4211+}
4212+
4213+/* ************************************* */
4214+
4215+static int remove_from_cluster(struct sock *sock,
4216+ struct ring_opt *pfr)
4217+{
4218+ struct ring_cluster *el;
4219+
4220+#if defined(RING_DEBUG)
4221+ printk("--> remove_from_cluster(%d)\n", pfr->cluster_id);
4222+#endif
4223+
4224+ if(pfr->cluster_id == 0 /* 0 = No Cluster */)
4225+ return(0); /* Noting to do */
4226+
4227+ el = ring_cluster_list;
4228+
4229+ while(el != NULL) {
4230+ if(el->cluster_id == pfr->cluster_id) {
4231+ return(remove_from_cluster_list(el, sock));
4232+ } else
4233+ el = el->next;
4234+ }
4235+
4236+ return(-EINVAL); /* Not found */
4237+}
4238+
4239+/* ************************************* */
4240+
4241+static int add_to_cluster(struct sock *sock,
4242+ struct ring_opt *pfr,
4243+ u_short cluster_id)
4244+{
4245+ struct ring_cluster *el;
4246+
4247+#ifndef RING_DEBUG
4248+ printk("--> add_to_cluster(%d)\n", cluster_id);
4249+#endif
4250+
4251+ if(cluster_id == 0 /* 0 = No Cluster */) return(-EINVAL);
4252+
4253+ if(pfr->cluster_id != 0)
4254+ remove_from_cluster(sock, pfr);
4255+
4256+ el = ring_cluster_list;
4257+
4258+ while(el != NULL) {
4259+ if(el->cluster_id == cluster_id) {
4260+ return(add_to_cluster_list(el, sock));
4261+ } else
4262+ el = el->next;
4263+ }
4264+
4265+ /* There's no existing cluster. We need to create one */
4266+ if((el = kmalloc(sizeof(struct ring_cluster), GFP_KERNEL)) == NULL)
4267+ return(-ENOMEM);
4268+
4269+ el->cluster_id = cluster_id;
4270+ el->num_cluster_elements = 1;
4271+ el->hashing_mode = cluster_per_flow; /* Default */
4272+ el->hashing_id = 0;
4273+
4274+ memset(el->sk, 0, sizeof(el->sk));
4275+ el->sk[0] = sock;
4276+ el->next = ring_cluster_list;
4277+ ring_cluster_list = el;
4278+ pfr->cluster_id = cluster_id;
4279+
4280+ return(0); /* 0 = OK */
4281+}
4282+
4283+/* ************************************* */
4284+
4285+/* Code taken/inspired from core/sock.c */
4286+static int ring_setsockopt(struct socket *sock,
4287+ int level, int optname,
4288+ char *optval, int optlen)
4289+{
4290+ struct ring_opt *pfr = ring_sk(sock->sk);
4291+ int val, found, ret = 0;
4292+ u_int cluster_id, do_enable;
4293+ char devName[8], bloom_filter[256], aho_pattern[256];
4294+
4295+ if(pfr == NULL) return(-EINVAL);
4296+
4297+ if (get_user(val, (int *)optval))
4298+ return -EFAULT;
4299+
4300+ found = 1;
4301+
4302+ switch(optname)
4303+ {
4304+ case SO_ATTACH_FILTER:
4305+ ret = -EINVAL;
4306+ if (optlen == sizeof(struct sock_fprog)) {
4307+ unsigned int fsize;
4308+ struct sock_fprog fprog;
4309+ struct sk_filter *filter;
4310+
4311+ ret = -EFAULT;
4312+
4313+ /*
4314+ NOTE
4315+
4316+ Do not call copy_from_user within a held
4317+ splinlock (e.g. ring_mgmt_lock) as this caused
4318+ problems when certain debugging was enabled under
4319+ 2.6.5 -- including hard lockups of the machine.
4320+ */
4321+ if(copy_from_user(&fprog, optval, sizeof(fprog)))
4322+ break;
4323+
4324+ fsize = sizeof(struct sock_filter) * fprog.len;
4325+ filter = kmalloc(fsize, GFP_KERNEL);
4326+
4327+ if(filter == NULL) {
4328+ ret = -ENOMEM;
4329+ break;
4330+ }
4331+
4332+ if(copy_from_user(filter->insns, fprog.filter, fsize))
4333+ break;
4334+
4335+ filter->len = fprog.len;
4336+
4337+ if(sk_chk_filter(filter->insns, filter->len) != 0) {
4338+ /* Bad filter specified */
4339+ kfree(filter);
4340+ pfr->bpfFilter = NULL;
4341+ break;
4342+ }
4343+
4344+ /* get the lock, set the filter, release the lock */
4345+ write_lock(&ring_mgmt_lock);
4346+ pfr->bpfFilter = filter;
4347+ write_unlock(&ring_mgmt_lock);
4348+ ret = 0;
4349+ }
4350+ break;
4351+
4352+ case SO_DETACH_FILTER:
4353+ write_lock(&ring_mgmt_lock);
4354+ found = 1;
4355+ if(pfr->bpfFilter != NULL) {
4356+ kfree(pfr->bpfFilter);
4357+ pfr->bpfFilter = NULL;
4358+ write_unlock(&ring_mgmt_lock);
4359+ break;
4360+ }
4361+ ret = -ENONET;
4362+ break;
4363+
4364+ case SO_ADD_TO_CLUSTER:
4365+ if (optlen!=sizeof(val))
4366+ return -EINVAL;
4367+
4368+ if (copy_from_user(&cluster_id, optval, sizeof(cluster_id)))
4369+ return -EFAULT;
4370+
4371+ write_lock(&ring_mgmt_lock);
4372+ ret = add_to_cluster(sock->sk, pfr, cluster_id);
4373+ write_unlock(&ring_mgmt_lock);
4374+ break;
4375+
4376+ case SO_REMOVE_FROM_CLUSTER:
4377+ write_lock(&ring_mgmt_lock);
4378+ ret = remove_from_cluster(sock->sk, pfr);
4379+ write_unlock(&ring_mgmt_lock);
4380+ break;
4381+
4382+ case SO_SET_REFLECTOR:
4383+ if(optlen >= (sizeof(devName)-1))
4384+ return -EINVAL;
4385+
4386+ if(optlen > 0) {
4387+ if(copy_from_user(devName, optval, optlen))
4388+ return -EFAULT;
4389+ }
4390+
4391+ devName[optlen] = '\0';
4392+
4393+#if defined(RING_DEBUG)
4394+ printk("+++ SO_SET_REFLECTOR(%s)\n", devName);
4395+#endif
4396+
4397+ write_lock(&ring_mgmt_lock);
c444bcac 4398+ pfr->reflector_dev = dev_get_by_name(&init_net, devName);
40fd095b 4399+ write_unlock(&ring_mgmt_lock);
4400+
4401+#if defined(RING_DEBUG)
4402+ if(pfr->reflector_dev != NULL)
4403+ printk("SO_SET_REFLECTOR(%s): succeded\n", devName);
4404+ else
4405+ printk("SO_SET_REFLECTOR(%s): device unknown\n", devName);
4406+#endif
4407+ break;
4408+
4409+ case SO_SET_BLOOM:
4410+ if(optlen >= (sizeof(bloom_filter)-1))
4411+ return -EINVAL;
4412+
4413+ if(optlen > 0) {
4414+ if(copy_from_user(bloom_filter, optval, optlen))
4415+ return -EFAULT;
4416+ }
4417+
4418+ bloom_filter[optlen] = '\0';
4419+
4420+ write_lock(&ring_mgmt_lock);
4421+ handle_bloom_filter_rule(pfr, bloom_filter);
4422+ write_unlock(&ring_mgmt_lock);
4423+ break;
4424+
4425+ case SO_SET_STRING:
4426+ if(optlen >= (sizeof(aho_pattern)-1))
4427+ return -EINVAL;
4428+
4429+ if(optlen > 0) {
4430+ if(copy_from_user(aho_pattern, optval, optlen))
4431+ return -EFAULT;
4432+ }
4433+
4434+ aho_pattern[optlen] = '\0';
4435+
4436+ write_lock(&ring_mgmt_lock);
4437+ if(pfr->acsm != NULL) acsmFree2(pfr->acsm);
4438+ if(optlen > 0) {
4439+#if 1
4440+ if((pfr->acsm = acsmNew2()) != NULL) {
4441+ int nc=1 /* case sensitive */, i = 0;
4442+
4443+ pfr->acsm->acsmFormat = ACF_BANDED;
4444+ acsmAddPattern2(pfr->acsm, (unsigned char*)aho_pattern,
4445+ (int)strlen(aho_pattern), nc, 0, 0,(void*)aho_pattern, i);
4446+ acsmCompile2(pfr->acsm);
4447+ }
4448+#else
4449+ pfr->acsm = kmalloc (10, GFP_KERNEL); /* TEST */
4450+#endif
4451+ }
4452+ write_unlock(&ring_mgmt_lock);
4453+ break;
4454+
4455+ case SO_TOGGLE_BLOOM_STATE:
4456+ if(optlen >= (sizeof(bloom_filter)-1))
4457+ return -EINVAL;
4458+
4459+ if(optlen > 0) {
4460+ if(copy_from_user(&do_enable, optval, optlen))
4461+ return -EFAULT;
4462+ }
4463+
4464+ write_lock(&ring_mgmt_lock);
4465+ if(do_enable)
4466+ pfr->bitmask_enabled = 1;
4467+ else
4468+ pfr->bitmask_enabled = 0;
4469+ write_unlock(&ring_mgmt_lock);
4470+ printk("SO_TOGGLE_BLOOM_STATE: bloom bitmask %s\n",
4471+ pfr->bitmask_enabled ? "enabled" : "disabled");
4472+ break;
4473+
4474+ case SO_RESET_BLOOM_FILTERS:
4475+ if(optlen >= (sizeof(bloom_filter)-1))
4476+ return -EINVAL;
4477+
4478+ if(optlen > 0) {
4479+ if(copy_from_user(&do_enable, optval, optlen))
4480+ return -EFAULT;
4481+ }
4482+
4483+ write_lock(&ring_mgmt_lock);
4484+ reset_bloom_filters(pfr);
4485+ write_unlock(&ring_mgmt_lock);
4486+ break;
4487+
4488+ default:
4489+ found = 0;
4490+ break;
4491+ }
4492+
4493+ if(found)
4494+ return(ret);
4495+ else
4496+ return(sock_setsockopt(sock, level, optname, optval, optlen));
4497+}
4498+
4499+/* ************************************* */
4500+
4501+static int ring_ioctl(struct socket *sock,
4502+ unsigned int cmd, unsigned long arg)
4503+{
4504+ switch(cmd)
4505+ {
4506+#ifdef CONFIG_INET
4507+ case SIOCGIFFLAGS:
4508+ case SIOCSIFFLAGS:
4509+ case SIOCGIFCONF:
4510+ case SIOCGIFMETRIC:
4511+ case SIOCSIFMETRIC:
4512+ case SIOCGIFMEM:
4513+ case SIOCSIFMEM:
4514+ case SIOCGIFMTU:
4515+ case SIOCSIFMTU:
4516+ case SIOCSIFLINK:
4517+ case SIOCGIFHWADDR:
4518+ case SIOCSIFHWADDR:
4519+ case SIOCSIFMAP:
4520+ case SIOCGIFMAP:
4521+ case SIOCSIFSLAVE:
4522+ case SIOCGIFSLAVE:
4523+ case SIOCGIFINDEX:
4524+ case SIOCGIFNAME:
4525+ case SIOCGIFCOUNT:
4526+ case SIOCSIFHWBROADCAST:
4527+ return(inet_dgram_ops.ioctl(sock, cmd, arg));
4528+#endif
4529+
4530+ default:
4531+ return -ENOIOCTLCMD;
4532+ }
4533+
4534+ return 0;
4535+}
4536+
4537+/* ************************************* */
4538+
4539+static struct proto_ops ring_ops = {
4540+ .family = PF_RING,
4541+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
4542+ .owner = THIS_MODULE,
4543+#endif
4544+
4545+ /* Operations that make no sense on ring sockets. */
4546+ .connect = sock_no_connect,
4547+ .socketpair = sock_no_socketpair,
4548+ .accept = sock_no_accept,
4549+ .getname = sock_no_getname,
4550+ .listen = sock_no_listen,
4551+ .shutdown = sock_no_shutdown,
4552+ .sendpage = sock_no_sendpage,
4553+ .sendmsg = sock_no_sendmsg,
4554+ .getsockopt = sock_no_getsockopt,
4555+
4556+ /* Now the operations that really occur. */
4557+ .release = ring_release,
4558+ .bind = ring_bind,
4559+ .mmap = ring_mmap,
4560+ .poll = ring_poll,
4561+ .setsockopt = ring_setsockopt,
4562+ .ioctl = ring_ioctl,
4563+ .recvmsg = ring_recvmsg,
4564+};
4565+
4566+/* ************************************ */
4567+
4568+static struct net_proto_family ring_family_ops = {
4569+ .family = PF_RING,
4570+ .create = ring_create,
4571+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
4572+ .owner = THIS_MODULE,
4573+#endif
4574+};
4575+
4576+// BD: API changed in 2.6.12, ref:
4577+// http://svn.clkao.org/svnweb/linux/revision/?rev=28201
4578+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11))
4579+static struct proto ring_proto = {
4580+ .name = "PF_RING",
4581+ .owner = THIS_MODULE,
4582+ .obj_size = sizeof(struct sock),
4583+};
4584+#endif
4585+
4586+/* ************************************ */
4587+
4588+static void __exit ring_exit(void)
4589+{
4590+ struct list_head *ptr;
4591+ struct ring_element *entry;
4592+
4593+ for(ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) {
4594+ entry = list_entry(ptr, struct ring_element, list);
4595+ kfree(entry);
4596+ }
4597+
4598+ while(ring_cluster_list != NULL) {
4599+ struct ring_cluster *next = ring_cluster_list->next;
4600+ kfree(ring_cluster_list);
4601+ ring_cluster_list = next;
4602+ }
4603+
4604+ set_skb_ring_handler(NULL);
4605+ set_buffer_ring_handler(NULL);
4606+ sock_unregister(PF_RING);
4607+ ring_proc_term();
4608+ printk("PF_RING shut down.\n");
4609+}
4610+
4611+/* ************************************ */
4612+
4613+static int __init ring_init(void)
4614+{
4615+ printk("Welcome to PF_RING %s\n(C) 2004-07 L.Deri <deri@ntop.org>\n",
4616+ RING_VERSION);
4617+
4618+ INIT_LIST_HEAD(&ring_table);
4619+ ring_cluster_list = NULL;
4620+
4621+ sock_register(&ring_family_ops);
4622+
4623+ set_skb_ring_handler(skb_ring_handler);
4624+ set_buffer_ring_handler(buffer_ring_handler);
4625+
4626+ if(get_buffer_ring_handler() != buffer_ring_handler) {
4627+ printk("PF_RING: set_buffer_ring_handler FAILED\n");
4628+
4629+ set_skb_ring_handler(NULL);
4630+ set_buffer_ring_handler(NULL);
4631+ sock_unregister(PF_RING);
4632+ return -1;
4633+ } else {
4634+ printk("PF_RING: bucket length %d bytes\n", bucket_len);
4635+ printk("PF_RING: ring slots %d\n", num_slots);
4636+ printk("PF_RING: sample rate %d [1=no sampling]\n", sample_rate);
4637+ printk("PF_RING: capture TX %s\n",
4638+ enable_tx_capture ? "Yes [RX+TX]" : "No [RX only]");
4639+ printk("PF_RING: transparent mode %s\n",
4640+ transparent_mode ? "Yes" : "No");
4641+
4642+ printk("PF_RING initialized correctly.\n");
4643+
4644+ ring_proc_init();
4645+ return 0;
4646+ }
4647+}
4648+
4649+module_init(ring_init);
4650+module_exit(ring_exit);
4651+MODULE_LICENSE("GPL");
4652+
4653+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
4654+MODULE_ALIAS_NETPROTO(PF_RING);
4655+#endif
This page took 0.650486 seconds and 4 git commands to generate.