]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-PF_RING.patch
- updated for 2.6.25.4
[packages/kernel.git] / kernel-PF_RING.patch
CommitLineData
40fd095b 1diff --unified --recursive --new-file linux-2.6.21.4/include/linux/ring.h linux-2.6.21.4-1-686-smp-ring3/include/linux/ring.h
2--- linux-2.6.21.4/include/linux/ring.h 1970-01-01 00:00:00.000000000 +0000
3+++ linux-2.6.21.4-1-686-smp-ring3/include/linux/ring.h 2007-06-10 16:43:04.346421348 +0000
4@@ -0,0 +1,240 @@
5+/*
6+ * Definitions for packet ring
7+ *
8+ * 2004-07 Luca Deri <deri@ntop.org>
9+ */
10+#ifndef __RING_H
11+#define __RING_H
12+
13+#define INCLUDE_MAC_INFO
14+
15+#ifdef INCLUDE_MAC_INFO
16+#define SKB_DISPLACEMENT 14 /* Include MAC address information */
17+#else
18+#define SKB_DISPLACEMENT 0 /* Do NOT include MAC address information */
19+#endif
20+
21+#define RING_MAGIC
22+#define RING_MAGIC_VALUE 0x88
23+#define RING_FLOWSLOT_VERSION 6
24+#define RING_VERSION "3.4.1"
25+
26+#define SO_ADD_TO_CLUSTER 99
27+#define SO_REMOVE_FROM_CLUSTER 100
28+#define SO_SET_REFLECTOR 101
29+#define SO_SET_BLOOM 102
30+#define SO_SET_STRING 103
31+#define SO_TOGGLE_BLOOM_STATE 104
32+#define SO_RESET_BLOOM_FILTERS 105
33+
34+#define BITMASK_SET(n, p) (((char*)p->bits_memory)[n/8] |= (1<<(n % 8)))
35+#define BITMASK_CLR(n, p) (((char*)p->bits_memory)[n/8] &= ~(1<<(n % 8)))
36+#define BITMASK_ISSET(n, p) (((char*)p->bits_memory)[n/8] & (1<<(n % 8)))
37+
38+/* *********************************** */
39+
40+/*
41+ Aho-Corasick code taken from Snort
42+ under GPL license
43+*/
44+/*
45+ * DEFINES and Typedef's
46+ */
47+#define MAX_ALPHABET_SIZE 256
48+
49+/*
50+ FAIL STATE for 1,2,or 4 bytes for state transitions
51+
52+ Uncomment this define to use 32 bit state values
53+ #define AC32
54+*/
55+
56+typedef unsigned short acstate_t;
57+#define ACSM_FAIL_STATE2 0xffff
58+
59+/*
60+ *
61+ */
62+typedef
63+struct _acsm_pattern2
64+{
65+ struct _acsm_pattern2 *next;
66+
67+ unsigned char *patrn;
68+ unsigned char *casepatrn;
69+ int n;
70+ int nocase;
71+ int offset;
72+ int depth;
73+ void * id;
74+ int iid;
75+
76+} ACSM_PATTERN2;
77+
78+/*
79+ * transition nodes - either 8 or 12 bytes
80+ */
81+typedef
82+struct trans_node_s {
83+
84+ acstate_t key; /* The character that got us here - sized to keep structure aligned on 4 bytes */
85+ /* to better the caching opportunities. A value that crosses the cache line */
86+ /* forces an expensive reconstruction, typing this as acstate_t stops that. */
87+ acstate_t next_state; /* */
88+ struct trans_node_s * next; /* next transition for this state */
89+
90+} trans_node_t;
91+
92+
93+/*
94+ * User specified final storage type for the state transitions
95+ */
96+enum {
97+ ACF_FULL,
98+ ACF_SPARSE,
99+ ACF_BANDED,
100+ ACF_SPARSEBANDS,
101+};
102+
103+/*
104+ * User specified machine types
105+ *
106+ * TRIE : Keyword trie
107+ * NFA :
108+ * DFA :
109+ */
110+enum {
111+ FSA_TRIE,
112+ FSA_NFA,
113+ FSA_DFA,
114+};
115+
116+/*
117+ * Aho-Corasick State Machine Struct - one per group of pattterns
118+ */
119+typedef struct {
120+ int acsmMaxStates;
121+ int acsmNumStates;
122+
123+ ACSM_PATTERN2 * acsmPatterns;
124+ acstate_t * acsmFailState;
125+ ACSM_PATTERN2 ** acsmMatchList;
126+
127+ /* list of transitions in each state, this is used to build the nfa & dfa */
128+ /* after construction we convert to sparse or full format matrix and free */
129+ /* the transition lists */
130+ trans_node_t ** acsmTransTable;
131+
132+ acstate_t ** acsmNextState;
133+ int acsmFormat;
134+ int acsmSparseMaxRowNodes;
135+ int acsmSparseMaxZcnt;
136+
137+ int acsmNumTrans;
138+ int acsmAlphabetSize;
139+ int acsmFSA;
140+
141+} ACSM_STRUCT2;
142+
143+/* *********************************** */
144+
145+#ifndef HAVE_PCAP
146+struct pcap_pkthdr {
147+ struct timeval ts; /* time stamp */
148+ u_int32_t caplen; /* length of portion present */
149+ u_int32_t len; /* length this packet (off wire) */
150+ /* packet parsing info */
151+ u_int16_t eth_type; /* Ethernet type */
152+ u_int16_t vlan_id; /* VLAN Id or -1 for no vlan */
153+ u_int8_t l3_proto; /* Layer 3 protocol */
154+ u_int16_t l3_offset, l4_offset, payload_offset; /* Offsets of L3/L4/payload elements */
155+ u_int32_t ipv4_src, ipv4_dst; /* IPv4 src/dst IP addresses */
156+ u_int16_t l4_src_port, l4_dst_port; /* Layer 4 src/dst ports */
157+};
158+#endif
159+
160+/* *********************************** */
161+
162+typedef struct _counter_list {
163+ u_int32_t bit_id;
164+ u_int32_t bit_counter;
165+ struct _counter_list *next;
166+} bitmask_counter_list;
167+
168+typedef struct {
169+ u_int32_t num_bits, order, num_pages;
170+ unsigned long bits_memory;
171+ bitmask_counter_list *clashes;
172+} bitmask_selector;
173+
174+/* *********************************** */
175+
176+enum cluster_type {
177+ cluster_per_flow = 0,
178+ cluster_round_robin
179+};
180+
181+/* *********************************** */
182+
183+#define RING_MIN_SLOT_SIZE (60+sizeof(struct pcap_pkthdr))
184+#define RING_MAX_SLOT_SIZE (1514+sizeof(struct pcap_pkthdr))
185+
186+/* *********************************** */
187+
188+typedef struct flowSlotInfo {
189+ u_int16_t version, sample_rate;
190+ u_int32_t tot_slots, slot_len, data_len, tot_mem;
191+
192+ u_int64_t tot_pkts, tot_lost;
193+ u_int64_t tot_insert, tot_read;
194+ u_int32_t insert_idx, remove_idx;
195+} FlowSlotInfo;
196+
197+/* *********************************** */
198+
199+typedef struct flowSlot {
200+#ifdef RING_MAGIC
201+ u_char magic; /* It must alwasy be zero */
202+#endif
203+ u_char slot_state; /* 0=empty, 1=full */
204+ u_char bucket; /* bucket[bucketLen] */
205+} FlowSlot;
206+
207+/* *********************************** */
208+
209+#ifdef __KERNEL__
210+
211+FlowSlotInfo* getRingPtr(void);
212+int allocateRing(char *deviceName, u_int numSlots,
213+ u_int bucketLen, u_int sampleRate);
214+unsigned int pollRing(struct file *fp, struct poll_table_struct * wait);
215+void deallocateRing(void);
216+
217+/* ************************* */
218+
219+typedef int (*handle_ring_skb)(struct sk_buff *skb,
220+ u_char recv_packet, u_char real_skb);
221+extern handle_ring_skb get_skb_ring_handler(void);
222+extern void set_skb_ring_handler(handle_ring_skb the_handler);
223+extern void do_skb_ring_handler(struct sk_buff *skb,
224+ u_char recv_packet, u_char real_skb);
225+
226+typedef int (*handle_ring_buffer)(struct net_device *dev,
227+ char *data, int len);
228+extern handle_ring_buffer get_buffer_ring_handler(void);
229+extern void set_buffer_ring_handler(handle_ring_buffer the_handler);
230+extern int do_buffer_ring_handler(struct net_device *dev,
231+ char *data, int len);
232+#endif /* __KERNEL__ */
233+
234+/* *********************************** */
235+
236+#define PF_RING 27 /* Packet Ring */
237+#define SOCK_RING PF_RING
238+
239+/* ioctl() */
240+#define SIORINGPOLL 0x8888
241+
242+/* *********************************** */
243+
244+#endif /* __RING_H */
245diff --unified --recursive --new-file linux-2.6.21.4/net/Kconfig linux-2.6.21.4-1-686-smp-ring3/net/Kconfig
246--- linux-2.6.21.4/net/Kconfig 2007-06-07 21:27:31.000000000 +0000
247+++ linux-2.6.21.4-1-686-smp-ring3/net/Kconfig 2007-06-10 16:43:04.402423771 +0000
248@@ -39,6 +39,7 @@
249 source "net/xfrm/Kconfig"
250 source "net/iucv/Kconfig"
251
252+source "net/ring/Kconfig"
253 config INET
254 bool "TCP/IP networking"
255 ---help---
256diff --unified --recursive --new-file linux-2.6.21.4/net/Makefile linux-2.6.21.4-1-686-smp-ring3/net/Makefile
257--- linux-2.6.21.4/net/Makefile 2007-06-07 21:27:31.000000000 +0000
258+++ linux-2.6.21.4-1-686-smp-ring3/net/Makefile 2007-06-10 16:43:04.394423425 +0000
259@@ -42,6 +42,7 @@
260 obj-$(CONFIG_DECNET) += decnet/
261 obj-$(CONFIG_ECONET) += econet/
262 obj-$(CONFIG_VLAN_8021Q) += 8021q/
263+obj-$(CONFIG_RING) += ring/
264 obj-$(CONFIG_IP_DCCP) += dccp/
265 obj-$(CONFIG_IP_SCTP) += sctp/
266 obj-$(CONFIG_IEEE80211) += ieee80211/
40fd095b 267diff --unified --recursive --new-file linux-2.6.21.4/net/core/dev.c linux-2.6.21.4-1-686-smp-ring3/net/core/dev.c
268--- linux-2.6.21.4/net/core/dev.c 2007-06-07 21:27:31.000000000 +0000
269+++ linux-2.6.21.4-1-686-smp-ring3/net/core/dev.c 2007-06-10 16:43:04.382422906 +0000
270@@ -117,6 +117,56 @@
271 #include <linux/err.h>
272 #include <linux/ctype.h>
273
274+#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
275+
276+/* #define RING_DEBUG */
277+
278+#include <linux/ring.h>
279+#include <linux/version.h>
280+
281+static handle_ring_skb ring_handler = NULL;
282+
283+handle_ring_skb get_skb_ring_handler() { return(ring_handler); }
284+
285+void set_skb_ring_handler(handle_ring_skb the_handler) {
286+ ring_handler = the_handler;
287+}
288+
289+void do_skb_ring_handler(struct sk_buff *skb,
290+ u_char recv_packet, u_char real_skb) {
291+ if(ring_handler)
292+ ring_handler(skb, recv_packet, real_skb);
293+}
294+
295+/* ******************* */
296+
297+static handle_ring_buffer buffer_ring_handler = NULL;
298+
299+handle_ring_buffer get_buffer_ring_handler() { return(buffer_ring_handler); }
300+
301+void set_buffer_ring_handler(handle_ring_buffer the_handler) {
302+ buffer_ring_handler = the_handler;
303+}
304+
305+int do_buffer_ring_handler(struct net_device *dev, char *data, int len) {
306+ if(buffer_ring_handler) {
307+ buffer_ring_handler(dev, data, len);
308+ return(1);
309+ } else
310+ return(0);
311+}
312+
313+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
314+EXPORT_SYMBOL(get_skb_ring_handler);
315+EXPORT_SYMBOL(set_skb_ring_handler);
316+EXPORT_SYMBOL(do_skb_ring_handler);
317+
318+EXPORT_SYMBOL(get_buffer_ring_handler);
319+EXPORT_SYMBOL(set_buffer_ring_handler);
320+EXPORT_SYMBOL(do_buffer_ring_handler);
321+#endif
322+
323+#endif
324 /*
325 * The list of packet types we will receive (as opposed to discard)
326 * and the routines to invoke.
327@@ -1474,6 +1524,10 @@
328 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
329 #endif
330 if (q->enqueue) {
331+#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
332+ if(ring_handler) ring_handler(skb, 0, 1);
333+#endif /* CONFIG_RING */
334+
335 /* Grab device queue */
336 spin_lock(&dev->queue_lock);
337 q = dev->qdisc;
338@@ -1574,6 +1628,13 @@
339 unsigned long flags;
340
341 /* if netpoll wants it, pretend we never saw it */
342+#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
343+ if(ring_handler && ring_handler(skb, 1, 1)) {
344+ /* The packet has been copied into a ring */
345+ return(NET_RX_SUCCESS);
346+ }
347+#endif /* CONFIG_RING */
348+
349 if (netpoll_rx(skb))
350 return NET_RX_DROP;
351
352@@ -1764,6 +1825,13 @@
353 struct net_device *orig_dev;
354 int ret = NET_RX_DROP;
355 __be16 type;
356+#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
357+ if(ring_handler && ring_handler(skb, 1, 1)) {
358+ /* The packet has been copied into a ring */
359+ return(NET_RX_SUCCESS);
360+ }
361+#endif /* CONFIG_RING */
362+
363
364 /* if we've gotten here through NAPI, check netpoll */
365 if (skb->dev->poll && netpoll_rx(skb))
40fd095b 366diff --unified --recursive --new-file linux-2.6.21.4/net/ring/Kconfig linux-2.6.21.4-1-686-smp-ring3/net/ring/Kconfig
367--- linux-2.6.21.4/net/ring/Kconfig 1970-01-01 00:00:00.000000000 +0000
368+++ linux-2.6.21.4-1-686-smp-ring3/net/ring/Kconfig 2007-06-10 16:43:04.406423944 +0000
369@@ -0,0 +1,14 @@
370+config RING
371+ tristate "PF_RING sockets (EXPERIMENTAL)"
372+ depends on EXPERIMENTAL
373+ ---help---
374+ PF_RING socket family, optimized for packet capture.
375+ If a PF_RING socket is bound to an adapter (via the bind() system
376+ call), such adapter will be used in read-only mode until the socket
377+ is destroyed. Whenever an incoming packet is received from the adapter
378+ it will not passed to upper layers, but instead it is copied to a ring
379+ buffer, which in turn is exported to user space applications via mmap.
380+ Please refer to http://luca.ntop.org/Ring.pdf for more.
381+
382+ Say N unless you know what you are doing.
383+
384diff --unified --recursive --new-file linux-2.6.21.4/net/ring/Makefile linux-2.6.21.4-1-686-smp-ring3/net/ring/Makefile
385--- linux-2.6.21.4/net/ring/Makefile 1970-01-01 00:00:00.000000000 +0000
386+++ linux-2.6.21.4-1-686-smp-ring3/net/ring/Makefile 2007-06-10 16:43:04.350421521 +0000
387@@ -0,0 +1,7 @@
388+#
389+# Makefile for the ring driver.
390+#
391+
392+obj-m += ring.o
393+
394+ring-objs := ring_packet.o
395diff --unified --recursive --new-file linux-2.6.21.4/net/ring/ring_packet.c linux-2.6.21.4-1-686-smp-ring3/net/ring/ring_packet.c
396--- linux-2.6.21.4/net/ring/ring_packet.c 1970-01-01 00:00:00.000000000 +0000
397+++ linux-2.6.21.4-1-686-smp-ring3/net/ring/ring_packet.c 2007-06-10 16:43:04.354421694 +0000
c1c82508 398@@ -0,0 +1,4258 @@
40fd095b 399+/* ***************************************************************
400+ *
401+ * (C) 2004-07 - Luca Deri <deri@ntop.org>
402+ *
403+ * This code includes contributions courtesy of
404+ * - Jeff Randall <jrandall@nexvu.com>
405+ * - Helmut Manck <helmut.manck@secunet.com>
406+ * - Brad Doctor <brad@stillsecure.com>
407+ * - Amit D. Chaudhary <amit_ml@rajgad.com>
408+ * - Francesco Fusco <fusco@ntop.org>
409+ * - Michael Stiller <ms@2scale.net>
410+ *
411+ *
412+ * This program is free software; you can redistribute it and/or modify
413+ * it under the terms of the GNU General Public License as published by
414+ * the Free Software Foundation; either version 2 of the License, or
415+ * (at your option) any later version.
416+ *
417+ * This program is distributed in the hope that it will be useful,
418+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
419+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
420+ * GNU General Public License for more details.
421+ *
422+ * You should have received a copy of the GNU General Public License
423+ * along with this program; if not, write to the Free Software Foundation,
424+ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
425+ *
426+ */
427+
428+#include <linux/version.h>
429+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,19))
430+#include <linux/autoconf.h>
431+#else
432+#include <linux/config.h>
433+#endif
434+#include <linux/module.h>
435+#include <linux/kernel.h>
436+#include <linux/socket.h>
437+#include <linux/skbuff.h>
438+#include <linux/rtnetlink.h>
439+#include <linux/in.h>
440+#include <linux/inet.h>
441+#include <linux/in6.h>
442+#include <linux/init.h>
443+#include <linux/filter.h>
444+#include <linux/ring.h>
445+#include <linux/ip.h>
446+#include <linux/tcp.h>
447+#include <linux/udp.h>
448+#include <linux/list.h>
449+#include <linux/proc_fs.h>
450+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
451+#include <net/xfrm.h>
452+#else
453+#include <linux/poll.h>
454+#endif
455+#include <net/sock.h>
456+#include <asm/io.h> /* needed for virt_to_phys() */
457+#ifdef CONFIG_INET
458+#include <net/inet_common.h>
459+#endif
460+
461+/* #define RING_DEBUG */
462+
463+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11))
464+static inline int remap_page_range(struct vm_area_struct *vma,
465+ unsigned long uvaddr,
466+ unsigned long paddr,
467+ unsigned long size,
468+ pgprot_t prot) {
469+ return(remap_pfn_range(vma, uvaddr, paddr >> PAGE_SHIFT,
470+ size, prot));
471+}
472+#endif
473+
474+/* ************************************************* */
475+
476+#define CLUSTER_LEN 8
477+
478+struct ring_cluster {
479+ u_short cluster_id; /* 0 = no cluster */
480+ u_short num_cluster_elements;
481+ enum cluster_type hashing_mode;
482+ u_short hashing_id;
483+ struct sock *sk[CLUSTER_LEN];
484+ struct ring_cluster *next; /* NULL = last element of the cluster */
485+};
486+
487+/* ************************************************* */
488+
489+struct ring_element {
490+ struct list_head list;
491+ struct sock *sk;
492+};
493+
494+/* ************************************************* */
495+
496+struct ring_opt {
497+ struct net_device *ring_netdev;
498+
499+ u_short ring_pid;
500+
501+ /* Cluster */
502+ u_short cluster_id; /* 0 = no cluster */
503+
504+ /* Reflector */
505+ struct net_device *reflector_dev;
506+
507+ /* Packet buffers */
508+ unsigned long order;
509+
510+ /* Ring Slots */
511+ unsigned long ring_memory;
512+ FlowSlotInfo *slots_info; /* Basically it points to ring_memory */
513+ char *ring_slots; /* Basically it points to ring_memory
514+ +sizeof(FlowSlotInfo) */
515+
516+ /* Packet Sampling */
517+ u_int pktToSample, sample_rate;
518+
519+ /* BPF Filter */
520+ struct sk_filter *bpfFilter;
521+
522+ /* Aho-Corasick */
523+ ACSM_STRUCT2 * acsm;
524+
525+ /* Locks */
526+ atomic_t num_ring_slots_waiters;
527+ wait_queue_head_t ring_slots_waitqueue;
528+ rwlock_t ring_index_lock;
529+
530+ /* Bloom Filters */
531+ u_char bitmask_enabled;
532+ bitmask_selector mac_bitmask, vlan_bitmask, ip_bitmask, twin_ip_bitmask,
533+ port_bitmask, twin_port_bitmask, proto_bitmask;
534+ u_int32_t num_mac_bitmask_add, num_mac_bitmask_remove;
535+ u_int32_t num_vlan_bitmask_add, num_vlan_bitmask_remove;
536+ u_int32_t num_ip_bitmask_add, num_ip_bitmask_remove;
537+ u_int32_t num_port_bitmask_add, num_port_bitmask_remove;
538+ u_int32_t num_proto_bitmask_add, num_proto_bitmask_remove;
539+
540+ /* Indexes (Internal) */
541+ u_int insert_page_id, insert_slot_id;
542+};
543+
544+/* ************************************************* */
545+
546+/* List of all ring sockets. */
547+static struct list_head ring_table;
548+static u_int ring_table_size;
549+
550+/* List of all clusters */
551+static struct ring_cluster *ring_cluster_list;
552+
553+static rwlock_t ring_mgmt_lock = RW_LOCK_UNLOCKED;
554+
555+/* ********************************** */
556+
557+/* /proc entry for ring module */
558+struct proc_dir_entry *ring_proc_dir = NULL;
559+struct proc_dir_entry *ring_proc = NULL;
560+
561+static int ring_proc_get_info(char *, char **, off_t, int, int *, void *);
562+static void ring_proc_add(struct ring_opt *pfr);
563+static void ring_proc_remove(struct ring_opt *pfr);
564+static void ring_proc_init(void);
565+static void ring_proc_term(void);
566+
567+/* ********************************** */
568+
569+/* Forward */
570+static struct proto_ops ring_ops;
571+
572+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11))
573+static struct proto ring_proto;
574+#endif
575+
576+static int skb_ring_handler(struct sk_buff *skb, u_char recv_packet,
577+ u_char real_skb);
578+static int buffer_ring_handler(struct net_device *dev, char *data, int len);
579+static int remove_from_cluster(struct sock *sock, struct ring_opt *pfr);
580+
581+/* Extern */
582+
583+/* ********************************** */
584+
585+/* Defaults */
586+static unsigned int bucket_len = 128, num_slots = 4096, sample_rate = 1,
587+ transparent_mode = 1, enable_tx_capture = 1;
588+
589+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16))
590+module_param(bucket_len, uint, 0644);
591+module_param(num_slots, uint, 0644);
592+module_param(sample_rate, uint, 0644);
593+module_param(transparent_mode, uint, 0644);
594+module_param(enable_tx_capture, uint, 0644);
595+#else
596+MODULE_PARM(bucket_len, "i");
597+MODULE_PARM(num_slots, "i");
598+MODULE_PARM(sample_rate, "i");
599+MODULE_PARM(transparent_mode, "i");
600+MODULE_PARM(enable_tx_capture, "i");
601+#endif
602+
603+MODULE_PARM_DESC(bucket_len, "Number of ring buckets");
604+MODULE_PARM_DESC(num_slots, "Number of ring slots");
605+MODULE_PARM_DESC(sample_rate, "Ring packet sample rate");
606+MODULE_PARM_DESC(transparent_mode,
607+ "Set to 1 to set transparent mode "
608+ "(slower but backwards compatible)");
609+
610+MODULE_PARM_DESC(enable_tx_capture, "Set to 1 to capture outgoing packets");
611+
612+/* ********************************** */
613+
614+#define MIN_QUEUED_PKTS 64
615+#define MAX_QUEUE_LOOPS 64
616+
617+
618+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
619+#define ring_sk_datatype(__sk) ((struct ring_opt *)__sk)
620+#define ring_sk(__sk) ((__sk)->sk_protinfo)
621+#else
622+#define ring_sk_datatype(a) (a)
623+#define ring_sk(__sk) ((__sk)->protinfo.pf_ring)
624+#endif
625+
626+#define _rdtsc() ({ uint64_t x; asm volatile("rdtsc" : "=A" (x)); x; })
627+
628+/*
629+ int dev_queue_xmit(struct sk_buff *skb)
630+ skb->dev;
631+ struct net_device *dev_get_by_name(const char *name)
632+*/
633+
634+/* ********************************** */
635+
636+/*
637+** $Id$
638+**
639+** acsmx2.c
640+**
641+** Multi-Pattern Search Engine
642+**
643+** Aho-Corasick State Machine - version 2.0
644+**
645+** Supports both Non-Deterministic and Deterministic Finite Automata
646+**
647+**
648+** Reference - Efficient String matching: An Aid to Bibliographic Search
649+** Alfred V Aho and Margaret J Corasick
650+** Bell Labratories
651+** Copyright(C) 1975 Association for Computing Machinery,Inc
652+**
653+** +++
654+** +++ Version 1.0 notes - Marc Norton:
655+** +++
656+**
657+** Original implementation based on the 4 algorithms in the paper by Aho & Corasick,
658+** some implementation ideas from 'Practical Algorithms in C', and some
659+** of my own.
660+**
661+** 1) Finds all occurrences of all patterns within a text.
662+**
663+** +++
664+** +++ Version 2.0 Notes - Marc Norton/Dan Roelker:
665+** +++
666+**
667+** New implementation modifies the state table storage and access model to use
668+** compacted sparse vector storage. Dan Roelker and I hammered this strategy out
669+** amongst many others in order to reduce memory usage and improve caching performance.
670+** The memory usage is greatly reduced, we only use 1/4 of what we use to. The caching
671+** performance is better in pure benchmarking tests, but does not show overall improvement
672+** in Snort. Unfortunately, once a pattern match test has been performed Snort moves on to doing
673+** many other things before we get back to a patteren match test, so the cache is voided.
674+**
675+** This versions has better caching performance characteristics, reduced memory,
676+** more state table storage options, and requires no a priori case conversions.
677+** It does maintain the same public interface. (Snort only used banded storage).
678+**
679+** 1) Supports NFA and DFA state machines, and basic keyword state machines
680+** 2) Initial transition table uses Linked Lists
681+** 3) Improved state table memory options. NFA and DFA state
682+** transition tables are converted to one of 4 formats during compilation.
683+** a) Full matrix
684+** b) Sparse matrix
685+** c) Banded matrix (Default-this is the only one used in snort)
686+** d) Sparse-Banded matrix
687+** 4) Added support for acstate_t in .h file so we can compile states as
688+** 16, or 32 bit state values for another reduction in memory consumption,
689+** smaller states allows more of the state table to be cached, and improves
690+** performance on x86-P4. Your mileage may vary, especially on risc systems.
691+** 5) Added a bool to each state transition list to indicate if there is a matching
692+** pattern in the state. This prevents us from accessing another data array
693+** and can improve caching/performance.
694+** 6) The search functions are very sensitive, don't change them without extensive testing,
695+** or you'll just spoil the caching and prefetching opportunities.
696+**
697+** Extras for fellow pattern matchers:
698+** The table below explains the storage format used at each step.
699+** You can use an NFA or DFA to match with, the NFA is slower but tiny - set the structure directly.
700+** You can use any of the 4 storage modes above -full,sparse,banded,sparse-bands, set the structure directly.
701+** For applications where you have lots of data and a pattern set to search, this version was up to 3x faster
702+** than the previous verion, due to caching performance. This cannot be fully realized in Snort yet,
703+** but other applications may have better caching opportunities.
704+** Snort only needs to use the banded or full storage.
705+**
706+** Transition table format at each processing stage.
707+** -------------------------------------------------
708+** Patterns -> Keyword State Table (List)
709+** Keyword State Table -> NFA (List)
710+** NFA -> DFA (List)
711+** DFA (List)-> Sparse Rows O(m-avg # transitions per state)
712+** -> Banded Rows O(1)
713+** -> Sparse-Banded Rows O(nb-# bands)
714+** -> Full Matrix O(1)
715+**
716+** Copyright(C) 2002,2003,2004 Marc Norton
717+** Copyright(C) 2003,2004 Daniel Roelker
718+** Copyright(C) 2002,2003,2004 Sourcefire,Inc.
719+**
720+** This program is free software; you can redistribute it and/or modify
721+** it under the terms of the GNU General Public License as published by
722+** the Free Software Foundation; either version 2 of the License, or
723+** (at your option) any later version.
724+**
725+** This program is distributed in the hope that it will be useful,
726+** but WITHOUT ANY WARRANTY; without even the implied warranty of
727+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
728+** GNU General Public License for more details.
729+**
730+** You should have received a copy of the GNU General Public License
731+** along with this program; if not, write to the Free Software
732+** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
733+*
734+*/
735+
736+/*
737+ *
738+ */
739+#define MEMASSERT(p,s) if(!p){printk("ACSM-No Memory: %s!\n",s);}
740+
741+/*
742+ *
743+ */
744+static int max_memory = 0;
745+
746+/*
747+ *
748+ */
749+typedef struct acsm_summary_s
750+{
751+ unsigned num_states;
752+ unsigned num_transitions;
753+ ACSM_STRUCT2 acsm;
754+
755+}acsm_summary_t;
756+
757+/*
758+ *
759+ */
760+static acsm_summary_t summary={0,0};
761+
762+/*
763+** Case Translation Table
764+*/
765+static unsigned char xlatcase[256];
766+/*
767+ *
768+ */
769+
770+inline int toupper(int ch) {
771+ if ( (unsigned int)(ch - 'a') < 26u )
772+ ch += 'A' - 'a';
773+ return ch;
774+}
775+
776+static void init_xlatcase(void)
777+{
778+ int i;
779+ for (i = 0; i < 256; i++)
780+ {
781+ xlatcase[i] = toupper(i);
782+ }
783+}
784+
785+/*
786+ * Case Conversion
787+ */
788+static
789+inline
790+void
791+ConvertCaseEx (unsigned char *d, unsigned char *s, int m)
792+{
793+ int i;
794+#ifdef XXXX
795+ int n;
796+ n = m & 3;
797+ m >>= 2;
798+
799+ for (i = 0; i < m; i++ )
800+ {
801+ d[0] = xlatcase[ s[0] ];
802+ d[2] = xlatcase[ s[2] ];
803+ d[1] = xlatcase[ s[1] ];
804+ d[3] = xlatcase[ s[3] ];
805+ d+=4;
806+ s+=4;
807+ }
808+
809+ for (i=0; i < n; i++)
810+ {
811+ d[i] = xlatcase[ s[i] ];
812+ }
813+#else
814+ for (i=0; i < m; i++)
815+ {
816+ d[i] = xlatcase[ s[i] ];
817+ }
818+
819+#endif
820+}
821+
822+
823+/*
824+ *
825+ */
826+static void *
827+AC_MALLOC (int n)
828+{
829+ void *p;
830+ p = kmalloc (n, GFP_KERNEL);
831+ if (p)
832+ max_memory += n;
833+ return p;
834+}
835+
836+
837+/*
838+ *
839+ */
840+static void
841+AC_FREE (void *p)
842+{
843+ if (p)
844+ kfree (p);
845+}
846+
847+
848+/*
849+ * Simple QUEUE NODE
850+ */
851+typedef struct _qnode
852+{
853+ int state;
854+ struct _qnode *next;
855+}
856+ QNODE;
857+
858+/*
859+ * Simple QUEUE Structure
860+ */
861+typedef struct _queue
862+{
863+ QNODE * head, *tail;
864+ int count;
865+}
866+ QUEUE;
867+
868+/*
869+ * Initialize the queue
870+ */
871+static void
872+queue_init (QUEUE * s)
873+{
874+ s->head = s->tail = 0;
875+ s->count= 0;
876+}
877+
878+/*
879+ * Find a State in the queue
880+ */
881+static int
882+queue_find (QUEUE * s, int state)
883+{
884+ QNODE * q;
885+ q = s->head;
886+ while( q )
887+ {
888+ if( q->state == state ) return 1;
889+ q = q->next;
890+ }
891+ return 0;
892+}
893+
894+/*
895+ * Add Tail Item to queue (FiFo/LiLo)
896+ */
897+static void
898+queue_add (QUEUE * s, int state)
899+{
900+ QNODE * q;
901+
902+ if( queue_find( s, state ) ) return;
903+
904+ if (!s->head)
905+ {
906+ q = s->tail = s->head = (QNODE *) AC_MALLOC (sizeof (QNODE));
907+ MEMASSERT (q, "queue_add");
908+ q->state = state;
909+ q->next = 0;
910+ }
911+ else
912+ {
913+ q = (QNODE *) AC_MALLOC (sizeof (QNODE));
914+ q->state = state;
915+ q->next = 0;
916+ s->tail->next = q;
917+ s->tail = q;
918+ }
919+ s->count++;
920+}
921+
922+
923+/*
924+ * Remove Head Item from queue
925+ */
926+static int
927+queue_remove (QUEUE * s)
928+{
929+ int state = 0;
930+ QNODE * q;
931+ if (s->head)
932+ {
933+ q = s->head;
934+ state = q->state;
935+ s->head = s->head->next;
936+ s->count--;
937+
938+ if( !s->head )
939+ {
940+ s->tail = 0;
941+ s->count = 0;
942+ }
943+ AC_FREE (q);
944+ }
945+ return state;
946+}
947+
948+
949+/*
950+ * Return items in the queue
951+ */
952+static int
953+queue_count (QUEUE * s)
954+{
955+ return s->count;
956+}
957+
958+
959+/*
960+ * Free the queue
961+ */
962+static void
963+queue_free (QUEUE * s)
964+{
965+ while (queue_count (s))
966+ {
967+ queue_remove (s);
968+ }
969+}
970+
971+/*
972+ * Get Next State-NFA
973+ */
974+static
975+int List_GetNextState( ACSM_STRUCT2 * acsm, int state, int input )
976+{
977+ trans_node_t * t = acsm->acsmTransTable[state];
978+
979+ while( t )
980+ {
981+ if( t->key == input )
982+ {
983+ return t->next_state;
984+ }
985+ t=t->next;
986+ }
987+
988+ if( state == 0 ) return 0;
989+
990+ return ACSM_FAIL_STATE2; /* Fail state ??? */
991+}
992+
993+/*
994+ * Get Next State-DFA
995+ */
996+static
997+int List_GetNextState2( ACSM_STRUCT2 * acsm, int state, int input )
998+{
999+ trans_node_t * t = acsm->acsmTransTable[state];
1000+
1001+ while( t )
1002+ {
1003+ if( t->key == input )
1004+ {
1005+ return t->next_state;
1006+ }
1007+ t = t->next;
1008+ }
1009+
1010+ return 0; /* default state */
1011+}
1012+/*
1013+ * Put Next State - Head insertion, and transition updates
1014+ */
1015+static
1016+int List_PutNextState( ACSM_STRUCT2 * acsm, int state, int input, int next_state )
1017+{
1018+ trans_node_t * p;
1019+ trans_node_t * tnew;
1020+
1021+ // printk(" List_PutNextState: state=%d, input='%c', next_state=%d\n",state,input,next_state);
1022+
1023+
1024+ /* Check if the transition already exists, if so just update the next_state */
1025+ p = acsm->acsmTransTable[state];
1026+ while( p )
1027+ {
1028+ if( p->key == input ) /* transition already exists- reset the next state */
1029+ {
1030+ p->next_state = next_state;
1031+ return 0;
1032+ }
1033+ p=p->next;
1034+ }
1035+
1036+ /* Definitely not an existing transition - add it */
1037+ tnew = (trans_node_t*)AC_MALLOC(sizeof(trans_node_t));
1038+ if( !tnew ) return -1;
1039+
1040+ tnew->key = input;
1041+ tnew->next_state = next_state;
1042+ tnew->next = 0;
1043+
1044+ tnew->next = acsm->acsmTransTable[state];
1045+ acsm->acsmTransTable[state] = tnew;
1046+
1047+ acsm->acsmNumTrans++;
1048+
1049+ return 0;
1050+}
1051+/*
1052+ * Free the entire transition table
1053+ */
1054+static
1055+int List_FreeTransTable( ACSM_STRUCT2 * acsm )
1056+{
1057+ int i;
1058+ trans_node_t * t, *p;
1059+
1060+ if( !acsm->acsmTransTable ) return 0;
1061+
1062+ for(i=0;i< acsm->acsmMaxStates;i++)
1063+ {
1064+ t = acsm->acsmTransTable[i];
1065+
1066+ while( t )
1067+ {
1068+ p = t->next;
1069+ kfree(t);
1070+ t = p;
1071+ max_memory -= sizeof(trans_node_t);
1072+ }
1073+ }
1074+
1075+ kfree(acsm->acsmTransTable);
1076+
1077+ max_memory -= sizeof(void*) * acsm->acsmMaxStates;
1078+
1079+ acsm->acsmTransTable = 0;
1080+
1081+ return 0;
1082+}
1083+
1084+/*
1085+ *
1086+ */
1087+/*
1088+ static
1089+ int List_FreeList( trans_node_t * t )
1090+ {
1091+ int tcnt=0;
1092+
1093+ trans_node_t *p;
1094+
1095+ while( t )
1096+ {
1097+ p = t->next;
1098+ kfree(t);
1099+ t = p;
1100+ max_memory -= sizeof(trans_node_t);
1101+ tcnt++;
1102+ }
1103+
1104+ return tcnt;
1105+ }
1106+*/
1107+
1108+/*
1109+ * Converts row of states from list to a full vector format
1110+ */
1111+static
1112+int List_ConvToFull(ACSM_STRUCT2 * acsm, acstate_t state, acstate_t * full )
1113+{
1114+ int tcnt = 0;
1115+ trans_node_t * t = acsm->acsmTransTable[ state ];
1116+
1117+ memset(full,0,sizeof(acstate_t)*acsm->acsmAlphabetSize);
1118+
1119+ if( !t ) return 0;
1120+
1121+ while(t)
1122+ {
1123+ full[ t->key ] = t->next_state;
1124+ tcnt++;
1125+ t = t->next;
1126+ }
1127+ return tcnt;
1128+}
1129+
1130+/*
1131+ * Copy a Match List Entry - don't dup the pattern data
1132+ */
1133+static ACSM_PATTERN2*
1134+CopyMatchListEntry (ACSM_PATTERN2 * px)
1135+{
1136+ ACSM_PATTERN2 * p;
1137+
1138+ p = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
1139+ MEMASSERT (p, "CopyMatchListEntry");
1140+
1141+ memcpy (p, px, sizeof (ACSM_PATTERN2));
1142+
1143+ p->next = 0;
1144+
1145+ return p;
1146+}
1147+
1148+/*
1149+ * Check if a pattern is in the list already,
1150+ * validate it using the 'id' field. This must be unique
1151+ * for every pattern.
1152+ */
1153+/*
1154+ static
1155+ int FindMatchListEntry (ACSM_STRUCT2 * acsm, int state, ACSM_PATTERN2 * px)
1156+ {
1157+ ACSM_PATTERN2 * p;
1158+
1159+ p = acsm->acsmMatchList[state];
1160+ while( p )
1161+ {
1162+ if( p->id == px->id ) return 1;
1163+ p = p->next;
1164+ }
1165+
1166+ return 0;
1167+ }
1168+*/
1169+
1170+
1171+/*
1172+ * Add a pattern to the list of patterns terminated at this state.
1173+ * Insert at front of list.
1174+ */
1175+static void
1176+AddMatchListEntry (ACSM_STRUCT2 * acsm, int state, ACSM_PATTERN2 * px)
1177+{
1178+ ACSM_PATTERN2 * p;
1179+
1180+ p = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
1181+
1182+ MEMASSERT (p, "AddMatchListEntry");
1183+
1184+ memcpy (p, px, sizeof (ACSM_PATTERN2));
1185+
1186+ p->next = acsm->acsmMatchList[state];
1187+
1188+ acsm->acsmMatchList[state] = p;
1189+}
1190+
1191+
1192+static void
1193+AddPatternStates (ACSM_STRUCT2 * acsm, ACSM_PATTERN2 * p)
1194+{
1195+ int state, next, n;
1196+ unsigned char *pattern;
1197+
1198+ n = p->n;
1199+ pattern = p->patrn;
1200+ state = 0;
1201+
1202+ /*
1203+ * Match up pattern with existing states
1204+ */
1205+ for (; n > 0; pattern++, n--)
1206+ {
1207+ next = List_GetNextState(acsm,state,*pattern);
1208+ if (next == ACSM_FAIL_STATE2 || next == 0)
1209+ {
1210+ break;
1211+ }
1212+ state = next;
1213+ }
1214+
1215+ /*
1216+ * Add new states for the rest of the pattern bytes, 1 state per byte
1217+ */
1218+ for (; n > 0; pattern++, n--)
1219+ {
1220+ acsm->acsmNumStates++;
1221+ List_PutNextState(acsm,state,*pattern,acsm->acsmNumStates);
1222+ state = acsm->acsmNumStates;
1223+ }
1224+
1225+ AddMatchListEntry (acsm, state, p );
1226+}
1227+
1228+/*
1229+ * Build A Non-Deterministic Finite Automata
1230+ * The keyword state table must already be built, via AddPatternStates().
1231+ */
1232+static void
1233+Build_NFA (ACSM_STRUCT2 * acsm)
1234+{
1235+ int r, s, i;
1236+ QUEUE q, *queue = &q;
1237+ acstate_t * FailState = acsm->acsmFailState;
1238+ ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
1239+ ACSM_PATTERN2 * mlist,* px;
1240+
1241+ /* Init a Queue */
1242+ queue_init (queue);
1243+
1244+
1245+ /* Add the state 0 transitions 1st, the states at depth 1, fail to state 0 */
1246+ for (i = 0; i < acsm->acsmAlphabetSize; i++)
1247+ {
1248+ s = List_GetNextState2(acsm,0,i);
1249+ if( s )
1250+ {
1251+ queue_add (queue, s);
1252+ FailState[s] = 0;
1253+ }
1254+ }
1255+
1256+ /* Build the fail state successive layer of transitions */
1257+ while (queue_count (queue) > 0)
1258+ {
1259+ r = queue_remove (queue);
1260+
1261+ /* Find Final States for any Failure */
1262+ for (i = 0; i < acsm->acsmAlphabetSize; i++)
1263+ {
1264+ int fs, next;
1265+
1266+ s = List_GetNextState(acsm,r,i);
1267+
1268+ if( s != ACSM_FAIL_STATE2 )
1269+ {
1270+ queue_add (queue, s);
1271+
1272+ fs = FailState[r];
1273+
1274+ /*
1275+ * Locate the next valid state for 'i' starting at fs
1276+ */
1277+ while( (next=List_GetNextState(acsm,fs,i)) == ACSM_FAIL_STATE2 )
1278+ {
1279+ fs = FailState[fs];
1280+ }
1281+
1282+ /*
1283+ * Update 's' state failure state to point to the next valid state
1284+ */
1285+ FailState[s] = next;
1286+
1287+ /*
1288+ * Copy 'next'states MatchList to 's' states MatchList,
1289+ * we copy them so each list can be AC_FREE'd later,
1290+ * else we could just manipulate pointers to fake the copy.
1291+ */
1292+ for( mlist = MatchList[next];
1293+ mlist;
1294+ mlist = mlist->next)
1295+ {
1296+ px = CopyMatchListEntry (mlist);
1297+
1298+ /* Insert at front of MatchList */
1299+ px->next = MatchList[s];
1300+ MatchList[s] = px;
1301+ }
1302+ }
1303+ }
1304+ }
1305+
1306+ /* Clean up the queue */
1307+ queue_free (queue);
1308+}
1309+
1310+/*
1311+ * Build Deterministic Finite Automata from the NFA
1312+ */
1313+static void
1314+Convert_NFA_To_DFA (ACSM_STRUCT2 * acsm)
1315+{
1316+ int i, r, s, cFailState;
1317+ QUEUE q, *queue = &q;
1318+ acstate_t * FailState = acsm->acsmFailState;
1319+
1320+ /* Init a Queue */
1321+ queue_init (queue);
1322+
1323+ /* Add the state 0 transitions 1st */
1324+ for(i=0; i<acsm->acsmAlphabetSize; i++)
1325+ {
1326+ s = List_GetNextState(acsm,0,i);
1327+ if ( s != 0 )
1328+ {
1329+ queue_add (queue, s);
1330+ }
1331+ }
1332+
1333+ /* Start building the next layer of transitions */
1334+ while( queue_count(queue) > 0 )
1335+ {
1336+ r = queue_remove(queue);
1337+
1338+ /* Process this states layer */
1339+ for (i = 0; i < acsm->acsmAlphabetSize; i++)
1340+ {
1341+ s = List_GetNextState(acsm,r,i);
1342+
1343+ if( s != ACSM_FAIL_STATE2 && s!= 0)
1344+ {
1345+ queue_add (queue, s);
1346+ }
1347+ else
1348+ {
1349+ cFailState = List_GetNextState(acsm,FailState[r],i);
1350+
1351+ if( cFailState != 0 && cFailState != ACSM_FAIL_STATE2 )
1352+ {
1353+ List_PutNextState(acsm,r,i,cFailState);
1354+ }
1355+ }
1356+ }
1357+ }
1358+
1359+ /* Clean up the queue */
1360+ queue_free (queue);
1361+}
1362+
1363+/*
1364+ *
1365+ * Convert a row lists for the state table to a full vector format
1366+ *
1367+ */
1368+static int
1369+Conv_List_To_Full(ACSM_STRUCT2 * acsm)
1370+{
1371+ int tcnt, k;
1372+ acstate_t * p;
1373+ acstate_t ** NextState = acsm->acsmNextState;
1374+
1375+ for(k=0;k<acsm->acsmMaxStates;k++)
1376+ {
1377+ p = AC_MALLOC( sizeof(acstate_t) * (acsm->acsmAlphabetSize+2) );
1378+ if(!p) return -1;
1379+
1380+ tcnt = List_ConvToFull( acsm, (acstate_t)k, p+2 );
1381+
1382+ p[0] = ACF_FULL;
1383+ p[1] = 0; /* no matches yet */
1384+
1385+ NextState[k] = p; /* now we have a full format row vector */
1386+ }
1387+
1388+ return 0;
1389+}
1390+
1391+/*
1392+ * Convert DFA memory usage from list based storage to a sparse-row storage.
1393+ *
1394+ * The Sparse format allows each row to be either full or sparse formatted. If the sparse row has
1395+ * too many transitions, performance or space may dictate that we use the standard full formatting
1396+ * for the row. More than 5 or 10 transitions per state ought to really whack performance. So the
1397+ * user can specify the max state transitions per state allowed in the sparse format.
1398+ *
1399+ * Standard Full Matrix Format
1400+ * ---------------------------
1401+ * acstate_t ** NextState ( 1st index is row/state, 2nd index is column=event/input)
1402+ *
1403+ * example:
1404+ *
1405+ * events -> a b c d e f g h i j k l m n o p
1406+ * states
1407+ * N 1 7 0 0 0 3 0 0 0 0 0 0 0 0 0 0
1408+ *
1409+ * Sparse Format, each row : Words Value
1410+ * 1-1 fmt(0-full,1-sparse,2-banded,3-sparsebands)
1411+ * 2-2 bool match flag (indicates this state has pattern matches)
1412+ * 3-3 sparse state count ( # of input/next-state pairs )
1413+ * 4-3+2*cnt 'input,next-state' pairs... each sizof(acstate_t)
1414+ *
1415+ * above example case yields:
1416+ * Full Format: 0, 1 7 0 0 0 3 0 0 0 0 0 0 0 0 0 0 ...
1417+ * Sparse format: 1, 3, 'a',1,'b',7,'f',3 - uses 2+2*ntransitions (non-default transitions)
1418+ */
1419+static int
1420+Conv_Full_DFA_To_Sparse(ACSM_STRUCT2 * acsm)
1421+{
1422+ int cnt, m, k, i;
1423+ acstate_t * p, state, maxstates=0;
1424+ acstate_t ** NextState = acsm->acsmNextState;
1425+ acstate_t full[MAX_ALPHABET_SIZE];
1426+
1427+ for(k=0;k<acsm->acsmMaxStates;k++)
1428+ {
1429+ cnt=0;
1430+
1431+ List_ConvToFull(acsm, (acstate_t)k, full );
1432+
1433+ for (i = 0; i < acsm->acsmAlphabetSize; i++)
1434+ {
1435+ state = full[i];
1436+ if( state != 0 && state != ACSM_FAIL_STATE2 ) cnt++;
1437+ }
1438+
1439+ if( cnt > 0 ) maxstates++;
1440+
1441+ if( k== 0 || cnt > acsm->acsmSparseMaxRowNodes )
1442+ {
1443+ p = AC_MALLOC(sizeof(acstate_t)*(acsm->acsmAlphabetSize+2) );
1444+ if(!p) return -1;
1445+
1446+ p[0] = ACF_FULL;
1447+ p[1] = 0;
1448+ memcpy(&p[2],full,acsm->acsmAlphabetSize*sizeof(acstate_t));
1449+ }
1450+ else
1451+ {
1452+ p = AC_MALLOC(sizeof(acstate_t)*(3+2*cnt));
1453+ if(!p) return -1;
1454+
1455+ m = 0;
1456+ p[m++] = ACF_SPARSE;
1457+ p[m++] = 0; /* no matches */
1458+ p[m++] = cnt;
1459+
1460+ for(i = 0; i < acsm->acsmAlphabetSize ; i++)
1461+ {
1462+ state = full[i];
1463+ if( state != 0 && state != ACSM_FAIL_STATE2 )
1464+ {
1465+ p[m++] = i;
1466+ p[m++] = state;
1467+ }
1468+ }
1469+ }
1470+
1471+ NextState[k] = p; /* now we are a sparse formatted state transition array */
1472+ }
1473+
1474+ return 0;
1475+}
1476+/*
1477+ Convert Full matrix to Banded row format.
1478+
1479+ Word values
1480+ 1 2 -> banded
1481+ 2 n number of values
1482+ 3 i index of 1st value (0-256)
1483+ 4 - 3+n next-state values at each index
1484+
1485+*/
1486+static int
1487+Conv_Full_DFA_To_Banded(ACSM_STRUCT2 * acsm)
1488+{
1489+ int first = -1, last;
1490+ acstate_t * p, state, full[MAX_ALPHABET_SIZE];
1491+ acstate_t ** NextState = acsm->acsmNextState;
1492+ int cnt,m,k,i;
1493+
1494+ for(k=0;k<acsm->acsmMaxStates;k++)
1495+ {
1496+ cnt=0;
1497+
1498+ List_ConvToFull(acsm, (acstate_t)k, full );
1499+
1500+ first=-1;
1501+ last =-2;
1502+
1503+ for (i = 0; i < acsm->acsmAlphabetSize; i++)
1504+ {
1505+ state = full[i];
1506+
1507+ if( state !=0 && state != ACSM_FAIL_STATE2 )
1508+ {
1509+ if( first < 0 ) first = i;
1510+ last = i;
1511+ }
1512+ }
1513+
1514+ /* calc band width */
1515+ cnt= last - first + 1;
1516+
1517+ p = AC_MALLOC(sizeof(acstate_t)*(4+cnt));
1518+
1519+ if(!p) return -1;
1520+
1521+ m = 0;
1522+ p[m++] = ACF_BANDED;
1523+ p[m++] = 0; /* no matches */
1524+ p[m++] = cnt;
1525+ p[m++] = first;
1526+
1527+ for(i = first; i <= last; i++)
1528+ {
1529+ p[m++] = full[i];
1530+ }
1531+
1532+ NextState[k] = p; /* now we are a banded formatted state transition array */
1533+ }
1534+
1535+ return 0;
1536+}
1537+
1538+/*
1539+ * Convert full matrix to Sparse Band row format.
1540+ *
1541+ * next - Full formatted row of next states
1542+ * asize - size of alphabet
1543+ * zcnt - max number of zeros in a run of zeros in any given band.
1544+ *
1545+ * Word Values
1546+ * 1 ACF_SPARSEBANDS
1547+ * 2 number of bands
1548+ * repeat 3 - 5+ ....once for each band in this row.
1549+ * 3 number of items in this band* 4 start index of this band
1550+ * 5- next-state values in this band...
1551+ */
1552+static
1553+int calcSparseBands( acstate_t * next, int * begin, int * end, int asize, int zmax )
1554+{
1555+ int i, nbands,zcnt,last=0;
1556+ acstate_t state;
1557+
1558+ nbands=0;
1559+ for( i=0; i<asize; i++ )
1560+ {
1561+ state = next[i];
1562+
1563+ if( state !=0 && state != ACSM_FAIL_STATE2 )
1564+ {
1565+ begin[nbands] = i;
1566+ zcnt=0;
1567+
1568+ for( ; i< asize; i++ )
1569+ {
1570+ state = next[i];
1571+ if( state ==0 || state == ACSM_FAIL_STATE2 )
1572+ {
1573+ zcnt++;
1574+ if( zcnt > zmax ) break;
1575+ }
1576+ else
1577+ {
1578+ zcnt=0;
1579+ last = i;
1580+ }
1581+ }
1582+
1583+ end[nbands++] = last;
1584+
1585+ }
1586+ }
1587+
1588+ return nbands;
1589+}
1590+
1591+
1592+/*
1593+ * Sparse Bands
1594+ *
1595+ * Row Format:
1596+ * Word
1597+ * 1 SPARSEBANDS format indicator
1598+ * 2 bool indicates a pattern match in this state
1599+ * 3 number of sparse bands
1600+ * 4 number of elements in this band
1601+ * 5 start index of this band
1602+ * 6- list of next states
1603+ *
1604+ * m number of elements in this band
1605+ * m+1 start index of this band
1606+ * m+2- list of next states
1607+ */
1608+static int
1609+Conv_Full_DFA_To_SparseBands(ACSM_STRUCT2 * acsm)
1610+{
1611+ acstate_t * p;
1612+ acstate_t ** NextState = acsm->acsmNextState;
1613+ int cnt,m,k,i,zcnt=acsm->acsmSparseMaxZcnt;
1614+
1615+ int band_begin[MAX_ALPHABET_SIZE];
1616+ int band_end[MAX_ALPHABET_SIZE];
1617+ int nbands,j;
1618+ acstate_t full[MAX_ALPHABET_SIZE];
1619+
1620+ for(k=0;k<acsm->acsmMaxStates;k++)
1621+ {
1622+ cnt=0;
1623+
1624+ List_ConvToFull(acsm, (acstate_t)k, full );
1625+
1626+ nbands = calcSparseBands( full, band_begin, band_end, acsm->acsmAlphabetSize, zcnt );
1627+
1628+ /* calc band width space*/
1629+ cnt = 3;
1630+ for(i=0;i<nbands;i++)
1631+ {
1632+ cnt += 2;
1633+ cnt += band_end[i] - band_begin[i] + 1;
1634+
1635+ /*printk("state %d: sparseband %d, first=%d, last=%d, cnt=%d\n",k,i,band_begin[i],band_end[i],band_end[i]-band_begin[i]+1); */
1636+ }
1637+
1638+ p = AC_MALLOC(sizeof(acstate_t)*(cnt));
1639+
1640+ if(!p) return -1;
1641+
1642+ m = 0;
1643+ p[m++] = ACF_SPARSEBANDS;
1644+ p[m++] = 0; /* no matches */
1645+ p[m++] = nbands;
1646+
1647+ for( i=0;i<nbands;i++ )
1648+ {
1649+ p[m++] = band_end[i] - band_begin[i] + 1; /* # states in this band */
1650+ p[m++] = band_begin[i]; /* start index */
1651+
1652+ for( j=band_begin[i]; j<=band_end[i]; j++ )
1653+ {
1654+ p[m++] = full[j]; /* some states may be state zero */
1655+ }
1656+ }
1657+
1658+ NextState[k] = p; /* now we are a sparse-banded formatted state transition array */
1659+ }
1660+
1661+ return 0;
1662+}
1663+
1664+/*
1665+ *
1666+ * Convert an NFA or DFA row from sparse to full format
1667+ * and store into the 'full' buffer.
1668+ *
1669+ * returns:
1670+ * 0 - failed, no state transitions
1671+ * *p - pointer to 'full' buffer
1672+ *
1673+ */
1674+/*
1675+ static
1676+ acstate_t * acsmConvToFull(ACSM_STRUCT2 * acsm, acstate_t k, acstate_t * full )
1677+ {
1678+ int i;
1679+ acstate_t * p, n, fmt, index, nb, bmatch;
1680+ acstate_t ** NextState = acsm->acsmNextState;
1681+
1682+ p = NextState[k];
1683+
1684+ if( !p ) return 0;
1685+
1686+ fmt = *p++;
1687+
1688+ bmatch = *p++;
1689+
1690+ if( fmt ==ACF_SPARSE )
1691+ {
1692+ n = *p++;
1693+ for( ; n>0; n--, p+=2 )
1694+ {
1695+ full[ p[0] ] = p[1];
1696+ }
1697+ }
1698+ else if( fmt ==ACF_BANDED )
1699+ {
1700+
1701+ n = *p++;
1702+ index = *p++;
1703+
1704+ for( ; n>0; n--, p++ )
1705+ {
1706+ full[ index++ ] = p[0];
1707+ }
1708+ }
1709+ else if( fmt ==ACF_SPARSEBANDS )
1710+ {
1711+ nb = *p++;
1712+ for(i=0;i<nb;i++)
1713+ {
1714+ n = *p++;
1715+ index = *p++;
1716+ for( ; n>0; n--, p++ )
1717+ {
1718+ full[ index++ ] = p[0];
1719+ }
1720+ }
1721+ }
1722+ else if( fmt == ACF_FULL )
1723+ {
1724+ memcpy(full,p,acsm->acsmAlphabetSize*sizeof(acstate_t));
1725+ }
1726+
1727+ return full;
1728+ }
1729+*/
1730+
1731+/*
1732+ * Select the desired storage mode
1733+ */
1734+int acsmSelectFormat2( ACSM_STRUCT2 * acsm, int m )
1735+{
1736+ switch( m )
1737+ {
1738+ case ACF_FULL:
1739+ case ACF_SPARSE:
1740+ case ACF_BANDED:
1741+ case ACF_SPARSEBANDS:
1742+ acsm->acsmFormat = m;
1743+ break;
1744+ default:
1745+ return -1;
1746+ }
1747+
1748+ return 0;
1749+}
1750+/*
1751+ *
1752+ */
1753+void acsmSetMaxSparseBandZeros2( ACSM_STRUCT2 * acsm, int n )
1754+{
1755+ acsm->acsmSparseMaxZcnt = n;
1756+}
1757+/*
1758+ *
1759+ */
1760+void acsmSetMaxSparseElements2( ACSM_STRUCT2 * acsm, int n )
1761+{
1762+ acsm->acsmSparseMaxRowNodes = n;
1763+}
1764+/*
1765+ *
1766+ */
1767+int acsmSelectFSA2( ACSM_STRUCT2 * acsm, int m )
1768+{
1769+ switch( m )
1770+ {
1771+ case FSA_TRIE:
1772+ case FSA_NFA:
1773+ case FSA_DFA:
1774+ acsm->acsmFSA = m;
1775+ default:
1776+ return -1;
1777+ }
1778+}
1779+/*
1780+ *
1781+ */
1782+int acsmSetAlphabetSize2( ACSM_STRUCT2 * acsm, int n )
1783+{
1784+ if( n <= MAX_ALPHABET_SIZE )
1785+ {
1786+ acsm->acsmAlphabetSize = n;
1787+ }
1788+ else
1789+ {
1790+ return -1;
1791+ }
1792+ return 0;
1793+}
1794+/*
1795+ * Create a new AC state machine
1796+ */
1797+static ACSM_STRUCT2 * acsmNew2 (void)
1798+{
1799+ ACSM_STRUCT2 * p;
1800+
1801+ init_xlatcase ();
1802+
1803+ p = (ACSM_STRUCT2 *) AC_MALLOC(sizeof (ACSM_STRUCT2));
1804+ MEMASSERT (p, "acsmNew");
1805+
1806+ if (p)
1807+ {
1808+ memset (p, 0, sizeof (ACSM_STRUCT2));
1809+
1810+ /* Some defaults */
1811+ p->acsmFSA = FSA_DFA;
1812+ p->acsmFormat = ACF_BANDED;
1813+ p->acsmAlphabetSize = 256;
1814+ p->acsmSparseMaxRowNodes = 256;
1815+ p->acsmSparseMaxZcnt = 10;
1816+ }
1817+
1818+ return p;
1819+}
1820+/*
1821+ * Add a pattern to the list of patterns for this state machine
1822+ *
1823+ */
1824+int
1825+acsmAddPattern2 (ACSM_STRUCT2 * p, unsigned char *pat, int n, int nocase,
1826+ int offset, int depth, void * id, int iid)
1827+{
1828+ ACSM_PATTERN2 * plist;
1829+
1830+ plist = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
1831+ MEMASSERT (plist, "acsmAddPattern");
1832+
1833+ plist->patrn = (unsigned char *) AC_MALLOC ( n );
1834+ MEMASSERT (plist->patrn, "acsmAddPattern");
1835+
1836+ ConvertCaseEx(plist->patrn, pat, n);
1837+
1838+ plist->casepatrn = (unsigned char *) AC_MALLOC ( n );
1839+ MEMASSERT (plist->casepatrn, "acsmAddPattern");
1840+
1841+ memcpy (plist->casepatrn, pat, n);
1842+
1843+ plist->n = n;
1844+ plist->nocase = nocase;
1845+ plist->offset = offset;
1846+ plist->depth = depth;
1847+ plist->id = id;
1848+ plist->iid = iid;
1849+
1850+ plist->next = p->acsmPatterns;
1851+ p->acsmPatterns = plist;
1852+
1853+ return 0;
1854+}
1855+/*
1856+ * Add a Key to the list of key+data pairs
1857+ */
1858+int acsmAddKey2(ACSM_STRUCT2 * p, unsigned char *key, int klen, int nocase, void * data)
1859+{
1860+ ACSM_PATTERN2 * plist;
1861+
1862+ plist = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
1863+ MEMASSERT (plist, "acsmAddPattern");
1864+
1865+ plist->patrn = (unsigned char *) AC_MALLOC (klen);
1866+ memcpy (plist->patrn, key, klen);
1867+
1868+ plist->casepatrn = (unsigned char *) AC_MALLOC (klen);
1869+ memcpy (plist->casepatrn, key, klen);
1870+
1871+ plist->n = klen;
1872+ plist->nocase = nocase;
1873+ plist->offset = 0;
1874+ plist->depth = 0;
1875+ plist->id = 0;
1876+ plist->iid = 0;
1877+
1878+ plist->next = p->acsmPatterns;
1879+ p->acsmPatterns = plist;
1880+
1881+ return 0;
1882+}
1883+
1884+/*
1885+ * Copy a boolean match flag int NextState table, for caching purposes.
1886+ */
1887+static
1888+void acsmUpdateMatchStates( ACSM_STRUCT2 * acsm )
1889+{
1890+ acstate_t state;
1891+ acstate_t ** NextState = acsm->acsmNextState;
1892+ ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
1893+
1894+ for( state=0; state<acsm->acsmNumStates; state++ )
1895+ {
1896+ if( MatchList[state] )
1897+ {
1898+ NextState[state][1] = 1;
1899+ }
1900+ else
1901+ {
1902+ NextState[state][1] = 0;
1903+ }
1904+ }
1905+}
1906+
1907+/*
1908+ * Compile State Machine - NFA or DFA and Full or Banded or Sparse or SparseBands
1909+ */
1910+int
1911+acsmCompile2 (ACSM_STRUCT2 * acsm)
1912+{
1913+ int k;
1914+ ACSM_PATTERN2 * plist;
1915+
1916+ /* Count number of states */
1917+ for (plist = acsm->acsmPatterns; plist != NULL; plist = plist->next)
1918+ {
1919+ acsm->acsmMaxStates += plist->n;
1920+ /* acsm->acsmMaxStates += plist->n*2; if we handle case in the table */
1921+ }
1922+ acsm->acsmMaxStates++; /* one extra */
1923+
1924+ /* Alloc a List based State Transition table */
1925+ acsm->acsmTransTable =(trans_node_t**) AC_MALLOC(sizeof(trans_node_t*) * acsm->acsmMaxStates );
1926+ MEMASSERT (acsm->acsmTransTable, "acsmCompile");
1927+
1928+ memset (acsm->acsmTransTable, 0, sizeof(trans_node_t*) * acsm->acsmMaxStates);
1929+
1930+ /* Alloc a failure table - this has a failure state, and a match list for each state */
1931+ acsm->acsmFailState =(acstate_t*) AC_MALLOC(sizeof(acstate_t) * acsm->acsmMaxStates );
1932+ MEMASSERT (acsm->acsmFailState, "acsmCompile");
1933+
1934+ memset (acsm->acsmFailState, 0, sizeof(acstate_t) * acsm->acsmMaxStates );
1935+
1936+ /* Alloc a MatchList table - this has a lis tof pattern matches for each state, if any */
1937+ acsm->acsmMatchList=(ACSM_PATTERN2**) AC_MALLOC(sizeof(ACSM_PATTERN2*) * acsm->acsmMaxStates );
1938+ MEMASSERT (acsm->acsmMatchList, "acsmCompile");
1939+
1940+ memset (acsm->acsmMatchList, 0, sizeof(ACSM_PATTERN2*) * acsm->acsmMaxStates );
1941+
1942+ /* Alloc a separate state transition table == in state 's' due to event 'k', transition to 'next' state */
1943+ acsm->acsmNextState=(acstate_t**)AC_MALLOC( acsm->acsmMaxStates * sizeof(acstate_t*) );
1944+ MEMASSERT(acsm->acsmNextState, "acsmCompile-NextState");
1945+
1946+ for (k = 0; k < acsm->acsmMaxStates; k++)
1947+ {
1948+ acsm->acsmNextState[k]=(acstate_t*)0;
1949+ }
1950+
1951+ /* Initialize state zero as a branch */
1952+ acsm->acsmNumStates = 0;
1953+
1954+ /* Add the 0'th state, */
1955+ //acsm->acsmNumStates++;
1956+
1957+ /* Add each Pattern to the State Table - This forms a keywords state table */
1958+ for (plist = acsm->acsmPatterns; plist != NULL; plist = plist->next)
1959+ {
1960+ AddPatternStates (acsm, plist);
1961+ }
1962+
1963+ acsm->acsmNumStates++;
1964+
1965+ if( acsm->acsmFSA == FSA_DFA || acsm->acsmFSA == FSA_NFA )
1966+ {
1967+ /* Build the NFA */
1968+ Build_NFA (acsm);
1969+ }
1970+
1971+ if( acsm->acsmFSA == FSA_DFA )
1972+ {
1973+ /* Convert the NFA to a DFA */
1974+ Convert_NFA_To_DFA (acsm);
1975+ }
1976+
1977+ /*
1978+ *
1979+ * Select Final Transition Table Storage Mode
1980+ *
1981+ */
1982+ if( acsm->acsmFormat == ACF_SPARSE )
1983+ {
1984+ /* Convert DFA Full matrix to a Sparse matrix */
1985+ if( Conv_Full_DFA_To_Sparse(acsm) )
1986+ return -1;
1987+ }
1988+
1989+ else if( acsm->acsmFormat == ACF_BANDED )
1990+ {
1991+ /* Convert DFA Full matrix to a Sparse matrix */
1992+ if( Conv_Full_DFA_To_Banded(acsm) )
1993+ return -1;
1994+ }
1995+
1996+ else if( acsm->acsmFormat == ACF_SPARSEBANDS )
1997+ {
1998+ /* Convert DFA Full matrix to a Sparse matrix */
1999+ if( Conv_Full_DFA_To_SparseBands(acsm) )
2000+ return -1;
2001+ }
2002+ else if( acsm->acsmFormat == ACF_FULL )
2003+ {
2004+ if( Conv_List_To_Full( acsm ) )
2005+ return -1;
2006+ }
2007+
2008+ acsmUpdateMatchStates( acsm ); /* load boolean match flags into state table */
2009+
2010+ /* Free up the Table Of Transition Lists */
2011+ List_FreeTransTable( acsm );
2012+
2013+ /* For now -- show this info */
2014+ /*
2015+ * acsmPrintInfo( acsm );
2016+ */
2017+
2018+
2019+ /* Accrue Summary State Stats */
2020+ summary.num_states += acsm->acsmNumStates;
2021+ summary.num_transitions += acsm->acsmNumTrans;
2022+
2023+ memcpy( &summary.acsm, acsm, sizeof(ACSM_STRUCT2));
2024+
2025+ return 0;
2026+}
2027+
2028+/*
2029+ * Get the NextState from the NFA, all NFA storage formats use this
2030+ */
2031+inline
2032+acstate_t SparseGetNextStateNFA(acstate_t * ps, acstate_t state, unsigned input)
2033+{
2034+ acstate_t fmt;
2035+ acstate_t n;
2036+ int index;
2037+ int nb;
2038+
2039+ fmt = *ps++;
2040+
2041+ ps++; /* skip bMatchState */
2042+
2043+ switch( fmt )
2044+ {
2045+ case ACF_BANDED:
2046+ {
2047+ n = ps[0];
2048+ index = ps[1];
2049+
2050+ if( input < index )
2051+ {
2052+ if(state==0)
2053+ {
2054+ return 0;
2055+ }
2056+ else
2057+ {
2058+ return (acstate_t)ACSM_FAIL_STATE2;
2059+ }
2060+ }
2061+ if( input >= index + n )
2062+ {
2063+ if(state==0)
2064+ {
2065+ return 0;
2066+ }
2067+ else
2068+ {
2069+ return (acstate_t)ACSM_FAIL_STATE2;
2070+ }
2071+ }
2072+ if( ps[input-index] == 0 )
2073+ {
2074+ if( state != 0 )
2075+ {
2076+ return ACSM_FAIL_STATE2;
2077+ }
2078+ }
2079+
2080+ return (acstate_t) ps[input-index];
2081+ }
2082+
2083+ case ACF_SPARSE:
2084+ {
2085+ n = *ps++; /* number of sparse index-value entries */
2086+
2087+ for( ; n>0 ; n-- )
2088+ {
2089+ if( ps[0] > input ) /* cannot match the input, already a higher value than the input */
2090+ {
2091+ return (acstate_t)ACSM_FAIL_STATE2; /* default state */
2092+ }
2093+ else if( ps[0] == input )
2094+ {
2095+ return ps[1]; /* next state */
2096+ }
2097+ ps+=2;
2098+ }
2099+ if( state == 0 )
2100+ {
2101+ return 0;
2102+ }
2103+ return ACSM_FAIL_STATE2;
2104+ }
2105+
2106+ case ACF_SPARSEBANDS:
2107+ {
2108+ nb = *ps++; /* number of bands */
2109+
2110+ while( nb > 0 ) /* for each band */
2111+ {
2112+ n = *ps++; /* number of elements */
2113+ index = *ps++; /* 1st element value */
2114+
2115+ if( input < index )
2116+ {
2117+ if( state != 0 )
2118+ {
2119+ return (acstate_t)ACSM_FAIL_STATE2;
2120+ }
2121+ return (acstate_t)0;
2122+ }
2123+ if( (input >= index) && (input < (index + n)) )
2124+ {
2125+ if( ps[input-index] == 0 )
2126+ {
2127+ if( state != 0 )
2128+ {
2129+ return ACSM_FAIL_STATE2;
2130+ }
2131+ }
2132+ return (acstate_t) ps[input-index];
2133+ }
2134+ nb--;
2135+ ps += n;
2136+ }
2137+ if( state != 0 )
2138+ {
2139+ return (acstate_t)ACSM_FAIL_STATE2;
2140+ }
2141+ return (acstate_t)0;
2142+ }
2143+
2144+ case ACF_FULL:
2145+ {
2146+ if( ps[input] == 0 )
2147+ {
2148+ if( state != 0 )
2149+ {
2150+ return ACSM_FAIL_STATE2;
2151+ }
2152+ }
2153+ return ps[input];
2154+ }
2155+ }
2156+
2157+ return 0;
2158+}
2159+
2160+
2161+
2162+/*
2163+ * Get the NextState from the DFA Next State Transition table
2164+ * Full and banded are supported separately, this is for
2165+ * sparse and sparse-bands
2166+ */
2167+inline
2168+acstate_t SparseGetNextStateDFA(acstate_t * ps, acstate_t state, unsigned input)
2169+{
2170+ acstate_t n, nb;
2171+ int index;
2172+
2173+ switch( ps[0] )
2174+ {
2175+ /* BANDED */
2176+ case ACF_BANDED:
2177+ {
2178+ /* n=ps[2] : number of entries in the band */
2179+ /* index=ps[3] : index of the 1st entry, sequential thereafter */
2180+
2181+ if( input < ps[3] ) return 0;
2182+ if( input >= (ps[3]+ps[2]) ) return 0;
2183+
2184+ return ps[4+input-ps[3]];
2185+ }
2186+
2187+ /* FULL */
2188+ case ACF_FULL:
2189+ {
2190+ return ps[2+input];
2191+ }
2192+
2193+ /* SPARSE */
2194+ case ACF_SPARSE:
2195+ {
2196+ n = ps[2]; /* number of entries/ key+next pairs */
2197+
2198+ ps += 3;
2199+
2200+ for( ; n>0 ; n-- )
2201+ {
2202+ if( input < ps[0] ) /* cannot match the input, already a higher value than the input */
2203+ {
2204+ return (acstate_t)0; /* default state */
2205+ }
2206+ else if( ps[0] == input )
2207+ {
2208+ return ps[1]; /* next state */
2209+ }
2210+ ps += 2;
2211+ }
2212+ return (acstate_t)0;
2213+ }
2214+
2215+
2216+ /* SPARSEBANDS */
2217+ case ACF_SPARSEBANDS:
2218+ {
2219+ nb = ps[2]; /* number of bands */
2220+
2221+ ps += 3;
2222+
2223+ while( nb > 0 ) /* for each band */
2224+ {
2225+ n = ps[0]; /* number of elements in this band */
2226+ index = ps[1]; /* start index/char of this band */
2227+ if( input < index )
2228+ {
2229+ return (acstate_t)0;
2230+ }
2231+ if( (input < (index + n)) )
2232+ {
2233+ return (acstate_t) ps[2+input-index];
2234+ }
2235+ nb--;
2236+ ps += n;
2237+ }
2238+ return (acstate_t)0;
2239+ }
2240+ }
2241+
2242+ return 0;
2243+}
2244+/*
2245+ * Search Text or Binary Data for Pattern matches
2246+ *
2247+ * Sparse & Sparse-Banded Matrix search
2248+ */
2249+static
2250+inline
2251+int
2252+acsmSearchSparseDFA(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
2253+ int (*Match) (void * id, int index, void *data),
2254+ void *data)
2255+{
2256+ acstate_t state;
2257+ ACSM_PATTERN2 * mlist;
2258+ unsigned char * Tend;
2259+ int nfound = 0;
2260+ unsigned char * T, * Tc;
2261+ int index;
2262+ acstate_t ** NextState = acsm->acsmNextState;
2263+ ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
2264+
2265+ Tc = Tx;
2266+ T = Tx;
2267+ Tend = T + n;
2268+
2269+ for( state = 0; T < Tend; T++ )
2270+ {
2271+ state = SparseGetNextStateDFA ( NextState[state], state, xlatcase[*T] );
2272+
2273+ /* test if this state has any matching patterns */
2274+ if( NextState[state][1] )
2275+ {
2276+ for( mlist = MatchList[state];
2277+ mlist!= NULL;
2278+ mlist = mlist->next )
2279+ {
2280+ index = T - mlist->n - Tc;
2281+ if( mlist->nocase )
2282+ {
2283+ nfound++;
2284+ if (Match (mlist->id, index, data))
2285+ return nfound;
2286+ }
2287+ else
2288+ {
2289+ if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
2290+ {
2291+ nfound++;
2292+ if (Match (mlist->id, index, data))
2293+ return nfound;
2294+ }
2295+ }
2296+ }
2297+ }
2298+ }
2299+ return nfound;
2300+}
2301+/*
2302+ * Full format DFA search
2303+ * Do not change anything here without testing, caching and prefetching
2304+ * performance is very sensitive to any changes.
2305+ *
2306+ * Perf-Notes:
2307+ * 1) replaced ConvertCaseEx with inline xlatcase - this improves performance 5-10%
2308+ * 2) using 'nocase' improves performance again by 10-15%, since memcmp is not needed
2309+ * 3)
2310+ */
2311+static
2312+inline
2313+int
2314+acsmSearchSparseDFA_Full(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
2315+ int (*Match) (void * id, int index, void *data),
2316+ void *data)
2317+{
2318+ ACSM_PATTERN2 * mlist;
2319+ unsigned char * Tend;
2320+ unsigned char * T;
2321+ int index;
2322+ acstate_t state;
2323+ acstate_t * ps;
2324+ acstate_t sindex;
2325+ acstate_t ** NextState = acsm->acsmNextState;
2326+ ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
2327+ int nfound = 0;
2328+
2329+ T = Tx;
2330+ Tend = Tx + n;
2331+
2332+ for( state = 0; T < Tend; T++ )
2333+ {
2334+ ps = NextState[ state ];
2335+
2336+ sindex = xlatcase[ T[0] ];
2337+
2338+ /* check the current state for a pattern match */
2339+ if( ps[1] )
2340+ {
2341+ for( mlist = MatchList[state];
2342+ mlist!= NULL;
2343+ mlist = mlist->next )
2344+ {
2345+ index = T - mlist->n - Tx;
2346+
2347+
2348+ if( mlist->nocase )
2349+ {
2350+ nfound++;
2351+ if (Match (mlist->id, index, data))
2352+ return nfound;
2353+ }
2354+ else
2355+ {
2356+ if( memcmp (mlist->casepatrn, Tx + index, mlist->n ) == 0 )
2357+ {
2358+ nfound++;
2359+ if (Match (mlist->id, index, data))
2360+ return nfound;
2361+ }
2362+ }
2363+
2364+ }
2365+ }
2366+
2367+ state = ps[ 2u + sindex ];
2368+ }
2369+
2370+ /* Check the last state for a pattern match */
2371+ for( mlist = MatchList[state];
2372+ mlist!= NULL;
2373+ mlist = mlist->next )
2374+ {
2375+ index = T - mlist->n - Tx;
2376+
2377+ if( mlist->nocase )
2378+ {
2379+ nfound++;
2380+ if (Match (mlist->id, index, data))
2381+ return nfound;
2382+ }
2383+ else
2384+ {
2385+ if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
2386+ {
2387+ nfound++;
2388+ if (Match (mlist->id, index, data))
2389+ return nfound;
2390+ }
2391+ }
2392+ }
2393+
2394+ return nfound;
2395+}
2396+/*
2397+ * Banded-Row format DFA search
2398+ * Do not change anything here, caching and prefetching
2399+ * performance is very sensitive to any changes.
2400+ *
2401+ * ps[0] = storage fmt
2402+ * ps[1] = bool match flag
2403+ * ps[2] = # elements in band
2404+ * ps[3] = index of 1st element
2405+ */
2406+static
2407+inline
2408+int
2409+acsmSearchSparseDFA_Banded(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
2410+ int (*Match) (void * id, int index, void *data),
2411+ void *data)
2412+{
2413+ acstate_t state;
2414+ unsigned char * Tend;
2415+ unsigned char * T;
2416+ int sindex;
2417+ int index;
2418+ acstate_t ** NextState = acsm->acsmNextState;
2419+ ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
2420+ ACSM_PATTERN2 * mlist;
2421+ acstate_t * ps;
2422+ int nfound = 0;
2423+
2424+ T = Tx;
2425+ Tend = T + n;
2426+
2427+ for( state = 0; T < Tend; T++ )
2428+ {
2429+ ps = NextState[state];
2430+
2431+ sindex = xlatcase[ T[0] ];
2432+
2433+ /* test if this state has any matching patterns */
2434+ if( ps[1] )
2435+ {
2436+ for( mlist = MatchList[state];
2437+ mlist!= NULL;
2438+ mlist = mlist->next )
2439+ {
2440+ index = T - mlist->n - Tx;
2441+
2442+ if( mlist->nocase )
2443+ {
2444+ nfound++;
2445+ if (Match (mlist->id, index, data))
2446+ return nfound;
2447+ }
2448+ else
2449+ {
2450+ if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
2451+ {
2452+ nfound++;
2453+ if (Match (mlist->id, index, data))
2454+ return nfound;
2455+ }
2456+ }
2457+ }
2458+ }
2459+
2460+ if( sindex < ps[3] ) state = 0;
2461+ else if( sindex >= (ps[3] + ps[2]) ) state = 0;
2462+ else state = ps[ 4u + sindex - ps[3] ];
2463+ }
2464+
2465+ /* Check the last state for a pattern match */
2466+ for( mlist = MatchList[state];
2467+ mlist!= NULL;
2468+ mlist = mlist->next )
2469+ {
2470+ index = T - mlist->n - Tx;
2471+
2472+ if( mlist->nocase )
2473+ {
2474+ nfound++;
2475+ if (Match (mlist->id, index, data))
2476+ return nfound;
2477+ }
2478+ else
2479+ {
2480+ if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
2481+ {
2482+ nfound++;
2483+ if (Match (mlist->id, index, data))
2484+ return nfound;
2485+ }
2486+ }
2487+ }
2488+
2489+ return nfound;
2490+}
2491+
2492+
2493+
2494+/*
2495+ * Search Text or Binary Data for Pattern matches
2496+ *
2497+ * Sparse Storage Version
2498+ */
2499+static
2500+inline
2501+int
2502+acsmSearchSparseNFA(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
2503+ int (*Match) (void * id, int index, void *data),
2504+ void *data)
2505+{
2506+ acstate_t state;
2507+ ACSM_PATTERN2 * mlist;
2508+ unsigned char * Tend;
2509+ int nfound = 0;
2510+ unsigned char * T, *Tc;
2511+ int index;
2512+ acstate_t ** NextState= acsm->acsmNextState;
2513+ acstate_t * FailState= acsm->acsmFailState;
2514+ ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
2515+ unsigned char Tchar;
2516+
2517+ Tc = Tx;
2518+ T = Tx;
2519+ Tend = T + n;
2520+
2521+ for( state = 0; T < Tend; T++ )
2522+ {
2523+ acstate_t nstate;
2524+
2525+ Tchar = xlatcase[ *T ];
2526+
2527+ while( (nstate=SparseGetNextStateNFA(NextState[state],state,Tchar))==ACSM_FAIL_STATE2 )
2528+ state = FailState[state];
2529+
2530+ state = nstate;
2531+
2532+ for( mlist = MatchList[state];
2533+ mlist!= NULL;
2534+ mlist = mlist->next )
2535+ {
2536+ index = T - mlist->n - Tx;
2537+ if( mlist->nocase )
2538+ {
2539+ nfound++;
2540+ if (Match (mlist->id, index, data))
2541+ return nfound;
2542+ }
2543+ else
2544+ {
2545+ if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
2546+ {
2547+ nfound++;
2548+ if (Match (mlist->id, index, data))
2549+ return nfound;
2550+ }
2551+ }
2552+ }
2553+ }
2554+
2555+ return nfound;
2556+}
2557+
2558+/*
2559+ * Search Function
2560+ */
2561+int
2562+acsmSearch2(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
2563+ int (*Match) (void * id, int index, void *data),
2564+ void *data)
2565+{
2566+
2567+ switch( acsm->acsmFSA )
2568+ {
2569+ case FSA_DFA:
2570+
2571+ if( acsm->acsmFormat == ACF_FULL )
2572+ {
2573+ return acsmSearchSparseDFA_Full( acsm, Tx, n, Match,data );
2574+ }
2575+ else if( acsm->acsmFormat == ACF_BANDED )
2576+ {
2577+ return acsmSearchSparseDFA_Banded( acsm, Tx, n, Match,data );
2578+ }
2579+ else
2580+ {
2581+ return acsmSearchSparseDFA( acsm, Tx, n, Match,data );
2582+ }
2583+
2584+ case FSA_NFA:
2585+
2586+ return acsmSearchSparseNFA( acsm, Tx, n, Match,data );
2587+
2588+ case FSA_TRIE:
2589+
2590+ return 0;
2591+ }
2592+ return 0;
2593+}
2594+
2595+
2596+/*
2597+ * Free all memory
2598+ */
2599+void
2600+acsmFree2 (ACSM_STRUCT2 * acsm)
2601+{
2602+ int i;
2603+ ACSM_PATTERN2 * mlist, *ilist;
2604+ for (i = 0; i < acsm->acsmMaxStates; i++)
2605+ {
2606+ mlist = acsm->acsmMatchList[i];
2607+
2608+ while (mlist)
2609+ {
2610+ ilist = mlist;
2611+ mlist = mlist->next;
2612+ AC_FREE (ilist);
2613+ }
2614+ AC_FREE(acsm->acsmNextState[i]);
2615+ }
2616+ AC_FREE(acsm->acsmFailState);
2617+ AC_FREE(acsm->acsmMatchList);
2618+}
2619+
2620+/* ********************************** */
2621+
2622+static void ring_sock_destruct(struct sock *sk) {
2623+
2624+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
2625+ skb_queue_purge(&sk->sk_receive_queue);
2626+
2627+ if (!sock_flag(sk, SOCK_DEAD)) {
2628+#if defined(RING_DEBUG)
2629+ printk("Attempt to release alive ring socket: %p\n", sk);
2630+#endif
2631+ return;
2632+ }
2633+
2634+ BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
2635+ BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
2636+#else
2637+
2638+ BUG_TRAP(atomic_read(&sk->rmem_alloc)==0);
2639+ BUG_TRAP(atomic_read(&sk->wmem_alloc)==0);
2640+
2641+ if (!sk->dead) {
2642+#if defined(RING_DEBUG)
2643+ printk("Attempt to release alive ring socket: %p\n", sk);
2644+#endif
2645+ return;
2646+ }
2647+#endif
2648+
2649+ kfree(ring_sk(sk));
2650+
2651+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
2652+ MOD_DEC_USE_COUNT;
2653+#endif
2654+}
2655+
2656+/* ********************************** */
2657+
2658+static void ring_proc_add(struct ring_opt *pfr) {
2659+ if(ring_proc_dir != NULL) {
2660+ char name[16];
2661+
2662+ pfr->ring_pid = current->pid;
2663+
2664+ snprintf(name, sizeof(name), "%d", pfr->ring_pid);
2665+ create_proc_read_entry(name, 0, ring_proc_dir,
2666+ ring_proc_get_info, pfr);
2667+ /* printk("PF_RING: added /proc/net/pf_ring/%s\n", name); */
2668+ }
2669+}
2670+
2671+/* ********************************** */
2672+
2673+static void ring_proc_remove(struct ring_opt *pfr) {
2674+ if(ring_proc_dir != NULL) {
2675+ char name[16];
2676+
2677+ snprintf(name, sizeof(name), "%d", pfr->ring_pid);
2678+ remove_proc_entry(name, ring_proc_dir);
2679+ /* printk("PF_RING: removed /proc/net/pf_ring/%s\n", name); */
2680+ }
2681+}
2682+
2683+/* ********************************** */
2684+
2685+static int ring_proc_get_info(char *buf, char **start, off_t offset,
2686+ int len, int *unused, void *data)
2687+{
2688+ int rlen = 0;
2689+ struct ring_opt *pfr;
2690+ FlowSlotInfo *fsi;
2691+
2692+ if(data == NULL) {
2693+ /* /proc/net/pf_ring/info */
2694+ rlen = sprintf(buf,"Version : %s\n", RING_VERSION);
2695+ rlen += sprintf(buf + rlen,"Bucket length : %d bytes\n", bucket_len);
2696+ rlen += sprintf(buf + rlen,"Ring slots : %d\n", num_slots);
2697+ rlen += sprintf(buf + rlen,"Sample rate : %d [1=no sampling]\n", sample_rate);
2698+
2699+ rlen += sprintf(buf + rlen,"Capture TX : %s\n",
2700+ enable_tx_capture ? "Yes [RX+TX]" : "No [RX only]");
2701+ rlen += sprintf(buf + rlen,"Transparent mode : %s\n",
2702+ transparent_mode ? "Yes" : "No");
2703+ rlen += sprintf(buf + rlen,"Total rings : %d\n", ring_table_size);
2704+ } else {
2705+ /* detailed statistics about a PF_RING */
2706+ pfr = (struct ring_opt*)data;
2707+
2708+ if(data) {
2709+ fsi = pfr->slots_info;
2710+
2711+ if(fsi) {
2712+ rlen = sprintf(buf, "Bound Device : %s\n",
2713+ pfr->ring_netdev->name == NULL ? "<NULL>" : pfr->ring_netdev->name);
2714+ rlen += sprintf(buf + rlen,"Version : %d\n", fsi->version);
2715+ rlen += sprintf(buf + rlen,"Sampling Rate : %d\n", pfr->sample_rate);
2716+ rlen += sprintf(buf + rlen,"BPF Filtering : %s\n", pfr->bpfFilter ? "Enabled" : "Disabled");
2717+ rlen += sprintf(buf + rlen,"Bloom Filters : %s\n", pfr->bitmask_enabled ? "Enabled" : "Disabled");
2718+ rlen += sprintf(buf + rlen,"Pattern Search: %s\n", pfr->acsm ? "Enabled" : "Disabled");
2719+ rlen += sprintf(buf + rlen,"Cluster Id : %d\n", pfr->cluster_id);
2720+ rlen += sprintf(buf + rlen,"Tot Slots : %d\n", fsi->tot_slots);
2721+ rlen += sprintf(buf + rlen,"Slot Len : %d\n", fsi->slot_len);
2722+ rlen += sprintf(buf + rlen,"Data Len : %d\n", fsi->data_len);
2723+ rlen += sprintf(buf + rlen,"Tot Memory : %d\n", fsi->tot_mem);
2724+ rlen += sprintf(buf + rlen,"Tot Packets : %lu\n", (unsigned long)fsi->tot_pkts);
2725+ rlen += sprintf(buf + rlen,"Tot Pkt Lost : %lu\n", (unsigned long)fsi->tot_lost);
2726+ rlen += sprintf(buf + rlen,"Tot Insert : %lu\n", (unsigned long)fsi->tot_insert);
2727+ rlen += sprintf(buf + rlen,"Tot Read : %lu\n", (unsigned long)fsi->tot_read);
2728+
2729+ } else
2730+ rlen = sprintf(buf, "WARNING fsi == NULL\n");
2731+ } else
2732+ rlen = sprintf(buf, "WARNING data == NULL\n");
2733+ }
2734+
2735+ return rlen;
2736+}
2737+
2738+/* ********************************** */
2739+
2740+static void ring_proc_init(void) {
143a4708 2741+ ring_proc_dir = proc_mkdir("pf_ring", init_net.proc_net);
40fd095b 2742+
2743+ if(ring_proc_dir) {
2744+ ring_proc_dir->owner = THIS_MODULE;
2745+ ring_proc = create_proc_read_entry("info", 0, ring_proc_dir,
2746+ ring_proc_get_info, NULL);
2747+ if(!ring_proc)
2748+ printk("PF_RING: unable to register proc file\n");
2749+ else {
2750+ ring_proc->owner = THIS_MODULE;
2751+ printk("PF_RING: registered /proc/net/pf_ring/\n");
2752+ }
2753+ } else
2754+ printk("PF_RING: unable to create /proc/net/pf_ring\n");
2755+}
2756+
2757+/* ********************************** */
2758+
2759+static void ring_proc_term(void) {
2760+ if(ring_proc != NULL) {
2761+ remove_proc_entry("info", ring_proc_dir);
143a4708 2762+ if(ring_proc_dir != NULL) remove_proc_entry("pf_ring", init_net.proc_net);
40fd095b 2763+
2764+ printk("PF_RING: deregistered /proc/net/pf_ring\n");
2765+ }
2766+}
2767+
2768+/* ********************************** */
2769+
2770+/*
2771+ * ring_insert()
2772+ *
2773+ * store the sk in a new element and add it
2774+ * to the head of the list.
2775+ */
2776+static inline void ring_insert(struct sock *sk) {
2777+ struct ring_element *next;
2778+
2779+#if defined(RING_DEBUG)
2780+ printk("RING: ring_insert()\n");
2781+#endif
2782+
2783+ next = kmalloc(sizeof(struct ring_element), GFP_ATOMIC);
2784+ if(next != NULL) {
2785+ next->sk = sk;
2786+ write_lock_irq(&ring_mgmt_lock);
2787+ list_add(&next->list, &ring_table);
2788+ write_unlock_irq(&ring_mgmt_lock);
2789+ } else {
2790+ if(net_ratelimit())
2791+ printk("RING: could not kmalloc slot!!\n");
2792+ }
2793+
2794+ ring_table_size++;
2795+ ring_proc_add(ring_sk(sk));
2796+}
2797+
2798+/* ********************************** */
2799+
2800+/*
2801+ * ring_remove()
2802+ *
2803+ * For each of the elements in the list:
2804+ * - check if this is the element we want to delete
2805+ * - if it is, remove it from the list, and free it.
2806+ *
2807+ * stop when we find the one we're looking for (break),
2808+ * or when we reach the end of the list.
2809+ */
2810+static inline void ring_remove(struct sock *sk) {
2811+ struct list_head *ptr;
2812+ struct ring_element *entry;
2813+
2814+ for(ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) {
2815+ entry = list_entry(ptr, struct ring_element, list);
2816+
2817+ if(entry->sk == sk) {
2818+ list_del(ptr);
2819+ kfree(ptr);
2820+ ring_table_size--;
2821+ break;
2822+ }
2823+ }
2824+}
2825+
2826+/* ********************************** */
2827+
2828+static u_int32_t num_queued_pkts(struct ring_opt *pfr) {
2829+
2830+ if(pfr->ring_slots != NULL) {
2831+
2832+ u_int32_t tot_insert = pfr->slots_info->insert_idx,
2833+#if defined(RING_DEBUG)
2834+ tot_read = pfr->slots_info->tot_read, tot_pkts;
2835+#else
2836+ tot_read = pfr->slots_info->tot_read;
2837+#endif
2838+
2839+ if(tot_insert >= tot_read) {
2840+#if defined(RING_DEBUG)
2841+ tot_pkts = tot_insert-tot_read;
2842+#endif
2843+ return(tot_insert-tot_read);
2844+ } else {
2845+#if defined(RING_DEBUG)
2846+ tot_pkts = ((u_int32_t)-1)+tot_insert-tot_read;
2847+#endif
2848+ return(((u_int32_t)-1)+tot_insert-tot_read);
2849+ }
2850+
2851+#if defined(RING_DEBUG)
2852+ printk("-> num_queued_pkts=%d [tot_insert=%d][tot_read=%d]\n",
2853+ tot_pkts, tot_insert, tot_read);
2854+#endif
2855+
2856+ } else
2857+ return(0);
2858+}
2859+
2860+/* ********************************** */
2861+
2862+static inline FlowSlot* get_insert_slot(struct ring_opt *pfr) {
2863+#if defined(RING_DEBUG)
2864+ printk("get_insert_slot(%d)\n", pfr->slots_info->insert_idx);
2865+#endif
2866+
2867+ if(pfr->ring_slots != NULL) {
2868+ FlowSlot *slot = (FlowSlot*)&(pfr->ring_slots[pfr->slots_info->insert_idx
2869+ *pfr->slots_info->slot_len]);
2870+ return(slot);
2871+ } else
2872+ return(NULL);
2873+}
2874+
2875+/* ********************************** */
2876+
2877+static inline FlowSlot* get_remove_slot(struct ring_opt *pfr) {
2878+#if defined(RING_DEBUG)
2879+ printk("get_remove_slot(%d)\n", pfr->slots_info->remove_idx);
2880+#endif
2881+
2882+ if(pfr->ring_slots != NULL)
2883+ return((FlowSlot*)&(pfr->ring_slots[pfr->slots_info->remove_idx*
2884+ pfr->slots_info->slot_len]));
2885+ else
2886+ return(NULL);
2887+}
2888+
2889+/* ******************************************************* */
2890+
2891+static int parse_pkt(struct sk_buff *skb, u_int16_t skb_displ,
2892+ u_int8_t *l3_proto, u_int16_t *eth_type,
2893+ u_int16_t *l3_offset, u_int16_t *l4_offset,
2894+ u_int16_t *vlan_id, u_int32_t *ipv4_src,
2895+ u_int32_t *ipv4_dst,
2896+ u_int16_t *l4_src_port, u_int16_t *l4_dst_port,
2897+ u_int16_t *payload_offset) {
2898+ struct iphdr *ip;
2899+ struct ethhdr *eh = (struct ethhdr*)(skb->data-skb_displ);
2900+ u_int16_t displ;
2901+
2902+ *l3_offset = *l4_offset = *l3_proto = *payload_offset = 0;
2903+ *eth_type = ntohs(eh->h_proto);
2904+
2905+ if(*eth_type == 0x8100 /* 802.1q (VLAN) */) {
2906+ (*vlan_id) = (skb->data[14] & 15)*256 + skb->data[15];
2907+ *eth_type = (skb->data[16])*256 + skb->data[17];
2908+ displ = 4;
2909+ } else {
2910+ displ = 0;
2911+ (*vlan_id) = (u_int16_t)-1;
2912+ }
2913+
2914+ if(*eth_type == 0x0800 /* IP */) {
2915+ *l3_offset = displ+sizeof(struct ethhdr);
2916+ ip = (struct iphdr*)(skb->data-skb_displ+(*l3_offset));
2917+
2918+ *ipv4_src = ntohl(ip->saddr), *ipv4_dst = ntohl(ip->daddr), *l3_proto = ip->protocol;
2919+
2920+ if((ip->protocol == IPPROTO_TCP) || (ip->protocol == IPPROTO_UDP)) {
2921+ *l4_offset = (*l3_offset)+(ip->ihl*4);
2922+
2923+ if(ip->protocol == IPPROTO_TCP) {
2924+ struct tcphdr *tcp = (struct tcphdr*)(skb->data-skb_displ+(*l4_offset));
2925+ *l4_src_port = ntohs(tcp->source), *l4_dst_port = ntohs(tcp->dest);
2926+ *payload_offset = (*l4_offset)+(tcp->doff * 4);
2927+ } else if(ip->protocol == IPPROTO_UDP) {
2928+ struct udphdr *udp = (struct udphdr*)(skb->data-skb_displ+(*l4_offset));
2929+ *l4_src_port = ntohs(udp->source), *l4_dst_port = ntohs(udp->dest);
2930+ *payload_offset = (*l4_offset)+sizeof(struct udphdr);
2931+ } else
2932+ *payload_offset = (*l4_offset);
2933+ } else
2934+ *l4_src_port = *l4_dst_port = 0;
2935+
2936+ return(1); /* IP */
2937+ } /* TODO: handle IPv6 */
2938+
2939+ return(0); /* No IP */
2940+}
2941+
2942+/* **************************************************************** */
2943+
2944+static void reset_bitmask(bitmask_selector *selector)
2945+{
2946+ memset((char*)selector->bits_memory, 0, selector->num_bits/8);
2947+
2948+ while(selector->clashes != NULL) {
2949+ bitmask_counter_list *next = selector->clashes->next;
2950+ kfree(selector->clashes);
2951+ selector->clashes = next;
2952+ }
2953+}
2954+
2955+/* **************************************************************** */
2956+
2957+static void alloc_bitmask(u_int32_t tot_bits, bitmask_selector *selector)
2958+{
2959+ u_int tot_mem = tot_bits/8;
2960+
2961+ if(tot_mem <= PAGE_SIZE)
2962+ selector->order = 1;
2963+ else {
2964+ for(selector->order = 0; (PAGE_SIZE << selector->order) < tot_mem; selector->order++)
2965+ ;
2966+ }
2967+
2968+ printk("BITMASK: [order=%d][tot_mem=%d]\n", selector->order, tot_mem);
2969+
2970+ while((selector->bits_memory = __get_free_pages(GFP_ATOMIC, selector->order)) == 0)
2971+ if(selector->order-- == 0)
2972+ break;
2973+
2974+ if(selector->order == 0) {
2975+ printk("BITMASK: ERROR not enough memory for bitmask\n");
2976+ selector->num_bits = 0;
2977+ return;
2978+ }
2979+
2980+ tot_mem = PAGE_SIZE << selector->order;
2981+ printk("BITMASK: succesfully allocated [tot_mem=%d][order=%d]\n",
2982+ tot_mem, selector->order);
2983+
2984+ selector->num_bits = tot_mem*8;
2985+ selector->clashes = NULL;
2986+ reset_bitmask(selector);
2987+}
2988+
2989+/* ********************************** */
2990+
2991+static void free_bitmask(bitmask_selector *selector)
2992+{
2993+ if(selector->bits_memory > 0)
2994+ free_pages(selector->bits_memory, selector->order);
2995+}
2996+
2997+/* ********************************** */
2998+
2999+static void set_bit_bitmask(bitmask_selector *selector, u_int32_t the_bit) {
3000+ u_int32_t idx = the_bit % selector->num_bits;
3001+
3002+ if(BITMASK_ISSET(idx, selector)) {
3003+ bitmask_counter_list *head = selector->clashes;
3004+
3005+ printk("BITMASK: bit %u was already set\n", the_bit);
3006+
3007+ while(head != NULL) {
3008+ if(head->bit_id == the_bit) {
3009+ head->bit_counter++;
3010+ printk("BITMASK: bit %u is now set to %d\n", the_bit, head->bit_counter);
3011+ return;
3012+ }
3013+
3014+ head = head->next;
3015+ }
3016+
3017+ head = kmalloc(sizeof(bitmask_counter_list), GFP_KERNEL);
3018+ if(head) {
3019+ head->bit_id = the_bit;
3020+ head->bit_counter = 1 /* previous value */ + 1 /* the requested set */;
3021+ head->next = selector->clashes;
3022+ selector->clashes = head;
3023+ } else {
3024+ printk("BITMASK: not enough memory\n");
3025+ return;
3026+ }
3027+ } else {
3028+ BITMASK_SET(idx, selector);
3029+ printk("BITMASK: bit %u is now set\n", the_bit);
3030+ }
3031+}
3032+
3033+/* ********************************** */
3034+
3035+static u_char is_set_bit_bitmask(bitmask_selector *selector, u_int32_t the_bit) {
3036+ u_int32_t idx = the_bit % selector->num_bits;
3037+ return(BITMASK_ISSET(idx, selector));
3038+}
3039+
3040+/* ********************************** */
3041+
3042+static void clear_bit_bitmask(bitmask_selector *selector, u_int32_t the_bit) {
3043+ u_int32_t idx = the_bit % selector->num_bits;
3044+
3045+ if(!BITMASK_ISSET(idx, selector))
3046+ printk("BITMASK: bit %u was not set\n", the_bit);
3047+ else {
3048+ bitmask_counter_list *head = selector->clashes, *prev = NULL;
3049+
3050+ while(head != NULL) {
3051+ if(head->bit_id == the_bit) {
3052+ head->bit_counter--;
3053+
3054+ printk("BITMASK: bit %u is now set to %d\n",
3055+ the_bit, head->bit_counter);
3056+
3057+ if(head->bit_counter == 1) {
3058+ /* We can now delete this entry as '1' can be
3059+ accommodated into the bitmask */
3060+
3061+ if(prev == NULL)
3062+ selector->clashes = head->next;
3063+ else
3064+ prev->next = head->next;
3065+
3066+ kfree(head);
3067+ }
3068+ return;
3069+ }
3070+
3071+ prev = head; head = head->next;
3072+ }
3073+
3074+ BITMASK_CLR(idx, selector);
3075+ printk("BITMASK: bit %u is now reset\n", the_bit);
3076+ }
3077+}
3078+
3079+/* ********************************** */
3080+
3081+/* Hash function */
3082+static u_int32_t sdb_hash(u_int32_t value) {
3083+ u_int32_t hash = 0, i;
3084+ u_int8_t str[sizeof(value)];
3085+
3086+ memcpy(str, &value, sizeof(value));
3087+
3088+ for(i = 0; i < sizeof(value); i++) {
3089+ hash = str[i] + (hash << 6) + (hash << 16) - hash;
3090+ }
3091+
3092+ return(hash);
3093+}
3094+
3095+/* ********************************** */
3096+
3097+static void handle_bloom_filter_rule(struct ring_opt *pfr, char *buf) {
3098+ u_int count;
3099+
3100+ if(buf == NULL)
3101+ return;
3102+ else
3103+ count = strlen(buf);
3104+
3105+ printk("PF_RING: -> handle_bloom_filter_rule(%s)\n", buf);
3106+
3107+ if((buf[count-1] == '\n') || (buf[count-1] == '\r')) buf[count-1] = '\0';
3108+
3109+ if(count > 1) {
3110+ u_int32_t the_bit;
3111+
3112+ if(!strncmp(&buf[1], "vlan=", 5)) {
3113+ sscanf(&buf[6], "%d", &the_bit);
3114+
3115+ if(buf[0] == '+')
3116+ set_bit_bitmask(&pfr->vlan_bitmask, the_bit), pfr->num_vlan_bitmask_add++;
3117+ else
3118+ clear_bit_bitmask(&pfr->vlan_bitmask, the_bit), pfr->num_vlan_bitmask_remove++;
3119+ } else if(!strncmp(&buf[1], "mac=", 4)) {
3120+ int a, b, c, d, e, f;
3121+
3122+ if(sscanf(&buf[5], "%02x:%02x:%02x:%02x:%02x:%02x:",
3123+ &a, &b, &c, &d, &e, &f) == 6) {
3124+ u_int32_t mac_addr = (a & 0xff) + (b & 0xff) + ((c & 0xff) << 24) + ((d & 0xff) << 16) + ((e & 0xff) << 8) + (f & 0xff);
3125+
3126+ /* printk("PF_RING: -> [%u][%u][%u][%u][%u][%u] -> [%u]\n", a, b, c, d, e, f, mac_addr); */
3127+
3128+ if(buf[0] == '+')
3129+ set_bit_bitmask(&pfr->mac_bitmask, mac_addr), pfr->num_mac_bitmask_add++;
3130+ else
3131+ clear_bit_bitmask(&pfr->mac_bitmask, mac_addr), pfr->num_mac_bitmask_remove++;
3132+ } else
3133+ printk("PF_RING: -> Invalid MAC address '%s'\n", &buf[5]);
3134+ } else if(!strncmp(&buf[1], "ip=", 3)) {
3135+ int a, b, c, d;
3136+
3137+ if(sscanf(&buf[4], "%d.%d.%d.%d", &a, &b, &c, &d) == 4) {
3138+ u_int32_t ip_addr = ((a & 0xff) << 24) + ((b & 0xff) << 16) + ((c & 0xff) << 8) + (d & 0xff);
3139+
3140+ if(buf[0] == '+')
3141+ set_bit_bitmask(&pfr->ip_bitmask, ip_addr), set_bit_bitmask(&pfr->ip_bitmask, sdb_hash(ip_addr)), pfr->num_ip_bitmask_add++;
3142+ else
3143+ clear_bit_bitmask(&pfr->ip_bitmask, ip_addr), clear_bit_bitmask(&pfr->twin_ip_bitmask, sdb_hash(ip_addr)), pfr->num_ip_bitmask_remove++;
3144+ } else
3145+ printk("PF_RING: -> Invalid IP address '%s'\n", &buf[4]);
3146+ } else if(!strncmp(&buf[1], "port=", 5)) {
3147+ sscanf(&buf[6], "%d", &the_bit);
3148+
3149+ if(buf[0] == '+')
3150+ set_bit_bitmask(&pfr->port_bitmask, the_bit), set_bit_bitmask(&pfr->port_bitmask, sdb_hash(the_bit)), pfr->num_port_bitmask_add++;
3151+ else
3152+ clear_bit_bitmask(&pfr->port_bitmask, the_bit), clear_bit_bitmask(&pfr->twin_port_bitmask, sdb_hash(the_bit)), pfr->num_port_bitmask_remove++;
3153+ } else if(!strncmp(&buf[1], "proto=", 6)) {
3154+ if(!strncmp(&buf[7], "tcp", 3)) the_bit = 6;
3155+ else if(!strncmp(&buf[7], "udp", 3)) the_bit = 17;
3156+ else if(!strncmp(&buf[7], "icmp", 4)) the_bit = 1;
3157+ else sscanf(&buf[7], "%d", &the_bit);
3158+
3159+ if(buf[0] == '+')
3160+ set_bit_bitmask(&pfr->proto_bitmask, the_bit);
3161+ else
3162+ clear_bit_bitmask(&pfr->proto_bitmask, the_bit);
3163+ } else
3164+ printk("PF_RING: -> Unknown rule type '%s'\n", buf);
3165+ }
3166+}
3167+
3168+/* ********************************** */
3169+
3170+static void reset_bloom_filters(struct ring_opt *pfr) {
3171+ reset_bitmask(&pfr->mac_bitmask);
3172+ reset_bitmask(&pfr->vlan_bitmask);
3173+ reset_bitmask(&pfr->ip_bitmask); reset_bitmask(&pfr->twin_ip_bitmask);
3174+ reset_bitmask(&pfr->port_bitmask); reset_bitmask(&pfr->twin_port_bitmask);
3175+ reset_bitmask(&pfr->proto_bitmask);
3176+
3177+ pfr->num_mac_bitmask_add = pfr->num_mac_bitmask_remove = 0;
3178+ pfr->num_vlan_bitmask_add = pfr->num_vlan_bitmask_remove = 0;
3179+ pfr->num_ip_bitmask_add = pfr->num_ip_bitmask_remove = 0;
3180+ pfr->num_port_bitmask_add = pfr->num_port_bitmask_remove = 0;
3181+ pfr->num_proto_bitmask_add = pfr->num_proto_bitmask_remove = 0;
3182+
3183+ printk("PF_RING: rules have been reset\n");
3184+}
3185+
3186+/* ********************************** */
3187+
3188+static void init_blooms(struct ring_opt *pfr) {
3189+ alloc_bitmask(4096, &pfr->mac_bitmask);
3190+ alloc_bitmask(4096, &pfr->vlan_bitmask);
3191+ alloc_bitmask(32768, &pfr->ip_bitmask); alloc_bitmask(32768, &pfr->twin_ip_bitmask);
3192+ alloc_bitmask(4096, &pfr->port_bitmask); alloc_bitmask(4096, &pfr->twin_port_bitmask);
3193+ alloc_bitmask(4096, &pfr->proto_bitmask);
3194+
3195+ pfr->num_mac_bitmask_add = pfr->num_mac_bitmask_remove = 0;
3196+ pfr->num_vlan_bitmask_add = pfr->num_vlan_bitmask_remove = 0;
3197+ pfr->num_ip_bitmask_add = pfr->num_ip_bitmask_remove = 0;
3198+ pfr->num_port_bitmask_add = pfr->num_port_bitmask_remove = 0;
3199+ pfr->num_proto_bitmask_add = pfr->num_proto_bitmask_remove = 0;
3200+
3201+ reset_bloom_filters(pfr);
3202+}
3203+
3204+/* ********************************** */
3205+
3206+inline int MatchFound (void* id, int index, void *data) { return(0); }
3207+
3208+/* ********************************** */
3209+
3210+static void add_skb_to_ring(struct sk_buff *skb,
3211+ struct ring_opt *pfr,
3212+ u_char recv_packet,
3213+ u_char real_skb /* 1=skb 0=faked skb */) {
3214+ FlowSlot *theSlot;
3215+ int idx, displ, fwd_pkt = 0;
3216+
3217+ if(recv_packet) {
3218+ /* Hack for identifying a packet received by the e1000 */
3219+ if(real_skb) {
3220+ displ = SKB_DISPLACEMENT;
3221+ } else
3222+ displ = 0; /* Received by the e1000 wrapper */
3223+ } else
3224+ displ = 0;
3225+
3226+ write_lock(&pfr->ring_index_lock);
3227+ pfr->slots_info->tot_pkts++;
3228+ write_unlock(&pfr->ring_index_lock);
3229+
3230+ /* BPF Filtering (from af_packet.c) */
3231+ if(pfr->bpfFilter != NULL) {
3232+ unsigned res = 1, len;
3233+
3234+ len = skb->len-skb->data_len;
3235+
3236+ write_lock(&pfr->ring_index_lock);
3237+ skb->data -= displ;
3238+ res = sk_run_filter(skb, pfr->bpfFilter->insns, pfr->bpfFilter->len);
3239+ skb->data += displ;
3240+ write_unlock(&pfr->ring_index_lock);
3241+
3242+ if(res == 0) {
3243+ /* Filter failed */
3244+
3245+#if defined(RING_DEBUG)
3246+ printk("add_skb_to_ring(skb): Filter failed [len=%d][tot=%llu]"
3247+ "[insertIdx=%d][pkt_type=%d][cloned=%d]\n",
3248+ (int)skb->len, pfr->slots_info->tot_pkts,
3249+ pfr->slots_info->insert_idx,
3250+ skb->pkt_type, skb->cloned);
3251+#endif
3252+
3253+ return;
3254+ }
3255+ }
3256+
3257+ /* ************************** */
3258+
3259+ if(pfr->sample_rate > 1) {
3260+ if(pfr->pktToSample == 0) {
3261+ write_lock(&pfr->ring_index_lock);
3262+ pfr->pktToSample = pfr->sample_rate;
3263+ write_unlock(&pfr->ring_index_lock);
3264+ } else {
3265+ write_lock(&pfr->ring_index_lock);
3266+ pfr->pktToSample--;
3267+ write_unlock(&pfr->ring_index_lock);
3268+
3269+#if defined(RING_DEBUG)
3270+ printk("add_skb_to_ring(skb): sampled packet [len=%d]"
3271+ "[tot=%llu][insertIdx=%d][pkt_type=%d][cloned=%d]\n",
3272+ (int)skb->len, pfr->slots_info->tot_pkts,
3273+ pfr->slots_info->insert_idx,
3274+ skb->pkt_type, skb->cloned);
3275+#endif
3276+ return;
3277+ }
3278+ }
3279+
3280+ /* ************************************* */
3281+
3282+ if((pfr->reflector_dev != NULL)
3283+ && (!netif_queue_stopped(pfr->reflector_dev))) {
3284+ int cpu = smp_processor_id();
3285+
3286+ /* increase reference counter so that this skb is not freed */
3287+ atomic_inc(&skb->users);
3288+
3289+ skb->data -= displ;
3290+
3291+ /* send it */
3292+ if (pfr->reflector_dev->xmit_lock_owner != cpu) {
3293+ /* Patch below courtesy of Matthew J. Roth <mroth@imminc.com> */
3294+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
3295+ spin_lock_bh(&pfr->reflector_dev->xmit_lock);
3296+ pfr->reflector_dev->xmit_lock_owner = cpu;
3297+ spin_unlock_bh(&pfr->reflector_dev->xmit_lock);
3298+#else
3299+ netif_tx_lock_bh(pfr->reflector_dev);
3300+#endif
3301+ if (pfr->reflector_dev->hard_start_xmit(skb, pfr->reflector_dev) == 0) {
3302+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
3303+ spin_lock_bh(&pfr->reflector_dev->xmit_lock);
3304+ pfr->reflector_dev->xmit_lock_owner = -1;
3305+ spin_unlock_bh(&pfr->reflector_dev->xmit_lock);
3306+#else
3307+ netif_tx_unlock_bh(pfr->reflector_dev);
3308+#endif
3309+ skb->data += displ;
3310+#if defined(RING_DEBUG)
3311+ printk("++ hard_start_xmit succeeded\n");
3312+#endif
3313+ return; /* OK */
3314+ }
3315+
3316+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
3317+ spin_lock_bh(&pfr->reflector_dev->xmit_lock);
3318+ pfr->reflector_dev->xmit_lock_owner = -1;
3319+ spin_unlock_bh(&pfr->reflector_dev->xmit_lock);
3320+#else
3321+ netif_tx_unlock_bh(pfr->reflector_dev);
3322+#endif
3323+ }
3324+
3325+#if defined(RING_DEBUG)
3326+ printk("++ hard_start_xmit failed\n");
3327+#endif
3328+ skb->data += displ;
3329+ return; /* -ENETDOWN */
3330+ }
3331+
3332+ /* ************************************* */
3333+
3334+#if defined(RING_DEBUG)
3335+ printk("add_skb_to_ring(skb) [len=%d][tot=%llu][insertIdx=%d]"
3336+ "[pkt_type=%d][cloned=%d]\n",
3337+ (int)skb->len, pfr->slots_info->tot_pkts,
3338+ pfr->slots_info->insert_idx,
3339+ skb->pkt_type, skb->cloned);
3340+#endif
3341+
3342+ idx = pfr->slots_info->insert_idx;
3343+ theSlot = get_insert_slot(pfr);
3344+
3345+ if((theSlot != NULL) && (theSlot->slot_state == 0)) {
3346+ struct pcap_pkthdr *hdr;
3347+ char *bucket;
3348+ int is_ip_pkt, debug = 0;
3349+
3350+ /* Update Index */
3351+ idx++;
3352+
3353+ bucket = &theSlot->bucket;
3354+ hdr = (struct pcap_pkthdr*)bucket;
3355+
3356+ /* BD - API changed for time keeping */
3357+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14))
3358+ if(skb->stamp.tv_sec == 0) do_gettimeofday(&skb->stamp);
3359+
3360+ hdr->ts.tv_sec = skb->stamp.tv_sec, hdr->ts.tv_usec = skb->stamp.tv_usec;
3361+#else
c1c82508 3362+ if(skb->tstamp.tv64 == 0) __net_timestamp(skb);
40fd095b 3363+
e6761c74 3364+ struct timeval tv = ktime_to_timeval(skb->tstamp);
c1c82508 3365+ hdr->ts.tv_sec = tv.tv_sec, hdr->ts.tv_usec = tv.tv_usec;
40fd095b 3366+#endif
3367+ hdr->caplen = skb->len+displ;
3368+
3369+ if(hdr->caplen > pfr->slots_info->data_len)
3370+ hdr->caplen = pfr->slots_info->data_len;
3371+
3372+ hdr->len = skb->len+displ;
3373+
3374+ /* Extensions */
3375+ is_ip_pkt = parse_pkt(skb, displ,
3376+ &hdr->l3_proto,
3377+ &hdr->eth_type,
3378+ &hdr->l3_offset,
3379+ &hdr->l4_offset,
3380+ &hdr->vlan_id,
3381+ &hdr->ipv4_src,
3382+ &hdr->ipv4_dst,
3383+ &hdr->l4_src_port,
3384+ &hdr->l4_dst_port,
3385+ &hdr->payload_offset);
3386+
3387+ if(is_ip_pkt && pfr->bitmask_enabled) {
3388+ int vlan_match = 0;
3389+
3390+ fwd_pkt = 0;
3391+
3392+ if(debug) {
3393+ if(is_ip_pkt)
3394+ printk(KERN_INFO "PF_RING: [proto=%d][vlan=%d][sport=%d][dport=%d][src=%u][dst=%u]\n",
3395+ hdr->l3_proto, hdr->vlan_id, hdr->l4_src_port, hdr->l4_dst_port, hdr->ipv4_src, hdr->ipv4_dst);
3396+ else
3397+ printk(KERN_INFO "PF_RING: [proto=%d][vlan=%d]\n", hdr->l3_proto, hdr->vlan_id);
3398+ }
3399+
3400+ if(hdr->vlan_id != (u_int16_t)-1) {
3401+ vlan_match = is_set_bit_bitmask(&pfr->vlan_bitmask, hdr->vlan_id);
3402+ } else
3403+ vlan_match = 1;
3404+
3405+ if(vlan_match) {
3406+ struct ethhdr *eh = (struct ethhdr*)(skb->data);
3407+ u_int32_t src_mac = (eh->h_source[0] & 0xff) + (eh->h_source[1] & 0xff) + ((eh->h_source[2] & 0xff) << 24)
3408+ + ((eh->h_source[3] & 0xff) << 16) + ((eh->h_source[4] & 0xff) << 8) + (eh->h_source[5] & 0xff);
3409+
3410+ if(debug) printk(KERN_INFO "PF_RING: [src_mac=%u]\n", src_mac);
3411+
3412+ fwd_pkt |= is_set_bit_bitmask(&pfr->mac_bitmask, src_mac);
3413+
3414+ if(!fwd_pkt) {
3415+ u_int32_t dst_mac = (eh->h_dest[0] & 0xff) + (eh->h_dest[1] & 0xff) + ((eh->h_dest[2] & 0xff) << 24)
3416+ + ((eh->h_dest[3] & 0xff) << 16) + ((eh->h_dest[4] & 0xff) << 8) + (eh->h_dest[5] & 0xff);
3417+
3418+ if(debug) printk(KERN_INFO "PF_RING: [dst_mac=%u]\n", dst_mac);
3419+
3420+ fwd_pkt |= is_set_bit_bitmask(&pfr->mac_bitmask, dst_mac);
3421+
3422+ if(is_ip_pkt && (!fwd_pkt)) {
3423+ fwd_pkt |= is_set_bit_bitmask(&pfr->ip_bitmask, hdr->ipv4_src);
3424+
3425+ if(!fwd_pkt) {
3426+ fwd_pkt |= is_set_bit_bitmask(&pfr->ip_bitmask, hdr->ipv4_dst);
3427+
3428+ if((!fwd_pkt) && ((hdr->l3_proto == IPPROTO_TCP)
3429+ || (hdr->l3_proto == IPPROTO_UDP))) {
3430+ fwd_pkt |= is_set_bit_bitmask(&pfr->port_bitmask, hdr->l4_src_port);
3431+ if(!fwd_pkt) fwd_pkt |= is_set_bit_bitmask(&pfr->port_bitmask, hdr->l4_dst_port);
3432+ }
3433+
3434+ if(!fwd_pkt) fwd_pkt |= is_set_bit_bitmask(&pfr->proto_bitmask, hdr->l3_proto);
3435+ }
3436+ }
3437+ }
3438+ }
3439+ } else
3440+ fwd_pkt = 1;
3441+
3442+ if(fwd_pkt && (pfr->acsm != NULL)) {
3443+ if((hdr->payload_offset > 0) && ((skb->len+skb->mac_len) > hdr->payload_offset)) {
3444+ char *payload = (skb->data-displ+hdr->payload_offset);
3445+ int payload_len = skb->len /* + skb->mac_len */ - hdr->payload_offset;
3446+
3447+ if((payload_len > 0)
3448+ && ((hdr->l4_src_port == 80) || (hdr->l4_dst_port == 80))) {
3449+ int rc;
3450+
3451+ if(0) {
3452+ char buf[1500];
3453+
3454+ memcpy(buf, payload, payload_len);
3455+ buf[payload_len] = '\0';
3456+ printk("[%s]\n", payload);
3457+ }
3458+
3459+ /* printk("Tring to match pattern [len=%d][%s]\n", payload_len, payload); */
3460+ rc = acsmSearch2(pfr->acsm, payload, payload_len, MatchFound, (void *)0) ? 1 : 0;
3461+
3462+ // printk("Match result: %d\n", fwd_pkt);
3463+ if(rc) {
3464+ printk("Pattern matched!\n");
3465+ } else {
3466+ fwd_pkt = 0;
3467+ }
3468+ } else
3469+ fwd_pkt = 0;
3470+ } else
3471+ fwd_pkt = 0;
3472+ }
3473+
3474+ if(fwd_pkt) {
3475+ memcpy(&bucket[sizeof(struct pcap_pkthdr)], skb->data-displ, hdr->caplen);
3476+
3477+#if defined(RING_DEBUG)
3478+ {
3479+ static unsigned int lastLoss = 0;
3480+
3481+ if(pfr->slots_info->tot_lost
3482+ && (lastLoss != pfr->slots_info->tot_lost)) {
3483+ printk("add_skb_to_ring(%d): [data_len=%d]"
3484+ "[hdr.caplen=%d][skb->len=%d]"
3485+ "[pcap_pkthdr=%d][removeIdx=%d]"
3486+ "[loss=%lu][page=%u][slot=%u]\n",
3487+ idx-1, pfr->slots_info->data_len, hdr->caplen, skb->len,
3488+ sizeof(struct pcap_pkthdr),
3489+ pfr->slots_info->remove_idx,
3490+ (long unsigned int)pfr->slots_info->tot_lost,
3491+ pfr->insert_page_id, pfr->insert_slot_id);
3492+
3493+ lastLoss = pfr->slots_info->tot_lost;
3494+ }
3495+ }
3496+#endif
3497+
3498+ write_lock(&pfr->ring_index_lock);
3499+ if(idx == pfr->slots_info->tot_slots)
3500+ pfr->slots_info->insert_idx = 0;
3501+ else
3502+ pfr->slots_info->insert_idx = idx;
3503+
3504+ pfr->slots_info->tot_insert++;
3505+ theSlot->slot_state = 1;
3506+ write_unlock(&pfr->ring_index_lock);
3507+ }
3508+ } else {
3509+ write_lock(&pfr->ring_index_lock);
3510+ pfr->slots_info->tot_lost++;
3511+ write_unlock(&pfr->ring_index_lock);
3512+
3513+#if defined(RING_DEBUG)
3514+ printk("add_skb_to_ring(skb): packet lost [loss=%lu]"
3515+ "[removeIdx=%u][insertIdx=%u]\n",
3516+ (long unsigned int)pfr->slots_info->tot_lost,
3517+ pfr->slots_info->remove_idx, pfr->slots_info->insert_idx);
3518+#endif
3519+ }
3520+
3521+ if(fwd_pkt) {
3522+
3523+ /* wakeup in case of poll() */
3524+ if(waitqueue_active(&pfr->ring_slots_waitqueue))
3525+ wake_up_interruptible(&pfr->ring_slots_waitqueue);
3526+ }
3527+}
3528+
3529+/* ********************************** */
3530+
3531+static u_int hash_skb(struct ring_cluster *cluster_ptr,
3532+ struct sk_buff *skb, u_char recv_packet) {
3533+ u_int idx;
3534+ int displ;
3535+ struct iphdr *ip;
3536+
3537+ if(cluster_ptr->hashing_mode == cluster_round_robin) {
3538+ idx = cluster_ptr->hashing_id++;
3539+ } else {
3540+ /* Per-flow clustering */
3541+ if(skb->len > sizeof(struct iphdr)+sizeof(struct tcphdr)) {
3542+ if(recv_packet)
3543+ displ = 0;
3544+ else
3545+ displ = SKB_DISPLACEMENT;
3546+
3547+ /*
3548+ skb->data+displ
3549+
3550+ Always points to to the IP part of the packet
3551+ */
3552+
3553+ ip = (struct iphdr*)(skb->data+displ);
3554+
3555+ idx = ip->saddr+ip->daddr+ip->protocol;
3556+
3557+ if(ip->protocol == IPPROTO_TCP) {
3558+ struct tcphdr *tcp = (struct tcphdr*)(skb->data+displ
3559+ +sizeof(struct iphdr));
3560+ idx += tcp->source+tcp->dest;
3561+ } else if(ip->protocol == IPPROTO_UDP) {
3562+ struct udphdr *udp = (struct udphdr*)(skb->data+displ
3563+ +sizeof(struct iphdr));
3564+ idx += udp->source+udp->dest;
3565+ }
3566+ } else
3567+ idx = skb->len;
3568+ }
3569+
3570+ return(idx % cluster_ptr->num_cluster_elements);
3571+}
3572+
3573+/* ********************************** */
3574+
3575+static int skb_ring_handler(struct sk_buff *skb,
3576+ u_char recv_packet,
3577+ u_char real_skb /* 1=skb 0=faked skb */) {
3578+ struct sock *skElement;
3579+ int rc = 0;
3580+ struct list_head *ptr;
3581+ struct ring_cluster *cluster_ptr;
3582+
3583+#ifdef PROFILING
3584+ uint64_t rdt = _rdtsc(), rdt1, rdt2;
3585+#endif
3586+
3587+ if((!skb) /* Invalid skb */
3588+ || ((!enable_tx_capture) && (!recv_packet))) {
3589+ /*
3590+ An outgoing packet is about to be sent out
3591+ but we decided not to handle transmitted
3592+ packets.
3593+ */
3594+ return(0);
3595+ }
3596+
3597+#if defined(RING_DEBUG)
3598+ if(0) {
3599+ printk("skb_ring_handler() [len=%d][dev=%s]\n", skb->len,
3600+ skb->dev->name == NULL ? "<NULL>" : skb->dev->name);
3601+ }
3602+#endif
3603+
3604+#ifdef PROFILING
3605+ rdt1 = _rdtsc();
3606+#endif
3607+
3608+ /* [1] Check unclustered sockets */
3609+ for (ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) {
3610+ struct ring_opt *pfr;
3611+ struct ring_element *entry;
3612+
3613+ entry = list_entry(ptr, struct ring_element, list);
3614+
3615+ read_lock(&ring_mgmt_lock);
3616+ skElement = entry->sk;
3617+ pfr = ring_sk(skElement);
3618+ read_unlock(&ring_mgmt_lock);
3619+
3620+ if((pfr != NULL)
3621+ && (pfr->cluster_id == 0 /* No cluster */)
3622+ && (pfr->ring_slots != NULL)
3623+ && ((pfr->ring_netdev == skb->dev) || ((skb->dev->flags & IFF_SLAVE) && pfr->ring_netdev == skb->dev->master))) {
3624+ /* We've found the ring where the packet can be stored */
3625+ read_lock(&ring_mgmt_lock);
3626+ add_skb_to_ring(skb, pfr, recv_packet, real_skb);
3627+ read_unlock(&ring_mgmt_lock);
3628+
3629+ rc = 1; /* Ring found: we've done our job */
3630+ }
3631+ }
3632+
3633+ /* [2] Check socket clusters */
3634+ cluster_ptr = ring_cluster_list;
3635+
3636+ while(cluster_ptr != NULL) {
3637+ struct ring_opt *pfr;
3638+
3639+ if(cluster_ptr->num_cluster_elements > 0) {
3640+ u_int skb_hash = hash_skb(cluster_ptr, skb, recv_packet);
3641+
3642+ read_lock(&ring_mgmt_lock);
3643+ skElement = cluster_ptr->sk[skb_hash];
3644+ read_unlock(&ring_mgmt_lock);
3645+
3646+ if(skElement != NULL) {
3647+ pfr = ring_sk(skElement);
3648+
3649+ if((pfr != NULL)
3650+ && (pfr->ring_slots != NULL)
3651+ && ((pfr->ring_netdev == skb->dev) || ((skb->dev->flags & IFF_SLAVE) && pfr->ring_netdev == skb->dev->master))) {
3652+ /* We've found the ring where the packet can be stored */
3653+ read_lock(&ring_mgmt_lock);
3654+ add_skb_to_ring(skb, pfr, recv_packet, real_skb);
3655+ read_unlock(&ring_mgmt_lock);
3656+
3657+ rc = 1; /* Ring found: we've done our job */
3658+ }
3659+ }
3660+ }
3661+
3662+ cluster_ptr = cluster_ptr->next;
3663+ }
3664+
3665+#ifdef PROFILING
3666+ rdt1 = _rdtsc()-rdt1;
3667+#endif
3668+
3669+#ifdef PROFILING
3670+ rdt2 = _rdtsc();
3671+#endif
3672+
3673+ if(transparent_mode) rc = 0;
3674+
3675+ if((rc != 0) && real_skb)
3676+ dev_kfree_skb(skb); /* Free the skb */
3677+
3678+#ifdef PROFILING
3679+ rdt2 = _rdtsc()-rdt2;
3680+ rdt = _rdtsc()-rdt;
3681+
3682+#if defined(RING_DEBUG)
3683+ printk("# cycles: %d [lock costed %d %d%%][free costed %d %d%%]\n",
3684+ (int)rdt, rdt-rdt1,
3685+ (int)((float)((rdt-rdt1)*100)/(float)rdt),
3686+ rdt2,
3687+ (int)((float)(rdt2*100)/(float)rdt));
3688+#endif
3689+#endif
3690+
3691+ return(rc); /* 0 = packet not handled */
3692+}
3693+
3694+/* ********************************** */
3695+
3696+struct sk_buff skb;
3697+
3698+static int buffer_ring_handler(struct net_device *dev,
3699+ char *data, int len) {
3700+
3701+#if defined(RING_DEBUG)
3702+ printk("buffer_ring_handler: [dev=%s][len=%d]\n",
3703+ dev->name == NULL ? "<NULL>" : dev->name, len);
3704+#endif
3705+
3706+ /* BD - API changed for time keeping */
3707+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14))
3708+ skb.dev = dev, skb.len = len, skb.data = data,
3709+ skb.data_len = len, skb.stamp.tv_sec = 0; /* Calculate the time */
3710+#else
3711+ skb.dev = dev, skb.len = len, skb.data = data,
c1c82508 3712+ skb.data_len = len, skb.tstamp.tv64 = 0; /* Calculate the time */
40fd095b 3713+#endif
3714+
3715+ skb_ring_handler(&skb, 1, 0 /* fake skb */);
3716+
3717+ return(0);
3718+}
3719+
3720+/* ********************************** */
3721+
3722+static int ring_create(struct socket *sock, int protocol) {
3723+ struct sock *sk;
3724+ struct ring_opt *pfr;
3725+ int err;
3726+
3727+#if defined(RING_DEBUG)
3728+ printk("RING: ring_create()\n");
3729+#endif
3730+
3731+ /* Are you root, superuser or so ? */
3732+ if(!capable(CAP_NET_ADMIN))
3733+ return -EPERM;
3734+
3735+ if(sock->type != SOCK_RAW)
3736+ return -ESOCKTNOSUPPORT;
3737+
3738+ if(protocol != htons(ETH_P_ALL))
3739+ return -EPROTONOSUPPORT;
3740+
3741+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
3742+ MOD_INC_USE_COUNT;
3743+#endif
3744+
3745+ err = -ENOMEM;
3746+
3747+ // BD: -- broke this out to keep it more simple and clear as to what the
3748+ // options are.
3749+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
3750+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,11))
3751+ sk = sk_alloc(PF_RING, GFP_KERNEL, 1, NULL);
3752+#else
3753+ // BD: API changed in 2.6.12, ref:
3754+ // http://svn.clkao.org/svnweb/linux/revision/?rev=28201
3755+ sk = sk_alloc(PF_RING, GFP_ATOMIC, &ring_proto, 1);
3756+#endif
3757+#else
3758+ /* Kernel 2.4 */
3759+ sk = sk_alloc(PF_RING, GFP_KERNEL, 1);
3760+#endif
3761+
3762+ if (sk == NULL)
3763+ goto out;
3764+
3765+ sock->ops = &ring_ops;
3766+ sock_init_data(sock, sk);
3767+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
3768+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,11))
3769+ sk_set_owner(sk, THIS_MODULE);
3770+#endif
3771+#endif
3772+
3773+ err = -ENOMEM;
3774+ ring_sk(sk) = ring_sk_datatype(kmalloc(sizeof(*pfr), GFP_KERNEL));
3775+
3776+ if (!(pfr = ring_sk(sk))) {
3777+ sk_free(sk);
3778+ goto out;
3779+ }
3780+ memset(pfr, 0, sizeof(*pfr));
3781+ init_waitqueue_head(&pfr->ring_slots_waitqueue);
3782+ pfr->ring_index_lock = RW_LOCK_UNLOCKED;
3783+ atomic_set(&pfr->num_ring_slots_waiters, 0);
3784+ init_blooms(pfr);
3785+ pfr->acsm = NULL;
3786+
3787+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
3788+ sk->sk_family = PF_RING;
3789+ sk->sk_destruct = ring_sock_destruct;
3790+#else
3791+ sk->family = PF_RING;
3792+ sk->destruct = ring_sock_destruct;
3793+ sk->num = protocol;
3794+#endif
3795+
3796+ ring_insert(sk);
3797+
3798+#if defined(RING_DEBUG)
3799+ printk("RING: ring_create() - created\n");
3800+#endif
3801+
3802+ return(0);
3803+ out:
3804+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
3805+ MOD_DEC_USE_COUNT;
3806+#endif
3807+ return err;
3808+}
3809+
3810+/* *********************************************** */
3811+
3812+static int ring_release(struct socket *sock)
3813+{
3814+ struct sock *sk = sock->sk;
3815+ struct ring_opt *pfr = ring_sk(sk);
3816+
3817+ if(!sk) return 0;
3818+
3819+#if defined(RING_DEBUG)
3820+ printk("RING: called ring_release\n");
3821+#endif
3822+
3823+#if defined(RING_DEBUG)
3824+ printk("RING: ring_release entered\n");
3825+#endif
3826+
3827+ /*
3828+ The calls below must be placed outside the
3829+ write_lock_irq...write_unlock_irq block.
3830+ */
3831+ sock_orphan(sk);
3832+ ring_proc_remove(ring_sk(sk));
3833+
3834+ write_lock_irq(&ring_mgmt_lock);
3835+ ring_remove(sk);
3836+ sock->sk = NULL;
3837+
3838+ /* Free the ring buffer */
3839+ if(pfr->ring_memory) {
3840+ struct page *page, *page_end;
3841+
3842+ page_end = virt_to_page(pfr->ring_memory + (PAGE_SIZE << pfr->order) - 1);
3843+ for(page = virt_to_page(pfr->ring_memory); page <= page_end; page++)
3844+ ClearPageReserved(page);
3845+
3846+ free_pages(pfr->ring_memory, pfr->order);
3847+ }
3848+
3849+ free_bitmask(&pfr->mac_bitmask);
3850+ free_bitmask(&pfr->vlan_bitmask);
3851+ free_bitmask(&pfr->ip_bitmask); free_bitmask(&pfr->twin_ip_bitmask);
3852+ free_bitmask(&pfr->port_bitmask); free_bitmask(&pfr->twin_port_bitmask);
3853+ free_bitmask(&pfr->proto_bitmask);
3854+
3855+ if(pfr->acsm != NULL) acsmFree2(pfr->acsm);
3856+
3857+ kfree(pfr);
3858+ ring_sk(sk) = NULL;
3859+
3860+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
3861+ skb_queue_purge(&sk->sk_write_queue);
3862+#endif
3863+
3864+ sock_put(sk);
3865+ write_unlock_irq(&ring_mgmt_lock);
3866+
3867+#if defined(RING_DEBUG)
3868+ printk("RING: ring_release leaving\n");
3869+#endif
3870+
3871+ return 0;
3872+}
3873+
3874+/* ********************************** */
3875+/*
3876+ * We create a ring for this socket and bind it to the specified device
3877+ */
3878+static int packet_ring_bind(struct sock *sk, struct net_device *dev)
3879+{
3880+ u_int the_slot_len;
3881+ u_int32_t tot_mem;
3882+ struct ring_opt *pfr = ring_sk(sk);
3883+ struct page *page, *page_end;
3884+
3885+ if(!dev) return(-1);
3886+
3887+#if defined(RING_DEBUG)
3888+ printk("RING: packet_ring_bind(%s) called\n", dev->name);
3889+#endif
3890+
3891+ /* **********************************************
3892+
3893+ *************************************
3894+ * *
3895+ * FlowSlotInfo *
3896+ * *
3897+ ************************************* <-+
3898+ * FlowSlot * |
3899+ ************************************* |
3900+ * FlowSlot * |
3901+ ************************************* +- num_slots
3902+ * FlowSlot * |
3903+ ************************************* |
3904+ * FlowSlot * |
3905+ ************************************* <-+
3906+
3907+ ********************************************** */
3908+
3909+ the_slot_len = sizeof(u_char) /* flowSlot.slot_state */
3910+#ifdef RING_MAGIC
3911+ + sizeof(u_char)
3912+#endif
3913+ + sizeof(struct pcap_pkthdr)
3914+ + bucket_len /* flowSlot.bucket */;
3915+
3916+ tot_mem = sizeof(FlowSlotInfo) + num_slots*the_slot_len;
3917+
3918+ /*
3919+ Calculate the value of the order parameter used later.
3920+ See http://www.linuxjournal.com/article.php?sid=1133
3921+ */
3922+ for(pfr->order = 0;(PAGE_SIZE << pfr->order) < tot_mem; pfr->order++) ;
3923+
3924+ /*
3925+ We now try to allocate the memory as required. If we fail
3926+ we try to allocate a smaller amount or memory (hence a
3927+ smaller ring).
3928+ */
3929+ while((pfr->ring_memory = __get_free_pages(GFP_ATOMIC, pfr->order)) == 0)
3930+ if(pfr->order-- == 0)
3931+ break;
3932+
3933+ if(pfr->order == 0) {
3934+ printk("RING: ERROR not enough memory for ring\n");
3935+ return(-1);
3936+ } else {
3937+ printk("RING: succesfully allocated %lu KB [tot_mem=%d][order=%ld]\n",
3938+ PAGE_SIZE >> (10 - pfr->order), tot_mem, pfr->order);
3939+ }
3940+
3941+ tot_mem = PAGE_SIZE << pfr->order;
3942+ memset((char*)pfr->ring_memory, 0, tot_mem);
3943+
3944+ /* Now we need to reserve the pages */
3945+ page_end = virt_to_page(pfr->ring_memory + (PAGE_SIZE << pfr->order) - 1);
3946+ for(page = virt_to_page(pfr->ring_memory); page <= page_end; page++)
3947+ SetPageReserved(page);
3948+
3949+ pfr->slots_info = (FlowSlotInfo*)pfr->ring_memory;
3950+ pfr->ring_slots = (char*)(pfr->ring_memory+sizeof(FlowSlotInfo));
3951+
3952+ pfr->slots_info->version = RING_FLOWSLOT_VERSION;
3953+ pfr->slots_info->slot_len = the_slot_len;
3954+ pfr->slots_info->data_len = bucket_len;
3955+ pfr->slots_info->tot_slots = (tot_mem-sizeof(FlowSlotInfo))/the_slot_len;
3956+ pfr->slots_info->tot_mem = tot_mem;
3957+ pfr->slots_info->sample_rate = sample_rate;
3958+
3959+ printk("RING: allocated %d slots [slot_len=%d][tot_mem=%u]\n",
3960+ pfr->slots_info->tot_slots, pfr->slots_info->slot_len,
3961+ pfr->slots_info->tot_mem);
3962+
3963+#ifdef RING_MAGIC
3964+ {
3965+ int i;
3966+
3967+ for(i=0; i<pfr->slots_info->tot_slots; i++) {
3968+ unsigned long idx = i*pfr->slots_info->slot_len;
3969+ FlowSlot *slot = (FlowSlot*)&pfr->ring_slots[idx];
3970+ slot->magic = RING_MAGIC_VALUE; slot->slot_state = 0;
3971+ }
3972+ }
3973+#endif
3974+
3975+ pfr->insert_page_id = 1, pfr->insert_slot_id = 0;
3976+
3977+ /*
3978+ IMPORTANT
3979+ Leave this statement here as last one. In fact when
3980+ the ring_netdev != NULL the socket is ready to be used.
3981+ */
3982+ pfr->ring_netdev = dev;
3983+
3984+ return(0);
3985+}
3986+
3987+/* ************************************* */
3988+
3989+/* Bind to a device */
3990+static int ring_bind(struct socket *sock,
3991+ struct sockaddr *sa, int addr_len)
3992+{
3993+ struct sock *sk=sock->sk;
3994+ struct net_device *dev = NULL;
3995+
3996+#if defined(RING_DEBUG)
3997+ printk("RING: ring_bind() called\n");
3998+#endif
3999+
4000+ /*
4001+ * Check legality
4002+ */
4003+ if (addr_len != sizeof(struct sockaddr))
4004+ return -EINVAL;
4005+ if (sa->sa_family != PF_RING)
4006+ return -EINVAL;
4007+
4008+ /* Safety check: add trailing zero if missing */
4009+ sa->sa_data[sizeof(sa->sa_data)-1] = '\0';
4010+
4011+#if defined(RING_DEBUG)
4012+ printk("RING: searching device %s\n", sa->sa_data);
4013+#endif
4014+
8924bddf 4015+ if((dev = __dev_get_by_name(&init_net, sa->sa_data)) == NULL) {
40fd095b 4016+#if defined(RING_DEBUG)
4017+ printk("RING: search failed\n");
4018+#endif
4019+ return(-EINVAL);
4020+ } else
4021+ return(packet_ring_bind(sk, dev));
4022+}
4023+
4024+/* ************************************* */
4025+
4026+static int ring_mmap(struct file *file,
4027+ struct socket *sock,
4028+ struct vm_area_struct *vma)
4029+{
4030+ struct sock *sk = sock->sk;
4031+ struct ring_opt *pfr = ring_sk(sk);
4032+ unsigned long size, start;
4033+ u_int pagesToMap;
4034+ char *ptr;
4035+
4036+#if defined(RING_DEBUG)
4037+ printk("RING: ring_mmap() called\n");
4038+#endif
4039+
4040+ if(pfr->ring_memory == 0) {
4041+#if defined(RING_DEBUG)
4042+ printk("RING: ring_mmap() failed: mapping area to an unbound socket\n");
4043+#endif
4044+ return -EINVAL;
4045+ }
4046+
4047+ size = (unsigned long)(vma->vm_end-vma->vm_start);
4048+
4049+ if(size % PAGE_SIZE) {
4050+#if defined(RING_DEBUG)
4051+ printk("RING: ring_mmap() failed: len is not multiple of PAGE_SIZE\n");
4052+#endif
4053+ return(-EINVAL);
4054+ }
4055+
4056+ /* if userspace tries to mmap beyond end of our buffer, fail */
4057+ if(size > pfr->slots_info->tot_mem) {
4058+#if defined(RING_DEBUG)
4059+ printk("proc_mmap() failed: area too large [%ld > %d]\n", size, pfr->slots_info->tot_mem);
4060+#endif
4061+ return(-EINVAL);
4062+ }
4063+
4064+ pagesToMap = size/PAGE_SIZE;
4065+
4066+#if defined(RING_DEBUG)
4067+ printk("RING: ring_mmap() called. %d pages to map\n", pagesToMap);
4068+#endif
4069+
4070+#if defined(RING_DEBUG)
4071+ printk("RING: mmap [slot_len=%d][tot_slots=%d] for ring on device %s\n",
4072+ pfr->slots_info->slot_len, pfr->slots_info->tot_slots,
4073+ pfr->ring_netdev->name);
4074+#endif
4075+
4076+ /* we do not want to have this area swapped out, lock it */
4077+ vma->vm_flags |= VM_LOCKED;
4078+ start = vma->vm_start;
4079+
4080+ /* Ring slots start from page 1 (page 0 is reserved for FlowSlotInfo) */
4081+ ptr = (char*)(start+PAGE_SIZE);
4082+
4083+ if(remap_page_range(
4084+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
4085+ vma,
4086+#endif
4087+ start,
4088+ __pa(pfr->ring_memory),
4089+ PAGE_SIZE*pagesToMap, vma->vm_page_prot)) {
4090+#if defined(RING_DEBUG)
4091+ printk("remap_page_range() failed\n");
4092+#endif
4093+ return(-EAGAIN);
4094+ }
4095+
4096+#if defined(RING_DEBUG)
4097+ printk("proc_mmap(pagesToMap=%d): success.\n", pagesToMap);
4098+#endif
4099+
4100+ return 0;
4101+}
4102+
4103+/* ************************************* */
4104+
4105+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
4106+static int ring_recvmsg(struct kiocb *iocb, struct socket *sock,
4107+ struct msghdr *msg, size_t len, int flags)
4108+#else
4109+ static int ring_recvmsg(struct socket *sock, struct msghdr *msg, int len,
4110+ int flags, struct scm_cookie *scm)
4111+#endif
4112+{
4113+ FlowSlot* slot;
4114+ struct ring_opt *pfr = ring_sk(sock->sk);
4115+ u_int32_t queued_pkts, num_loops = 0;
4116+
4117+#if defined(RING_DEBUG)
4118+ printk("ring_recvmsg called\n");
4119+#endif
4120+
4121+ slot = get_remove_slot(pfr);
4122+
4123+ while((queued_pkts = num_queued_pkts(pfr)) < MIN_QUEUED_PKTS) {
4124+ wait_event_interruptible(pfr->ring_slots_waitqueue, 1);
4125+
4126+#if defined(RING_DEBUG)
4127+ printk("-> ring_recvmsg returning %d [queued_pkts=%d][num_loops=%d]\n",
4128+ slot->slot_state, queued_pkts, num_loops);
4129+#endif
4130+
4131+ if(queued_pkts > 0) {
4132+ if(num_loops++ > MAX_QUEUE_LOOPS)
4133+ break;
4134+ }
4135+ }
4136+
4137+#if defined(RING_DEBUG)
4138+ if(slot != NULL)
4139+ printk("ring_recvmsg is returning [queued_pkts=%d][num_loops=%d]\n",
4140+ queued_pkts, num_loops);
4141+#endif
4142+
4143+ return(queued_pkts);
4144+}
4145+
4146+/* ************************************* */
4147+
4148+unsigned int ring_poll(struct file * file,
4149+ struct socket *sock, poll_table *wait)
4150+{
4151+ FlowSlot* slot;
4152+ struct ring_opt *pfr = ring_sk(sock->sk);
4153+
4154+#if defined(RING_DEBUG)
4155+ printk("poll called\n");
4156+#endif
4157+
4158+ slot = get_remove_slot(pfr);
4159+
4160+ if((slot != NULL) && (slot->slot_state == 0))
4161+ poll_wait(file, &pfr->ring_slots_waitqueue, wait);
4162+
4163+#if defined(RING_DEBUG)
4164+ printk("poll returning %d\n", slot->slot_state);
4165+#endif
4166+
4167+ if((slot != NULL) && (slot->slot_state == 1))
4168+ return(POLLIN | POLLRDNORM);
4169+ else
4170+ return(0);
4171+}
4172+
4173+/* ************************************* */
4174+
4175+int add_to_cluster_list(struct ring_cluster *el,
4176+ struct sock *sock) {
4177+
4178+ if(el->num_cluster_elements == CLUSTER_LEN)
4179+ return(-1); /* Cluster full */
4180+
4181+ ring_sk_datatype(ring_sk(sock))->cluster_id = el->cluster_id;
4182+ el->sk[el->num_cluster_elements] = sock;
4183+ el->num_cluster_elements++;
4184+ return(0);
4185+}
4186+
4187+/* ************************************* */
4188+
4189+int remove_from_cluster_list(struct ring_cluster *el,
4190+ struct sock *sock) {
4191+ int i, j;
4192+
4193+ for(i=0; i<CLUSTER_LEN; i++)
4194+ if(el->sk[i] == sock) {
4195+ el->num_cluster_elements--;
4196+
4197+ if(el->num_cluster_elements > 0) {
4198+ /* The cluster contains other elements */
4199+ for(j=i; j<CLUSTER_LEN-1; j++)
4200+ el->sk[j] = el->sk[j+1];
4201+
4202+ el->sk[CLUSTER_LEN-1] = NULL;
4203+ } else {
4204+ /* Empty cluster */
4205+ memset(el->sk, 0, sizeof(el->sk));
4206+ }
4207+
4208+ return(0);
4209+ }
4210+
4211+ return(-1); /* Not found */
4212+}
4213+
4214+/* ************************************* */
4215+
4216+static int remove_from_cluster(struct sock *sock,
4217+ struct ring_opt *pfr)
4218+{
4219+ struct ring_cluster *el;
4220+
4221+#if defined(RING_DEBUG)
4222+ printk("--> remove_from_cluster(%d)\n", pfr->cluster_id);
4223+#endif
4224+
4225+ if(pfr->cluster_id == 0 /* 0 = No Cluster */)
4226+ return(0); /* Noting to do */
4227+
4228+ el = ring_cluster_list;
4229+
4230+ while(el != NULL) {
4231+ if(el->cluster_id == pfr->cluster_id) {
4232+ return(remove_from_cluster_list(el, sock));
4233+ } else
4234+ el = el->next;
4235+ }
4236+
4237+ return(-EINVAL); /* Not found */
4238+}
4239+
4240+/* ************************************* */
4241+
4242+static int add_to_cluster(struct sock *sock,
4243+ struct ring_opt *pfr,
4244+ u_short cluster_id)
4245+{
4246+ struct ring_cluster *el;
4247+
4248+#ifndef RING_DEBUG
4249+ printk("--> add_to_cluster(%d)\n", cluster_id);
4250+#endif
4251+
4252+ if(cluster_id == 0 /* 0 = No Cluster */) return(-EINVAL);
4253+
4254+ if(pfr->cluster_id != 0)
4255+ remove_from_cluster(sock, pfr);
4256+
4257+ el = ring_cluster_list;
4258+
4259+ while(el != NULL) {
4260+ if(el->cluster_id == cluster_id) {
4261+ return(add_to_cluster_list(el, sock));
4262+ } else
4263+ el = el->next;
4264+ }
4265+
4266+ /* There's no existing cluster. We need to create one */
4267+ if((el = kmalloc(sizeof(struct ring_cluster), GFP_KERNEL)) == NULL)
4268+ return(-ENOMEM);
4269+
4270+ el->cluster_id = cluster_id;
4271+ el->num_cluster_elements = 1;
4272+ el->hashing_mode = cluster_per_flow; /* Default */
4273+ el->hashing_id = 0;
4274+
4275+ memset(el->sk, 0, sizeof(el->sk));
4276+ el->sk[0] = sock;
4277+ el->next = ring_cluster_list;
4278+ ring_cluster_list = el;
4279+ pfr->cluster_id = cluster_id;
4280+
4281+ return(0); /* 0 = OK */
4282+}
4283+
4284+/* ************************************* */
4285+
4286+/* Code taken/inspired from core/sock.c */
4287+static int ring_setsockopt(struct socket *sock,
4288+ int level, int optname,
4289+ char *optval, int optlen)
4290+{
4291+ struct ring_opt *pfr = ring_sk(sock->sk);
4292+ int val, found, ret = 0;
4293+ u_int cluster_id, do_enable;
4294+ char devName[8], bloom_filter[256], aho_pattern[256];
4295+
4296+ if(pfr == NULL) return(-EINVAL);
4297+
4298+ if (get_user(val, (int *)optval))
4299+ return -EFAULT;
4300+
4301+ found = 1;
4302+
4303+ switch(optname)
4304+ {
4305+ case SO_ATTACH_FILTER:
4306+ ret = -EINVAL;
4307+ if (optlen == sizeof(struct sock_fprog)) {
4308+ unsigned int fsize;
4309+ struct sock_fprog fprog;
4310+ struct sk_filter *filter;
4311+
4312+ ret = -EFAULT;
4313+
4314+ /*
4315+ NOTE
4316+
4317+ Do not call copy_from_user within a held
4318+ splinlock (e.g. ring_mgmt_lock) as this caused
4319+ problems when certain debugging was enabled under
4320+ 2.6.5 -- including hard lockups of the machine.
4321+ */
4322+ if(copy_from_user(&fprog, optval, sizeof(fprog)))
4323+ break;
4324+
4325+ fsize = sizeof(struct sock_filter) * fprog.len;
4326+ filter = kmalloc(fsize, GFP_KERNEL);
4327+
4328+ if(filter == NULL) {
4329+ ret = -ENOMEM;
4330+ break;
4331+ }
4332+
4333+ if(copy_from_user(filter->insns, fprog.filter, fsize))
4334+ break;
4335+
4336+ filter->len = fprog.len;
4337+
4338+ if(sk_chk_filter(filter->insns, filter->len) != 0) {
4339+ /* Bad filter specified */
4340+ kfree(filter);
4341+ pfr->bpfFilter = NULL;
4342+ break;
4343+ }
4344+
4345+ /* get the lock, set the filter, release the lock */
4346+ write_lock(&ring_mgmt_lock);
4347+ pfr->bpfFilter = filter;
4348+ write_unlock(&ring_mgmt_lock);
4349+ ret = 0;
4350+ }
4351+ break;
4352+
4353+ case SO_DETACH_FILTER:
4354+ write_lock(&ring_mgmt_lock);
4355+ found = 1;
4356+ if(pfr->bpfFilter != NULL) {
4357+ kfree(pfr->bpfFilter);
4358+ pfr->bpfFilter = NULL;
4359+ write_unlock(&ring_mgmt_lock);
4360+ break;
4361+ }
4362+ ret = -ENONET;
4363+ break;
4364+
4365+ case SO_ADD_TO_CLUSTER:
4366+ if (optlen!=sizeof(val))
4367+ return -EINVAL;
4368+
4369+ if (copy_from_user(&cluster_id, optval, sizeof(cluster_id)))
4370+ return -EFAULT;
4371+
4372+ write_lock(&ring_mgmt_lock);
4373+ ret = add_to_cluster(sock->sk, pfr, cluster_id);
4374+ write_unlock(&ring_mgmt_lock);
4375+ break;
4376+
4377+ case SO_REMOVE_FROM_CLUSTER:
4378+ write_lock(&ring_mgmt_lock);
4379+ ret = remove_from_cluster(sock->sk, pfr);
4380+ write_unlock(&ring_mgmt_lock);
4381+ break;
4382+
4383+ case SO_SET_REFLECTOR:
4384+ if(optlen >= (sizeof(devName)-1))
4385+ return -EINVAL;
4386+
4387+ if(optlen > 0) {
4388+ if(copy_from_user(devName, optval, optlen))
4389+ return -EFAULT;
4390+ }
4391+
4392+ devName[optlen] = '\0';
4393+
4394+#if defined(RING_DEBUG)
4395+ printk("+++ SO_SET_REFLECTOR(%s)\n", devName);
4396+#endif
4397+
4398+ write_lock(&ring_mgmt_lock);
c444bcac 4399+ pfr->reflector_dev = dev_get_by_name(&init_net, devName);
40fd095b 4400+ write_unlock(&ring_mgmt_lock);
4401+
4402+#if defined(RING_DEBUG)
4403+ if(pfr->reflector_dev != NULL)
4404+ printk("SO_SET_REFLECTOR(%s): succeded\n", devName);
4405+ else
4406+ printk("SO_SET_REFLECTOR(%s): device unknown\n", devName);
4407+#endif
4408+ break;
4409+
4410+ case SO_SET_BLOOM:
4411+ if(optlen >= (sizeof(bloom_filter)-1))
4412+ return -EINVAL;
4413+
4414+ if(optlen > 0) {
4415+ if(copy_from_user(bloom_filter, optval, optlen))
4416+ return -EFAULT;
4417+ }
4418+
4419+ bloom_filter[optlen] = '\0';
4420+
4421+ write_lock(&ring_mgmt_lock);
4422+ handle_bloom_filter_rule(pfr, bloom_filter);
4423+ write_unlock(&ring_mgmt_lock);
4424+ break;
4425+
4426+ case SO_SET_STRING:
4427+ if(optlen >= (sizeof(aho_pattern)-1))
4428+ return -EINVAL;
4429+
4430+ if(optlen > 0) {
4431+ if(copy_from_user(aho_pattern, optval, optlen))
4432+ return -EFAULT;
4433+ }
4434+
4435+ aho_pattern[optlen] = '\0';
4436+
4437+ write_lock(&ring_mgmt_lock);
4438+ if(pfr->acsm != NULL) acsmFree2(pfr->acsm);
4439+ if(optlen > 0) {
4440+#if 1
4441+ if((pfr->acsm = acsmNew2()) != NULL) {
4442+ int nc=1 /* case sensitive */, i = 0;
4443+
4444+ pfr->acsm->acsmFormat = ACF_BANDED;
4445+ acsmAddPattern2(pfr->acsm, (unsigned char*)aho_pattern,
4446+ (int)strlen(aho_pattern), nc, 0, 0,(void*)aho_pattern, i);
4447+ acsmCompile2(pfr->acsm);
4448+ }
4449+#else
4450+ pfr->acsm = kmalloc (10, GFP_KERNEL); /* TEST */
4451+#endif
4452+ }
4453+ write_unlock(&ring_mgmt_lock);
4454+ break;
4455+
4456+ case SO_TOGGLE_BLOOM_STATE:
4457+ if(optlen >= (sizeof(bloom_filter)-1))
4458+ return -EINVAL;
4459+
4460+ if(optlen > 0) {
4461+ if(copy_from_user(&do_enable, optval, optlen))
4462+ return -EFAULT;
4463+ }
4464+
4465+ write_lock(&ring_mgmt_lock);
4466+ if(do_enable)
4467+ pfr->bitmask_enabled = 1;
4468+ else
4469+ pfr->bitmask_enabled = 0;
4470+ write_unlock(&ring_mgmt_lock);
4471+ printk("SO_TOGGLE_BLOOM_STATE: bloom bitmask %s\n",
4472+ pfr->bitmask_enabled ? "enabled" : "disabled");
4473+ break;
4474+
4475+ case SO_RESET_BLOOM_FILTERS:
4476+ if(optlen >= (sizeof(bloom_filter)-1))
4477+ return -EINVAL;
4478+
4479+ if(optlen > 0) {
4480+ if(copy_from_user(&do_enable, optval, optlen))
4481+ return -EFAULT;
4482+ }
4483+
4484+ write_lock(&ring_mgmt_lock);
4485+ reset_bloom_filters(pfr);
4486+ write_unlock(&ring_mgmt_lock);
4487+ break;
4488+
4489+ default:
4490+ found = 0;
4491+ break;
4492+ }
4493+
4494+ if(found)
4495+ return(ret);
4496+ else
4497+ return(sock_setsockopt(sock, level, optname, optval, optlen));
4498+}
4499+
4500+/* ************************************* */
4501+
4502+static int ring_ioctl(struct socket *sock,
4503+ unsigned int cmd, unsigned long arg)
4504+{
4505+ switch(cmd)
4506+ {
4507+#ifdef CONFIG_INET
4508+ case SIOCGIFFLAGS:
4509+ case SIOCSIFFLAGS:
4510+ case SIOCGIFCONF:
4511+ case SIOCGIFMETRIC:
4512+ case SIOCSIFMETRIC:
4513+ case SIOCGIFMEM:
4514+ case SIOCSIFMEM:
4515+ case SIOCGIFMTU:
4516+ case SIOCSIFMTU:
4517+ case SIOCSIFLINK:
4518+ case SIOCGIFHWADDR:
4519+ case SIOCSIFHWADDR:
4520+ case SIOCSIFMAP:
4521+ case SIOCGIFMAP:
4522+ case SIOCSIFSLAVE:
4523+ case SIOCGIFSLAVE:
4524+ case SIOCGIFINDEX:
4525+ case SIOCGIFNAME:
4526+ case SIOCGIFCOUNT:
4527+ case SIOCSIFHWBROADCAST:
4528+ return(inet_dgram_ops.ioctl(sock, cmd, arg));
4529+#endif
4530+
4531+ default:
4532+ return -ENOIOCTLCMD;
4533+ }
4534+
4535+ return 0;
4536+}
4537+
4538+/* ************************************* */
4539+
4540+static struct proto_ops ring_ops = {
4541+ .family = PF_RING,
4542+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
4543+ .owner = THIS_MODULE,
4544+#endif
4545+
4546+ /* Operations that make no sense on ring sockets. */
4547+ .connect = sock_no_connect,
4548+ .socketpair = sock_no_socketpair,
4549+ .accept = sock_no_accept,
4550+ .getname = sock_no_getname,
4551+ .listen = sock_no_listen,
4552+ .shutdown = sock_no_shutdown,
4553+ .sendpage = sock_no_sendpage,
4554+ .sendmsg = sock_no_sendmsg,
4555+ .getsockopt = sock_no_getsockopt,
4556+
4557+ /* Now the operations that really occur. */
4558+ .release = ring_release,
4559+ .bind = ring_bind,
4560+ .mmap = ring_mmap,
4561+ .poll = ring_poll,
4562+ .setsockopt = ring_setsockopt,
4563+ .ioctl = ring_ioctl,
4564+ .recvmsg = ring_recvmsg,
4565+};
4566+
4567+/* ************************************ */
4568+
4569+static struct net_proto_family ring_family_ops = {
4570+ .family = PF_RING,
4571+ .create = ring_create,
4572+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
4573+ .owner = THIS_MODULE,
4574+#endif
4575+};
4576+
4577+// BD: API changed in 2.6.12, ref:
4578+// http://svn.clkao.org/svnweb/linux/revision/?rev=28201
4579+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11))
4580+static struct proto ring_proto = {
4581+ .name = "PF_RING",
4582+ .owner = THIS_MODULE,
4583+ .obj_size = sizeof(struct sock),
4584+};
4585+#endif
4586+
4587+/* ************************************ */
4588+
4589+static void __exit ring_exit(void)
4590+{
4591+ struct list_head *ptr;
4592+ struct ring_element *entry;
4593+
4594+ for(ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) {
4595+ entry = list_entry(ptr, struct ring_element, list);
4596+ kfree(entry);
4597+ }
4598+
4599+ while(ring_cluster_list != NULL) {
4600+ struct ring_cluster *next = ring_cluster_list->next;
4601+ kfree(ring_cluster_list);
4602+ ring_cluster_list = next;
4603+ }
4604+
4605+ set_skb_ring_handler(NULL);
4606+ set_buffer_ring_handler(NULL);
4607+ sock_unregister(PF_RING);
4608+ ring_proc_term();
4609+ printk("PF_RING shut down.\n");
4610+}
4611+
4612+/* ************************************ */
4613+
4614+static int __init ring_init(void)
4615+{
4616+ printk("Welcome to PF_RING %s\n(C) 2004-07 L.Deri <deri@ntop.org>\n",
4617+ RING_VERSION);
4618+
4619+ INIT_LIST_HEAD(&ring_table);
4620+ ring_cluster_list = NULL;
4621+
4622+ sock_register(&ring_family_ops);
4623+
4624+ set_skb_ring_handler(skb_ring_handler);
4625+ set_buffer_ring_handler(buffer_ring_handler);
4626+
4627+ if(get_buffer_ring_handler() != buffer_ring_handler) {
4628+ printk("PF_RING: set_buffer_ring_handler FAILED\n");
4629+
4630+ set_skb_ring_handler(NULL);
4631+ set_buffer_ring_handler(NULL);
4632+ sock_unregister(PF_RING);
4633+ return -1;
4634+ } else {
4635+ printk("PF_RING: bucket length %d bytes\n", bucket_len);
4636+ printk("PF_RING: ring slots %d\n", num_slots);
4637+ printk("PF_RING: sample rate %d [1=no sampling]\n", sample_rate);
4638+ printk("PF_RING: capture TX %s\n",
4639+ enable_tx_capture ? "Yes [RX+TX]" : "No [RX only]");
4640+ printk("PF_RING: transparent mode %s\n",
4641+ transparent_mode ? "Yes" : "No");
4642+
4643+ printk("PF_RING initialized correctly.\n");
4644+
4645+ ring_proc_init();
4646+ return 0;
4647+ }
4648+}
4649+
4650+module_init(ring_init);
4651+module_exit(ring_exit);
4652+MODULE_LICENSE("GPL");
4653+
4654+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
4655+MODULE_ALIAS_NETPROTO(PF_RING);
4656+#endif
This page took 0.538615 seconds and 4 git commands to generate.