-diff --unified --recursive --new-file linux-2.6.21.4/include/linux/ring.h linux-2.6.21.4-1-686-smp-ring3/include/linux/ring.h
---- linux-2.6.21.4/include/linux/ring.h 1970-01-01 00:00:00.000000000 +0000
-+++ linux-2.6.21.4-1-686-smp-ring3/include/linux/ring.h 2007-06-10 16:43:04.346421348 +0000
-@@ -0,0 +1,240 @@
+diff --unified --recursive --new-file linux-2.6.30/include/linux/ring.h linux-2.6.30-1-686-smp-PF_RING/include/linux/ring.h
+--- linux-2.6.30/include/linux/ring.h 1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.30-1-686-smp-PF_RING/include/linux/ring.h 2009-07-21 04:40:31.308485480 +0200
+@@ -0,0 +1,590 @@
+/*
+ * Definitions for packet ring
+ *
-+ * 2004-07 Luca Deri <deri@ntop.org>
++ * 2004-09 Luca Deri <deri@ntop.org>
+ */
++
+#ifndef __RING_H
+#define __RING_H
+
+#endif
+
+#define RING_MAGIC
-+#define RING_MAGIC_VALUE 0x88
-+#define RING_FLOWSLOT_VERSION 6
-+#define RING_VERSION "3.4.1"
-+
-+#define SO_ADD_TO_CLUSTER 99
-+#define SO_REMOVE_FROM_CLUSTER 100
-+#define SO_SET_REFLECTOR 101
-+#define SO_SET_BLOOM 102
-+#define SO_SET_STRING 103
-+#define SO_TOGGLE_BLOOM_STATE 104
-+#define SO_RESET_BLOOM_FILTERS 105
-+
-+#define BITMASK_SET(n, p) (((char*)p->bits_memory)[n/8] |= (1<<(n % 8)))
-+#define BITMASK_CLR(n, p) (((char*)p->bits_memory)[n/8] &= ~(1<<(n % 8)))
-+#define BITMASK_ISSET(n, p) (((char*)p->bits_memory)[n/8] & (1<<(n % 8)))
-+
-+/* *********************************** */
++#define RING_MAGIC_VALUE 0x88
++#define RING_FLOWSLOT_VERSION 9
++
++#define DEFAULT_BUCKET_LEN 128
++#define MAX_NUM_DEVICES 256
++
++/* Versioning */
++#define RING_VERSION "3.9.5"
++#define RING_VERSION_NUM 0x030904
++
++/* Set */
++#define SO_ADD_TO_CLUSTER 99
++#define SO_REMOVE_FROM_CLUSTER 100
++#define SO_SET_REFLECTOR 101
++#define SO_SET_STRING 102
++#define SO_ADD_FILTERING_RULE 103
++#define SO_REMOVE_FILTERING_RULE 104
++#define SO_TOGGLE_FILTER_POLICY 105
++#define SO_SET_SAMPLING_RATE 106
++#define SO_ACTIVATE_RING 107
++#define SO_RING_BUCKET_LEN 108
++#define SO_SET_CHANNEL_ID 109
++#define SO_PURGE_IDLE_HASH_RULES 110 /* inactivity (sec) */
++#define SO_SET_APPL_NAME 111
++
++/* Get */
++#define SO_GET_RING_VERSION 120
++#define SO_GET_FILTERING_RULE_STATS 121
++#define SO_GET_HASH_FILTERING_RULE_STATS 122
++#define SO_GET_MAPPED_DNA_DEVICE 123
++
++/* Map */
++#define SO_MAP_DNA_DEVICE 130
++
++/* **************** regexp.h ******************* */
+
+/*
-+ Aho-Corasick code taken from Snort
-+ under GPL license
-+*/
-+/*
-+ * DEFINES and Typedef's
-+ */
-+#define MAX_ALPHABET_SIZE 256
++http://www.opensource.apple.com/darwinsource/10.3/expect-1/expect/expect.h ,
++which contains a version of this library, says:
+
-+/*
-+ FAIL STATE for 1,2,or 4 bytes for state transitions
++ *
++ * NSUBEXP must be at least 10, and no greater than 117 or the parser
++ * will not work properly.
++ *
+
-+ Uncomment this define to use 32 bit state values
-+ #define AC32
++However, it looks rather like this library is limited to 10. If you think
++otherwise, let us know.
+*/
+
-+typedef unsigned short acstate_t;
-+#define ACSM_FAIL_STATE2 0xffff
-+
-+/*
-+ *
-+ */
-+typedef
-+struct _acsm_pattern2
-+{
-+ struct _acsm_pattern2 *next;
-+
-+ unsigned char *patrn;
-+ unsigned char *casepatrn;
-+ int n;
-+ int nocase;
-+ int offset;
-+ int depth;
-+ void * id;
-+ int iid;
-+
-+} ACSM_PATTERN2;
++#define NSUBEXP 10
++typedef struct regexp {
++ char *startp[NSUBEXP];
++ char *endp[NSUBEXP];
++ char regstart; /* Internal use only. */
++ char reganch; /* Internal use only. */
++ char *regmust; /* Internal use only. */
++ int regmlen; /* Internal use only. */
++ char program[1]; /* Unwarranted chumminess with compiler. */
++} regexp;
++
++regexp * regcomp(char *exp, int *patternsize);
++int regexec(regexp *prog, char *string);
++void regsub(regexp *prog, char *source, char *dest);
++void regerror(char *s);
+
+/*
-+ * transition nodes - either 8 or 12 bytes
++ * The first byte of the regexp internal "program" is actually this magic
++ * number; the start node begins in the second byte.
+ */
-+typedef
-+struct trans_node_s {
-+
-+ acstate_t key; /* The character that got us here - sized to keep structure aligned on 4 bytes */
-+ /* to better the caching opportunities. A value that crosses the cache line */
-+ /* forces an expensive reconstruction, typing this as acstate_t stops that. */
-+ acstate_t next_state; /* */
-+ struct trans_node_s * next; /* next transition for this state */
-+
-+} trans_node_t;
++#define MAGIC 0234
+
++/* *********************************** */
+
-+/*
-+ * User specified final storage type for the state transitions
-+ */
-+enum {
-+ ACF_FULL,
-+ ACF_SPARSE,
-+ ACF_BANDED,
-+ ACF_SPARSEBANDS,
++struct pkt_aggregation_info {
++ u_int32_t num_pkts, num_bytes;
++ struct timeval first_seen, last_seen;
+};
+
+/*
-+ * User specified machine types
-+ *
-+ * TRIE : Keyword trie
-+ * NFA :
-+ * DFA :
-+ */
-+enum {
-+ FSA_TRIE,
-+ FSA_NFA,
-+ FSA_DFA,
++ Note that as offsets *can* be negative,
++ please do not change them to unsigned
++*/
++struct pkt_offset {
++ int16_t eth_offset; /* This offset *must* be added to all offsets below */
++ int16_t vlan_offset;
++ int16_t l3_offset;
++ int16_t l4_offset;
++ int16_t payload_offset;
+};
+
-+/*
-+ * Aho-Corasick State Machine Struct - one per group of pattterns
-+ */
-+typedef struct {
-+ int acsmMaxStates;
-+ int acsmNumStates;
-+
-+ ACSM_PATTERN2 * acsmPatterns;
-+ acstate_t * acsmFailState;
-+ ACSM_PATTERN2 ** acsmMatchList;
-+
-+ /* list of transitions in each state, this is used to build the nfa & dfa */
-+ /* after construction we convert to sparse or full format matrix and free */
-+ /* the transition lists */
-+ trans_node_t ** acsmTransTable;
-+
-+ acstate_t ** acsmNextState;
-+ int acsmFormat;
-+ int acsmSparseMaxRowNodes;
-+ int acsmSparseMaxZcnt;
-+
-+ int acsmNumTrans;
-+ int acsmAlphabetSize;
-+ int acsmFSA;
-+
-+} ACSM_STRUCT2;
++struct pkt_parsing_info {
++ /* Core fields (also used by NetFlow) */
++ u_int16_t eth_type; /* Ethernet type */
++ u_int16_t vlan_id; /* VLAN Id or NO_VLAN */
++ u_int8_t l3_proto, ipv4_tos; /* Layer 3 protocol/TOS */
++ u_int32_t ipv4_src, ipv4_dst; /* IPv4 src/dst IP addresses */
++ u_int16_t l4_src_port, l4_dst_port; /* Layer 4 src/dst ports */
++ u_int8_t tcp_flags; /* TCP flags (0 if not available) */
+
-+/* *********************************** */
++ u_int16_t last_matched_plugin_id; /* If > 0 identifies a plugin to that matched the packet */
++ union {
++ struct pkt_offset offset; /* Offsets of L3/L4/payload elements */
++ struct pkt_aggregation_info aggregation; /* Future or plugin use */
++ } pkt_detail;
++};
+
-+#ifndef HAVE_PCAP
-+struct pcap_pkthdr {
++struct pfring_pkthdr {
+ struct timeval ts; /* time stamp */
+ u_int32_t caplen; /* length of portion present */
+ u_int32_t len; /* length this packet (off wire) */
-+ /* packet parsing info */
-+ u_int16_t eth_type; /* Ethernet type */
-+ u_int16_t vlan_id; /* VLAN Id or -1 for no vlan */
-+ u_int8_t l3_proto; /* Layer 3 protocol */
-+ u_int16_t l3_offset, l4_offset, payload_offset; /* Offsets of L3/L4/payload elements */
-+ u_int32_t ipv4_src, ipv4_dst; /* IPv4 src/dst IP addresses */
-+ u_int16_t l4_src_port, l4_dst_port; /* Layer 4 src/dst ports */
++ struct pkt_parsing_info parsed_pkt; /* packet parsing info */
++ u_int16_t parsed_header_len; /* Extra parsing data before packet */
+};
-+#endif
+
+/* *********************************** */
+
-+typedef struct _counter_list {
-+ u_int32_t bit_id;
-+ u_int32_t bit_counter;
-+ struct _counter_list *next;
-+} bitmask_counter_list;
++#define MAX_PLUGIN_ID 64
++#define MAX_PLUGIN_FIELDS 32
++
++/* ************************************************* */
++
++typedef struct {
++ u_int8_t proto; /* Use 0 for 'any' protocol */
++ u_int16_t vlan_id; /* Use '0' for any vlan */
++ u_int32_t host_low, host_high; /* User '0' for any host. This is applied to both source
++ and destination. */
++ u_int16_t port_low, port_high; /* All ports between port_low...port_high
++ 0 means 'any' port. This is applied to both source
++ and destination. This means that
++ (proto, sip, sport, dip, dport) matches the rule if
++ one in "sip & sport", "sip & dport" "dip & sport"
++ match. */
++} filtering_rule_core_fields;
++
++/* ************************************************* */
++
++#define FILTER_PLUGIN_DATA_LEN 256
++
++typedef struct {
++ char payload_pattern[32]; /* If strlen(payload_pattern) > 0, the packet payload
++ must match the specified pattern */
++ u_int16_t filter_plugin_id; /* If > 0 identifies a plugin to which the datastructure
++ below will be passed for matching */
++ char filter_plugin_data[FILTER_PLUGIN_DATA_LEN];
++ /* Opaque datastructure that is interpreted by the
++ specified plugin and that specifies a filtering
++ criteria to be checked for match. Usually this data
++ is re-casted to a more meaningful datastructure
++ */
++} filtering_rule_extended_fields;
++
++/* ************************************************* */
++
++typedef struct {
++ /* Plugin Action */
++ u_int16_t plugin_id; /* ('0'=no plugin) id of the plugin associated with this rule */
++} filtering_rule_plugin_action;
++
++typedef enum {
++ forward_packet_and_stop_rule_evaluation = 0,
++ dont_forward_packet_and_stop_rule_evaluation,
++ execute_action_and_continue_rule_evaluation,
++ forward_packet_add_rule_and_stop_rule_evaluation
++} rule_action_behaviour;
++
++typedef enum {
++ forward_packet = 100,
++ dont_forward_packet,
++ use_rule_forward_policy
++} packet_action_behaviour;
+
+typedef struct {
-+ u_int32_t num_bits, order, num_pages;
-+ unsigned long bits_memory;
-+ bitmask_counter_list *clashes;
-+} bitmask_selector;
++ u_int16_t rule_id; /* Rules are processed in order from lowest to higest id */
++ rule_action_behaviour rule_action; /* What to do in case of match */
++ u_int8_t balance_id, balance_pool; /* If balance_pool > 0, then pass the packet above only if the
++ (hash(proto, sip, sport, dip, dport) % balance_pool)
++ = balance_id */
++ filtering_rule_core_fields core_fields;
++ filtering_rule_extended_fields extended_fields;
++ filtering_rule_plugin_action plugin_action;
++ unsigned long jiffies_last_match; /* Jiffies of the last rule match (updated by pf_ring) */
++} filtering_rule;
+
+/* *********************************** */
+
-+enum cluster_type {
-+ cluster_per_flow = 0,
-+ cluster_round_robin
-+};
++/* Hash size used for precise packet matching */
++#define DEFAULT_RING_HASH_SIZE 4096
++
++/*
++ * The hashtable contains only perfect matches: no
++ * wildacards or so are accepted.
++*/
++typedef struct {
++ u_int16_t vlan_id;
++ u_int8_t proto;
++ u_int32_t host_peer_a, host_peer_b;
++ u_int16_t port_peer_a, port_peer_b;
++
++ rule_action_behaviour rule_action; /* What to do in case of match */
++ filtering_rule_plugin_action plugin_action;
++ unsigned long jiffies_last_match; /* Jiffies of the last rule match (updated by pf_ring) */
++} hash_filtering_rule;
++
++/* ************************************************* */
++
++typedef struct _filtering_hash_bucket {
++ hash_filtering_rule rule;
++ void *plugin_data_ptr; /* ptr to a *continuous* memory area
++ allocated by the plugin */
++ u_int16_t plugin_data_ptr_len;
++ struct _filtering_hash_bucket *next;
++} filtering_hash_bucket;
+
+/* *********************************** */
+
-+#define RING_MIN_SLOT_SIZE (60+sizeof(struct pcap_pkthdr))
-+#define RING_MAX_SLOT_SIZE (1514+sizeof(struct pcap_pkthdr))
++#define RING_MIN_SLOT_SIZE (60+sizeof(struct pfring_pkthdr))
++#define RING_MAX_SLOT_SIZE (1514+sizeof(struct pfring_pkthdr))
++
++#ifndef min
++#define min(a,b) ((a < b) ? a : b)
++#endif
+
+/* *********************************** */
++/* False sharing reference: http://en.wikipedia.org/wiki/False_sharing */
+
+typedef struct flowSlotInfo {
+ u_int16_t version, sample_rate;
+ u_int32_t tot_slots, slot_len, data_len, tot_mem;
-+
-+ u_int64_t tot_pkts, tot_lost;
-+ u_int64_t tot_insert, tot_read;
-+ u_int32_t insert_idx, remove_idx;
++ u_int64_t tot_pkts, tot_lost, tot_insert, tot_read;
++ u_int32_t insert_idx;
++ u_int8_t padding[72]; /* Used to avoid false sharing */
++ u_int32_t remove_idx;
++ u_int32_t padding2[31]; /* Used to avoid false sharing */
+} FlowSlotInfo;
+
+/* *********************************** */
+
+/* *********************************** */
+
-+#ifdef __KERNEL__
++#ifdef __KERNEL__
+
+FlowSlotInfo* getRingPtr(void);
+int allocateRing(char *deviceName, u_int numSlots,
+
+/* ************************* */
+
-+typedef int (*handle_ring_skb)(struct sk_buff *skb,
-+ u_char recv_packet, u_char real_skb);
++#endif /* __KERNEL__ */
++
++/* *********************************** */
++
++#define PF_RING 27 /* Packet Ring */
++#define SOCK_RING PF_RING
++
++/* ioctl() */
++#define SIORINGPOLL 0x8888
++
++/* ************************************************* */
++
++typedef int (*dna_wait_packet)(void *adapter, int mode);
++
++typedef enum {
++ add_device_mapping = 0, remove_device_mapping
++} dna_device_operation;
++
++typedef enum {
++ intel_e1000 = 0, intel_igb, intel_ixgbe
++} dna_device_model;
++
++typedef struct {
++ unsigned long packet_memory; /* Invalid in userland */
++ u_int packet_memory_num_slots;
++ u_int packet_memory_slot_len;
++ u_int packet_memory_tot_len;
++ void *descr_packet_memory; /* Invalid in userland */
++ u_int descr_packet_memory_num_slots;
++ u_int descr_packet_memory_slot_len;
++ u_int descr_packet_memory_tot_len;
++ u_int channel_id;
++ char *phys_card_memory; /* Invalid in userland */
++ u_int phys_card_memory_len;
++ struct net_device *netdev; /* Invalid in userland */
++ dna_device_model device_model;
++#ifdef __KERNEL__
++ wait_queue_head_t *packet_waitqueue;
++#else
++ void *packet_waitqueue;
++#endif
++ u_int8_t *interrupt_received, in_use;
++ void *adapter_ptr;
++ dna_wait_packet wait_packet_function_ptr;
++} dna_device;
++
++typedef struct {
++ dna_device_operation operation;
++ char device_name[8];
++ int32_t channel_id;
++} dna_device_mapping;
++
++/* ************************************************* */
++
++#ifdef __KERNEL__
++
++enum cluster_type {
++ cluster_per_flow = 0,
++ cluster_round_robin
++};
++
++#define CLUSTER_LEN 8
++
++/*
++ * A ring cluster is used group together rings used by various applications
++ * so that they look, from the PF_RING point of view, as a single ring.
++ * This means that developers can use clusters for sharing packets across
++ * applications using various policies as specified in the hashing_mode
++ * parameter.
++ */
++struct ring_cluster {
++ u_short cluster_id; /* 0 = no cluster */
++ u_short num_cluster_elements;
++ enum cluster_type hashing_mode;
++ u_short hashing_id;
++ struct sock *sk[CLUSTER_LEN];
++};
++
++/*
++ * Linked-list of ring clusters.
++ */
++typedef struct {
++ struct ring_cluster cluster;
++ struct list_head list;
++} ring_cluster_element;
++
++typedef struct {
++ dna_device dev;
++ struct list_head list;
++} dna_device_list;
++
++/* ************************************************* */
++
++/*
++ * Linked-list of ring sockets.
++ */
++struct ring_element {
++ struct list_head list;
++ struct sock *sk;
++};
++
++/* ************************************************* */
++
++struct ring_opt *pfr; /* Forward */
++
++typedef int (*do_handle_filtering_hash_bucket)(struct ring_opt *pfr,
++ filtering_hash_bucket* rule,
++ u_char add_rule);
++
++/* ************************************************* */
++
++#define RING_ANY_CHANNEL -1
++
++/*
++ * Ring options
++ */
++struct ring_opt {
++ u_int8_t ring_active;
++ struct net_device *ring_netdev;
++ u_short ring_pid;
++ u_int32_t ring_id;
++ char *appl_name; /* String that identifies the application bound to the socket */
++
++ /* Direct NIC Access */
++ u_int8_t mmap_count;
++ dna_device *dna_device;
++
++ /* Cluster */
++ u_short cluster_id; /* 0 = no cluster */
++
++ /* Channel */
++ int32_t channel_id; /* -1 = any channel */
++
++ /* Reflector */
++ struct net_device *reflector_dev; /* Reflector device */
++
++ /* Packet buffers */
++ unsigned long order;
++
++ /* Ring Slots */
++ void * ring_memory;
++ u_int32_t bucket_len;
++ FlowSlotInfo *slots_info; /* Points to ring_memory */
++ char *ring_slots; /* Points to ring_memory+sizeof(FlowSlotInfo) */
++
++ /* Packet Sampling */
++ u_int32_t pktToSample, sample_rate;
++
++ /* BPF Filter */
++ struct sk_filter *bpfFilter;
++
++ /* Filtering Rules */
++ filtering_hash_bucket **filtering_hash;
++ u_int16_t num_filtering_rules;
++ u_int8_t rules_default_accept_policy; /* 1=default policy is accept, drop otherwise */
++ struct list_head rules;
++
++ /* Locks */
++ atomic_t num_ring_users;
++ wait_queue_head_t ring_slots_waitqueue;
++ rwlock_t ring_index_lock, ring_rules_lock;
++
++ /* Indexes (Internal) */
++ u_int insert_page_id, insert_slot_id;
++
++ /* Function pointer */
++ do_handle_filtering_hash_bucket handle_hash_rule;
++};
++
++/* **************************************** */
++
++/*
++ * Linked-list of device rings
++ */
++typedef struct {
++ struct ring_opt *the_ring;
++ struct list_head list;
++} device_ring_list_element;
++
++/* **************************************** */
++
++typedef struct {
++ filtering_rule rule;
++ regexp *pattern;
++ struct list_head list;
++
++ /* Plugin action */
++ void *plugin_data_ptr; /* ptr to a *continuous* memory area allocated by the plugin */
++} filtering_rule_element;
++
++struct parse_buffer {
++ void *mem;
++ u_int16_t mem_len;
++};
++
++/* **************************************** */
++
++/* Plugins */
++/* Execute an action (e.g. update rule stats) */
++typedef int (*plugin_handle_skb)(struct ring_opt *the_ring,
++ filtering_rule_element *rule, /* In case the match is on the list */
++ filtering_hash_bucket *hash_bucket, /* In case the match is on the hash */
++ struct pfring_pkthdr *hdr,
++ struct sk_buff *skb,
++ u_int16_t filter_plugin_id,
++ struct parse_buffer **filter_rule_memory_storage,
++ packet_action_behaviour *behaviour);
++/* Return 1/0 in case of match/no match for the given skb */
++typedef int (*plugin_filter_skb)(struct ring_opt *the_ring,
++ filtering_rule_element *rule,
++ struct pfring_pkthdr *hdr,
++ struct sk_buff *skb,
++ struct parse_buffer **filter_rule_memory_storage);
++/* Get stats about the rule */
++typedef int (*plugin_get_stats)(struct ring_opt *pfr,
++ filtering_rule_element *rule,
++ filtering_hash_bucket *hash_bucket,
++ u_char* stats_buffer, u_int stats_buffer_len);
++
++/* Called when a ring is disposed */
++typedef void (*plugin_free_ring_mem)(filtering_rule_element *rule);
++
++struct pfring_plugin_registration {
++ u_int16_t plugin_id;
++ char name[16]; /* Unique plugin name (e.g. sip, udp) */
++ char description[64]; /* Short plugin description */
++ plugin_filter_skb pfring_plugin_filter_skb; /* Filter skb: 1=match, 0=no match */
++ plugin_handle_skb pfring_plugin_handle_skb;
++ plugin_get_stats pfring_plugin_get_stats;
++ plugin_free_ring_mem pfring_plugin_free_ring_mem;
++};
++
++typedef int (*register_pfring_plugin)(struct pfring_plugin_registration
++ *reg);
++typedef int (*unregister_pfring_plugin)(u_int16_t pfring_plugin_id);
++typedef u_int (*read_device_pfring_free_slots)(int ifindex);
++typedef void (*handle_ring_dna_device)(dna_device_operation operation,
++ unsigned long packet_memory,
++ u_int packet_memory_num_slots,
++ u_int packet_memory_slot_len,
++ u_int packet_memory_tot_len,
++ void *descr_packet_memory,
++ u_int descr_packet_memory_num_slots,
++ u_int descr_packet_memory_slot_len,
++ u_int descr_packet_memory_tot_len,
++ u_int channel_id,
++ void *phys_card_memory,
++ u_int phys_card_memory_len,
++ struct net_device *netdev,
++ dna_device_model device_model,
++ wait_queue_head_t *packet_waitqueue,
++ u_int8_t *interrupt_received,
++ void *adapter_ptr,
++ dna_wait_packet wait_packet_function_ptr);
++
++extern register_pfring_plugin get_register_pfring_plugin(void);
++extern unregister_pfring_plugin get_unregister_pfring_plugin(void);
++extern read_device_pfring_free_slots get_read_device_pfring_free_slots(void);
++
++extern void set_register_pfring_plugin(register_pfring_plugin the_handler);
++extern void set_unregister_pfring_plugin(unregister_pfring_plugin the_handler);
++extern void set_read_device_pfring_free_slots(read_device_pfring_free_slots the_handler);
++
++extern int do_register_pfring_plugin(struct pfring_plugin_registration *reg);
++extern int do_unregister_pfring_plugin(u_int16_t pfring_plugin_id);
++extern int do_read_device_pfring_free_slots(int deviceidx);
++
++extern handle_ring_dna_device get_ring_dna_device_handler(void);
++extern void set_ring_dna_device_handler(handle_ring_dna_device
++ the_dna_device_handler);
++extern void do_ring_dna_device_handler(dna_device_operation operation,
++ unsigned long packet_memory,
++ u_int packet_memory_num_slots,
++ u_int packet_memory_slot_len,
++ u_int packet_memory_tot_len,
++ void *descr_packet_memory,
++ u_int descr_packet_memory_num_slots,
++ u_int descr_packet_memory_slot_len,
++ u_int descr_packet_memory_tot_len,
++ u_int channel_id,
++ void *phys_card_memory,
++ u_int phys_card_memory_len,
++ struct net_device *netdev,
++ dna_device_model device_model,
++ wait_queue_head_t *packet_waitqueue,
++ u_int8_t *interrupt_received,
++ void *adapter_ptr,
++ dna_wait_packet wait_packet_function_ptr);
++
++typedef int (*handle_ring_skb)(struct sk_buff *skb, u_char recv_packet,
++ u_char real_skb, short channel_id);
+extern handle_ring_skb get_skb_ring_handler(void);
+extern void set_skb_ring_handler(handle_ring_skb the_handler);
+extern void do_skb_ring_handler(struct sk_buff *skb,
+ u_char recv_packet, u_char real_skb);
-+
-+typedef int (*handle_ring_buffer)(struct net_device *dev,
++
++typedef int (*handle_ring_buffer)(struct net_device *dev,
+ char *data, int len);
+extern handle_ring_buffer get_buffer_ring_handler(void);
+extern void set_buffer_ring_handler(handle_ring_buffer the_handler);
+extern int do_buffer_ring_handler(struct net_device *dev,
+ char *data, int len);
-+#endif /* __KERNEL__ */
+
-+/* *********************************** */
++typedef int (*handle_add_hdr_to_ring)(struct ring_opt *pfr,
++ struct pfring_pkthdr *hdr);
++extern handle_add_hdr_to_ring get_add_hdr_to_ring(void);
++extern void set_add_hdr_to_ring(handle_add_hdr_to_ring the_handler);
++extern int do_add_hdr_to_ring(struct ring_opt *pfr, struct pfring_pkthdr *hdr);
+
-+#define PF_RING 27 /* Packet Ring */
-+#define SOCK_RING PF_RING
++#endif /* __KERNEL__ */
+
-+/* ioctl() */
-+#define SIORINGPOLL 0x8888
+
+/* *********************************** */
+
+#endif /* __RING_H */
-diff --unified --recursive --new-file linux-2.6.21.4/net/Kconfig linux-2.6.21.4-1-686-smp-ring3/net/Kconfig
---- linux-2.6.21.4/net/Kconfig 2007-06-07 21:27:31.000000000 +0000
-+++ linux-2.6.21.4-1-686-smp-ring3/net/Kconfig 2007-06-10 16:43:04.402423771 +0000
-@@ -39,6 +39,7 @@
- source "net/xfrm/Kconfig"
- source "net/iucv/Kconfig"
-
-+source "net/ring/Kconfig"
- config INET
- bool "TCP/IP networking"
- ---help---
-diff --unified --recursive --new-file linux-2.6.21.4/net/Makefile linux-2.6.21.4-1-686-smp-ring3/net/Makefile
---- linux-2.6.21.4/net/Makefile 2007-06-07 21:27:31.000000000 +0000
-+++ linux-2.6.21.4-1-686-smp-ring3/net/Makefile 2007-06-10 16:43:04.394423425 +0000
-@@ -45,6 +45,7 @@
- ifneq ($(CONFIG_VLAN_8021Q),)
- obj-y += 8021q/
- endif
-+obj-$(CONFIG_RING) += ring/
- obj-$(CONFIG_IP_DCCP) += dccp/
- obj-$(CONFIG_IP_SCTP) += sctp/
- obj-y += wireless/
-diff --unified --recursive --new-file linux-2.6.21.4/net/core/dev.c linux-2.6.21.4-1-686-smp-ring3/net/core/dev.c
---- linux-2.6.21.4/net/core/dev.c 2007-06-07 21:27:31.000000000 +0000
-+++ linux-2.6.21.4-1-686-smp-ring3/net/core/dev.c 2007-06-10 16:43:04.382422906 +0000
-@@ -133,6 +133,56 @@
+diff --unified --recursive --new-file linux-2.6.30/net/core/dev.c linux-2.6.30-1-686-smp-PF_RING/net/core/dev.c
+--- linux-2.6.30/net/core/dev.c 2009-06-10 05:05:27.000000000 +0200
++++ linux-2.6.30-1-686-smp-PF_RING/net/core/dev.c 2009-07-21 04:40:31.365770966 +0200
+@@ -129,6 +129,196 @@
#include "net-sysfs.h"
+#include <linux/ring.h>
+#include <linux/version.h>
+
++/* ************************************************ */
++
+static handle_ring_skb ring_handler = NULL;
+
+handle_ring_skb get_skb_ring_handler() { return(ring_handler); }
+void do_skb_ring_handler(struct sk_buff *skb,
+ u_char recv_packet, u_char real_skb) {
+ if(ring_handler)
-+ ring_handler(skb, recv_packet, real_skb);
++ ring_handler(skb, recv_packet, real_skb, -1 /* Unknown channel */);
+}
+
-+/* ******************* */
++/* ************************************************ */
+
+static handle_ring_buffer buffer_ring_handler = NULL;
+
+ return(0);
+}
+
++/* ******************* */
++
++static handle_add_hdr_to_ring buffer_add_hdr_to_ring = NULL;
++
++handle_add_hdr_to_ring get_add_hdr_to_ring() { return(buffer_add_hdr_to_ring); }
++
++void set_add_hdr_to_ring(handle_add_hdr_to_ring the_handler) {
++ buffer_add_hdr_to_ring = the_handler;
++}
++
++int do_add_hdr_to_ring(struct ring_opt *pfr, struct pfring_pkthdr *hdr) {
++ if(buffer_add_hdr_to_ring) {
++ buffer_add_hdr_to_ring(pfr, hdr);
++ return(1);
++ } else
++ return(0);
++}
++
++/* ************************************************ */
++
++static register_pfring_plugin pfring_registration = NULL;
++
++register_pfring_plugin get_register_pfring_plugin() { return(pfring_registration); }
++
++void set_register_pfring_plugin(register_pfring_plugin the_handler) {
++ pfring_registration = the_handler;
++}
++
++int do_register_pfring_plugin(struct pfring_plugin_registration *reg) {
++ if(pfring_registration) {
++ pfring_registration(reg);
++ return(1);
++ } else
++ return(0);
++}
++
++/* ************************************************ */
++
++static unregister_pfring_plugin pfring_unregistration = NULL;
++
++unregister_pfring_plugin get_unregister_pfring_plugin() { return(pfring_unregistration); }
++
++void set_unregister_pfring_plugin(unregister_pfring_plugin the_handler) {
++ pfring_unregistration = the_handler;
++}
++
++int do_unregister_pfring_plugin(u_int16_t pfring_plugin_id) {
++ if(pfring_unregistration) {
++ pfring_unregistration(pfring_plugin_id);
++ return(1);
++ } else
++ return(0);
++}
++
++/* ************************************************ */
++
++static handle_ring_dna_device ring_dna_device_handler = NULL;
++
++handle_ring_dna_device get_ring_dna_device_handler() { return(ring_dna_device_handler); }
++
++void set_ring_dna_device_handler(handle_ring_dna_device the_dna_device_handler) {
++ ring_dna_device_handler = the_dna_device_handler;
++}
++
++void do_ring_dna_device_handler(dna_device_operation operation,
++ unsigned long packet_memory,
++ u_int packet_memory_num_slots,
++ u_int packet_memory_slot_len,
++ u_int packet_memory_tot_len,
++ void *descr_packet_memory,
++ u_int descr_packet_memory_num_slots,
++ u_int descr_packet_memory_slot_len,
++ u_int descr_packet_memory_tot_len,
++ u_int channel_id,
++ void *phys_card_memory,
++ u_int phys_card_memory_len,
++ struct net_device *netdev,
++ dna_device_model device_model,
++ wait_queue_head_t *packet_waitqueue,
++ u_int8_t *interrupt_received,
++ void *adapter_ptr,
++ dna_wait_packet wait_packet_function_ptr) {
++ if(ring_dna_device_handler)
++ ring_dna_device_handler(operation,
++ packet_memory,
++ packet_memory_num_slots,
++ packet_memory_slot_len,
++ packet_memory_tot_len,
++ descr_packet_memory,
++ descr_packet_memory_num_slots,
++ descr_packet_memory_slot_len,
++ descr_packet_memory_tot_len, channel_id,
++ phys_card_memory, phys_card_memory_len,
++ netdev, device_model, packet_waitqueue,
++ interrupt_received, adapter_ptr,
++ wait_packet_function_ptr);
++}
++
++/* ************************************************ */
++
++static read_device_pfring_free_slots pfring_free_device_slots = NULL;
++
++read_device_pfring_free_slots get_read_device_pfring_free_slots() { return(pfring_free_device_slots); }
++
++void set_read_device_pfring_free_slots(read_device_pfring_free_slots the_handler) {
++ pfring_free_device_slots = the_handler;
++}
++
++int do_read_device_pfring_free_slots(int deviceidx) {
++ if(pfring_free_device_slots) {
++ return(pfring_free_device_slots(deviceidx));
++ } else
++ return(0);
++}
++
++/* ************************************************ */
++
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
+EXPORT_SYMBOL(get_skb_ring_handler);
+EXPORT_SYMBOL(set_skb_ring_handler);
+EXPORT_SYMBOL(get_buffer_ring_handler);
+EXPORT_SYMBOL(set_buffer_ring_handler);
+EXPORT_SYMBOL(do_buffer_ring_handler);
++
++EXPORT_SYMBOL(get_add_hdr_to_ring);
++EXPORT_SYMBOL(set_add_hdr_to_ring);
++EXPORT_SYMBOL(do_add_hdr_to_ring);
++
++EXPORT_SYMBOL(get_register_pfring_plugin);
++EXPORT_SYMBOL(set_register_pfring_plugin);
++EXPORT_SYMBOL(do_register_pfring_plugin);
++
++EXPORT_SYMBOL(get_unregister_pfring_plugin);
++EXPORT_SYMBOL(set_unregister_pfring_plugin);
++EXPORT_SYMBOL(do_unregister_pfring_plugin);
++
++EXPORT_SYMBOL(get_ring_dna_device_handler);
++EXPORT_SYMBOL(set_ring_dna_device_handler);
++EXPORT_SYMBOL(do_ring_dna_device_handler);
++
++EXPORT_SYMBOL(get_read_device_pfring_free_slots);
++EXPORT_SYMBOL(set_read_device_pfring_free_slots);
++EXPORT_SYMBOL(do_read_device_pfring_free_slots);
++
+#endif
+
+#endif
- /*
- * The list of packet types we will receive (as opposed to discard)
- * and the routines to invoke.
-@@ -1809,6 +1859,9 @@
- skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
- #endif
+ /* Instead of increasing this, you should create a hash table. */
+ #define MAX_GRO_SKBS 8
+
+@@ -1839,6 +2029,12 @@
if (q->enqueue) {
-+#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
-+ if(ring_handler) ring_handler(skb, 0, 1);
-+#endif /* CONFIG_RING */
spinlock_t *root_lock = qdisc_lock(q);
++ /* This TX patch applies to all drivers */
++ #if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
++ if(ring_handler) ring_handler(skb, 0, 1, -1 /* Unknown channel */);
++ #endif /* CONFIG_RING */
++
++
spin_lock(root_lock);
-@@ -1908,6 +1961,13 @@
+
+ if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
+@@ -1936,6 +2132,16 @@
unsigned long flags;
/* if netpoll wants it, pretend we never saw it */
++/* This RX patch applies only to non-NAPI drivers */
++
+#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
-+ if(ring_handler && ring_handler(skb, 1, 1)) {
-+ /* The packet has been copied into a ring */
-+ return(NET_RX_SUCCESS);
-+ }
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
++if(ring_handler && ring_handler(skb, 1, 1, -1 /* Unknown channel */)) {
++ /* The packet has been copied into a ring */
++ return(NET_RX_SUCCESS);
++}
++#endif
+#endif /* CONFIG_RING */
-+
if (netpoll_rx(skb))
return NET_RX_DROP;
-@@ -2193,6 +2253,13 @@
+@@ -2220,6 +2426,16 @@
+ struct net_device *orig_dev;
struct net_device *null_or_orig;
int ret = NET_RX_DROP;
- __be16 type;
++/*
++ This RX patch applies to both non-NAPI (this as netif_receive_rx
++ is called by netif_rx) and NAPI drivers.
++*/
+#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
-+ if(ring_handler && ring_handler(skb, 1, 1)) {
-+ /* The packet has been copied into a ring */
-+ return(NET_RX_SUCCESS);
-+ }
++if(ring_handler && ring_handler(skb, 1, 1, -1 /* Unknown channel */)) {
++ /* The packet has been copied into a ring */
++ return(NET_RX_SUCCESS);
++}
+#endif /* CONFIG_RING */
-+
+ __be16 type;
- /* if we've gotten here through NAPI, check netpoll */
- if (netpoll_receive_skb(skb))
-diff --unified --recursive --new-file linux-2.6.21.4/net/ring/Kconfig linux-2.6.21.4-1-686-smp-ring3/net/ring/Kconfig
---- linux-2.6.21.4/net/ring/Kconfig 1970-01-01 00:00:00.000000000 +0000
-+++ linux-2.6.21.4-1-686-smp-ring3/net/ring/Kconfig 2007-06-10 16:43:04.406423944 +0000
-@@ -0,0 +1,14 @@
-+config RING
-+ tristate "PF_RING sockets (EXPERIMENTAL)"
-+ depends on EXPERIMENTAL
-+ ---help---
-+ PF_RING socket family, optimized for packet capture.
-+ If a PF_RING socket is bound to an adapter (via the bind() system
-+ call), such adapter will be used in read-only mode until the socket
-+ is destroyed. Whenever an incoming packet is received from the adapter
-+ it will not passed to upper layers, but instead it is copied to a ring
-+ buffer, which in turn is exported to user space applications via mmap.
-+ Please refer to http://luca.ntop.org/Ring.pdf for more.
-+
-+ Say N unless you know what you are doing.
-+
-diff --unified --recursive --new-file linux-2.6.21.4/net/ring/Makefile linux-2.6.21.4-1-686-smp-ring3/net/ring/Makefile
---- linux-2.6.21.4/net/ring/Makefile 1970-01-01 00:00:00.000000000 +0000
-+++ linux-2.6.21.4-1-686-smp-ring3/net/ring/Makefile 2007-06-10 16:43:04.350421521 +0000
-@@ -0,0 +1,7 @@
-+#
-+# Makefile for the ring driver.
-+#
-+
-+obj-m += ring.o
-+
-+ring-objs := ring_packet.o
-diff --unified --recursive --new-file linux-2.6.21.4/net/ring/ring_packet.c linux-2.6.21.4-1-686-smp-ring3/net/ring/ring_packet.c
---- linux-2.6.21.4/net/ring/ring_packet.c 1970-01-01 00:00:00.000000000 +0000
-+++ linux-2.6.21.4-1-686-smp-ring3/net/ring/ring_packet.c 2007-06-10 16:43:04.354421694 +0000
-@@ -0,0 +1,4258 @@
-+/* ***************************************************************
-+ *
-+ * (C) 2004-07 - Luca Deri <deri@ntop.org>
-+ *
-+ * This code includes contributions courtesy of
-+ * - Jeff Randall <jrandall@nexvu.com>
-+ * - Helmut Manck <helmut.manck@secunet.com>
-+ * - Brad Doctor <brad@stillsecure.com>
-+ * - Amit D. Chaudhary <amit_ml@rajgad.com>
-+ * - Francesco Fusco <fusco@ntop.org>
-+ * - Michael Stiller <ms@2scale.net>
-+ *
+ if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
+diff --unified --recursive --new-file linux-2.6.30/net/core/dev.c.ORG linux-2.6.30-1-686-smp-PF_RING/net/core/dev.c.ORG
+--- linux-2.6.30/net/core/dev.c.ORG 1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.30-1-686-smp-PF_RING/net/core/dev.c.ORG 2009-07-21 04:40:31.319103951 +0200
+@@ -0,0 +1,5336 @@
++/*
++ * NET3 Protocol independent device support routines.
+ *
-+ * This program is free software; you can redistribute it and/or modify
-+ * it under the terms of the GNU General Public License as published by
-+ * the Free Software Foundation; either version 2 of the License, or
-+ * (at your option) any later version.
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
+ *
-+ * This program is distributed in the hope that it will be useful,
-+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * GNU General Public License for more details.
++ * Derived from the non IP parts of dev.c 1.0.19
++ * Authors: Ross Biro
++ * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
++ * Mark Evans, <evansmp@uhura.aston.ac.uk>
+ *
-+ * You should have received a copy of the GNU General Public License
-+ * along with this program; if not, write to the Free Software Foundation,
-+ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ * Additional Authors:
++ * Florian la Roche <rzsfl@rz.uni-sb.de>
++ * Alan Cox <gw4pts@gw4pts.ampr.org>
++ * David Hinds <dahinds@users.sourceforge.net>
++ * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
++ * Adam Sulmicki <adam@cfar.umd.edu>
++ * Pekka Riikonen <priikone@poesidon.pspt.fi>
+ *
++ * Changes:
++ * D.J. Barrow : Fixed bug where dev->refcnt gets set
++ * to 2 if register_netdev gets called
++ * before net_dev_init & also removed a
++ * few lines of code in the process.
++ * Alan Cox : device private ioctl copies fields back.
++ * Alan Cox : Transmit queue code does relevant
++ * stunts to keep the queue safe.
++ * Alan Cox : Fixed double lock.
++ * Alan Cox : Fixed promisc NULL pointer trap
++ * ???????? : Support the full private ioctl range
++ * Alan Cox : Moved ioctl permission check into
++ * drivers
++ * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
++ * Alan Cox : 100 backlog just doesn't cut it when
++ * you start doing multicast video 8)
++ * Alan Cox : Rewrote net_bh and list manager.
++ * Alan Cox : Fix ETH_P_ALL echoback lengths.
++ * Alan Cox : Took out transmit every packet pass
++ * Saved a few bytes in the ioctl handler
++ * Alan Cox : Network driver sets packet type before
++ * calling netif_rx. Saves a function
++ * call a packet.
++ * Alan Cox : Hashed net_bh()
++ * Richard Kooijman: Timestamp fixes.
++ * Alan Cox : Wrong field in SIOCGIFDSTADDR
++ * Alan Cox : Device lock protection.
++ * Alan Cox : Fixed nasty side effect of device close
++ * changes.
++ * Rudi Cilibrasi : Pass the right thing to
++ * set_mac_address()
++ * Dave Miller : 32bit quantity for the device lock to
++ * make it work out on a Sparc.
++ * Bjorn Ekwall : Added KERNELD hack.
++ * Alan Cox : Cleaned up the backlog initialise.
++ * Craig Metz : SIOCGIFCONF fix if space for under
++ * 1 device.
++ * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
++ * is no device open function.
++ * Andi Kleen : Fix error reporting for SIOCGIFCONF
++ * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
++ * Cyrus Durgin : Cleaned for KMOD
++ * Adam Sulmicki : Bug Fix : Network Device Unload
++ * A network device unload needs to purge
++ * the backlog queue.
++ * Paul Rusty Russell : SIOCSIFNAME
++ * Pekka Riikonen : Netdev boot-time settings code
++ * Andrew Morton : Make unregister_netdevice wait
++ * indefinitely on dev->refcnt
++ * J Hadi Salim : - Backlog queue sampling
++ * - netif_rx() feedback
+ */
+
-+#include <linux/version.h>
-+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,19))
-+#include <linux/autoconf.h>
-+#else
-+#include <linux/config.h>
-+#endif
-+#include <linux/module.h>
++#include <asm/uaccess.h>
++#include <asm/system.h>
++#include <linux/bitops.h>
++#include <linux/capability.h>
++#include <linux/cpu.h>
++#include <linux/types.h>
+#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/mutex.h>
++#include <linux/string.h>
++#include <linux/mm.h>
+#include <linux/socket.h>
++#include <linux/sockios.h>
++#include <linux/errno.h>
++#include <linux/interrupt.h>
++#include <linux/if_ether.h>
++#include <linux/netdevice.h>
++#include <linux/etherdevice.h>
++#include <linux/ethtool.h>
++#include <linux/notifier.h>
+#include <linux/skbuff.h>
++#include <net/net_namespace.h>
++#include <net/sock.h>
+#include <linux/rtnetlink.h>
-+#include <linux/in.h>
-+#include <linux/inet.h>
-+#include <linux/in6.h>
++#include <linux/proc_fs.h>
++#include <linux/seq_file.h>
++#include <linux/stat.h>
++#include <linux/if_bridge.h>
++#include <linux/if_macvlan.h>
++#include <net/dst.h>
++#include <net/pkt_sched.h>
++#include <net/checksum.h>
++#include <linux/highmem.h>
+#include <linux/init.h>
-+#include <linux/filter.h>
-+#include <linux/ring.h>
++#include <linux/kmod.h>
++#include <linux/module.h>
++#include <linux/netpoll.h>
++#include <linux/rcupdate.h>
++#include <linux/delay.h>
++#include <net/wext.h>
++#include <net/iw_handler.h>
++#include <asm/current.h>
++#include <linux/audit.h>
++#include <linux/dmaengine.h>
++#include <linux/err.h>
++#include <linux/ctype.h>
++#include <linux/if_arp.h>
++#include <linux/if_vlan.h>
+#include <linux/ip.h>
-+#include <linux/tcp.h>
-+#include <linux/udp.h>
-+#include <linux/list.h>
-+#include <linux/proc_fs.h>
-+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
-+#include <net/xfrm.h>
-+#else
-+#include <linux/poll.h>
-+#endif
-+#include <net/sock.h>
-+#include <asm/io.h> /* needed for virt_to_phys() */
-+#ifdef CONFIG_INET
-+#include <net/inet_common.h>
-+#endif
-+
-+/* #define RING_DEBUG */
-+
-+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11))
-+static inline int remap_page_range(struct vm_area_struct *vma,
-+ unsigned long uvaddr,
-+ unsigned long paddr,
-+ unsigned long size,
-+ pgprot_t prot) {
-+ return(remap_pfn_range(vma, uvaddr, paddr >> PAGE_SHIFT,
-+ size, prot));
-+}
-+#endif
-+
-+/* ************************************************* */
-+
-+#define CLUSTER_LEN 8
++#include <net/ip.h>
++#include <linux/ipv6.h>
++#include <linux/in.h>
++#include <linux/jhash.h>
++#include <linux/random.h>
+
-+struct ring_cluster {
-+ u_short cluster_id; /* 0 = no cluster */
-+ u_short num_cluster_elements;
-+ enum cluster_type hashing_mode;
-+ u_short hashing_id;
-+ struct sock *sk[CLUSTER_LEN];
-+ struct ring_cluster *next; /* NULL = last element of the cluster */
-+};
++#include "net-sysfs.h"
+
-+/* ************************************************* */
++/* Instead of increasing this, you should create a hash table. */
++#define MAX_GRO_SKBS 8
+
-+struct ring_element {
-+ struct list_head list;
-+ struct sock *sk;
-+};
++/* This should be increased if a protocol with a bigger head is added. */
++#define GRO_MAX_HEAD (MAX_HEADER + 128)
+
-+/* ************************************************* */
++/*
++ * The list of packet types we will receive (as opposed to discard)
++ * and the routines to invoke.
++ *
++ * Why 16. Because with 16 the only overlap we get on a hash of the
++ * low nibble of the protocol value is RARP/SNAP/X.25.
++ *
++ * NOTE: That is no longer true with the addition of VLAN tags. Not
++ * sure which should go first, but I bet it won't make much
++ * difference if we are running VLANs. The good news is that
++ * this protocol won't be in the list unless compiled in, so
++ * the average user (w/out VLANs) will not be adversely affected.
++ * --BLG
++ *
++ * 0800 IP
++ * 8100 802.1Q VLAN
++ * 0001 802.3
++ * 0002 AX.25
++ * 0004 802.2
++ * 8035 RARP
++ * 0005 SNAP
++ * 0805 X.25
++ * 0806 ARP
++ * 8137 IPX
++ * 0009 Localtalk
++ * 86DD IPv6
++ */
+
-+struct ring_opt {
-+ struct net_device *ring_netdev;
++#define PTYPE_HASH_SIZE (16)
++#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
+
-+ u_short ring_pid;
++static DEFINE_SPINLOCK(ptype_lock);
++static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
++static struct list_head ptype_all __read_mostly; /* Taps */
+
-+ /* Cluster */
-+ u_short cluster_id; /* 0 = no cluster */
++/*
++ * The @dev_base_head list is protected by @dev_base_lock and the rtnl
++ * semaphore.
++ *
++ * Pure readers hold dev_base_lock for reading.
++ *
++ * Writers must hold the rtnl semaphore while they loop through the
++ * dev_base_head list, and hold dev_base_lock for writing when they do the
++ * actual updates. This allows pure readers to access the list even
++ * while a writer is preparing to update it.
++ *
++ * To put it another way, dev_base_lock is held for writing only to
++ * protect against pure readers; the rtnl semaphore provides the
++ * protection against other writers.
++ *
++ * See, for example usages, register_netdevice() and
++ * unregister_netdevice(), which must be called with the rtnl
++ * semaphore held.
++ */
++DEFINE_RWLOCK(dev_base_lock);
+
-+ /* Reflector */
-+ struct net_device *reflector_dev;
++EXPORT_SYMBOL(dev_base_lock);
+
-+ /* Packet buffers */
-+ unsigned long order;
++#define NETDEV_HASHBITS 8
++#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
+
-+ /* Ring Slots */
-+ unsigned long ring_memory;
-+ FlowSlotInfo *slots_info; /* Basically it points to ring_memory */
-+ char *ring_slots; /* Basically it points to ring_memory
-+ +sizeof(FlowSlotInfo) */
++static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
++{
++ unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
++ return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
++}
+
-+ /* Packet Sampling */
-+ u_int pktToSample, sample_rate;
++static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
++{
++ return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
++}
+
-+ /* BPF Filter */
-+ struct sk_filter *bpfFilter;
++/* Device list insertion */
++static int list_netdevice(struct net_device *dev)
++{
++ struct net *net = dev_net(dev);
+
-+ /* Aho-Corasick */
-+ ACSM_STRUCT2 * acsm;
++ ASSERT_RTNL();
+
-+ /* Locks */
-+ atomic_t num_ring_slots_waiters;
-+ wait_queue_head_t ring_slots_waitqueue;
-+ rwlock_t ring_index_lock;
-+
-+ /* Bloom Filters */
-+ u_char bitmask_enabled;
-+ bitmask_selector mac_bitmask, vlan_bitmask, ip_bitmask, twin_ip_bitmask,
-+ port_bitmask, twin_port_bitmask, proto_bitmask;
-+ u_int32_t num_mac_bitmask_add, num_mac_bitmask_remove;
-+ u_int32_t num_vlan_bitmask_add, num_vlan_bitmask_remove;
-+ u_int32_t num_ip_bitmask_add, num_ip_bitmask_remove;
-+ u_int32_t num_port_bitmask_add, num_port_bitmask_remove;
-+ u_int32_t num_proto_bitmask_add, num_proto_bitmask_remove;
++ write_lock_bh(&dev_base_lock);
++ list_add_tail(&dev->dev_list, &net->dev_base_head);
++ hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
++ hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
++ write_unlock_bh(&dev_base_lock);
++ return 0;
++}
+
-+ /* Indexes (Internal) */
-+ u_int insert_page_id, insert_slot_id;
-+};
++/* Device list removal */
++static void unlist_netdevice(struct net_device *dev)
++{
++ ASSERT_RTNL();
++
++ /* Unlink dev from the device chain */
++ write_lock_bh(&dev_base_lock);
++ list_del(&dev->dev_list);
++ hlist_del(&dev->name_hlist);
++ hlist_del(&dev->index_hlist);
++ write_unlock_bh(&dev_base_lock);
++}
+
-+/* ************************************************* */
++/*
++ * Our notifier list
++ */
+
-+/* List of all ring sockets. */
-+static struct list_head ring_table;
-+static u_int ring_table_size;
++static RAW_NOTIFIER_HEAD(netdev_chain);
+
-+/* List of all clusters */
-+static struct ring_cluster *ring_cluster_list;
++/*
++ * Device drivers call our routines to queue packets here. We empty the
++ * queue in the local softnet handler.
++ */
+
-+static rwlock_t ring_mgmt_lock = RW_LOCK_UNLOCKED;
++DEFINE_PER_CPU(struct softnet_data, softnet_data);
+
-+/* ********************************** */
++#ifdef CONFIG_LOCKDEP
++/*
++ * register_netdevice() inits txq->_xmit_lock and sets lockdep class
++ * according to dev->type
++ */
++static const unsigned short netdev_lock_type[] =
++ {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
++ ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
++ ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
++ ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
++ ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
++ ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
++ ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
++ ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
++ ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
++ ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
++ ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
++ ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
++ ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
++ ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
++ ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE};
++
++static const char *netdev_lock_name[] =
++ {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
++ "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
++ "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
++ "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
++ "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
++ "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
++ "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
++ "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
++ "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
++ "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
++ "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
++ "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
++ "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
++ "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
++ "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"};
++
++static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
++static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
++
++static inline unsigned short netdev_lock_pos(unsigned short dev_type)
++{
++ int i;
+
-+/* /proc entry for ring module */
-+struct proc_dir_entry *ring_proc_dir = NULL;
-+struct proc_dir_entry *ring_proc = NULL;
++ for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
++ if (netdev_lock_type[i] == dev_type)
++ return i;
++ /* the last key is used by default */
++ return ARRAY_SIZE(netdev_lock_type) - 1;
++}
+
-+static int ring_proc_get_info(char *, char **, off_t, int, int *, void *);
-+static void ring_proc_add(struct ring_opt *pfr);
-+static void ring_proc_remove(struct ring_opt *pfr);
-+static void ring_proc_init(void);
-+static void ring_proc_term(void);
++static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
++ unsigned short dev_type)
++{
++ int i;
+
-+/* ********************************** */
++ i = netdev_lock_pos(dev_type);
++ lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
++ netdev_lock_name[i]);
++}
+
-+/* Forward */
-+static struct proto_ops ring_ops;
++static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
++{
++ int i;
+
-+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11))
-+static struct proto ring_proto;
++ i = netdev_lock_pos(dev->type);
++ lockdep_set_class_and_name(&dev->addr_list_lock,
++ &netdev_addr_lock_key[i],
++ netdev_lock_name[i]);
++}
++#else
++static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
++ unsigned short dev_type)
++{
++}
++static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
++{
++}
+#endif
+
-+static int skb_ring_handler(struct sk_buff *skb, u_char recv_packet,
-+ u_char real_skb);
-+static int buffer_ring_handler(struct net_device *dev, char *data, int len);
-+static int remove_from_cluster(struct sock *sock, struct ring_opt *pfr);
++/*******************************************************************************
+
-+/* Extern */
++ Protocol management and registration routines
+
-+/* ********************************** */
++*******************************************************************************/
+
-+/* Defaults */
-+static unsigned int bucket_len = 128, num_slots = 4096, sample_rate = 1,
-+ transparent_mode = 1, enable_tx_capture = 1;
++/*
++ * Add a protocol ID to the list. Now that the input handler is
++ * smarter we can dispense with all the messy stuff that used to be
++ * here.
++ *
++ * BEWARE!!! Protocol handlers, mangling input packets,
++ * MUST BE last in hash buckets and checking protocol handlers
++ * MUST start from promiscuous ptype_all chain in net_bh.
++ * It is true now, do not change it.
++ * Explanation follows: if protocol handler, mangling packet, will
++ * be the first on list, it is not able to sense, that packet
++ * is cloned and should be copied-on-write, so that it will
++ * change it and subsequent readers will get broken packet.
++ * --ANK (980803)
++ */
+
-+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16))
-+module_param(bucket_len, uint, 0644);
-+module_param(num_slots, uint, 0644);
-+module_param(sample_rate, uint, 0644);
-+module_param(transparent_mode, uint, 0644);
-+module_param(enable_tx_capture, uint, 0644);
-+#else
-+MODULE_PARM(bucket_len, "i");
-+MODULE_PARM(num_slots, "i");
-+MODULE_PARM(sample_rate, "i");
-+MODULE_PARM(transparent_mode, "i");
-+MODULE_PARM(enable_tx_capture, "i");
-+#endif
++/**
++ * dev_add_pack - add packet handler
++ * @pt: packet type declaration
++ *
++ * Add a protocol handler to the networking stack. The passed &packet_type
++ * is linked into kernel lists and may not be freed until it has been
++ * removed from the kernel lists.
++ *
++ * This call does not sleep therefore it can not
++ * guarantee all CPU's that are in middle of receiving packets
++ * will see the new packet type (until the next received packet).
++ */
+
-+MODULE_PARM_DESC(bucket_len, "Number of ring buckets");
-+MODULE_PARM_DESC(num_slots, "Number of ring slots");
-+MODULE_PARM_DESC(sample_rate, "Ring packet sample rate");
-+MODULE_PARM_DESC(transparent_mode,
-+ "Set to 1 to set transparent mode "
-+ "(slower but backwards compatible)");
++void dev_add_pack(struct packet_type *pt)
++{
++ int hash;
++
++ spin_lock_bh(&ptype_lock);
++ if (pt->type == htons(ETH_P_ALL))
++ list_add_rcu(&pt->list, &ptype_all);
++ else {
++ hash = ntohs(pt->type) & PTYPE_HASH_MASK;
++ list_add_rcu(&pt->list, &ptype_base[hash]);
++ }
++ spin_unlock_bh(&ptype_lock);
++}
+
-+MODULE_PARM_DESC(enable_tx_capture, "Set to 1 to capture outgoing packets");
++/**
++ * __dev_remove_pack - remove packet handler
++ * @pt: packet type declaration
++ *
++ * Remove a protocol handler that was previously added to the kernel
++ * protocol handlers by dev_add_pack(). The passed &packet_type is removed
++ * from the kernel lists and can be freed or reused once this function
++ * returns.
++ *
++ * The packet type might still be in use by receivers
++ * and must not be freed until after all the CPU's have gone
++ * through a quiescent state.
++ */
++void __dev_remove_pack(struct packet_type *pt)
++{
++ struct list_head *head;
++ struct packet_type *pt1;
+
-+/* ********************************** */
++ spin_lock_bh(&ptype_lock);
+
-+#define MIN_QUEUED_PKTS 64
-+#define MAX_QUEUE_LOOPS 64
++ if (pt->type == htons(ETH_P_ALL))
++ head = &ptype_all;
++ else
++ head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
+
++ list_for_each_entry(pt1, head, list) {
++ if (pt == pt1) {
++ list_del_rcu(&pt->list);
++ goto out;
++ }
++ }
+
-+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
-+#define ring_sk_datatype(__sk) ((struct ring_opt *)__sk)
-+#define ring_sk(__sk) ((__sk)->sk_protinfo)
-+#else
-+#define ring_sk_datatype(a) (a)
-+#define ring_sk(__sk) ((__sk)->protinfo.pf_ring)
-+#endif
++ printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
++out:
++ spin_unlock_bh(&ptype_lock);
++}
++/**
++ * dev_remove_pack - remove packet handler
++ * @pt: packet type declaration
++ *
++ * Remove a protocol handler that was previously added to the kernel
++ * protocol handlers by dev_add_pack(). The passed &packet_type is removed
++ * from the kernel lists and can be freed or reused once this function
++ * returns.
++ *
++ * This call sleeps to guarantee that no CPU is looking at the packet
++ * type after return.
++ */
++void dev_remove_pack(struct packet_type *pt)
++{
++ __dev_remove_pack(pt);
+
-+#define _rdtsc() ({ uint64_t x; asm volatile("rdtsc" : "=A" (x)); x; })
++ synchronize_net();
++}
+
-+/*
-+ int dev_queue_xmit(struct sk_buff *skb)
-+ skb->dev;
-+ struct net_device *dev_get_by_name(const char *name)
-+*/
++/******************************************************************************
+
-+/* ********************************** */
++ Device Boot-time Settings Routines
+
-+/*
-+** $Id$
-+**
-+** acsmx2.c
-+**
-+** Multi-Pattern Search Engine
-+**
-+** Aho-Corasick State Machine - version 2.0
-+**
-+** Supports both Non-Deterministic and Deterministic Finite Automata
-+**
-+**
-+** Reference - Efficient String matching: An Aid to Bibliographic Search
-+** Alfred V Aho and Margaret J Corasick
-+** Bell Labratories
-+** Copyright(C) 1975 Association for Computing Machinery,Inc
-+**
-+** +++
-+** +++ Version 1.0 notes - Marc Norton:
-+** +++
-+**
-+** Original implementation based on the 4 algorithms in the paper by Aho & Corasick,
-+** some implementation ideas from 'Practical Algorithms in C', and some
-+** of my own.
-+**
-+** 1) Finds all occurrences of all patterns within a text.
-+**
-+** +++
-+** +++ Version 2.0 Notes - Marc Norton/Dan Roelker:
-+** +++
-+**
-+** New implementation modifies the state table storage and access model to use
-+** compacted sparse vector storage. Dan Roelker and I hammered this strategy out
-+** amongst many others in order to reduce memory usage and improve caching performance.
-+** The memory usage is greatly reduced, we only use 1/4 of what we use to. The caching
-+** performance is better in pure benchmarking tests, but does not show overall improvement
-+** in Snort. Unfortunately, once a pattern match test has been performed Snort moves on to doing
-+** many other things before we get back to a patteren match test, so the cache is voided.
-+**
-+** This versions has better caching performance characteristics, reduced memory,
-+** more state table storage options, and requires no a priori case conversions.
-+** It does maintain the same public interface. (Snort only used banded storage).
-+**
-+** 1) Supports NFA and DFA state machines, and basic keyword state machines
-+** 2) Initial transition table uses Linked Lists
-+** 3) Improved state table memory options. NFA and DFA state
-+** transition tables are converted to one of 4 formats during compilation.
-+** a) Full matrix
-+** b) Sparse matrix
-+** c) Banded matrix (Default-this is the only one used in snort)
-+** d) Sparse-Banded matrix
-+** 4) Added support for acstate_t in .h file so we can compile states as
-+** 16, or 32 bit state values for another reduction in memory consumption,
-+** smaller states allows more of the state table to be cached, and improves
-+** performance on x86-P4. Your mileage may vary, especially on risc systems.
-+** 5) Added a bool to each state transition list to indicate if there is a matching
-+** pattern in the state. This prevents us from accessing another data array
-+** and can improve caching/performance.
-+** 6) The search functions are very sensitive, don't change them without extensive testing,
-+** or you'll just spoil the caching and prefetching opportunities.
-+**
-+** Extras for fellow pattern matchers:
-+** The table below explains the storage format used at each step.
-+** You can use an NFA or DFA to match with, the NFA is slower but tiny - set the structure directly.
-+** You can use any of the 4 storage modes above -full,sparse,banded,sparse-bands, set the structure directly.
-+** For applications where you have lots of data and a pattern set to search, this version was up to 3x faster
-+** than the previous verion, due to caching performance. This cannot be fully realized in Snort yet,
-+** but other applications may have better caching opportunities.
-+** Snort only needs to use the banded or full storage.
-+**
-+** Transition table format at each processing stage.
-+** -------------------------------------------------
-+** Patterns -> Keyword State Table (List)
-+** Keyword State Table -> NFA (List)
-+** NFA -> DFA (List)
-+** DFA (List)-> Sparse Rows O(m-avg # transitions per state)
-+** -> Banded Rows O(1)
-+** -> Sparse-Banded Rows O(nb-# bands)
-+** -> Full Matrix O(1)
-+**
-+** Copyright(C) 2002,2003,2004 Marc Norton
-+** Copyright(C) 2003,2004 Daniel Roelker
-+** Copyright(C) 2002,2003,2004 Sourcefire,Inc.
-+**
-+** This program is free software; you can redistribute it and/or modify
-+** it under the terms of the GNU General Public License as published by
-+** the Free Software Foundation; either version 2 of the License, or
-+** (at your option) any later version.
-+**
-+** This program is distributed in the hope that it will be useful,
-+** but WITHOUT ANY WARRANTY; without even the implied warranty of
-+** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+** GNU General Public License for more details.
-+**
-+** You should have received a copy of the GNU General Public License
-+** along with this program; if not, write to the Free Software
-+** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-+*
-+*/
++*******************************************************************************/
+
-+/*
-+ *
-+ */
-+#define MEMASSERT(p,s) if(!p){printk("ACSM-No Memory: %s!\n",s);}
++/* Boot time configuration table */
++static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
+
-+/*
++/**
++ * netdev_boot_setup_add - add new setup entry
++ * @name: name of the device
++ * @map: configured settings for the device
+ *
++ * Adds new setup entry to the dev_boot_setup list. The function
++ * returns 0 on error and 1 on success. This is a generic routine to
++ * all netdevices.
+ */
-+static int max_memory = 0;
++static int netdev_boot_setup_add(char *name, struct ifmap *map)
++{
++ struct netdev_boot_setup *s;
++ int i;
++
++ s = dev_boot_setup;
++ for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
++ if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
++ memset(s[i].name, 0, sizeof(s[i].name));
++ strlcpy(s[i].name, name, IFNAMSIZ);
++ memcpy(&s[i].map, map, sizeof(s[i].map));
++ break;
++ }
++ }
+
-+/*
++ return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
++}
++
++/**
++ * netdev_boot_setup_check - check boot time settings
++ * @dev: the netdevice
+ *
++ * Check boot time settings for the device.
++ * The found settings are set for the device to be used
++ * later in the device probing.
++ * Returns 0 if no settings found, 1 if they are.
+ */
-+typedef struct acsm_summary_s
++int netdev_boot_setup_check(struct net_device *dev)
+{
-+ unsigned num_states;
-+ unsigned num_transitions;
-+ ACSM_STRUCT2 acsm;
++ struct netdev_boot_setup *s = dev_boot_setup;
++ int i;
++
++ for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
++ if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
++ !strcmp(dev->name, s[i].name)) {
++ dev->irq = s[i].map.irq;
++ dev->base_addr = s[i].map.base_addr;
++ dev->mem_start = s[i].map.mem_start;
++ dev->mem_end = s[i].map.mem_end;
++ return 1;
++ }
++ }
++ return 0;
++}
+
-+}acsm_summary_t;
+
-+/*
++/**
++ * netdev_boot_base - get address from boot time settings
++ * @prefix: prefix for network device
++ * @unit: id for network device
+ *
++ * Check boot time settings for the base address of device.
++ * The found settings are set for the device to be used
++ * later in the device probing.
++ * Returns 0 if no settings found.
+ */
-+static acsm_summary_t summary={0,0};
++unsigned long netdev_boot_base(const char *prefix, int unit)
++{
++ const struct netdev_boot_setup *s = dev_boot_setup;
++ char name[IFNAMSIZ];
++ int i;
+
-+/*
-+** Case Translation Table
-+*/
-+static unsigned char xlatcase[256];
-+/*
-+ *
-+ */
++ sprintf(name, "%s%d", prefix, unit);
+
-+inline int toupper(int ch) {
-+ if ( (unsigned int)(ch - 'a') < 26u )
-+ ch += 'A' - 'a';
-+ return ch;
++ /*
++ * If device already registered then return base of 1
++ * to indicate not to probe for this interface
++ */
++ if (__dev_get_by_name(&init_net, name))
++ return 1;
++
++ for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
++ if (!strcmp(name, s[i].name))
++ return s[i].map.base_addr;
++ return 0;
+}
+
-+static void init_xlatcase(void)
++/*
++ * Saves at boot time configured settings for any netdevice.
++ */
++int __init netdev_boot_setup(char *str)
+{
-+ int i;
-+ for (i = 0; i < 256; i++)
-+ {
-+ xlatcase[i] = toupper(i);
-+ }
-+}
++ int ints[5];
++ struct ifmap map;
+
-+/*
-+ * Case Conversion
-+ */
-+static
-+inline
-+void
-+ConvertCaseEx (unsigned char *d, unsigned char *s, int m)
-+{
-+ int i;
-+#ifdef XXXX
-+ int n;
-+ n = m & 3;
-+ m >>= 2;
++ str = get_options(str, ARRAY_SIZE(ints), ints);
++ if (!str || !*str)
++ return 0;
+
-+ for (i = 0; i < m; i++ )
-+ {
-+ d[0] = xlatcase[ s[0] ];
-+ d[2] = xlatcase[ s[2] ];
-+ d[1] = xlatcase[ s[1] ];
-+ d[3] = xlatcase[ s[3] ];
-+ d+=4;
-+ s+=4;
-+ }
++ /* Save settings */
++ memset(&map, 0, sizeof(map));
++ if (ints[0] > 0)
++ map.irq = ints[1];
++ if (ints[0] > 1)
++ map.base_addr = ints[2];
++ if (ints[0] > 2)
++ map.mem_start = ints[3];
++ if (ints[0] > 3)
++ map.mem_end = ints[4];
++
++ /* Add new entry to the list */
++ return netdev_boot_setup_add(str, &map);
++}
+
-+ for (i=0; i < n; i++)
-+ {
-+ d[i] = xlatcase[ s[i] ];
-+ }
-+#else
-+ for (i=0; i < m; i++)
-+ {
-+ d[i] = xlatcase[ s[i] ];
-+ }
++__setup("netdev=", netdev_boot_setup);
+
-+#endif
-+}
++/*******************************************************************************
+
++ Device Interface Subroutines
+
-+/*
++*******************************************************************************/
++
++/**
++ * __dev_get_by_name - find a device by its name
++ * @net: the applicable net namespace
++ * @name: name to find
+ *
++ * Find an interface by name. Must be called under RTNL semaphore
++ * or @dev_base_lock. If the name is found a pointer to the device
++ * is returned. If the name is not found then %NULL is returned. The
++ * reference counters are not incremented so the caller must be
++ * careful with locks.
+ */
-+static void *
-+AC_MALLOC (int n)
++
++struct net_device *__dev_get_by_name(struct net *net, const char *name)
+{
-+ void *p;
-+ p = kmalloc (n, GFP_KERNEL);
-+ if (p)
-+ max_memory += n;
-+ return p;
-+}
++ struct hlist_node *p;
+
++ hlist_for_each(p, dev_name_hash(net, name)) {
++ struct net_device *dev
++ = hlist_entry(p, struct net_device, name_hlist);
++ if (!strncmp(dev->name, name, IFNAMSIZ))
++ return dev;
++ }
++ return NULL;
++}
+
-+/*
++/**
++ * dev_get_by_name - find a device by its name
++ * @net: the applicable net namespace
++ * @name: name to find
+ *
++ * Find an interface by name. This can be called from any
++ * context and does its own locking. The returned handle has
++ * the usage count incremented and the caller must use dev_put() to
++ * release it when it is no longer needed. %NULL is returned if no
++ * matching device is found.
+ */
-+static void
-+AC_FREE (void *p)
-+{
-+ if (p)
-+ kfree (p);
-+}
+
-+
-+/*
-+ * Simple QUEUE NODE
-+ */
-+typedef struct _qnode
++struct net_device *dev_get_by_name(struct net *net, const char *name)
+{
-+ int state;
-+ struct _qnode *next;
++ struct net_device *dev;
++
++ read_lock(&dev_base_lock);
++ dev = __dev_get_by_name(net, name);
++ if (dev)
++ dev_hold(dev);
++ read_unlock(&dev_base_lock);
++ return dev;
+}
-+ QNODE;
+
-+/*
-+ * Simple QUEUE Structure
++/**
++ * __dev_get_by_index - find a device by its ifindex
++ * @net: the applicable net namespace
++ * @ifindex: index of device
++ *
++ * Search for an interface by index. Returns %NULL if the device
++ * is not found or a pointer to the device. The device has not
++ * had its reference counter increased so the caller must be careful
++ * about locking. The caller must hold either the RTNL semaphore
++ * or @dev_base_lock.
+ */
-+typedef struct _queue
++
++struct net_device *__dev_get_by_index(struct net *net, int ifindex)
+{
-+ QNODE * head, *tail;
-+ int count;
++ struct hlist_node *p;
++
++ hlist_for_each(p, dev_index_hash(net, ifindex)) {
++ struct net_device *dev
++ = hlist_entry(p, struct net_device, index_hlist);
++ if (dev->ifindex == ifindex)
++ return dev;
++ }
++ return NULL;
+}
-+ QUEUE;
+
-+/*
-+ * Initialize the queue
++
++/**
++ * dev_get_by_index - find a device by its ifindex
++ * @net: the applicable net namespace
++ * @ifindex: index of device
++ *
++ * Search for an interface by index. Returns NULL if the device
++ * is not found or a pointer to the device. The device returned has
++ * had a reference added and the pointer is safe until the user calls
++ * dev_put to indicate they have finished with it.
+ */
-+static void
-+queue_init (QUEUE * s)
++
++struct net_device *dev_get_by_index(struct net *net, int ifindex)
+{
-+ s->head = s->tail = 0;
-+ s->count= 0;
++ struct net_device *dev;
++
++ read_lock(&dev_base_lock);
++ dev = __dev_get_by_index(net, ifindex);
++ if (dev)
++ dev_hold(dev);
++ read_unlock(&dev_base_lock);
++ return dev;
+}
+
-+/*
-+ * Find a State in the queue
++/**
++ * dev_getbyhwaddr - find a device by its hardware address
++ * @net: the applicable net namespace
++ * @type: media type of device
++ * @ha: hardware address
++ *
++ * Search for an interface by MAC address. Returns NULL if the device
++ * is not found or a pointer to the device. The caller must hold the
++ * rtnl semaphore. The returned device has not had its ref count increased
++ * and the caller must therefore be careful about locking
++ *
++ * BUGS:
++ * If the API was consistent this would be __dev_get_by_hwaddr
+ */
-+static int
-+queue_find (QUEUE * s, int state)
++
++struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
+{
-+ QNODE * q;
-+ q = s->head;
-+ while( q )
-+ {
-+ if( q->state == state ) return 1;
-+ q = q->next;
-+ }
-+ return 0;
++ struct net_device *dev;
++
++ ASSERT_RTNL();
++
++ for_each_netdev(net, dev)
++ if (dev->type == type &&
++ !memcmp(dev->dev_addr, ha, dev->addr_len))
++ return dev;
++
++ return NULL;
+}
+
-+/*
-+ * Add Tail Item to queue (FiFo/LiLo)
-+ */
-+static void
-+queue_add (QUEUE * s, int state)
++EXPORT_SYMBOL(dev_getbyhwaddr);
++
++struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
+{
-+ QNODE * q;
++ struct net_device *dev;
+
-+ if( queue_find( s, state ) ) return;
++ ASSERT_RTNL();
++ for_each_netdev(net, dev)
++ if (dev->type == type)
++ return dev;
+
-+ if (!s->head)
-+ {
-+ q = s->tail = s->head = (QNODE *) AC_MALLOC (sizeof (QNODE));
-+ MEMASSERT (q, "queue_add");
-+ q->state = state;
-+ q->next = 0;
-+ }
-+ else
-+ {
-+ q = (QNODE *) AC_MALLOC (sizeof (QNODE));
-+ q->state = state;
-+ q->next = 0;
-+ s->tail->next = q;
-+ s->tail = q;
-+ }
-+ s->count++;
++ return NULL;
+}
+
++EXPORT_SYMBOL(__dev_getfirstbyhwtype);
+
-+/*
-+ * Remove Head Item from queue
-+ */
-+static int
-+queue_remove (QUEUE * s)
++struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
+{
-+ int state = 0;
-+ QNODE * q;
-+ if (s->head)
-+ {
-+ q = s->head;
-+ state = q->state;
-+ s->head = s->head->next;
-+ s->count--;
-+
-+ if( !s->head )
-+ {
-+ s->tail = 0;
-+ s->count = 0;
-+ }
-+ AC_FREE (q);
-+ }
-+ return state;
++ struct net_device *dev;
++
++ rtnl_lock();
++ dev = __dev_getfirstbyhwtype(net, type);
++ if (dev)
++ dev_hold(dev);
++ rtnl_unlock();
++ return dev;
+}
+
++EXPORT_SYMBOL(dev_getfirstbyhwtype);
+
-+/*
-+ * Return items in the queue
++/**
++ * dev_get_by_flags - find any device with given flags
++ * @net: the applicable net namespace
++ * @if_flags: IFF_* values
++ * @mask: bitmask of bits in if_flags to check
++ *
++ * Search for any interface with the given flags. Returns NULL if a device
++ * is not found or a pointer to the device. The device returned has
++ * had a reference added and the pointer is safe until the user calls
++ * dev_put to indicate they have finished with it.
+ */
-+static int
-+queue_count (QUEUE * s)
++
++struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
+{
-+ return s->count;
++ struct net_device *dev, *ret;
++
++ ret = NULL;
++ read_lock(&dev_base_lock);
++ for_each_netdev(net, dev) {
++ if (((dev->flags ^ if_flags) & mask) == 0) {
++ dev_hold(dev);
++ ret = dev;
++ break;
++ }
++ }
++ read_unlock(&dev_base_lock);
++ return ret;
+}
+
-+
-+/*
-+ * Free the queue
++/**
++ * dev_valid_name - check if name is okay for network device
++ * @name: name string
++ *
++ * Network device names need to be valid file names to
++ * to allow sysfs to work. We also disallow any kind of
++ * whitespace.
+ */
-+static void
-+queue_free (QUEUE * s)
++int dev_valid_name(const char *name)
+{
-+ while (queue_count (s))
-+ {
-+ queue_remove (s);
-+ }
++ if (*name == '\0')
++ return 0;
++ if (strlen(name) >= IFNAMSIZ)
++ return 0;
++ if (!strcmp(name, ".") || !strcmp(name, ".."))
++ return 0;
++
++ while (*name) {
++ if (*name == '/' || isspace(*name))
++ return 0;
++ name++;
++ }
++ return 1;
+}
+
-+/*
-+ * Get Next State-NFA
++/**
++ * __dev_alloc_name - allocate a name for a device
++ * @net: network namespace to allocate the device name in
++ * @name: name format string
++ * @buf: scratch buffer and result name string
++ *
++ * Passed a format string - eg "lt%d" it will try and find a suitable
++ * id. It scans list of devices to build up a free map, then chooses
++ * the first empty slot. The caller must hold the dev_base or rtnl lock
++ * while allocating the name and adding the device in order to avoid
++ * duplicates.
++ * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
++ * Returns the number of the unit assigned or a negative errno code.
+ */
-+static
-+int List_GetNextState( ACSM_STRUCT2 * acsm, int state, int input )
++
++static int __dev_alloc_name(struct net *net, const char *name, char *buf)
+{
-+ trans_node_t * t = acsm->acsmTransTable[state];
++ int i = 0;
++ const char *p;
++ const int max_netdevices = 8*PAGE_SIZE;
++ unsigned long *inuse;
++ struct net_device *d;
++
++ p = strnchr(name, IFNAMSIZ-1, '%');
++ if (p) {
++ /*
++ * Verify the string as this thing may have come from
++ * the user. There must be either one "%d" and no other "%"
++ * characters.
++ */
++ if (p[1] != 'd' || strchr(p + 2, '%'))
++ return -EINVAL;
++
++ /* Use one page as a bit array of possible slots */
++ inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
++ if (!inuse)
++ return -ENOMEM;
++
++ for_each_netdev(net, d) {
++ if (!sscanf(d->name, name, &i))
++ continue;
++ if (i < 0 || i >= max_netdevices)
++ continue;
++
++ /* avoid cases where sscanf is not exact inverse of printf */
++ snprintf(buf, IFNAMSIZ, name, i);
++ if (!strncmp(buf, d->name, IFNAMSIZ))
++ set_bit(i, inuse);
++ }
+
-+ while( t )
-+ {
-+ if( t->key == input )
-+ {
-+ return t->next_state;
++ i = find_first_zero_bit(inuse, max_netdevices);
++ free_page((unsigned long) inuse);
+ }
-+ t=t->next;
-+ }
+
-+ if( state == 0 ) return 0;
++ snprintf(buf, IFNAMSIZ, name, i);
++ if (!__dev_get_by_name(net, buf))
++ return i;
+
-+ return ACSM_FAIL_STATE2; /* Fail state ??? */
++ /* It is possible to run out of possible slots
++ * when the name is long and there isn't enough space left
++ * for the digits, or if all bits are used.
++ */
++ return -ENFILE;
+}
+
-+/*
-+ * Get Next State-DFA
++/**
++ * dev_alloc_name - allocate a name for a device
++ * @dev: device
++ * @name: name format string
++ *
++ * Passed a format string - eg "lt%d" it will try and find a suitable
++ * id. It scans list of devices to build up a free map, then chooses
++ * the first empty slot. The caller must hold the dev_base or rtnl lock
++ * while allocating the name and adding the device in order to avoid
++ * duplicates.
++ * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
++ * Returns the number of the unit assigned or a negative errno code.
+ */
-+static
-+int List_GetNextState2( ACSM_STRUCT2 * acsm, int state, int input )
++
++int dev_alloc_name(struct net_device *dev, const char *name)
+{
-+ trans_node_t * t = acsm->acsmTransTable[state];
++ char buf[IFNAMSIZ];
++ struct net *net;
++ int ret;
++
++ BUG_ON(!dev_net(dev));
++ net = dev_net(dev);
++ ret = __dev_alloc_name(net, name, buf);
++ if (ret >= 0)
++ strlcpy(dev->name, buf, IFNAMSIZ);
++ return ret;
++}
+
-+ while( t )
-+ {
-+ if( t->key == input )
-+ {
-+ return t->next_state;
-+ }
-+ t = t->next;
-+ }
+
-+ return 0; /* default state */
-+}
-+/*
-+ * Put Next State - Head insertion, and transition updates
++/**
++ * dev_change_name - change name of a device
++ * @dev: device
++ * @newname: name (or format string) must be at least IFNAMSIZ
++ *
++ * Change name of a device, can pass format strings "eth%d".
++ * for wildcarding.
+ */
-+static
-+int List_PutNextState( ACSM_STRUCT2 * acsm, int state, int input, int next_state )
++int dev_change_name(struct net_device *dev, const char *newname)
+{
-+ trans_node_t * p;
-+ trans_node_t * tnew;
++ char oldname[IFNAMSIZ];
++ int err = 0;
++ int ret;
++ struct net *net;
+
-+ // printk(" List_PutNextState: state=%d, input='%c', next_state=%d\n",state,input,next_state);
++ ASSERT_RTNL();
++ BUG_ON(!dev_net(dev));
+
++ net = dev_net(dev);
++ if (dev->flags & IFF_UP)
++ return -EBUSY;
+
-+ /* Check if the transition already exists, if so just update the next_state */
-+ p = acsm->acsmTransTable[state];
-+ while( p )
-+ {
-+ if( p->key == input ) /* transition already exists- reset the next state */
-+ {
-+ p->next_state = next_state;
-+ return 0;
-+ }
-+ p=p->next;
-+ }
++ if (!dev_valid_name(newname))
++ return -EINVAL;
+
-+ /* Definitely not an existing transition - add it */
-+ tnew = (trans_node_t*)AC_MALLOC(sizeof(trans_node_t));
-+ if( !tnew ) return -1;
++ if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
++ return 0;
+
-+ tnew->key = input;
-+ tnew->next_state = next_state;
-+ tnew->next = 0;
++ memcpy(oldname, dev->name, IFNAMSIZ);
+
-+ tnew->next = acsm->acsmTransTable[state];
-+ acsm->acsmTransTable[state] = tnew;
++ if (strchr(newname, '%')) {
++ err = dev_alloc_name(dev, newname);
++ if (err < 0)
++ return err;
++ }
++ else if (__dev_get_by_name(net, newname))
++ return -EEXIST;
++ else
++ strlcpy(dev->name, newname, IFNAMSIZ);
++
++rollback:
++ /* For now only devices in the initial network namespace
++ * are in sysfs.
++ */
++ if (net == &init_net) {
++ ret = device_rename(&dev->dev, dev->name);
++ if (ret) {
++ memcpy(dev->name, oldname, IFNAMSIZ);
++ return ret;
++ }
++ }
+
-+ acsm->acsmNumTrans++;
++ write_lock_bh(&dev_base_lock);
++ hlist_del(&dev->name_hlist);
++ hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
++ write_unlock_bh(&dev_base_lock);
++
++ ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
++ ret = notifier_to_errno(ret);
++
++ if (ret) {
++ if (err) {
++ printk(KERN_ERR
++ "%s: name change rollback failed: %d.\n",
++ dev->name, ret);
++ } else {
++ err = ret;
++ memcpy(dev->name, oldname, IFNAMSIZ);
++ goto rollback;
++ }
++ }
+
-+ return 0;
++ return err;
+}
-+/*
-+ * Free the entire transition table
++
++/**
++ * dev_set_alias - change ifalias of a device
++ * @dev: device
++ * @alias: name up to IFALIASZ
++ * @len: limit of bytes to copy from info
++ *
++ * Set ifalias for a device,
+ */
-+static
-+int List_FreeTransTable( ACSM_STRUCT2 * acsm )
++int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
+{
-+ int i;
-+ trans_node_t * t, *p;
-+
-+ if( !acsm->acsmTransTable ) return 0;
++ ASSERT_RTNL();
+
-+ for(i=0;i< acsm->acsmMaxStates;i++)
-+ {
-+ t = acsm->acsmTransTable[i];
++ if (len >= IFALIASZ)
++ return -EINVAL;
+
-+ while( t )
-+ {
-+ p = t->next;
-+ kfree(t);
-+ t = p;
-+ max_memory -= sizeof(trans_node_t);
++ if (!len) {
++ if (dev->ifalias) {
++ kfree(dev->ifalias);
++ dev->ifalias = NULL;
++ }
++ return 0;
+ }
-+ }
-+
-+ kfree(acsm->acsmTransTable);
+
-+ max_memory -= sizeof(void*) * acsm->acsmMaxStates;
++ dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
++ if (!dev->ifalias)
++ return -ENOMEM;
+
-+ acsm->acsmTransTable = 0;
-+
-+ return 0;
++ strlcpy(dev->ifalias, alias, len+1);
++ return len;
+}
+
-+/*
-+ *
-+ */
-+/*
-+ static
-+ int List_FreeList( trans_node_t * t )
-+ {
-+ int tcnt=0;
-+
-+ trans_node_t *p;
+
-+ while( t )
-+ {
-+ p = t->next;
-+ kfree(t);
-+ t = p;
-+ max_memory -= sizeof(trans_node_t);
-+ tcnt++;
-+ }
-+
-+ return tcnt;
-+ }
-+*/
-+
-+/*
-+ * Converts row of states from list to a full vector format
++/**
++ * netdev_features_change - device changes features
++ * @dev: device to cause notification
++ *
++ * Called to indicate a device has changed features.
+ */
-+static
-+int List_ConvToFull(ACSM_STRUCT2 * acsm, acstate_t state, acstate_t * full )
++void netdev_features_change(struct net_device *dev)
+{
-+ int tcnt = 0;
-+ trans_node_t * t = acsm->acsmTransTable[ state ];
-+
-+ memset(full,0,sizeof(acstate_t)*acsm->acsmAlphabetSize);
-+
-+ if( !t ) return 0;
-+
-+ while(t)
-+ {
-+ full[ t->key ] = t->next_state;
-+ tcnt++;
-+ t = t->next;
-+ }
-+ return tcnt;
++ call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
+}
++EXPORT_SYMBOL(netdev_features_change);
+
-+/*
-+ * Copy a Match List Entry - don't dup the pattern data
++/**
++ * netdev_state_change - device changes state
++ * @dev: device to cause notification
++ *
++ * Called to indicate a device has changed state. This function calls
++ * the notifier chains for netdev_chain and sends a NEWLINK message
++ * to the routing socket.
+ */
-+static ACSM_PATTERN2*
-+CopyMatchListEntry (ACSM_PATTERN2 * px)
++void netdev_state_change(struct net_device *dev)
+{
-+ ACSM_PATTERN2 * p;
-+
-+ p = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
-+ MEMASSERT (p, "CopyMatchListEntry");
-+
-+ memcpy (p, px, sizeof (ACSM_PATTERN2));
-+
-+ p->next = 0;
++ if (dev->flags & IFF_UP) {
++ call_netdevice_notifiers(NETDEV_CHANGE, dev);
++ rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
++ }
++}
+
-+ return p;
++void netdev_bonding_change(struct net_device *dev)
++{
++ call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
+}
++EXPORT_SYMBOL(netdev_bonding_change);
+
-+/*
-+ * Check if a pattern is in the list already,
-+ * validate it using the 'id' field. This must be unique
-+ * for every pattern.
++/**
++ * dev_load - load a network module
++ * @net: the applicable net namespace
++ * @name: name of interface
++ *
++ * If a network interface is not present and the process has suitable
++ * privileges this function loads the module. If module loading is not
++ * available in this kernel then it becomes a nop.
+ */
-+/*
-+ static
-+ int FindMatchListEntry (ACSM_STRUCT2 * acsm, int state, ACSM_PATTERN2 * px)
-+ {
-+ ACSM_PATTERN2 * p;
+
-+ p = acsm->acsmMatchList[state];
-+ while( p )
-+ {
-+ if( p->id == px->id ) return 1;
-+ p = p->next;
-+ }
++void dev_load(struct net *net, const char *name)
++{
++ struct net_device *dev;
+
-+ return 0;
-+ }
-+*/
++ read_lock(&dev_base_lock);
++ dev = __dev_get_by_name(net, name);
++ read_unlock(&dev_base_lock);
+
++ if (!dev && capable(CAP_SYS_MODULE))
++ request_module("%s", name);
++}
+
-+/*
-+ * Add a pattern to the list of patterns terminated at this state.
-+ * Insert at front of list.
++/**
++ * dev_open - prepare an interface for use.
++ * @dev: device to open
++ *
++ * Takes a device from down to up state. The device's private open
++ * function is invoked and then the multicast lists are loaded. Finally
++ * the device is moved into the up state and a %NETDEV_UP message is
++ * sent to the netdev notifier chain.
++ *
++ * Calling this function on an active interface is a nop. On a failure
++ * a negative errno code is returned.
+ */
-+static void
-+AddMatchListEntry (ACSM_STRUCT2 * acsm, int state, ACSM_PATTERN2 * px)
++int dev_open(struct net_device *dev)
+{
-+ ACSM_PATTERN2 * p;
++ const struct net_device_ops *ops = dev->netdev_ops;
++ int ret = 0;
+
-+ p = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
++ ASSERT_RTNL();
+
-+ MEMASSERT (p, "AddMatchListEntry");
-+
-+ memcpy (p, px, sizeof (ACSM_PATTERN2));
++ /*
++ * Is it already up?
++ */
+
-+ p->next = acsm->acsmMatchList[state];
++ if (dev->flags & IFF_UP)
++ return 0;
+
-+ acsm->acsmMatchList[state] = p;
-+}
++ /*
++ * Is it even present?
++ */
++ if (!netif_device_present(dev))
++ return -ENODEV;
+
++ /*
++ * Call device private open method
++ */
++ set_bit(__LINK_STATE_START, &dev->state);
+
-+static void
-+AddPatternStates (ACSM_STRUCT2 * acsm, ACSM_PATTERN2 * p)
-+{
-+ int state, next, n;
-+ unsigned char *pattern;
++ if (ops->ndo_validate_addr)
++ ret = ops->ndo_validate_addr(dev);
+
-+ n = p->n;
-+ pattern = p->patrn;
-+ state = 0;
++ if (!ret && ops->ndo_open)
++ ret = ops->ndo_open(dev);
+
-+ /*
-+ * Match up pattern with existing states
-+ */
-+ for (; n > 0; pattern++, n--)
-+ {
-+ next = List_GetNextState(acsm,state,*pattern);
-+ if (next == ACSM_FAIL_STATE2 || next == 0)
-+ {
-+ break;
++ /*
++ * If it went open OK then:
++ */
++
++ if (ret)
++ clear_bit(__LINK_STATE_START, &dev->state);
++ else {
++ /*
++ * Set the flags.
++ */
++ dev->flags |= IFF_UP;
++
++ /*
++ * Enable NET_DMA
++ */
++ net_dmaengine_get();
++
++ /*
++ * Initialize multicasting status
++ */
++ dev_set_rx_mode(dev);
++
++ /*
++ * Wakeup transmit queue engine
++ */
++ dev_activate(dev);
++
++ /*
++ * ... and announce new interface.
++ */
++ call_netdevice_notifiers(NETDEV_UP, dev);
+ }
-+ state = next;
-+ }
-+
-+ /*
-+ * Add new states for the rest of the pattern bytes, 1 state per byte
-+ */
-+ for (; n > 0; pattern++, n--)
-+ {
-+ acsm->acsmNumStates++;
-+ List_PutNextState(acsm,state,*pattern,acsm->acsmNumStates);
-+ state = acsm->acsmNumStates;
-+ }
+
-+ AddMatchListEntry (acsm, state, p );
++ return ret;
+}
+
-+/*
-+ * Build A Non-Deterministic Finite Automata
-+ * The keyword state table must already be built, via AddPatternStates().
++/**
++ * dev_close - shutdown an interface.
++ * @dev: device to shutdown
++ *
++ * This function moves an active device into down state. A
++ * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
++ * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
++ * chain.
+ */
-+static void
-+Build_NFA (ACSM_STRUCT2 * acsm)
++int dev_close(struct net_device *dev)
+{
-+ int r, s, i;
-+ QUEUE q, *queue = &q;
-+ acstate_t * FailState = acsm->acsmFailState;
-+ ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
-+ ACSM_PATTERN2 * mlist,* px;
++ const struct net_device_ops *ops = dev->netdev_ops;
++ ASSERT_RTNL();
+
-+ /* Init a Queue */
-+ queue_init (queue);
++ might_sleep();
+
++ if (!(dev->flags & IFF_UP))
++ return 0;
+
-+ /* Add the state 0 transitions 1st, the states at depth 1, fail to state 0 */
-+ for (i = 0; i < acsm->acsmAlphabetSize; i++)
-+ {
-+ s = List_GetNextState2(acsm,0,i);
-+ if( s )
-+ {
-+ queue_add (queue, s);
-+ FailState[s] = 0;
-+ }
-+ }
++ /*
++ * Tell people we are going down, so that they can
++ * prepare to death, when device is still operating.
++ */
++ call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
+
-+ /* Build the fail state successive layer of transitions */
-+ while (queue_count (queue) > 0)
-+ {
-+ r = queue_remove (queue);
++ clear_bit(__LINK_STATE_START, &dev->state);
+
-+ /* Find Final States for any Failure */
-+ for (i = 0; i < acsm->acsmAlphabetSize; i++)
-+ {
-+ int fs, next;
++ /* Synchronize to scheduled poll. We cannot touch poll list,
++ * it can be even on different cpu. So just clear netif_running().
++ *
++ * dev->stop() will invoke napi_disable() on all of it's
++ * napi_struct instances on this device.
++ */
++ smp_mb__after_clear_bit(); /* Commit netif_running(). */
+
-+ s = List_GetNextState(acsm,r,i);
++ dev_deactivate(dev);
+
-+ if( s != ACSM_FAIL_STATE2 )
-+ {
-+ queue_add (queue, s);
++ /*
++ * Call the device specific close. This cannot fail.
++ * Only if device is UP
++ *
++ * We allow it to be called even after a DETACH hot-plug
++ * event.
++ */
++ if (ops->ndo_stop)
++ ops->ndo_stop(dev);
+
-+ fs = FailState[r];
++ /*
++ * Device is now down.
++ */
+
-+ /*
-+ * Locate the next valid state for 'i' starting at fs
-+ */
-+ while( (next=List_GetNextState(acsm,fs,i)) == ACSM_FAIL_STATE2 )
-+ {
-+ fs = FailState[fs];
-+ }
++ dev->flags &= ~IFF_UP;
+
-+ /*
-+ * Update 's' state failure state to point to the next valid state
-+ */
-+ FailState[s] = next;
-+
-+ /*
-+ * Copy 'next'states MatchList to 's' states MatchList,
-+ * we copy them so each list can be AC_FREE'd later,
-+ * else we could just manipulate pointers to fake the copy.
-+ */
-+ for( mlist = MatchList[next];
-+ mlist;
-+ mlist = mlist->next)
-+ {
-+ px = CopyMatchListEntry (mlist);
++ /*
++ * Tell people we are down
++ */
++ call_netdevice_notifiers(NETDEV_DOWN, dev);
+
-+ /* Insert at front of MatchList */
-+ px->next = MatchList[s];
-+ MatchList[s] = px;
-+ }
-+ }
-+ }
-+ }
++ /*
++ * Shutdown NET_DMA
++ */
++ net_dmaengine_put();
+
-+ /* Clean up the queue */
-+ queue_free (queue);
++ return 0;
+}
+
-+/*
-+ * Build Deterministic Finite Automata from the NFA
++
++/**
++ * dev_disable_lro - disable Large Receive Offload on a device
++ * @dev: device
++ *
++ * Disable Large Receive Offload (LRO) on a net device. Must be
++ * called under RTNL. This is needed if received packets may be
++ * forwarded to another interface.
+ */
-+static void
-+Convert_NFA_To_DFA (ACSM_STRUCT2 * acsm)
++void dev_disable_lro(struct net_device *dev)
+{
-+ int i, r, s, cFailState;
-+ QUEUE q, *queue = &q;
-+ acstate_t * FailState = acsm->acsmFailState;
++ if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
++ dev->ethtool_ops->set_flags) {
++ u32 flags = dev->ethtool_ops->get_flags(dev);
++ if (flags & ETH_FLAG_LRO) {
++ flags &= ~ETH_FLAG_LRO;
++ dev->ethtool_ops->set_flags(dev, flags);
++ }
++ }
++ WARN_ON(dev->features & NETIF_F_LRO);
++}
++EXPORT_SYMBOL(dev_disable_lro);
+
-+ /* Init a Queue */
-+ queue_init (queue);
+
-+ /* Add the state 0 transitions 1st */
-+ for(i=0; i<acsm->acsmAlphabetSize; i++)
-+ {
-+ s = List_GetNextState(acsm,0,i);
-+ if ( s != 0 )
-+ {
-+ queue_add (queue, s);
-+ }
-+ }
++static int dev_boot_phase = 1;
+
-+ /* Start building the next layer of transitions */
-+ while( queue_count(queue) > 0 )
-+ {
-+ r = queue_remove(queue);
++/*
++ * Device change register/unregister. These are not inline or static
++ * as we export them to the world.
++ */
+
-+ /* Process this states layer */
-+ for (i = 0; i < acsm->acsmAlphabetSize; i++)
-+ {
-+ s = List_GetNextState(acsm,r,i);
++/**
++ * register_netdevice_notifier - register a network notifier block
++ * @nb: notifier
++ *
++ * Register a notifier to be called when network device events occur.
++ * The notifier passed is linked into the kernel structures and must
++ * not be reused until it has been unregistered. A negative errno code
++ * is returned on a failure.
++ *
++ * When registered all registration and up events are replayed
++ * to the new notifier to allow device to have a race free
++ * view of the network device list.
++ */
+
-+ if( s != ACSM_FAIL_STATE2 && s!= 0)
-+ {
-+ queue_add (queue, s);
-+ }
-+ else
-+ {
-+ cFailState = List_GetNextState(acsm,FailState[r],i);
++int register_netdevice_notifier(struct notifier_block *nb)
++{
++ struct net_device *dev;
++ struct net_device *last;
++ struct net *net;
++ int err;
++
++ rtnl_lock();
++ err = raw_notifier_chain_register(&netdev_chain, nb);
++ if (err)
++ goto unlock;
++ if (dev_boot_phase)
++ goto unlock;
++ for_each_net(net) {
++ for_each_netdev(net, dev) {
++ err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
++ err = notifier_to_errno(err);
++ if (err)
++ goto rollback;
++
++ if (!(dev->flags & IFF_UP))
++ continue;
++
++ nb->notifier_call(nb, NETDEV_UP, dev);
++ }
++ }
+
-+ if( cFailState != 0 && cFailState != ACSM_FAIL_STATE2 )
-+ {
-+ List_PutNextState(acsm,r,i,cFailState);
++unlock:
++ rtnl_unlock();
++ return err;
++
++rollback:
++ last = dev;
++ for_each_net(net) {
++ for_each_netdev(net, dev) {
++ if (dev == last)
++ break;
++
++ if (dev->flags & IFF_UP) {
++ nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
++ nb->notifier_call(nb, NETDEV_DOWN, dev);
++ }
++ nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
+ }
-+ }
+ }
-+ }
+
-+ /* Clean up the queue */
-+ queue_free (queue);
++ raw_notifier_chain_unregister(&netdev_chain, nb);
++ goto unlock;
+}
+
-+/*
-+ *
-+ * Convert a row lists for the state table to a full vector format
++/**
++ * unregister_netdevice_notifier - unregister a network notifier block
++ * @nb: notifier
+ *
++ * Unregister a notifier previously registered by
++ * register_netdevice_notifier(). The notifier is unlinked into the
++ * kernel structures and may then be reused. A negative errno code
++ * is returned on a failure.
+ */
-+static int
-+Conv_List_To_Full(ACSM_STRUCT2 * acsm)
++
++int unregister_netdevice_notifier(struct notifier_block *nb)
+{
-+ int tcnt, k;
-+ acstate_t * p;
-+ acstate_t ** NextState = acsm->acsmNextState;
++ int err;
+
-+ for(k=0;k<acsm->acsmMaxStates;k++)
-+ {
-+ p = AC_MALLOC( sizeof(acstate_t) * (acsm->acsmAlphabetSize+2) );
-+ if(!p) return -1;
++ rtnl_lock();
++ err = raw_notifier_chain_unregister(&netdev_chain, nb);
++ rtnl_unlock();
++ return err;
++}
+
-+ tcnt = List_ConvToFull( acsm, (acstate_t)k, p+2 );
++/**
++ * call_netdevice_notifiers - call all network notifier blocks
++ * @val: value passed unmodified to notifier function
++ * @dev: net_device pointer passed unmodified to notifier function
++ *
++ * Call all network notifier blocks. Parameters and return value
++ * are as for raw_notifier_call_chain().
++ */
+
-+ p[0] = ACF_FULL;
-+ p[1] = 0; /* no matches yet */
++int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
++{
++ return raw_notifier_call_chain(&netdev_chain, val, dev);
++}
+
-+ NextState[k] = p; /* now we have a full format row vector */
-+ }
++/* When > 0 there are consumers of rx skb time stamps */
++static atomic_t netstamp_needed = ATOMIC_INIT(0);
+
-+ return 0;
++void net_enable_timestamp(void)
++{
++ atomic_inc(&netstamp_needed);
+}
+
-+/*
-+ * Convert DFA memory usage from list based storage to a sparse-row storage.
-+ *
-+ * The Sparse format allows each row to be either full or sparse formatted. If the sparse row has
-+ * too many transitions, performance or space may dictate that we use the standard full formatting
-+ * for the row. More than 5 or 10 transitions per state ought to really whack performance. So the
-+ * user can specify the max state transitions per state allowed in the sparse format.
-+ *
-+ * Standard Full Matrix Format
-+ * ---------------------------
-+ * acstate_t ** NextState ( 1st index is row/state, 2nd index is column=event/input)
-+ *
-+ * example:
-+ *
-+ * events -> a b c d e f g h i j k l m n o p
-+ * states
-+ * N 1 7 0 0 0 3 0 0 0 0 0 0 0 0 0 0
-+ *
-+ * Sparse Format, each row : Words Value
-+ * 1-1 fmt(0-full,1-sparse,2-banded,3-sparsebands)
-+ * 2-2 bool match flag (indicates this state has pattern matches)
-+ * 3-3 sparse state count ( # of input/next-state pairs )
-+ * 4-3+2*cnt 'input,next-state' pairs... each sizof(acstate_t)
-+ *
-+ * above example case yields:
-+ * Full Format: 0, 1 7 0 0 0 3 0 0 0 0 0 0 0 0 0 0 ...
-+ * Sparse format: 1, 3, 'a',1,'b',7,'f',3 - uses 2+2*ntransitions (non-default transitions)
-+ */
-+static int
-+Conv_Full_DFA_To_Sparse(ACSM_STRUCT2 * acsm)
++void net_disable_timestamp(void)
+{
-+ int cnt, m, k, i;
-+ acstate_t * p, state, maxstates=0;
-+ acstate_t ** NextState = acsm->acsmNextState;
-+ acstate_t full[MAX_ALPHABET_SIZE];
-+
-+ for(k=0;k<acsm->acsmMaxStates;k++)
-+ {
-+ cnt=0;
-+
-+ List_ConvToFull(acsm, (acstate_t)k, full );
-+
-+ for (i = 0; i < acsm->acsmAlphabetSize; i++)
-+ {
-+ state = full[i];
-+ if( state != 0 && state != ACSM_FAIL_STATE2 ) cnt++;
-+ }
++ atomic_dec(&netstamp_needed);
++}
+
-+ if( cnt > 0 ) maxstates++;
++static inline void net_timestamp(struct sk_buff *skb)
++{
++ if (atomic_read(&netstamp_needed))
++ __net_timestamp(skb);
++ else
++ skb->tstamp.tv64 = 0;
++}
+
-+ if( k== 0 || cnt > acsm->acsmSparseMaxRowNodes )
-+ {
-+ p = AC_MALLOC(sizeof(acstate_t)*(acsm->acsmAlphabetSize+2) );
-+ if(!p) return -1;
++/*
++ * Support routine. Sends outgoing frames to any network
++ * taps currently in use.
++ */
+
-+ p[0] = ACF_FULL;
-+ p[1] = 0;
-+ memcpy(&p[2],full,acsm->acsmAlphabetSize*sizeof(acstate_t));
-+ }
-+ else
-+ {
-+ p = AC_MALLOC(sizeof(acstate_t)*(3+2*cnt));
-+ if(!p) return -1;
++static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
++{
++ struct packet_type *ptype;
+
-+ m = 0;
-+ p[m++] = ACF_SPARSE;
-+ p[m++] = 0; /* no matches */
-+ p[m++] = cnt;
++#ifdef CONFIG_NET_CLS_ACT
++ if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
++ net_timestamp(skb);
++#else
++ net_timestamp(skb);
++#endif
+
-+ for(i = 0; i < acsm->acsmAlphabetSize ; i++)
-+ {
-+ state = full[i];
-+ if( state != 0 && state != ACSM_FAIL_STATE2 )
-+ {
-+ p[m++] = i;
-+ p[m++] = state;
++ rcu_read_lock();
++ list_for_each_entry_rcu(ptype, &ptype_all, list) {
++ /* Never send packets back to the socket
++ * they originated from - MvS (miquels@drinkel.ow.org)
++ */
++ if ((ptype->dev == dev || !ptype->dev) &&
++ (ptype->af_packet_priv == NULL ||
++ (struct sock *)ptype->af_packet_priv != skb->sk)) {
++ struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
++ if (!skb2)
++ break;
++
++ /* skb->nh should be correctly
++ set by sender, so that the second statement is
++ just protection against buggy protocols.
++ */
++ skb_reset_mac_header(skb2);
++
++ if (skb_network_header(skb2) < skb2->data ||
++ skb2->network_header > skb2->tail) {
++ if (net_ratelimit())
++ printk(KERN_CRIT "protocol %04x is "
++ "buggy, dev %s\n",
++ skb2->protocol, dev->name);
++ skb_reset_network_header(skb2);
++ }
++
++ skb2->transport_header = skb2->network_header;
++ skb2->pkt_type = PACKET_OUTGOING;
++ ptype->func(skb2, skb->dev, ptype, skb->dev);
+ }
-+ }
+ }
-+
-+ NextState[k] = p; /* now we are a sparse formatted state transition array */
-+ }
-+
-+ return 0;
++ rcu_read_unlock();
+}
-+/*
-+ Convert Full matrix to Banded row format.
+
-+ Word values
-+ 1 2 -> banded
-+ 2 n number of values
-+ 3 i index of 1st value (0-256)
-+ 4 - 3+n next-state values at each index
+
-+*/
-+static int
-+Conv_Full_DFA_To_Banded(ACSM_STRUCT2 * acsm)
++static inline void __netif_reschedule(struct Qdisc *q)
+{
-+ int first = -1, last;
-+ acstate_t * p, state, full[MAX_ALPHABET_SIZE];
-+ acstate_t ** NextState = acsm->acsmNextState;
-+ int cnt,m,k,i;
++ struct softnet_data *sd;
++ unsigned long flags;
++
++ local_irq_save(flags);
++ sd = &__get_cpu_var(softnet_data);
++ q->next_sched = sd->output_queue;
++ sd->output_queue = q;
++ raise_softirq_irqoff(NET_TX_SOFTIRQ);
++ local_irq_restore(flags);
++}
+
-+ for(k=0;k<acsm->acsmMaxStates;k++)
-+ {
-+ cnt=0;
++void __netif_schedule(struct Qdisc *q)
++{
++ if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
++ __netif_reschedule(q);
++}
++EXPORT_SYMBOL(__netif_schedule);
+
-+ List_ConvToFull(acsm, (acstate_t)k, full );
++void dev_kfree_skb_irq(struct sk_buff *skb)
++{
++ if (atomic_dec_and_test(&skb->users)) {
++ struct softnet_data *sd;
++ unsigned long flags;
++
++ local_irq_save(flags);
++ sd = &__get_cpu_var(softnet_data);
++ skb->next = sd->completion_queue;
++ sd->completion_queue = skb;
++ raise_softirq_irqoff(NET_TX_SOFTIRQ);
++ local_irq_restore(flags);
++ }
++}
++EXPORT_SYMBOL(dev_kfree_skb_irq);
+
-+ first=-1;
-+ last =-2;
++void dev_kfree_skb_any(struct sk_buff *skb)
++{
++ if (in_irq() || irqs_disabled())
++ dev_kfree_skb_irq(skb);
++ else
++ dev_kfree_skb(skb);
++}
++EXPORT_SYMBOL(dev_kfree_skb_any);
+
-+ for (i = 0; i < acsm->acsmAlphabetSize; i++)
-+ {
-+ state = full[i];
+
-+ if( state !=0 && state != ACSM_FAIL_STATE2 )
-+ {
-+ if( first < 0 ) first = i;
-+ last = i;
-+ }
++/**
++ * netif_device_detach - mark device as removed
++ * @dev: network device
++ *
++ * Mark device as removed from system and therefore no longer available.
++ */
++void netif_device_detach(struct net_device *dev)
++{
++ if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
++ netif_running(dev)) {
++ netif_tx_stop_all_queues(dev);
+ }
++}
++EXPORT_SYMBOL(netif_device_detach);
+
-+ /* calc band width */
-+ cnt= last - first + 1;
-+
-+ p = AC_MALLOC(sizeof(acstate_t)*(4+cnt));
-+
-+ if(!p) return -1;
++/**
++ * netif_device_attach - mark device as attached
++ * @dev: network device
++ *
++ * Mark device as attached from system and restart if needed.
++ */
++void netif_device_attach(struct net_device *dev)
++{
++ if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
++ netif_running(dev)) {
++ netif_tx_wake_all_queues(dev);
++ __netdev_watchdog_up(dev);
++ }
++}
++EXPORT_SYMBOL(netif_device_attach);
+
-+ m = 0;
-+ p[m++] = ACF_BANDED;
-+ p[m++] = 0; /* no matches */
-+ p[m++] = cnt;
-+ p[m++] = first;
++static bool can_checksum_protocol(unsigned long features, __be16 protocol)
++{
++ return ((features & NETIF_F_GEN_CSUM) ||
++ ((features & NETIF_F_IP_CSUM) &&
++ protocol == htons(ETH_P_IP)) ||
++ ((features & NETIF_F_IPV6_CSUM) &&
++ protocol == htons(ETH_P_IPV6)) ||
++ ((features & NETIF_F_FCOE_CRC) &&
++ protocol == htons(ETH_P_FCOE)));
++}
+
-+ for(i = first; i <= last; i++)
-+ {
-+ p[m++] = full[i];
++static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
++{
++ if (can_checksum_protocol(dev->features, skb->protocol))
++ return true;
++
++ if (skb->protocol == htons(ETH_P_8021Q)) {
++ struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
++ if (can_checksum_protocol(dev->features & dev->vlan_features,
++ veh->h_vlan_encapsulated_proto))
++ return true;
+ }
+
-+ NextState[k] = p; /* now we are a banded formatted state transition array */
-+ }
-+
-+ return 0;
++ return false;
+}
+
+/*
-+ * Convert full matrix to Sparse Band row format.
-+ *
-+ * next - Full formatted row of next states
-+ * asize - size of alphabet
-+ * zcnt - max number of zeros in a run of zeros in any given band.
-+ *
-+ * Word Values
-+ * 1 ACF_SPARSEBANDS
-+ * 2 number of bands
-+ * repeat 3 - 5+ ....once for each band in this row.
-+ * 3 number of items in this band* 4 start index of this band
-+ * 5- next-state values in this band...
++ * Invalidate hardware checksum when packet is to be mangled, and
++ * complete checksum manually on outgoing path.
+ */
-+static
-+int calcSparseBands( acstate_t * next, int * begin, int * end, int asize, int zmax )
++int skb_checksum_help(struct sk_buff *skb)
+{
-+ int i, nbands,zcnt,last=0;
-+ acstate_t state;
++ __wsum csum;
++ int ret = 0, offset;
+
-+ nbands=0;
-+ for( i=0; i<asize; i++ )
-+ {
-+ state = next[i];
++ if (skb->ip_summed == CHECKSUM_COMPLETE)
++ goto out_set_summed;
+
-+ if( state !=0 && state != ACSM_FAIL_STATE2 )
-+ {
-+ begin[nbands] = i;
-+ zcnt=0;
++ if (unlikely(skb_shinfo(skb)->gso_size)) {
++ /* Let GSO fix up the checksum. */
++ goto out_set_summed;
++ }
+
-+ for( ; i< asize; i++ )
-+ {
-+ state = next[i];
-+ if( state ==0 || state == ACSM_FAIL_STATE2 )
-+ {
-+ zcnt++;
-+ if( zcnt > zmax ) break;
-+ }
-+ else
-+ {
-+ zcnt=0;
-+ last = i;
-+ }
-+ }
++ offset = skb->csum_start - skb_headroom(skb);
++ BUG_ON(offset >= skb_headlen(skb));
++ csum = skb_checksum(skb, offset, skb->len - offset, 0);
+
-+ end[nbands++] = last;
++ offset += skb->csum_offset;
++ BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
+
++ if (skb_cloned(skb) &&
++ !skb_clone_writable(skb, offset + sizeof(__sum16))) {
++ ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
++ if (ret)
++ goto out;
+ }
-+ }
+
-+ return nbands;
++ *(__sum16 *)(skb->data + offset) = csum_fold(csum);
++out_set_summed:
++ skb->ip_summed = CHECKSUM_NONE;
++out:
++ return ret;
+}
+
-+
-+/*
-+ * Sparse Bands
-+ *
-+ * Row Format:
-+ * Word
-+ * 1 SPARSEBANDS format indicator
-+ * 2 bool indicates a pattern match in this state
-+ * 3 number of sparse bands
-+ * 4 number of elements in this band
-+ * 5 start index of this band
-+ * 6- list of next states
-+ *
-+ * m number of elements in this band
-+ * m+1 start index of this band
-+ * m+2- list of next states
++/**
++ * skb_gso_segment - Perform segmentation on skb.
++ * @skb: buffer to segment
++ * @features: features for the output path (see dev->features)
++ *
++ * This function segments the given skb and returns a list of segments.
++ *
++ * It may return NULL if the skb requires no segmentation. This is
++ * only possible when GSO is used for verifying header integrity.
+ */
-+static int
-+Conv_Full_DFA_To_SparseBands(ACSM_STRUCT2 * acsm)
++struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
+{
-+ acstate_t * p;
-+ acstate_t ** NextState = acsm->acsmNextState;
-+ int cnt,m,k,i,zcnt=acsm->acsmSparseMaxZcnt;
-+
-+ int band_begin[MAX_ALPHABET_SIZE];
-+ int band_end[MAX_ALPHABET_SIZE];
-+ int nbands,j;
-+ acstate_t full[MAX_ALPHABET_SIZE];
-+
-+ for(k=0;k<acsm->acsmMaxStates;k++)
-+ {
-+ cnt=0;
-+
-+ List_ConvToFull(acsm, (acstate_t)k, full );
-+
-+ nbands = calcSparseBands( full, band_begin, band_end, acsm->acsmAlphabetSize, zcnt );
-+
-+ /* calc band width space*/
-+ cnt = 3;
-+ for(i=0;i<nbands;i++)
-+ {
-+ cnt += 2;
-+ cnt += band_end[i] - band_begin[i] + 1;
-+
-+ /*printk("state %d: sparseband %d, first=%d, last=%d, cnt=%d\n",k,i,band_begin[i],band_end[i],band_end[i]-band_begin[i]+1); */
++ struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
++ struct packet_type *ptype;
++ __be16 type = skb->protocol;
++ int err;
++
++ skb_reset_mac_header(skb);
++ skb->mac_len = skb->network_header - skb->mac_header;
++ __skb_pull(skb, skb->mac_len);
++
++ if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
++ struct net_device *dev = skb->dev;
++ struct ethtool_drvinfo info = {};
++
++ if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
++ dev->ethtool_ops->get_drvinfo(dev, &info);
++
++ WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
++ "ip_summed=%d",
++ info.driver, dev ? dev->features : 0L,
++ skb->sk ? skb->sk->sk_route_caps : 0L,
++ skb->len, skb->data_len, skb->ip_summed);
++
++ if (skb_header_cloned(skb) &&
++ (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
++ return ERR_PTR(err);
+ }
+
-+ p = AC_MALLOC(sizeof(acstate_t)*(cnt));
++ rcu_read_lock();
++ list_for_each_entry_rcu(ptype,
++ &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
++ if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
++ if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
++ err = ptype->gso_send_check(skb);
++ segs = ERR_PTR(err);
++ if (err || skb_gso_ok(skb, features))
++ break;
++ __skb_push(skb, (skb->data -
++ skb_network_header(skb)));
++ }
++ segs = ptype->gso_segment(skb, features);
++ break;
++ }
++ }
++ rcu_read_unlock();
+
-+ if(!p) return -1;
++ __skb_push(skb, skb->data - skb_mac_header(skb));
+
-+ m = 0;
-+ p[m++] = ACF_SPARSEBANDS;
-+ p[m++] = 0; /* no matches */
-+ p[m++] = nbands;
++ return segs;
++}
+
-+ for( i=0;i<nbands;i++ )
-+ {
-+ p[m++] = band_end[i] - band_begin[i] + 1; /* # states in this band */
-+ p[m++] = band_begin[i]; /* start index */
++EXPORT_SYMBOL(skb_gso_segment);
+
-+ for( j=band_begin[i]; j<=band_end[i]; j++ )
-+ {
-+ p[m++] = full[j]; /* some states may be state zero */
-+ }
++/* Take action when hardware reception checksum errors are detected. */
++#ifdef CONFIG_BUG
++void netdev_rx_csum_fault(struct net_device *dev)
++{
++ if (net_ratelimit()) {
++ printk(KERN_ERR "%s: hw csum failure.\n",
++ dev ? dev->name : "<unknown>");
++ dump_stack();
+ }
-+
-+ NextState[k] = p; /* now we are a sparse-banded formatted state transition array */
-+ }
-+
-+ return 0;
+}
++EXPORT_SYMBOL(netdev_rx_csum_fault);
++#endif
+
-+/*
-+ *
-+ * Convert an NFA or DFA row from sparse to full format
-+ * and store into the 'full' buffer.
-+ *
-+ * returns:
-+ * 0 - failed, no state transitions
-+ * *p - pointer to 'full' buffer
-+ *
++/* Actually, we should eliminate this check as soon as we know, that:
++ * 1. IOMMU is present and allows to map all the memory.
++ * 2. No high memory really exists on this machine.
+ */
-+/*
-+ static
-+ acstate_t * acsmConvToFull(ACSM_STRUCT2 * acsm, acstate_t k, acstate_t * full )
-+ {
-+ int i;
-+ acstate_t * p, n, fmt, index, nb, bmatch;
-+ acstate_t ** NextState = acsm->acsmNextState;
+
-+ p = NextState[k];
++static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
++{
++#ifdef CONFIG_HIGHMEM
++ int i;
+
-+ if( !p ) return 0;
++ if (dev->features & NETIF_F_HIGHDMA)
++ return 0;
+
-+ fmt = *p++;
++ for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
++ if (PageHighMem(skb_shinfo(skb)->frags[i].page))
++ return 1;
+
-+ bmatch = *p++;
++#endif
++ return 0;
++}
+
-+ if( fmt ==ACF_SPARSE )
-+ {
-+ n = *p++;
-+ for( ; n>0; n--, p+=2 )
-+ {
-+ full[ p[0] ] = p[1];
-+ }
-+ }
-+ else if( fmt ==ACF_BANDED )
-+ {
++struct dev_gso_cb {
++ void (*destructor)(struct sk_buff *skb);
++};
+
-+ n = *p++;
-+ index = *p++;
++#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
+
-+ for( ; n>0; n--, p++ )
-+ {
-+ full[ index++ ] = p[0];
-+ }
-+ }
-+ else if( fmt ==ACF_SPARSEBANDS )
-+ {
-+ nb = *p++;
-+ for(i=0;i<nb;i++)
-+ {
-+ n = *p++;
-+ index = *p++;
-+ for( ; n>0; n--, p++ )
-+ {
-+ full[ index++ ] = p[0];
-+ }
-+ }
-+ }
-+ else if( fmt == ACF_FULL )
-+ {
-+ memcpy(full,p,acsm->acsmAlphabetSize*sizeof(acstate_t));
-+ }
++static void dev_gso_skb_destructor(struct sk_buff *skb)
++{
++ struct dev_gso_cb *cb;
+
-+ return full;
-+ }
-+*/
++ do {
++ struct sk_buff *nskb = skb->next;
+
-+/*
-+ * Select the desired storage mode
-+ */
-+int acsmSelectFormat2( ACSM_STRUCT2 * acsm, int m )
-+{
-+ switch( m )
-+ {
-+ case ACF_FULL:
-+ case ACF_SPARSE:
-+ case ACF_BANDED:
-+ case ACF_SPARSEBANDS:
-+ acsm->acsmFormat = m;
-+ break;
-+ default:
-+ return -1;
-+ }
++ skb->next = nskb->next;
++ nskb->next = NULL;
++ kfree_skb(nskb);
++ } while (skb->next);
+
-+ return 0;
-+}
-+/*
-+ *
-+ */
-+void acsmSetMaxSparseBandZeros2( ACSM_STRUCT2 * acsm, int n )
-+{
-+ acsm->acsmSparseMaxZcnt = n;
-+}
-+/*
-+ *
-+ */
-+void acsmSetMaxSparseElements2( ACSM_STRUCT2 * acsm, int n )
-+{
-+ acsm->acsmSparseMaxRowNodes = n;
-+}
-+/*
-+ *
-+ */
-+int acsmSelectFSA2( ACSM_STRUCT2 * acsm, int m )
-+{
-+ switch( m )
-+ {
-+ case FSA_TRIE:
-+ case FSA_NFA:
-+ case FSA_DFA:
-+ acsm->acsmFSA = m;
-+ default:
-+ return -1;
-+ }
++ cb = DEV_GSO_CB(skb);
++ if (cb->destructor)
++ cb->destructor(skb);
+}
-+/*
++
++/**
++ * dev_gso_segment - Perform emulated hardware segmentation on skb.
++ * @skb: buffer to segment
+ *
++ * This function segments the given skb and stores the list of segments
++ * in skb->next.
+ */
-+int acsmSetAlphabetSize2( ACSM_STRUCT2 * acsm, int n )
-+{
-+ if( n <= MAX_ALPHABET_SIZE )
-+ {
-+ acsm->acsmAlphabetSize = n;
-+ }
-+ else
-+ {
-+ return -1;
-+ }
-+ return 0;
-+}
-+/*
-+ * Create a new AC state machine
-+ */
-+static ACSM_STRUCT2 * acsmNew2 (void)
++static int dev_gso_segment(struct sk_buff *skb)
+{
-+ ACSM_STRUCT2 * p;
++ struct net_device *dev = skb->dev;
++ struct sk_buff *segs;
++ int features = dev->features & ~(illegal_highdma(dev, skb) ?
++ NETIF_F_SG : 0);
+
-+ init_xlatcase ();
-+
-+ p = (ACSM_STRUCT2 *) AC_MALLOC(sizeof (ACSM_STRUCT2));
-+ MEMASSERT (p, "acsmNew");
-+
-+ if (p)
-+ {
-+ memset (p, 0, sizeof (ACSM_STRUCT2));
-+
-+ /* Some defaults */
-+ p->acsmFSA = FSA_DFA;
-+ p->acsmFormat = ACF_BANDED;
-+ p->acsmAlphabetSize = 256;
-+ p->acsmSparseMaxRowNodes = 256;
-+ p->acsmSparseMaxZcnt = 10;
-+ }
++ segs = skb_gso_segment(skb, features);
+
-+ return p;
-+}
-+/*
-+ * Add a pattern to the list of patterns for this state machine
-+ *
-+ */
-+int
-+acsmAddPattern2 (ACSM_STRUCT2 * p, unsigned char *pat, int n, int nocase,
-+ int offset, int depth, void * id, int iid)
-+{
-+ ACSM_PATTERN2 * plist;
++ /* Verifying header integrity only. */
++ if (!segs)
++ return 0;
+
-+ plist = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
-+ MEMASSERT (plist, "acsmAddPattern");
++ if (IS_ERR(segs))
++ return PTR_ERR(segs);
+
-+ plist->patrn = (unsigned char *) AC_MALLOC ( n );
-+ MEMASSERT (plist->patrn, "acsmAddPattern");
++ skb->next = segs;
++ DEV_GSO_CB(skb)->destructor = skb->destructor;
++ skb->destructor = dev_gso_skb_destructor;
+
-+ ConvertCaseEx(plist->patrn, pat, n);
++ return 0;
++}
+
-+ plist->casepatrn = (unsigned char *) AC_MALLOC ( n );
-+ MEMASSERT (plist->casepatrn, "acsmAddPattern");
++int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
++ struct netdev_queue *txq)
++{
++ const struct net_device_ops *ops = dev->netdev_ops;
++ int rc;
++
++ if (likely(!skb->next)) {
++ if (!list_empty(&ptype_all))
++ dev_queue_xmit_nit(skb, dev);
++
++ if (netif_needs_gso(dev, skb)) {
++ if (unlikely(dev_gso_segment(skb)))
++ goto out_kfree_skb;
++ if (skb->next)
++ goto gso;
++ }
+
-+ memcpy (plist->casepatrn, pat, n);
++ rc = ops->ndo_start_xmit(skb, dev);
++ /*
++ * TODO: if skb_orphan() was called by
++ * dev->hard_start_xmit() (for example, the unmodified
++ * igb driver does that; bnx2 doesn't), then
++ * skb_tx_software_timestamp() will be unable to send
++ * back the time stamp.
++ *
++ * How can this be prevented? Always create another
++ * reference to the socket before calling
++ * dev->hard_start_xmit()? Prevent that skb_orphan()
++ * does anything in dev->hard_start_xmit() by clearing
++ * the skb destructor before the call and restoring it
++ * afterwards, then doing the skb_orphan() ourselves?
++ */
++ return rc;
++ }
+
-+ plist->n = n;
-+ plist->nocase = nocase;
-+ plist->offset = offset;
-+ plist->depth = depth;
-+ plist->id = id;
-+ plist->iid = iid;
++gso:
++ do {
++ struct sk_buff *nskb = skb->next;
++
++ skb->next = nskb->next;
++ nskb->next = NULL;
++ rc = ops->ndo_start_xmit(nskb, dev);
++ if (unlikely(rc)) {
++ nskb->next = skb->next;
++ skb->next = nskb;
++ return rc;
++ }
++ if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
++ return NETDEV_TX_BUSY;
++ } while (skb->next);
+
-+ plist->next = p->acsmPatterns;
-+ p->acsmPatterns = plist;
++ skb->destructor = DEV_GSO_CB(skb)->destructor;
+
-+ return 0;
++out_kfree_skb:
++ kfree_skb(skb);
++ return 0;
+}
-+/*
-+ * Add a Key to the list of key+data pairs
-+ */
-+int acsmAddKey2(ACSM_STRUCT2 * p, unsigned char *key, int klen, int nocase, void * data)
-+{
-+ ACSM_PATTERN2 * plist;
+
-+ plist = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
-+ MEMASSERT (plist, "acsmAddPattern");
++static u32 skb_tx_hashrnd;
+
-+ plist->patrn = (unsigned char *) AC_MALLOC (klen);
-+ memcpy (plist->patrn, key, klen);
++u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
++{
++ u32 hash;
+
-+ plist->casepatrn = (unsigned char *) AC_MALLOC (klen);
-+ memcpy (plist->casepatrn, key, klen);
++ if (skb_rx_queue_recorded(skb))
++ return skb_get_rx_queue(skb) % dev->real_num_tx_queues;
+
-+ plist->n = klen;
-+ plist->nocase = nocase;
-+ plist->offset = 0;
-+ plist->depth = 0;
-+ plist->id = 0;
-+ plist->iid = 0;
++ if (skb->sk && skb->sk->sk_hash)
++ hash = skb->sk->sk_hash;
++ else
++ hash = skb->protocol;
+
-+ plist->next = p->acsmPatterns;
-+ p->acsmPatterns = plist;
++ hash = jhash_1word(hash, skb_tx_hashrnd);
+
-+ return 0;
++ return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
+}
++EXPORT_SYMBOL(skb_tx_hash);
+
-+/*
-+ * Copy a boolean match flag int NextState table, for caching purposes.
-+ */
-+static
-+void acsmUpdateMatchStates( ACSM_STRUCT2 * acsm )
++static struct netdev_queue *dev_pick_tx(struct net_device *dev,
++ struct sk_buff *skb)
+{
-+ acstate_t state;
-+ acstate_t ** NextState = acsm->acsmNextState;
-+ ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
++ const struct net_device_ops *ops = dev->netdev_ops;
++ u16 queue_index = 0;
+
-+ for( state=0; state<acsm->acsmNumStates; state++ )
-+ {
-+ if( MatchList[state] )
-+ {
-+ NextState[state][1] = 1;
-+ }
-+ else
-+ {
-+ NextState[state][1] = 0;
-+ }
-+ }
++ if (ops->ndo_select_queue)
++ queue_index = ops->ndo_select_queue(dev, skb);
++ else if (dev->real_num_tx_queues > 1)
++ queue_index = skb_tx_hash(dev, skb);
++
++ skb_set_queue_mapping(skb, queue_index);
++ return netdev_get_tx_queue(dev, queue_index);
+}
+
-+/*
-+ * Compile State Machine - NFA or DFA and Full or Banded or Sparse or SparseBands
++/**
++ * dev_queue_xmit - transmit a buffer
++ * @skb: buffer to transmit
++ *
++ * Queue a buffer for transmission to a network device. The caller must
++ * have set the device and priority and built the buffer before calling
++ * this function. The function can be called from an interrupt.
++ *
++ * A negative errno code is returned on a failure. A success does not
++ * guarantee the frame will be transmitted as it may be dropped due
++ * to congestion or traffic shaping.
++ *
++ * -----------------------------------------------------------------------------------
++ * I notice this method can also return errors from the queue disciplines,
++ * including NET_XMIT_DROP, which is a positive value. So, errors can also
++ * be positive.
++ *
++ * Regardless of the return value, the skb is consumed, so it is currently
++ * difficult to retry a send to this method. (You can bump the ref count
++ * before sending to hold a reference for retry if you are careful.)
++ *
++ * When calling this method, interrupts MUST be enabled. This is because
++ * the BH enable code must have IRQs enabled so that it will not deadlock.
++ * --BLG
+ */
-+int
-+acsmCompile2 (ACSM_STRUCT2 * acsm)
++int dev_queue_xmit(struct sk_buff *skb)
+{
-+ int k;
-+ ACSM_PATTERN2 * plist;
++ struct net_device *dev = skb->dev;
++ struct netdev_queue *txq;
++ struct Qdisc *q;
++ int rc = -ENOMEM;
++
++ /* GSO will handle the following emulations directly. */
++ if (netif_needs_gso(dev, skb))
++ goto gso;
++
++ if (skb_shinfo(skb)->frag_list &&
++ !(dev->features & NETIF_F_FRAGLIST) &&
++ __skb_linearize(skb))
++ goto out_kfree_skb;
++
++ /* Fragmented skb is linearized if device does not support SG,
++ * or if at least one of fragments is in highmem and device
++ * does not support DMA from it.
++ */
++ if (skb_shinfo(skb)->nr_frags &&
++ (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
++ __skb_linearize(skb))
++ goto out_kfree_skb;
++
++ /* If packet is not checksummed and device does not support
++ * checksumming for this protocol, complete checksumming here.
++ */
++ if (skb->ip_summed == CHECKSUM_PARTIAL) {
++ skb_set_transport_header(skb, skb->csum_start -
++ skb_headroom(skb));
++ if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
++ goto out_kfree_skb;
++ }
+
-+ /* Count number of states */
-+ for (plist = acsm->acsmPatterns; plist != NULL; plist = plist->next)
-+ {
-+ acsm->acsmMaxStates += plist->n;
-+ /* acsm->acsmMaxStates += plist->n*2; if we handle case in the table */
-+ }
-+ acsm->acsmMaxStates++; /* one extra */
++gso:
++ /* Disable soft irqs for various locks below. Also
++ * stops preemption for RCU.
++ */
++ rcu_read_lock_bh();
+
-+ /* Alloc a List based State Transition table */
-+ acsm->acsmTransTable =(trans_node_t**) AC_MALLOC(sizeof(trans_node_t*) * acsm->acsmMaxStates );
-+ MEMASSERT (acsm->acsmTransTable, "acsmCompile");
++ txq = dev_pick_tx(dev, skb);
++ q = rcu_dereference(txq->qdisc);
+
-+ memset (acsm->acsmTransTable, 0, sizeof(trans_node_t*) * acsm->acsmMaxStates);
++#ifdef CONFIG_NET_CLS_ACT
++ skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
++#endif
++ if (q->enqueue) {
++ spinlock_t *root_lock = qdisc_lock(q);
+
-+ /* Alloc a failure table - this has a failure state, and a match list for each state */
-+ acsm->acsmFailState =(acstate_t*) AC_MALLOC(sizeof(acstate_t) * acsm->acsmMaxStates );
-+ MEMASSERT (acsm->acsmFailState, "acsmCompile");
++ spin_lock(root_lock);
+
-+ memset (acsm->acsmFailState, 0, sizeof(acstate_t) * acsm->acsmMaxStates );
++ if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
++ kfree_skb(skb);
++ rc = NET_XMIT_DROP;
++ } else {
++ rc = qdisc_enqueue_root(skb, q);
++ qdisc_run(q);
++ }
++ spin_unlock(root_lock);
+
-+ /* Alloc a MatchList table - this has a lis tof pattern matches for each state, if any */
-+ acsm->acsmMatchList=(ACSM_PATTERN2**) AC_MALLOC(sizeof(ACSM_PATTERN2*) * acsm->acsmMaxStates );
-+ MEMASSERT (acsm->acsmMatchList, "acsmCompile");
++ goto out;
++ }
+
-+ memset (acsm->acsmMatchList, 0, sizeof(ACSM_PATTERN2*) * acsm->acsmMaxStates );
++ /* The device has no queue. Common case for software devices:
++ loopback, all the sorts of tunnels...
++
++ Really, it is unlikely that netif_tx_lock protection is necessary
++ here. (f.e. loopback and IP tunnels are clean ignoring statistics
++ counters.)
++ However, it is possible, that they rely on protection
++ made by us here.
++
++ Check this and shot the lock. It is not prone from deadlocks.
++ Either shot noqueue qdisc, it is even simpler 8)
++ */
++ if (dev->flags & IFF_UP) {
++ int cpu = smp_processor_id(); /* ok because BHs are off */
++
++ if (txq->xmit_lock_owner != cpu) {
++
++ HARD_TX_LOCK(dev, txq, cpu);
++
++ if (!netif_tx_queue_stopped(txq)) {
++ rc = 0;
++ if (!dev_hard_start_xmit(skb, dev, txq)) {
++ HARD_TX_UNLOCK(dev, txq);
++ goto out;
++ }
++ }
++ HARD_TX_UNLOCK(dev, txq);
++ if (net_ratelimit())
++ printk(KERN_CRIT "Virtual device %s asks to "
++ "queue packet!\n", dev->name);
++ } else {
++ /* Recursion is detected! It is possible,
++ * unfortunately */
++ if (net_ratelimit())
++ printk(KERN_CRIT "Dead loop on virtual device "
++ "%s, fix it urgently!\n", dev->name);
++ }
++ }
+
-+ /* Alloc a separate state transition table == in state 's' due to event 'k', transition to 'next' state */
-+ acsm->acsmNextState=(acstate_t**)AC_MALLOC( acsm->acsmMaxStates * sizeof(acstate_t*) );
-+ MEMASSERT(acsm->acsmNextState, "acsmCompile-NextState");
++ rc = -ENETDOWN;
++ rcu_read_unlock_bh();
+
-+ for (k = 0; k < acsm->acsmMaxStates; k++)
-+ {
-+ acsm->acsmNextState[k]=(acstate_t*)0;
-+ }
++out_kfree_skb:
++ kfree_skb(skb);
++ return rc;
++out:
++ rcu_read_unlock_bh();
++ return rc;
++}
+
-+ /* Initialize state zero as a branch */
-+ acsm->acsmNumStates = 0;
+
-+ /* Add the 0'th state, */
-+ //acsm->acsmNumStates++;
++/*=======================================================================
++ Receiver routines
++ =======================================================================*/
+
-+ /* Add each Pattern to the State Table - This forms a keywords state table */
-+ for (plist = acsm->acsmPatterns; plist != NULL; plist = plist->next)
-+ {
-+ AddPatternStates (acsm, plist);
-+ }
++int netdev_max_backlog __read_mostly = 1000;
++int netdev_budget __read_mostly = 300;
++int weight_p __read_mostly = 64; /* old backlog weight */
+
-+ acsm->acsmNumStates++;
++DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
+
-+ if( acsm->acsmFSA == FSA_DFA || acsm->acsmFSA == FSA_NFA )
-+ {
-+ /* Build the NFA */
-+ Build_NFA (acsm);
-+ }
+
-+ if( acsm->acsmFSA == FSA_DFA )
-+ {
-+ /* Convert the NFA to a DFA */
-+ Convert_NFA_To_DFA (acsm);
-+ }
++/**
++ * netif_rx - post buffer to the network code
++ * @skb: buffer to post
++ *
++ * This function receives a packet from a device driver and queues it for
++ * the upper (protocol) levels to process. It always succeeds. The buffer
++ * may be dropped during processing for congestion control or by the
++ * protocol layers.
++ *
++ * return values:
++ * NET_RX_SUCCESS (no congestion)
++ * NET_RX_DROP (packet was dropped)
++ *
++ */
+
-+ /*
-+ *
-+ * Select Final Transition Table Storage Mode
-+ *
-+ */
-+ if( acsm->acsmFormat == ACF_SPARSE )
-+ {
-+ /* Convert DFA Full matrix to a Sparse matrix */
-+ if( Conv_Full_DFA_To_Sparse(acsm) )
-+ return -1;
-+ }
++int netif_rx(struct sk_buff *skb)
++{
++ struct softnet_data *queue;
++ unsigned long flags;
+
-+ else if( acsm->acsmFormat == ACF_BANDED )
-+ {
-+ /* Convert DFA Full matrix to a Sparse matrix */
-+ if( Conv_Full_DFA_To_Banded(acsm) )
-+ return -1;
-+ }
++ /* if netpoll wants it, pretend we never saw it */
++ if (netpoll_rx(skb))
++ return NET_RX_DROP;
+
-+ else if( acsm->acsmFormat == ACF_SPARSEBANDS )
-+ {
-+ /* Convert DFA Full matrix to a Sparse matrix */
-+ if( Conv_Full_DFA_To_SparseBands(acsm) )
-+ return -1;
-+ }
-+ else if( acsm->acsmFormat == ACF_FULL )
-+ {
-+ if( Conv_List_To_Full( acsm ) )
-+ return -1;
-+ }
++ if (!skb->tstamp.tv64)
++ net_timestamp(skb);
+
-+ acsmUpdateMatchStates( acsm ); /* load boolean match flags into state table */
++ /*
++ * The code is rearranged so that the path is the most
++ * short when CPU is congested, but is still operating.
++ */
++ local_irq_save(flags);
++ queue = &__get_cpu_var(softnet_data);
++
++ __get_cpu_var(netdev_rx_stat).total++;
++ if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
++ if (queue->input_pkt_queue.qlen) {
++enqueue:
++ __skb_queue_tail(&queue->input_pkt_queue, skb);
++ local_irq_restore(flags);
++ return NET_RX_SUCCESS;
++ }
+
-+ /* Free up the Table Of Transition Lists */
-+ List_FreeTransTable( acsm );
++ napi_schedule(&queue->backlog);
++ goto enqueue;
++ }
+
-+ /* For now -- show this info */
-+ /*
-+ * acsmPrintInfo( acsm );
-+ */
++ __get_cpu_var(netdev_rx_stat).dropped++;
++ local_irq_restore(flags);
+
++ kfree_skb(skb);
++ return NET_RX_DROP;
++}
+
-+ /* Accrue Summary State Stats */
-+ summary.num_states += acsm->acsmNumStates;
-+ summary.num_transitions += acsm->acsmNumTrans;
++int netif_rx_ni(struct sk_buff *skb)
++{
++ int err;
+
-+ memcpy( &summary.acsm, acsm, sizeof(ACSM_STRUCT2));
++ preempt_disable();
++ err = netif_rx(skb);
++ if (local_softirq_pending())
++ do_softirq();
++ preempt_enable();
+
-+ return 0;
++ return err;
+}
+
-+/*
-+ * Get the NextState from the NFA, all NFA storage formats use this
-+ */
-+inline
-+acstate_t SparseGetNextStateNFA(acstate_t * ps, acstate_t state, unsigned input)
++EXPORT_SYMBOL(netif_rx_ni);
++
++static void net_tx_action(struct softirq_action *h)
+{
-+ acstate_t fmt;
-+ acstate_t n;
-+ int index;
-+ int nb;
++ struct softnet_data *sd = &__get_cpu_var(softnet_data);
+
-+ fmt = *ps++;
++ if (sd->completion_queue) {
++ struct sk_buff *clist;
+
-+ ps++; /* skip bMatchState */
++ local_irq_disable();
++ clist = sd->completion_queue;
++ sd->completion_queue = NULL;
++ local_irq_enable();
+
-+ switch( fmt )
-+ {
-+ case ACF_BANDED:
-+ {
-+ n = ps[0];
-+ index = ps[1];
-+
-+ if( input < index )
-+ {
-+ if(state==0)
-+ {
-+ return 0;
-+ }
-+ else
-+ {
-+ return (acstate_t)ACSM_FAIL_STATE2;
-+ }
-+ }
-+ if( input >= index + n )
-+ {
-+ if(state==0)
-+ {
-+ return 0;
-+ }
-+ else
-+ {
-+ return (acstate_t)ACSM_FAIL_STATE2;
-+ }
-+ }
-+ if( ps[input-index] == 0 )
-+ {
-+ if( state != 0 )
-+ {
-+ return ACSM_FAIL_STATE2;
-+ }
-+ }
++ while (clist) {
++ struct sk_buff *skb = clist;
++ clist = clist->next;
+
-+ return (acstate_t) ps[input-index];
-+ }
++ WARN_ON(atomic_read(&skb->users));
++ __kfree_skb(skb);
++ }
++ }
+
-+ case ACF_SPARSE:
-+ {
-+ n = *ps++; /* number of sparse index-value entries */
++ if (sd->output_queue) {
++ struct Qdisc *head;
++
++ local_irq_disable();
++ head = sd->output_queue;
++ sd->output_queue = NULL;
++ local_irq_enable();
++
++ while (head) {
++ struct Qdisc *q = head;
++ spinlock_t *root_lock;
++
++ head = head->next_sched;
++
++ root_lock = qdisc_lock(q);
++ if (spin_trylock(root_lock)) {
++ smp_mb__before_clear_bit();
++ clear_bit(__QDISC_STATE_SCHED,
++ &q->state);
++ qdisc_run(q);
++ spin_unlock(root_lock);
++ } else {
++ if (!test_bit(__QDISC_STATE_DEACTIVATED,
++ &q->state)) {
++ __netif_reschedule(q);
++ } else {
++ smp_mb__before_clear_bit();
++ clear_bit(__QDISC_STATE_SCHED,
++ &q->state);
++ }
++ }
++ }
++ }
++}
+
-+ for( ; n>0 ; n-- )
-+ {
-+ if( ps[0] > input ) /* cannot match the input, already a higher value than the input */
-+ {
-+ return (acstate_t)ACSM_FAIL_STATE2; /* default state */
-+ }
-+ else if( ps[0] == input )
-+ {
-+ return ps[1]; /* next state */
-+ }
-+ ps+=2;
-+ }
-+ if( state == 0 )
-+ {
-+ return 0;
-+ }
-+ return ACSM_FAIL_STATE2;
-+ }
++static inline int deliver_skb(struct sk_buff *skb,
++ struct packet_type *pt_prev,
++ struct net_device *orig_dev)
++{
++ atomic_inc(&skb->users);
++ return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
++}
+
-+ case ACF_SPARSEBANDS:
-+ {
-+ nb = *ps++; /* number of bands */
++#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
++/* These hooks defined here for ATM */
++struct net_bridge;
++struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
++ unsigned char *addr);
++void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
+
-+ while( nb > 0 ) /* for each band */
-+ {
-+ n = *ps++; /* number of elements */
-+ index = *ps++; /* 1st element value */
++/*
++ * If bridge module is loaded call bridging hook.
++ * returns NULL if packet was consumed.
++ */
++struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
++ struct sk_buff *skb) __read_mostly;
++static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
++ struct packet_type **pt_prev, int *ret,
++ struct net_device *orig_dev)
++{
++ struct net_bridge_port *port;
+
-+ if( input < index )
-+ {
-+ if( state != 0 )
-+ {
-+ return (acstate_t)ACSM_FAIL_STATE2;
-+ }
-+ return (acstate_t)0;
-+ }
-+ if( (input >= index) && (input < (index + n)) )
-+ {
-+ if( ps[input-index] == 0 )
-+ {
-+ if( state != 0 )
-+ {
-+ return ACSM_FAIL_STATE2;
-+ }
-+ }
-+ return (acstate_t) ps[input-index];
-+ }
-+ nb--;
-+ ps += n;
-+ }
-+ if( state != 0 )
-+ {
-+ return (acstate_t)ACSM_FAIL_STATE2;
-+ }
-+ return (acstate_t)0;
-+ }
++ if (skb->pkt_type == PACKET_LOOPBACK ||
++ (port = rcu_dereference(skb->dev->br_port)) == NULL)
++ return skb;
+
-+ case ACF_FULL:
-+ {
-+ if( ps[input] == 0 )
-+ {
-+ if( state != 0 )
-+ {
-+ return ACSM_FAIL_STATE2;
-+ }
-+ }
-+ return ps[input];
-+ }
-+ }
++ if (*pt_prev) {
++ *ret = deliver_skb(skb, *pt_prev, orig_dev);
++ *pt_prev = NULL;
++ }
+
-+ return 0;
++ return br_handle_frame_hook(port, skb);
+}
++#else
++#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
++#endif
+
++#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
++struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
++EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
+
++static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
++ struct packet_type **pt_prev,
++ int *ret,
++ struct net_device *orig_dev)
++{
++ if (skb->dev->macvlan_port == NULL)
++ return skb;
+
-+/*
-+ * Get the NextState from the DFA Next State Transition table
-+ * Full and banded are supported separately, this is for
-+ * sparse and sparse-bands
++ if (*pt_prev) {
++ *ret = deliver_skb(skb, *pt_prev, orig_dev);
++ *pt_prev = NULL;
++ }
++ return macvlan_handle_frame_hook(skb);
++}
++#else
++#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb)
++#endif
++
++#ifdef CONFIG_NET_CLS_ACT
++/* TODO: Maybe we should just force sch_ingress to be compiled in
++ * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
++ * a compare and 2 stores extra right now if we dont have it on
++ * but have CONFIG_NET_CLS_ACT
++ * NOTE: This doesnt stop any functionality; if you dont have
++ * the ingress scheduler, you just cant add policies on ingress.
++ *
+ */
-+inline
-+acstate_t SparseGetNextStateDFA(acstate_t * ps, acstate_t state, unsigned input)
++static int ing_filter(struct sk_buff *skb)
+{
-+ acstate_t n, nb;
-+ int index;
-+
-+ switch( ps[0] )
-+ {
-+ /* BANDED */
-+ case ACF_BANDED:
-+ {
-+ /* n=ps[2] : number of entries in the band */
-+ /* index=ps[3] : index of the 1st entry, sequential thereafter */
++ struct net_device *dev = skb->dev;
++ u32 ttl = G_TC_RTTL(skb->tc_verd);
++ struct netdev_queue *rxq;
++ int result = TC_ACT_OK;
++ struct Qdisc *q;
++
++ if (MAX_RED_LOOP < ttl++) {
++ printk(KERN_WARNING
++ "Redir loop detected Dropping packet (%d->%d)\n",
++ skb->iif, dev->ifindex);
++ return TC_ACT_SHOT;
++ }
+
-+ if( input < ps[3] ) return 0;
-+ if( input >= (ps[3]+ps[2]) ) return 0;
++ skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
++ skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
+
-+ return ps[4+input-ps[3]];
-+ }
++ rxq = &dev->rx_queue;
+
-+ /* FULL */
-+ case ACF_FULL:
-+ {
-+ return ps[2+input];
-+ }
++ q = rxq->qdisc;
++ if (q != &noop_qdisc) {
++ spin_lock(qdisc_lock(q));
++ if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
++ result = qdisc_enqueue_root(skb, q);
++ spin_unlock(qdisc_lock(q));
++ }
+
-+ /* SPARSE */
-+ case ACF_SPARSE:
-+ {
-+ n = ps[2]; /* number of entries/ key+next pairs */
++ return result;
++}
+
-+ ps += 3;
++static inline struct sk_buff *handle_ing(struct sk_buff *skb,
++ struct packet_type **pt_prev,
++ int *ret, struct net_device *orig_dev)
++{
++ if (skb->dev->rx_queue.qdisc == &noop_qdisc)
++ goto out;
++
++ if (*pt_prev) {
++ *ret = deliver_skb(skb, *pt_prev, orig_dev);
++ *pt_prev = NULL;
++ } else {
++ /* Huh? Why does turning on AF_PACKET affect this? */
++ skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
++ }
+
-+ for( ; n>0 ; n-- )
-+ {
-+ if( input < ps[0] ) /* cannot match the input, already a higher value than the input */
-+ {
-+ return (acstate_t)0; /* default state */
-+ }
-+ else if( ps[0] == input )
-+ {
-+ return ps[1]; /* next state */
-+ }
-+ ps += 2;
-+ }
-+ return (acstate_t)0;
-+ }
++ switch (ing_filter(skb)) {
++ case TC_ACT_SHOT:
++ case TC_ACT_STOLEN:
++ kfree_skb(skb);
++ return NULL;
++ }
+
++out:
++ skb->tc_verd = 0;
++ return skb;
++}
++#endif
+
-+ /* SPARSEBANDS */
-+ case ACF_SPARSEBANDS:
-+ {
-+ nb = ps[2]; /* number of bands */
++/*
++ * netif_nit_deliver - deliver received packets to network taps
++ * @skb: buffer
++ *
++ * This function is used to deliver incoming packets to network
++ * taps. It should be used when the normal netif_receive_skb path
++ * is bypassed, for example because of VLAN acceleration.
++ */
++void netif_nit_deliver(struct sk_buff *skb)
++{
++ struct packet_type *ptype;
+
-+ ps += 3;
++ if (list_empty(&ptype_all))
++ return;
+
-+ while( nb > 0 ) /* for each band */
-+ {
-+ n = ps[0]; /* number of elements in this band */
-+ index = ps[1]; /* start index/char of this band */
-+ if( input < index )
-+ {
-+ return (acstate_t)0;
-+ }
-+ if( (input < (index + n)) )
-+ {
-+ return (acstate_t) ps[2+input-index];
-+ }
-+ nb--;
-+ ps += n;
-+ }
-+ return (acstate_t)0;
-+ }
-+ }
++ skb_reset_network_header(skb);
++ skb_reset_transport_header(skb);
++ skb->mac_len = skb->network_header - skb->mac_header;
+
-+ return 0;
++ rcu_read_lock();
++ list_for_each_entry_rcu(ptype, &ptype_all, list) {
++ if (!ptype->dev || ptype->dev == skb->dev)
++ deliver_skb(skb, ptype, skb->dev);
++ }
++ rcu_read_unlock();
+}
-+/*
-+ * Search Text or Binary Data for Pattern matches
++
++/**
++ * netif_receive_skb - process receive buffer from network
++ * @skb: buffer to process
++ *
++ * netif_receive_skb() is the main receive data processing function.
++ * It always succeeds. The buffer may be dropped during processing
++ * for congestion control or by the protocol layers.
+ *
-+ * Sparse & Sparse-Banded Matrix search
++ * This function may only be called from softirq context and interrupts
++ * should be enabled.
++ *
++ * Return values (usually ignored):
++ * NET_RX_SUCCESS: no congestion
++ * NET_RX_DROP: packet was dropped
+ */
-+static
-+inline
-+int
-+acsmSearchSparseDFA(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
-+ int (*Match) (void * id, int index, void *data),
-+ void *data)
-+{
-+ acstate_t state;
-+ ACSM_PATTERN2 * mlist;
-+ unsigned char * Tend;
-+ int nfound = 0;
-+ unsigned char * T, * Tc;
-+ int index;
-+ acstate_t ** NextState = acsm->acsmNextState;
-+ ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
-+
-+ Tc = Tx;
-+ T = Tx;
-+ Tend = T + n;
-+
-+ for( state = 0; T < Tend; T++ )
-+ {
-+ state = SparseGetNextStateDFA ( NextState[state], state, xlatcase[*T] );
++int netif_receive_skb(struct sk_buff *skb)
++{
++ struct packet_type *ptype, *pt_prev;
++ struct net_device *orig_dev;
++ struct net_device *null_or_orig;
++ int ret = NET_RX_DROP;
++ __be16 type;
++
++ if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
++ return NET_RX_SUCCESS;
++
++ /* if we've gotten here through NAPI, check netpoll */
++ if (netpoll_receive_skb(skb))
++ return NET_RX_DROP;
++
++ if (!skb->tstamp.tv64)
++ net_timestamp(skb);
++
++ if (!skb->iif)
++ skb->iif = skb->dev->ifindex;
++
++ null_or_orig = NULL;
++ orig_dev = skb->dev;
++ if (orig_dev->master) {
++ if (skb_bond_should_drop(skb))
++ null_or_orig = orig_dev; /* deliver only exact match */
++ else
++ skb->dev = orig_dev->master;
++ }
+
-+ /* test if this state has any matching patterns */
-+ if( NextState[state][1] )
-+ {
-+ for( mlist = MatchList[state];
-+ mlist!= NULL;
-+ mlist = mlist->next )
-+ {
-+ index = T - mlist->n - Tc;
-+ if( mlist->nocase )
-+ {
-+ nfound++;
-+ if (Match (mlist->id, index, data))
-+ return nfound;
++ __get_cpu_var(netdev_rx_stat).total++;
++
++ skb_reset_network_header(skb);
++ skb_reset_transport_header(skb);
++ skb->mac_len = skb->network_header - skb->mac_header;
++
++ pt_prev = NULL;
++
++ rcu_read_lock();
++
++#ifdef CONFIG_NET_CLS_ACT
++ if (skb->tc_verd & TC_NCLS) {
++ skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
++ goto ncls;
++ }
++#endif
++
++ list_for_each_entry_rcu(ptype, &ptype_all, list) {
++ if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
++ ptype->dev == orig_dev) {
++ if (pt_prev)
++ ret = deliver_skb(skb, pt_prev, orig_dev);
++ pt_prev = ptype;
+ }
-+ else
-+ {
-+ if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
-+ {
-+ nfound++;
-+ if (Match (mlist->id, index, data))
-+ return nfound;
-+ }
++ }
++
++#ifdef CONFIG_NET_CLS_ACT
++ skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
++ if (!skb)
++ goto out;
++ncls:
++#endif
++
++ skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
++ if (!skb)
++ goto out;
++ skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
++ if (!skb)
++ goto out;
++
++ skb_orphan(skb);
++
++ type = skb->protocol;
++ list_for_each_entry_rcu(ptype,
++ &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
++ if (ptype->type == type &&
++ (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
++ ptype->dev == orig_dev)) {
++ if (pt_prev)
++ ret = deliver_skb(skb, pt_prev, orig_dev);
++ pt_prev = ptype;
+ }
-+ }
+ }
-+ }
-+ return nfound;
++
++ if (pt_prev) {
++ ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
++ } else {
++ kfree_skb(skb);
++ /* Jamal, now you will not able to escape explaining
++ * me how you were going to use this. :-)
++ */
++ ret = NET_RX_DROP;
++ }
++
++out:
++ rcu_read_unlock();
++ return ret;
+}
-+/*
-+ * Full format DFA search
-+ * Do not change anything here without testing, caching and prefetching
-+ * performance is very sensitive to any changes.
-+ *
-+ * Perf-Notes:
-+ * 1) replaced ConvertCaseEx with inline xlatcase - this improves performance 5-10%
-+ * 2) using 'nocase' improves performance again by 10-15%, since memcmp is not needed
-+ * 3)
-+ */
-+static
-+inline
-+int
-+acsmSearchSparseDFA_Full(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
-+ int (*Match) (void * id, int index, void *data),
-+ void *data)
-+{
-+ ACSM_PATTERN2 * mlist;
-+ unsigned char * Tend;
-+ unsigned char * T;
-+ int index;
-+ acstate_t state;
-+ acstate_t * ps;
-+ acstate_t sindex;
-+ acstate_t ** NextState = acsm->acsmNextState;
-+ ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
-+ int nfound = 0;
-+
-+ T = Tx;
-+ Tend = Tx + n;
-+
-+ for( state = 0; T < Tend; T++ )
-+ {
-+ ps = NextState[ state ];
+
-+ sindex = xlatcase[ T[0] ];
++/* Network device is going away, flush any packets still pending */
++static void flush_backlog(void *arg)
++{
++ struct net_device *dev = arg;
++ struct softnet_data *queue = &__get_cpu_var(softnet_data);
++ struct sk_buff *skb, *tmp;
++
++ skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
++ if (skb->dev == dev) {
++ __skb_unlink(skb, &queue->input_pkt_queue);
++ kfree_skb(skb);
++ }
++}
+
-+ /* check the current state for a pattern match */
-+ if( ps[1] )
-+ {
-+ for( mlist = MatchList[state];
-+ mlist!= NULL;
-+ mlist = mlist->next )
-+ {
-+ index = T - mlist->n - Tx;
++static int napi_gro_complete(struct sk_buff *skb)
++{
++ struct packet_type *ptype;
++ __be16 type = skb->protocol;
++ struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
++ int err = -ENOENT;
++
++ if (NAPI_GRO_CB(skb)->count == 1) {
++ skb_shinfo(skb)->gso_size = 0;
++ goto out;
++ }
+
++ rcu_read_lock();
++ list_for_each_entry_rcu(ptype, head, list) {
++ if (ptype->type != type || ptype->dev || !ptype->gro_complete)
++ continue;
+
-+ if( mlist->nocase )
-+ {
-+ nfound++;
-+ if (Match (mlist->id, index, data))
-+ return nfound;
-+ }
-+ else
-+ {
-+ if( memcmp (mlist->casepatrn, Tx + index, mlist->n ) == 0 )
-+ {
-+ nfound++;
-+ if (Match (mlist->id, index, data))
-+ return nfound;
-+ }
-+ }
++ err = ptype->gro_complete(skb);
++ break;
++ }
++ rcu_read_unlock();
+
-+ }
++ if (err) {
++ WARN_ON(&ptype->list == head);
++ kfree_skb(skb);
++ return NET_RX_SUCCESS;
+ }
+
-+ state = ps[ 2u + sindex ];
-+ }
++out:
++ return netif_receive_skb(skb);
++}
+
-+ /* Check the last state for a pattern match */
-+ for( mlist = MatchList[state];
-+ mlist!= NULL;
-+ mlist = mlist->next )
-+ {
-+ index = T - mlist->n - Tx;
++void napi_gro_flush(struct napi_struct *napi)
++{
++ struct sk_buff *skb, *next;
+
-+ if( mlist->nocase )
-+ {
-+ nfound++;
-+ if (Match (mlist->id, index, data))
-+ return nfound;
++ for (skb = napi->gro_list; skb; skb = next) {
++ next = skb->next;
++ skb->next = NULL;
++ napi_gro_complete(skb);
+ }
-+ else
-+ {
-+ if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
-+ {
-+ nfound++;
-+ if (Match (mlist->id, index, data))
-+ return nfound;
-+ }
++
++ napi->gro_count = 0;
++ napi->gro_list = NULL;
++}
++EXPORT_SYMBOL(napi_gro_flush);
++
++void *skb_gro_header(struct sk_buff *skb, unsigned int hlen)
++{
++ unsigned int offset = skb_gro_offset(skb);
++
++ hlen += offset;
++ if (hlen <= skb_headlen(skb))
++ return skb->data + offset;
++
++ if (unlikely(!skb_shinfo(skb)->nr_frags ||
++ skb_shinfo(skb)->frags[0].size <=
++ hlen - skb_headlen(skb) ||
++ PageHighMem(skb_shinfo(skb)->frags[0].page)))
++ return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
++
++ return page_address(skb_shinfo(skb)->frags[0].page) +
++ skb_shinfo(skb)->frags[0].page_offset +
++ offset - skb_headlen(skb);
++}
++EXPORT_SYMBOL(skb_gro_header);
++
++int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
++{
++ struct sk_buff **pp = NULL;
++ struct packet_type *ptype;
++ __be16 type = skb->protocol;
++ struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
++ int same_flow;
++ int mac_len;
++ int ret;
++
++ if (!(skb->dev->features & NETIF_F_GRO))
++ goto normal;
++
++ if (skb_is_gso(skb) || skb_shinfo(skb)->frag_list)
++ goto normal;
++
++ rcu_read_lock();
++ list_for_each_entry_rcu(ptype, head, list) {
++ if (ptype->type != type || ptype->dev || !ptype->gro_receive)
++ continue;
++
++ skb_set_network_header(skb, skb_gro_offset(skb));
++ mac_len = skb->network_header - skb->mac_header;
++ skb->mac_len = mac_len;
++ NAPI_GRO_CB(skb)->same_flow = 0;
++ NAPI_GRO_CB(skb)->flush = 0;
++ NAPI_GRO_CB(skb)->free = 0;
++
++ pp = ptype->gro_receive(&napi->gro_list, skb);
++ break;
+ }
-+ }
++ rcu_read_unlock();
++
++ if (&ptype->list == head)
++ goto normal;
++
++ same_flow = NAPI_GRO_CB(skb)->same_flow;
++ ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
++
++ if (pp) {
++ struct sk_buff *nskb = *pp;
++
++ *pp = nskb->next;
++ nskb->next = NULL;
++ napi_gro_complete(nskb);
++ napi->gro_count--;
++ }
++
++ if (same_flow)
++ goto ok;
++
++ if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
++ goto normal;
++
++ napi->gro_count++;
++ NAPI_GRO_CB(skb)->count = 1;
++ skb_shinfo(skb)->gso_size = skb_gro_len(skb);
++ skb->next = napi->gro_list;
++ napi->gro_list = skb;
++ ret = GRO_HELD;
+
-+ return nfound;
++pull:
++ if (unlikely(!pskb_may_pull(skb, skb_gro_offset(skb)))) {
++ if (napi->gro_list == skb)
++ napi->gro_list = skb->next;
++ ret = GRO_DROP;
++ }
++
++ok:
++ return ret;
++
++normal:
++ ret = GRO_NORMAL;
++ goto pull;
+}
-+/*
-+ * Banded-Row format DFA search
-+ * Do not change anything here, caching and prefetching
-+ * performance is very sensitive to any changes.
-+ *
-+ * ps[0] = storage fmt
-+ * ps[1] = bool match flag
-+ * ps[2] = # elements in band
-+ * ps[3] = index of 1st element
-+ */
-+static
-+inline
-+int
-+acsmSearchSparseDFA_Banded(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
-+ int (*Match) (void * id, int index, void *data),
-+ void *data)
-+{
-+ acstate_t state;
-+ unsigned char * Tend;
-+ unsigned char * T;
-+ int sindex;
-+ int index;
-+ acstate_t ** NextState = acsm->acsmNextState;
-+ ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
-+ ACSM_PATTERN2 * mlist;
-+ acstate_t * ps;
-+ int nfound = 0;
-+
-+ T = Tx;
-+ Tend = T + n;
-+
-+ for( state = 0; T < Tend; T++ )
-+ {
-+ ps = NextState[state];
++EXPORT_SYMBOL(dev_gro_receive);
+
-+ sindex = xlatcase[ T[0] ];
++static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
++{
++ struct sk_buff *p;
+
-+ /* test if this state has any matching patterns */
-+ if( ps[1] )
-+ {
-+ for( mlist = MatchList[state];
-+ mlist!= NULL;
-+ mlist = mlist->next )
-+ {
-+ index = T - mlist->n - Tx;
++ if (netpoll_rx_on(skb))
++ return GRO_NORMAL;
+
-+ if( mlist->nocase )
-+ {
-+ nfound++;
-+ if (Match (mlist->id, index, data))
-+ return nfound;
-+ }
-+ else
-+ {
-+ if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
-+ {
-+ nfound++;
-+ if (Match (mlist->id, index, data))
-+ return nfound;
-+ }
-+ }
-+ }
++ for (p = napi->gro_list; p; p = p->next) {
++ NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev)
++ && !compare_ether_header(skb_mac_header(p),
++ skb_gro_mac_header(skb));
++ NAPI_GRO_CB(p)->flush = 0;
+ }
+
-+ if( sindex < ps[3] ) state = 0;
-+ else if( sindex >= (ps[3] + ps[2]) ) state = 0;
-+ else state = ps[ 4u + sindex - ps[3] ];
-+ }
++ return dev_gro_receive(napi, skb);
++}
+
-+ /* Check the last state for a pattern match */
-+ for( mlist = MatchList[state];
-+ mlist!= NULL;
-+ mlist = mlist->next )
-+ {
-+ index = T - mlist->n - Tx;
++int napi_skb_finish(int ret, struct sk_buff *skb)
++{
++ int err = NET_RX_SUCCESS;
+
-+ if( mlist->nocase )
-+ {
-+ nfound++;
-+ if (Match (mlist->id, index, data))
-+ return nfound;
++ switch (ret) {
++ case GRO_NORMAL:
++ return netif_receive_skb(skb);
++
++ case GRO_DROP:
++ err = NET_RX_DROP;
++ /* fall through */
++
++ case GRO_MERGED_FREE:
++ kfree_skb(skb);
++ break;
+ }
-+ else
-+ {
-+ if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
-+ {
-+ nfound++;
-+ if (Match (mlist->id, index, data))
-+ return nfound;
-+ }
++
++ return err;
++}
++EXPORT_SYMBOL(napi_skb_finish);
++
++int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
++{
++ skb_gro_reset_offset(skb);
++
++ return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
++}
++EXPORT_SYMBOL(napi_gro_receive);
++
++void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
++{
++ __skb_pull(skb, skb_headlen(skb));
++ skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
++
++ napi->skb = skb;
++}
++EXPORT_SYMBOL(napi_reuse_skb);
++
++struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
++ struct napi_gro_fraginfo *info)
++{
++ struct net_device *dev = napi->dev;
++ struct sk_buff *skb = napi->skb;
++ struct ethhdr *eth;
++ skb_frag_t *frag;
++ int i;
++
++ napi->skb = NULL;
++
++ if (!skb) {
++ skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
++ if (!skb)
++ goto out;
++
++ skb_reserve(skb, NET_IP_ALIGN);
+ }
-+ }
+
-+ return nfound;
++ BUG_ON(info->nr_frags > MAX_SKB_FRAGS);
++ frag = info->frags;
++
++ for (i = 0; i < info->nr_frags; i++) {
++ skb_fill_page_desc(skb, i, frag->page, frag->page_offset,
++ frag->size);
++ frag++;
++ }
++ skb_shinfo(skb)->nr_frags = info->nr_frags;
++
++ skb->data_len = info->len;
++ skb->len += info->len;
++ skb->truesize += info->len;
++
++ skb_reset_mac_header(skb);
++ skb_gro_reset_offset(skb);
++
++ eth = skb_gro_header(skb, sizeof(*eth));
++ if (!eth) {
++ napi_reuse_skb(napi, skb);
++ skb = NULL;
++ goto out;
++ }
++
++ skb_gro_pull(skb, sizeof(*eth));
++
++ /*
++ * This works because the only protocols we care about don't require
++ * special handling. We'll fix it up properly at the end.
++ */
++ skb->protocol = eth->h_proto;
++
++ skb->ip_summed = info->ip_summed;
++ skb->csum = info->csum;
++
++out:
++ return skb;
++}
++EXPORT_SYMBOL(napi_fraginfo_skb);
++
++int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
++{
++ int err = NET_RX_SUCCESS;
++
++ switch (ret) {
++ case GRO_NORMAL:
++ case GRO_HELD:
++ skb->protocol = eth_type_trans(skb, napi->dev);
++
++ if (ret == GRO_NORMAL)
++ return netif_receive_skb(skb);
++
++ skb_gro_pull(skb, -ETH_HLEN);
++ break;
++
++ case GRO_DROP:
++ err = NET_RX_DROP;
++ /* fall through */
++
++ case GRO_MERGED_FREE:
++ napi_reuse_skb(napi, skb);
++ break;
++ }
++
++ return err;
++}
++EXPORT_SYMBOL(napi_frags_finish);
++
++int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
++{
++ struct sk_buff *skb = napi_fraginfo_skb(napi, info);
++
++ if (!skb)
++ return NET_RX_DROP;
++
++ return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
++}
++EXPORT_SYMBOL(napi_gro_frags);
++
++static int process_backlog(struct napi_struct *napi, int quota)
++{
++ int work = 0;
++ struct softnet_data *queue = &__get_cpu_var(softnet_data);
++ unsigned long start_time = jiffies;
++
++ napi->weight = weight_p;
++ do {
++ struct sk_buff *skb;
++
++ local_irq_disable();
++ skb = __skb_dequeue(&queue->input_pkt_queue);
++ if (!skb) {
++ __napi_complete(napi);
++ local_irq_enable();
++ break;
++ }
++ local_irq_enable();
++
++ netif_receive_skb(skb);
++ } while (++work < quota && jiffies == start_time);
++
++ return work;
++}
++
++/**
++ * __napi_schedule - schedule for receive
++ * @n: entry to schedule
++ *
++ * The entry's receive function will be scheduled to run
++ */
++void __napi_schedule(struct napi_struct *n)
++{
++ unsigned long flags;
++
++ local_irq_save(flags);
++ list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
++ __raise_softirq_irqoff(NET_RX_SOFTIRQ);
++ local_irq_restore(flags);
++}
++EXPORT_SYMBOL(__napi_schedule);
++
++void __napi_complete(struct napi_struct *n)
++{
++ BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
++ BUG_ON(n->gro_list);
++
++ list_del(&n->poll_list);
++ smp_mb__before_clear_bit();
++ clear_bit(NAPI_STATE_SCHED, &n->state);
++}
++EXPORT_SYMBOL(__napi_complete);
++
++void napi_complete(struct napi_struct *n)
++{
++ unsigned long flags;
++
++ /*
++ * don't let napi dequeue from the cpu poll list
++ * just in case its running on a different cpu
++ */
++ if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
++ return;
++
++ napi_gro_flush(n);
++ local_irq_save(flags);
++ __napi_complete(n);
++ local_irq_restore(flags);
++}
++EXPORT_SYMBOL(napi_complete);
++
++void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
++ int (*poll)(struct napi_struct *, int), int weight)
++{
++ INIT_LIST_HEAD(&napi->poll_list);
++ napi->gro_count = 0;
++ napi->gro_list = NULL;
++ napi->skb = NULL;
++ napi->poll = poll;
++ napi->weight = weight;
++ list_add(&napi->dev_list, &dev->napi_list);
++ napi->dev = dev;
++#ifdef CONFIG_NETPOLL
++ spin_lock_init(&napi->poll_lock);
++ napi->poll_owner = -1;
++#endif
++ set_bit(NAPI_STATE_SCHED, &napi->state);
++}
++EXPORT_SYMBOL(netif_napi_add);
++
++void netif_napi_del(struct napi_struct *napi)
++{
++ struct sk_buff *skb, *next;
++
++ list_del_init(&napi->dev_list);
++ kfree_skb(napi->skb);
++
++ for (skb = napi->gro_list; skb; skb = next) {
++ next = skb->next;
++ skb->next = NULL;
++ kfree_skb(skb);
++ }
++
++ napi->gro_list = NULL;
++ napi->gro_count = 0;
++}
++EXPORT_SYMBOL(netif_napi_del);
++
++
++static void net_rx_action(struct softirq_action *h)
++{
++ struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
++ unsigned long time_limit = jiffies + 2;
++ int budget = netdev_budget;
++ void *have;
++
++ local_irq_disable();
++
++ while (!list_empty(list)) {
++ struct napi_struct *n;
++ int work, weight;
++
++ /* If softirq window is exhuasted then punt.
++ * Allow this to run for 2 jiffies since which will allow
++ * an average latency of 1.5/HZ.
++ */
++ if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
++ goto softnet_break;
++
++ local_irq_enable();
++
++ /* Even though interrupts have been re-enabled, this
++ * access is safe because interrupts can only add new
++ * entries to the tail of this list, and only ->poll()
++ * calls can remove this head entry from the list.
++ */
++ n = list_entry(list->next, struct napi_struct, poll_list);
++
++ have = netpoll_poll_lock(n);
++
++ weight = n->weight;
++
++ /* This NAPI_STATE_SCHED test is for avoiding a race
++ * with netpoll's poll_napi(). Only the entity which
++ * obtains the lock and sees NAPI_STATE_SCHED set will
++ * actually make the ->poll() call. Therefore we avoid
++ * accidently calling ->poll() when NAPI is not scheduled.
++ */
++ work = 0;
++ if (test_bit(NAPI_STATE_SCHED, &n->state))
++ work = n->poll(n, weight);
++
++ WARN_ON_ONCE(work > weight);
++
++ budget -= work;
++
++ local_irq_disable();
++
++ /* Drivers must not modify the NAPI state if they
++ * consume the entire weight. In such cases this code
++ * still "owns" the NAPI instance and therefore can
++ * move the instance around on the list at-will.
++ */
++ if (unlikely(work == weight)) {
++ if (unlikely(napi_disable_pending(n)))
++ __napi_complete(n);
++ else
++ list_move_tail(&n->poll_list, list);
++ }
++
++ netpoll_poll_unlock(have);
++ }
++out:
++ local_irq_enable();
++
++#ifdef CONFIG_NET_DMA
++ /*
++ * There may not be any more sk_buffs coming right now, so push
++ * any pending DMA copies to hardware
++ */
++ dma_issue_pending_all();
++#endif
++
++ return;
++
++softnet_break:
++ __get_cpu_var(netdev_rx_stat).time_squeeze++;
++ __raise_softirq_irqoff(NET_RX_SOFTIRQ);
++ goto out;
++}
++
++static gifconf_func_t * gifconf_list [NPROTO];
++
++/**
++ * register_gifconf - register a SIOCGIF handler
++ * @family: Address family
++ * @gifconf: Function handler
++ *
++ * Register protocol dependent address dumping routines. The handler
++ * that is passed must not be freed or reused until it has been replaced
++ * by another handler.
++ */
++int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
++{
++ if (family >= NPROTO)
++ return -EINVAL;
++ gifconf_list[family] = gifconf;
++ return 0;
++}
++
++
++/*
++ * Map an interface index to its name (SIOCGIFNAME)
++ */
++
++/*
++ * We need this ioctl for efficient implementation of the
++ * if_indextoname() function required by the IPv6 API. Without
++ * it, we would have to search all the interfaces to find a
++ * match. --pb
++ */
++
++static int dev_ifname(struct net *net, struct ifreq __user *arg)
++{
++ struct net_device *dev;
++ struct ifreq ifr;
++
++ /*
++ * Fetch the caller's info block.
++ */
++
++ if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
++ return -EFAULT;
++
++ read_lock(&dev_base_lock);
++ dev = __dev_get_by_index(net, ifr.ifr_ifindex);
++ if (!dev) {
++ read_unlock(&dev_base_lock);
++ return -ENODEV;
++ }
++
++ strcpy(ifr.ifr_name, dev->name);
++ read_unlock(&dev_base_lock);
++
++ if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
++ return -EFAULT;
++ return 0;
++}
++
++/*
++ * Perform a SIOCGIFCONF call. This structure will change
++ * size eventually, and there is nothing I can do about it.
++ * Thus we will need a 'compatibility mode'.
++ */
++
++static int dev_ifconf(struct net *net, char __user *arg)
++{
++ struct ifconf ifc;
++ struct net_device *dev;
++ char __user *pos;
++ int len;
++ int total;
++ int i;
++
++ /*
++ * Fetch the caller's info block.
++ */
++
++ if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
++ return -EFAULT;
++
++ pos = ifc.ifc_buf;
++ len = ifc.ifc_len;
++
++ /*
++ * Loop over the interfaces, and write an info block for each.
++ */
++
++ total = 0;
++ for_each_netdev(net, dev) {
++ for (i = 0; i < NPROTO; i++) {
++ if (gifconf_list[i]) {
++ int done;
++ if (!pos)
++ done = gifconf_list[i](dev, NULL, 0);
++ else
++ done = gifconf_list[i](dev, pos + total,
++ len - total);
++ if (done < 0)
++ return -EFAULT;
++ total += done;
++ }
++ }
++ }
++
++ /*
++ * All done. Write the updated control block back to the caller.
++ */
++ ifc.ifc_len = total;
++
++ /*
++ * Both BSD and Solaris return 0 here, so we do too.
++ */
++ return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
++}
++
++#ifdef CONFIG_PROC_FS
++/*
++ * This is invoked by the /proc filesystem handler to display a device
++ * in detail.
++ */
++void *dev_seq_start(struct seq_file *seq, loff_t *pos)
++ __acquires(dev_base_lock)
++{
++ struct net *net = seq_file_net(seq);
++ loff_t off;
++ struct net_device *dev;
++
++ read_lock(&dev_base_lock);
++ if (!*pos)
++ return SEQ_START_TOKEN;
++
++ off = 1;
++ for_each_netdev(net, dev)
++ if (off++ == *pos)
++ return dev;
++
++ return NULL;
++}
++
++void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++ struct net *net = seq_file_net(seq);
++ ++*pos;
++ return v == SEQ_START_TOKEN ?
++ first_net_device(net) : next_net_device((struct net_device *)v);
++}
++
++void dev_seq_stop(struct seq_file *seq, void *v)
++ __releases(dev_base_lock)
++{
++ read_unlock(&dev_base_lock);
++}
++
++static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
++{
++ const struct net_device_stats *stats = dev_get_stats(dev);
++
++ seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
++ "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
++ dev->name, stats->rx_bytes, stats->rx_packets,
++ stats->rx_errors,
++ stats->rx_dropped + stats->rx_missed_errors,
++ stats->rx_fifo_errors,
++ stats->rx_length_errors + stats->rx_over_errors +
++ stats->rx_crc_errors + stats->rx_frame_errors,
++ stats->rx_compressed, stats->multicast,
++ stats->tx_bytes, stats->tx_packets,
++ stats->tx_errors, stats->tx_dropped,
++ stats->tx_fifo_errors, stats->collisions,
++ stats->tx_carrier_errors +
++ stats->tx_aborted_errors +
++ stats->tx_window_errors +
++ stats->tx_heartbeat_errors,
++ stats->tx_compressed);
++}
++
++/*
++ * Called from the PROCfs module. This now uses the new arbitrary sized
++ * /proc/net interface to create /proc/net/dev
++ */
++static int dev_seq_show(struct seq_file *seq, void *v)
++{
++ if (v == SEQ_START_TOKEN)
++ seq_puts(seq, "Inter-| Receive "
++ " | Transmit\n"
++ " face |bytes packets errs drop fifo frame "
++ "compressed multicast|bytes packets errs "
++ "drop fifo colls carrier compressed\n");
++ else
++ dev_seq_printf_stats(seq, v);
++ return 0;
++}
++
++static struct netif_rx_stats *softnet_get_online(loff_t *pos)
++{
++ struct netif_rx_stats *rc = NULL;
++
++ while (*pos < nr_cpu_ids)
++ if (cpu_online(*pos)) {
++ rc = &per_cpu(netdev_rx_stat, *pos);
++ break;
++ } else
++ ++*pos;
++ return rc;
++}
++
++static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
++{
++ return softnet_get_online(pos);
++}
++
++static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++ ++*pos;
++ return softnet_get_online(pos);
++}
++
++static void softnet_seq_stop(struct seq_file *seq, void *v)
++{
++}
++
++static int softnet_seq_show(struct seq_file *seq, void *v)
++{
++ struct netif_rx_stats *s = v;
++
++ seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
++ s->total, s->dropped, s->time_squeeze, 0,
++ 0, 0, 0, 0, /* was fastroute */
++ s->cpu_collision );
++ return 0;
++}
++
++static const struct seq_operations dev_seq_ops = {
++ .start = dev_seq_start,
++ .next = dev_seq_next,
++ .stop = dev_seq_stop,
++ .show = dev_seq_show,
++};
++
++static int dev_seq_open(struct inode *inode, struct file *file)
++{
++ return seq_open_net(inode, file, &dev_seq_ops,
++ sizeof(struct seq_net_private));
++}
++
++static const struct file_operations dev_seq_fops = {
++ .owner = THIS_MODULE,
++ .open = dev_seq_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = seq_release_net,
++};
++
++static const struct seq_operations softnet_seq_ops = {
++ .start = softnet_seq_start,
++ .next = softnet_seq_next,
++ .stop = softnet_seq_stop,
++ .show = softnet_seq_show,
++};
++
++static int softnet_seq_open(struct inode *inode, struct file *file)
++{
++ return seq_open(file, &softnet_seq_ops);
++}
++
++static const struct file_operations softnet_seq_fops = {
++ .owner = THIS_MODULE,
++ .open = softnet_seq_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = seq_release,
++};
++
++static void *ptype_get_idx(loff_t pos)
++{
++ struct packet_type *pt = NULL;
++ loff_t i = 0;
++ int t;
++
++ list_for_each_entry_rcu(pt, &ptype_all, list) {
++ if (i == pos)
++ return pt;
++ ++i;
++ }
++
++ for (t = 0; t < PTYPE_HASH_SIZE; t++) {
++ list_for_each_entry_rcu(pt, &ptype_base[t], list) {
++ if (i == pos)
++ return pt;
++ ++i;
++ }
++ }
++ return NULL;
++}
++
++static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
++ __acquires(RCU)
++{
++ rcu_read_lock();
++ return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
++}
++
++static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++ struct packet_type *pt;
++ struct list_head *nxt;
++ int hash;
++
++ ++*pos;
++ if (v == SEQ_START_TOKEN)
++ return ptype_get_idx(0);
++
++ pt = v;
++ nxt = pt->list.next;
++ if (pt->type == htons(ETH_P_ALL)) {
++ if (nxt != &ptype_all)
++ goto found;
++ hash = 0;
++ nxt = ptype_base[0].next;
++ } else
++ hash = ntohs(pt->type) & PTYPE_HASH_MASK;
++
++ while (nxt == &ptype_base[hash]) {
++ if (++hash >= PTYPE_HASH_SIZE)
++ return NULL;
++ nxt = ptype_base[hash].next;
++ }
++found:
++ return list_entry(nxt, struct packet_type, list);
++}
++
++static void ptype_seq_stop(struct seq_file *seq, void *v)
++ __releases(RCU)
++{
++ rcu_read_unlock();
++}
++
++static int ptype_seq_show(struct seq_file *seq, void *v)
++{
++ struct packet_type *pt = v;
++
++ if (v == SEQ_START_TOKEN)
++ seq_puts(seq, "Type Device Function\n");
++ else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
++ if (pt->type == htons(ETH_P_ALL))
++ seq_puts(seq, "ALL ");
++ else
++ seq_printf(seq, "%04x", ntohs(pt->type));
++
++ seq_printf(seq, " %-8s %pF\n",
++ pt->dev ? pt->dev->name : "", pt->func);
++ }
++
++ return 0;
++}
++
++static const struct seq_operations ptype_seq_ops = {
++ .start = ptype_seq_start,
++ .next = ptype_seq_next,
++ .stop = ptype_seq_stop,
++ .show = ptype_seq_show,
++};
++
++static int ptype_seq_open(struct inode *inode, struct file *file)
++{
++ return seq_open_net(inode, file, &ptype_seq_ops,
++ sizeof(struct seq_net_private));
++}
++
++static const struct file_operations ptype_seq_fops = {
++ .owner = THIS_MODULE,
++ .open = ptype_seq_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = seq_release_net,
++};
++
++
++static int __net_init dev_proc_net_init(struct net *net)
++{
++ int rc = -ENOMEM;
++
++ if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
++ goto out;
++ if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
++ goto out_dev;
++ if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
++ goto out_softnet;
++
++ if (wext_proc_init(net))
++ goto out_ptype;
++ rc = 0;
++out:
++ return rc;
++out_ptype:
++ proc_net_remove(net, "ptype");
++out_softnet:
++ proc_net_remove(net, "softnet_stat");
++out_dev:
++ proc_net_remove(net, "dev");
++ goto out;
++}
++
++static void __net_exit dev_proc_net_exit(struct net *net)
++{
++ wext_proc_exit(net);
++
++ proc_net_remove(net, "ptype");
++ proc_net_remove(net, "softnet_stat");
++ proc_net_remove(net, "dev");
++}
++
++static struct pernet_operations __net_initdata dev_proc_ops = {
++ .init = dev_proc_net_init,
++ .exit = dev_proc_net_exit,
++};
++
++static int __init dev_proc_init(void)
++{
++ return register_pernet_subsys(&dev_proc_ops);
++}
++#else
++#define dev_proc_init() 0
++#endif /* CONFIG_PROC_FS */
++
++
++/**
++ * netdev_set_master - set up master/slave pair
++ * @slave: slave device
++ * @master: new master device
++ *
++ * Changes the master device of the slave. Pass %NULL to break the
++ * bonding. The caller must hold the RTNL semaphore. On a failure
++ * a negative errno code is returned. On success the reference counts
++ * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
++ * function returns zero.
++ */
++int netdev_set_master(struct net_device *slave, struct net_device *master)
++{
++ struct net_device *old = slave->master;
++
++ ASSERT_RTNL();
++
++ if (master) {
++ if (old)
++ return -EBUSY;
++ dev_hold(master);
++ }
++
++ slave->master = master;
++
++ synchronize_net();
++
++ if (old)
++ dev_put(old);
++
++ if (master)
++ slave->flags |= IFF_SLAVE;
++ else
++ slave->flags &= ~IFF_SLAVE;
++
++ rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
++ return 0;
++}
++
++static void dev_change_rx_flags(struct net_device *dev, int flags)
++{
++ const struct net_device_ops *ops = dev->netdev_ops;
++
++ if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
++ ops->ndo_change_rx_flags(dev, flags);
++}
++
++static int __dev_set_promiscuity(struct net_device *dev, int inc)
++{
++ unsigned short old_flags = dev->flags;
++ uid_t uid;
++ gid_t gid;
++
++ ASSERT_RTNL();
++
++ dev->flags |= IFF_PROMISC;
++ dev->promiscuity += inc;
++ if (dev->promiscuity == 0) {
++ /*
++ * Avoid overflow.
++ * If inc causes overflow, untouch promisc and return error.
++ */
++ if (inc < 0)
++ dev->flags &= ~IFF_PROMISC;
++ else {
++ dev->promiscuity -= inc;
++ printk(KERN_WARNING "%s: promiscuity touches roof, "
++ "set promiscuity failed, promiscuity feature "
++ "of device might be broken.\n", dev->name);
++ return -EOVERFLOW;
++ }
++ }
++ if (dev->flags != old_flags) {
++ printk(KERN_INFO "device %s %s promiscuous mode\n",
++ dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
++ "left");
++ if (audit_enabled) {
++ current_uid_gid(&uid, &gid);
++ audit_log(current->audit_context, GFP_ATOMIC,
++ AUDIT_ANOM_PROMISCUOUS,
++ "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
++ dev->name, (dev->flags & IFF_PROMISC),
++ (old_flags & IFF_PROMISC),
++ audit_get_loginuid(current),
++ uid, gid,
++ audit_get_sessionid(current));
++ }
++
++ dev_change_rx_flags(dev, IFF_PROMISC);
++ }
++ return 0;
++}
++
++/**
++ * dev_set_promiscuity - update promiscuity count on a device
++ * @dev: device
++ * @inc: modifier
++ *
++ * Add or remove promiscuity from a device. While the count in the device
++ * remains above zero the interface remains promiscuous. Once it hits zero
++ * the device reverts back to normal filtering operation. A negative inc
++ * value is used to drop promiscuity on the device.
++ * Return 0 if successful or a negative errno code on error.
++ */
++int dev_set_promiscuity(struct net_device *dev, int inc)
++{
++ unsigned short old_flags = dev->flags;
++ int err;
++
++ err = __dev_set_promiscuity(dev, inc);
++ if (err < 0)
++ return err;
++ if (dev->flags != old_flags)
++ dev_set_rx_mode(dev);
++ return err;
++}
++
++/**
++ * dev_set_allmulti - update allmulti count on a device
++ * @dev: device
++ * @inc: modifier
++ *
++ * Add or remove reception of all multicast frames to a device. While the
++ * count in the device remains above zero the interface remains listening
++ * to all interfaces. Once it hits zero the device reverts back to normal
++ * filtering operation. A negative @inc value is used to drop the counter
++ * when releasing a resource needing all multicasts.
++ * Return 0 if successful or a negative errno code on error.
++ */
++
++int dev_set_allmulti(struct net_device *dev, int inc)
++{
++ unsigned short old_flags = dev->flags;
++
++ ASSERT_RTNL();
++
++ dev->flags |= IFF_ALLMULTI;
++ dev->allmulti += inc;
++ if (dev->allmulti == 0) {
++ /*
++ * Avoid overflow.
++ * If inc causes overflow, untouch allmulti and return error.
++ */
++ if (inc < 0)
++ dev->flags &= ~IFF_ALLMULTI;
++ else {
++ dev->allmulti -= inc;
++ printk(KERN_WARNING "%s: allmulti touches roof, "
++ "set allmulti failed, allmulti feature of "
++ "device might be broken.\n", dev->name);
++ return -EOVERFLOW;
++ }
++ }
++ if (dev->flags ^ old_flags) {
++ dev_change_rx_flags(dev, IFF_ALLMULTI);
++ dev_set_rx_mode(dev);
++ }
++ return 0;
++}
++
++/*
++ * Upload unicast and multicast address lists to device and
++ * configure RX filtering. When the device doesn't support unicast
++ * filtering it is put in promiscuous mode while unicast addresses
++ * are present.
++ */
++void __dev_set_rx_mode(struct net_device *dev)
++{
++ const struct net_device_ops *ops = dev->netdev_ops;
++
++ /* dev_open will call this function so the list will stay sane. */
++ if (!(dev->flags&IFF_UP))
++ return;
++
++ if (!netif_device_present(dev))
++ return;
++
++ if (ops->ndo_set_rx_mode)
++ ops->ndo_set_rx_mode(dev);
++ else {
++ /* Unicast addresses changes may only happen under the rtnl,
++ * therefore calling __dev_set_promiscuity here is safe.
++ */
++ if (dev->uc_count > 0 && !dev->uc_promisc) {
++ __dev_set_promiscuity(dev, 1);
++ dev->uc_promisc = 1;
++ } else if (dev->uc_count == 0 && dev->uc_promisc) {
++ __dev_set_promiscuity(dev, -1);
++ dev->uc_promisc = 0;
++ }
++
++ if (ops->ndo_set_multicast_list)
++ ops->ndo_set_multicast_list(dev);
++ }
++}
++
++void dev_set_rx_mode(struct net_device *dev)
++{
++ netif_addr_lock_bh(dev);
++ __dev_set_rx_mode(dev);
++ netif_addr_unlock_bh(dev);
++}
++
++int __dev_addr_delete(struct dev_addr_list **list, int *count,
++ void *addr, int alen, int glbl)
++{
++ struct dev_addr_list *da;
++
++ for (; (da = *list) != NULL; list = &da->next) {
++ if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
++ alen == da->da_addrlen) {
++ if (glbl) {
++ int old_glbl = da->da_gusers;
++ da->da_gusers = 0;
++ if (old_glbl == 0)
++ break;
++ }
++ if (--da->da_users)
++ return 0;
++
++ *list = da->next;
++ kfree(da);
++ (*count)--;
++ return 0;
++ }
++ }
++ return -ENOENT;
++}
++
++int __dev_addr_add(struct dev_addr_list **list, int *count,
++ void *addr, int alen, int glbl)
++{
++ struct dev_addr_list *da;
++
++ for (da = *list; da != NULL; da = da->next) {
++ if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
++ da->da_addrlen == alen) {
++ if (glbl) {
++ int old_glbl = da->da_gusers;
++ da->da_gusers = 1;
++ if (old_glbl)
++ return 0;
++ }
++ da->da_users++;
++ return 0;
++ }
++ }
++
++ da = kzalloc(sizeof(*da), GFP_ATOMIC);
++ if (da == NULL)
++ return -ENOMEM;
++ memcpy(da->da_addr, addr, alen);
++ da->da_addrlen = alen;
++ da->da_users = 1;
++ da->da_gusers = glbl ? 1 : 0;
++ da->next = *list;
++ *list = da;
++ (*count)++;
++ return 0;
++}
++
++/**
++ * dev_unicast_delete - Release secondary unicast address.
++ * @dev: device
++ * @addr: address to delete
++ * @alen: length of @addr
++ *
++ * Release reference to a secondary unicast address and remove it
++ * from the device if the reference count drops to zero.
++ *
++ * The caller must hold the rtnl_mutex.
++ */
++int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
++{
++ int err;
++
++ ASSERT_RTNL();
++
++ netif_addr_lock_bh(dev);
++ err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
++ if (!err)
++ __dev_set_rx_mode(dev);
++ netif_addr_unlock_bh(dev);
++ return err;
++}
++EXPORT_SYMBOL(dev_unicast_delete);
++
++/**
++ * dev_unicast_add - add a secondary unicast address
++ * @dev: device
++ * @addr: address to add
++ * @alen: length of @addr
++ *
++ * Add a secondary unicast address to the device or increase
++ * the reference count if it already exists.
++ *
++ * The caller must hold the rtnl_mutex.
++ */
++int dev_unicast_add(struct net_device *dev, void *addr, int alen)
++{
++ int err;
++
++ ASSERT_RTNL();
++
++ netif_addr_lock_bh(dev);
++ err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
++ if (!err)
++ __dev_set_rx_mode(dev);
++ netif_addr_unlock_bh(dev);
++ return err;
++}
++EXPORT_SYMBOL(dev_unicast_add);
++
++int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
++ struct dev_addr_list **from, int *from_count)
++{
++ struct dev_addr_list *da, *next;
++ int err = 0;
++
++ da = *from;
++ while (da != NULL) {
++ next = da->next;
++ if (!da->da_synced) {
++ err = __dev_addr_add(to, to_count,
++ da->da_addr, da->da_addrlen, 0);
++ if (err < 0)
++ break;
++ da->da_synced = 1;
++ da->da_users++;
++ } else if (da->da_users == 1) {
++ __dev_addr_delete(to, to_count,
++ da->da_addr, da->da_addrlen, 0);
++ __dev_addr_delete(from, from_count,
++ da->da_addr, da->da_addrlen, 0);
++ }
++ da = next;
++ }
++ return err;
++}
++
++void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
++ struct dev_addr_list **from, int *from_count)
++{
++ struct dev_addr_list *da, *next;
++
++ da = *from;
++ while (da != NULL) {
++ next = da->next;
++ if (da->da_synced) {
++ __dev_addr_delete(to, to_count,
++ da->da_addr, da->da_addrlen, 0);
++ da->da_synced = 0;
++ __dev_addr_delete(from, from_count,
++ da->da_addr, da->da_addrlen, 0);
++ }
++ da = next;
++ }
++}
++
++/**
++ * dev_unicast_sync - Synchronize device's unicast list to another device
++ * @to: destination device
++ * @from: source device
++ *
++ * Add newly added addresses to the destination device and release
++ * addresses that have no users left. The source device must be
++ * locked by netif_tx_lock_bh.
++ *
++ * This function is intended to be called from the dev->set_rx_mode
++ * function of layered software devices.
++ */
++int dev_unicast_sync(struct net_device *to, struct net_device *from)
++{
++ int err = 0;
++
++ netif_addr_lock_bh(to);
++ err = __dev_addr_sync(&to->uc_list, &to->uc_count,
++ &from->uc_list, &from->uc_count);
++ if (!err)
++ __dev_set_rx_mode(to);
++ netif_addr_unlock_bh(to);
++ return err;
++}
++EXPORT_SYMBOL(dev_unicast_sync);
++
++/**
++ * dev_unicast_unsync - Remove synchronized addresses from the destination device
++ * @to: destination device
++ * @from: source device
++ *
++ * Remove all addresses that were added to the destination device by
++ * dev_unicast_sync(). This function is intended to be called from the
++ * dev->stop function of layered software devices.
++ */
++void dev_unicast_unsync(struct net_device *to, struct net_device *from)
++{
++ netif_addr_lock_bh(from);
++ netif_addr_lock(to);
++
++ __dev_addr_unsync(&to->uc_list, &to->uc_count,
++ &from->uc_list, &from->uc_count);
++ __dev_set_rx_mode(to);
++
++ netif_addr_unlock(to);
++ netif_addr_unlock_bh(from);
++}
++EXPORT_SYMBOL(dev_unicast_unsync);
++
++static void __dev_addr_discard(struct dev_addr_list **list)
++{
++ struct dev_addr_list *tmp;
++
++ while (*list != NULL) {
++ tmp = *list;
++ *list = tmp->next;
++ if (tmp->da_users > tmp->da_gusers)
++ printk("__dev_addr_discard: address leakage! "
++ "da_users=%d\n", tmp->da_users);
++ kfree(tmp);
++ }
++}
++
++static void dev_addr_discard(struct net_device *dev)
++{
++ netif_addr_lock_bh(dev);
++
++ __dev_addr_discard(&dev->uc_list);
++ dev->uc_count = 0;
++
++ __dev_addr_discard(&dev->mc_list);
++ dev->mc_count = 0;
++
++ netif_addr_unlock_bh(dev);
++}
++
++/**
++ * dev_get_flags - get flags reported to userspace
++ * @dev: device
++ *
++ * Get the combination of flag bits exported through APIs to userspace.
++ */
++unsigned dev_get_flags(const struct net_device *dev)
++{
++ unsigned flags;
++
++ flags = (dev->flags & ~(IFF_PROMISC |
++ IFF_ALLMULTI |
++ IFF_RUNNING |
++ IFF_LOWER_UP |
++ IFF_DORMANT)) |
++ (dev->gflags & (IFF_PROMISC |
++ IFF_ALLMULTI));
++
++ if (netif_running(dev)) {
++ if (netif_oper_up(dev))
++ flags |= IFF_RUNNING;
++ if (netif_carrier_ok(dev))
++ flags |= IFF_LOWER_UP;
++ if (netif_dormant(dev))
++ flags |= IFF_DORMANT;
++ }
++
++ return flags;
++}
++
++/**
++ * dev_change_flags - change device settings
++ * @dev: device
++ * @flags: device state flags
++ *
++ * Change settings on device based state flags. The flags are
++ * in the userspace exported format.
++ */
++int dev_change_flags(struct net_device *dev, unsigned flags)
++{
++ int ret, changes;
++ int old_flags = dev->flags;
++
++ ASSERT_RTNL();
++
++ /*
++ * Set the flags on our device.
++ */
++
++ dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
++ IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
++ IFF_AUTOMEDIA)) |
++ (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
++ IFF_ALLMULTI));
++
++ /*
++ * Load in the correct multicast list now the flags have changed.
++ */
++
++ if ((old_flags ^ flags) & IFF_MULTICAST)
++ dev_change_rx_flags(dev, IFF_MULTICAST);
++
++ dev_set_rx_mode(dev);
++
++ /*
++ * Have we downed the interface. We handle IFF_UP ourselves
++ * according to user attempts to set it, rather than blindly
++ * setting it.
++ */
++
++ ret = 0;
++ if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
++ ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
++
++ if (!ret)
++ dev_set_rx_mode(dev);
++ }
++
++ if (dev->flags & IFF_UP &&
++ ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
++ IFF_VOLATILE)))
++ call_netdevice_notifiers(NETDEV_CHANGE, dev);
++
++ if ((flags ^ dev->gflags) & IFF_PROMISC) {
++ int inc = (flags & IFF_PROMISC) ? +1 : -1;
++ dev->gflags ^= IFF_PROMISC;
++ dev_set_promiscuity(dev, inc);
++ }
++
++ /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
++ is important. Some (broken) drivers set IFF_PROMISC, when
++ IFF_ALLMULTI is requested not asking us and not reporting.
++ */
++ if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
++ int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
++ dev->gflags ^= IFF_ALLMULTI;
++ dev_set_allmulti(dev, inc);
++ }
++
++ /* Exclude state transition flags, already notified */
++ changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
++ if (changes)
++ rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
++
++ return ret;
++}
++
++/**
++ * dev_set_mtu - Change maximum transfer unit
++ * @dev: device
++ * @new_mtu: new transfer unit
++ *
++ * Change the maximum transfer size of the network device.
++ */
++int dev_set_mtu(struct net_device *dev, int new_mtu)
++{
++ const struct net_device_ops *ops = dev->netdev_ops;
++ int err;
++
++ if (new_mtu == dev->mtu)
++ return 0;
++
++ /* MTU must be positive. */
++ if (new_mtu < 0)
++ return -EINVAL;
++
++ if (!netif_device_present(dev))
++ return -ENODEV;
++
++ err = 0;
++ if (ops->ndo_change_mtu)
++ err = ops->ndo_change_mtu(dev, new_mtu);
++ else
++ dev->mtu = new_mtu;
++
++ if (!err && dev->flags & IFF_UP)
++ call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
++ return err;
++}
++
++/**
++ * dev_set_mac_address - Change Media Access Control Address
++ * @dev: device
++ * @sa: new address
++ *
++ * Change the hardware (MAC) address of the device
++ */
++int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
++{
++ const struct net_device_ops *ops = dev->netdev_ops;
++ int err;
++
++ if (!ops->ndo_set_mac_address)
++ return -EOPNOTSUPP;
++ if (sa->sa_family != dev->type)
++ return -EINVAL;
++ if (!netif_device_present(dev))
++ return -ENODEV;
++ err = ops->ndo_set_mac_address(dev, sa);
++ if (!err)
++ call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
++ return err;
++}
++
++/*
++ * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
++ */
++static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
++{
++ int err;
++ struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
++
++ if (!dev)
++ return -ENODEV;
++
++ switch (cmd) {
++ case SIOCGIFFLAGS: /* Get interface flags */
++ ifr->ifr_flags = dev_get_flags(dev);
++ return 0;
++
++ case SIOCGIFMETRIC: /* Get the metric on the interface
++ (currently unused) */
++ ifr->ifr_metric = 0;
++ return 0;
++
++ case SIOCGIFMTU: /* Get the MTU of a device */
++ ifr->ifr_mtu = dev->mtu;
++ return 0;
++
++ case SIOCGIFHWADDR:
++ if (!dev->addr_len)
++ memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
++ else
++ memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
++ min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
++ ifr->ifr_hwaddr.sa_family = dev->type;
++ return 0;
++
++ case SIOCGIFSLAVE:
++ err = -EINVAL;
++ break;
++
++ case SIOCGIFMAP:
++ ifr->ifr_map.mem_start = dev->mem_start;
++ ifr->ifr_map.mem_end = dev->mem_end;
++ ifr->ifr_map.base_addr = dev->base_addr;
++ ifr->ifr_map.irq = dev->irq;
++ ifr->ifr_map.dma = dev->dma;
++ ifr->ifr_map.port = dev->if_port;
++ return 0;
++
++ case SIOCGIFINDEX:
++ ifr->ifr_ifindex = dev->ifindex;
++ return 0;
++
++ case SIOCGIFTXQLEN:
++ ifr->ifr_qlen = dev->tx_queue_len;
++ return 0;
++
++ default:
++ /* dev_ioctl() should ensure this case
++ * is never reached
++ */
++ WARN_ON(1);
++ err = -EINVAL;
++ break;
++
++ }
++ return err;
++}
++
++/*
++ * Perform the SIOCxIFxxx calls, inside rtnl_lock()
++ */
++static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
++{
++ int err;
++ struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
++ const struct net_device_ops *ops;
++
++ if (!dev)
++ return -ENODEV;
++
++ ops = dev->netdev_ops;
++
++ switch (cmd) {
++ case SIOCSIFFLAGS: /* Set interface flags */
++ return dev_change_flags(dev, ifr->ifr_flags);
++
++ case SIOCSIFMETRIC: /* Set the metric on the interface
++ (currently unused) */
++ return -EOPNOTSUPP;
++
++ case SIOCSIFMTU: /* Set the MTU of a device */
++ return dev_set_mtu(dev, ifr->ifr_mtu);
++
++ case SIOCSIFHWADDR:
++ return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
++
++ case SIOCSIFHWBROADCAST:
++ if (ifr->ifr_hwaddr.sa_family != dev->type)
++ return -EINVAL;
++ memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
++ min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
++ call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
++ return 0;
++
++ case SIOCSIFMAP:
++ if (ops->ndo_set_config) {
++ if (!netif_device_present(dev))
++ return -ENODEV;
++ return ops->ndo_set_config(dev, &ifr->ifr_map);
++ }
++ return -EOPNOTSUPP;
++
++ case SIOCADDMULTI:
++ if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
++ ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
++ return -EINVAL;
++ if (!netif_device_present(dev))
++ return -ENODEV;
++ return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
++ dev->addr_len, 1);
++
++ case SIOCDELMULTI:
++ if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
++ ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
++ return -EINVAL;
++ if (!netif_device_present(dev))
++ return -ENODEV;
++ return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
++ dev->addr_len, 1);
++
++ case SIOCSIFTXQLEN:
++ if (ifr->ifr_qlen < 0)
++ return -EINVAL;
++ dev->tx_queue_len = ifr->ifr_qlen;
++ return 0;
++
++ case SIOCSIFNAME:
++ ifr->ifr_newname[IFNAMSIZ-1] = '\0';
++ return dev_change_name(dev, ifr->ifr_newname);
++
++ /*
++ * Unknown or private ioctl
++ */
++
++ default:
++ if ((cmd >= SIOCDEVPRIVATE &&
++ cmd <= SIOCDEVPRIVATE + 15) ||
++ cmd == SIOCBONDENSLAVE ||
++ cmd == SIOCBONDRELEASE ||
++ cmd == SIOCBONDSETHWADDR ||
++ cmd == SIOCBONDSLAVEINFOQUERY ||
++ cmd == SIOCBONDINFOQUERY ||
++ cmd == SIOCBONDCHANGEACTIVE ||
++ cmd == SIOCGMIIPHY ||
++ cmd == SIOCGMIIREG ||
++ cmd == SIOCSMIIREG ||
++ cmd == SIOCBRADDIF ||
++ cmd == SIOCBRDELIF ||
++ cmd == SIOCSHWTSTAMP ||
++ cmd == SIOCWANDEV) {
++ err = -EOPNOTSUPP;
++ if (ops->ndo_do_ioctl) {
++ if (netif_device_present(dev))
++ err = ops->ndo_do_ioctl(dev, ifr, cmd);
++ else
++ err = -ENODEV;
++ }
++ } else
++ err = -EINVAL;
++
++ }
++ return err;
++}
++
++/*
++ * This function handles all "interface"-type I/O control requests. The actual
++ * 'doing' part of this is dev_ifsioc above.
++ */
++
++/**
++ * dev_ioctl - network device ioctl
++ * @net: the applicable net namespace
++ * @cmd: command to issue
++ * @arg: pointer to a struct ifreq in user space
++ *
++ * Issue ioctl functions to devices. This is normally called by the
++ * user space syscall interfaces but can sometimes be useful for
++ * other purposes. The return value is the return from the syscall if
++ * positive or a negative errno code on error.
++ */
++
++int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
++{
++ struct ifreq ifr;
++ int ret;
++ char *colon;
++
++ /* One special case: SIOCGIFCONF takes ifconf argument
++ and requires shared lock, because it sleeps writing
++ to user space.
++ */
++
++ if (cmd == SIOCGIFCONF) {
++ rtnl_lock();
++ ret = dev_ifconf(net, (char __user *) arg);
++ rtnl_unlock();
++ return ret;
++ }
++ if (cmd == SIOCGIFNAME)
++ return dev_ifname(net, (struct ifreq __user *)arg);
++
++ if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
++ return -EFAULT;
++
++ ifr.ifr_name[IFNAMSIZ-1] = 0;
++
++ colon = strchr(ifr.ifr_name, ':');
++ if (colon)
++ *colon = 0;
++
++ /*
++ * See which interface the caller is talking about.
++ */
++
++ switch (cmd) {
++ /*
++ * These ioctl calls:
++ * - can be done by all.
++ * - atomic and do not require locking.
++ * - return a value
++ */
++ case SIOCGIFFLAGS:
++ case SIOCGIFMETRIC:
++ case SIOCGIFMTU:
++ case SIOCGIFHWADDR:
++ case SIOCGIFSLAVE:
++ case SIOCGIFMAP:
++ case SIOCGIFINDEX:
++ case SIOCGIFTXQLEN:
++ dev_load(net, ifr.ifr_name);
++ read_lock(&dev_base_lock);
++ ret = dev_ifsioc_locked(net, &ifr, cmd);
++ read_unlock(&dev_base_lock);
++ if (!ret) {
++ if (colon)
++ *colon = ':';
++ if (copy_to_user(arg, &ifr,
++ sizeof(struct ifreq)))
++ ret = -EFAULT;
++ }
++ return ret;
++
++ case SIOCETHTOOL:
++ dev_load(net, ifr.ifr_name);
++ rtnl_lock();
++ ret = dev_ethtool(net, &ifr);
++ rtnl_unlock();
++ if (!ret) {
++ if (colon)
++ *colon = ':';
++ if (copy_to_user(arg, &ifr,
++ sizeof(struct ifreq)))
++ ret = -EFAULT;
++ }
++ return ret;
++
++ /*
++ * These ioctl calls:
++ * - require superuser power.
++ * - require strict serialization.
++ * - return a value
++ */
++ case SIOCGMIIPHY:
++ case SIOCGMIIREG:
++ case SIOCSIFNAME:
++ if (!capable(CAP_NET_ADMIN))
++ return -EPERM;
++ dev_load(net, ifr.ifr_name);
++ rtnl_lock();
++ ret = dev_ifsioc(net, &ifr, cmd);
++ rtnl_unlock();
++ if (!ret) {
++ if (colon)
++ *colon = ':';
++ if (copy_to_user(arg, &ifr,
++ sizeof(struct ifreq)))
++ ret = -EFAULT;
++ }
++ return ret;
++
++ /*
++ * These ioctl calls:
++ * - require superuser power.
++ * - require strict serialization.
++ * - do not return a value
++ */
++ case SIOCSIFFLAGS:
++ case SIOCSIFMETRIC:
++ case SIOCSIFMTU:
++ case SIOCSIFMAP:
++ case SIOCSIFHWADDR:
++ case SIOCSIFSLAVE:
++ case SIOCADDMULTI:
++ case SIOCDELMULTI:
++ case SIOCSIFHWBROADCAST:
++ case SIOCSIFTXQLEN:
++ case SIOCSMIIREG:
++ case SIOCBONDENSLAVE:
++ case SIOCBONDRELEASE:
++ case SIOCBONDSETHWADDR:
++ case SIOCBONDCHANGEACTIVE:
++ case SIOCBRADDIF:
++ case SIOCBRDELIF:
++ case SIOCSHWTSTAMP:
++ if (!capable(CAP_NET_ADMIN))
++ return -EPERM;
++ /* fall through */
++ case SIOCBONDSLAVEINFOQUERY:
++ case SIOCBONDINFOQUERY:
++ dev_load(net, ifr.ifr_name);
++ rtnl_lock();
++ ret = dev_ifsioc(net, &ifr, cmd);
++ rtnl_unlock();
++ return ret;
++
++ case SIOCGIFMEM:
++ /* Get the per device memory space. We can add this but
++ * currently do not support it */
++ case SIOCSIFMEM:
++ /* Set the per device memory buffer space.
++ * Not applicable in our case */
++ case SIOCSIFLINK:
++ return -EINVAL;
++
++ /*
++ * Unknown or private ioctl.
++ */
++ default:
++ if (cmd == SIOCWANDEV ||
++ (cmd >= SIOCDEVPRIVATE &&
++ cmd <= SIOCDEVPRIVATE + 15)) {
++ dev_load(net, ifr.ifr_name);
++ rtnl_lock();
++ ret = dev_ifsioc(net, &ifr, cmd);
++ rtnl_unlock();
++ if (!ret && copy_to_user(arg, &ifr,
++ sizeof(struct ifreq)))
++ ret = -EFAULT;
++ return ret;
++ }
++ /* Take care of Wireless Extensions */
++ if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
++ return wext_handle_ioctl(net, &ifr, cmd, arg);
++ return -EINVAL;
++ }
++}
++
++
++/**
++ * dev_new_index - allocate an ifindex
++ * @net: the applicable net namespace
++ *
++ * Returns a suitable unique value for a new device interface
++ * number. The caller must hold the rtnl semaphore or the
++ * dev_base_lock to be sure it remains unique.
++ */
++static int dev_new_index(struct net *net)
++{
++ static int ifindex;
++ for (;;) {
++ if (++ifindex <= 0)
++ ifindex = 1;
++ if (!__dev_get_by_index(net, ifindex))
++ return ifindex;
++ }
++}
++
++/* Delayed registration/unregisteration */
++static LIST_HEAD(net_todo_list);
++
++static void net_set_todo(struct net_device *dev)
++{
++ list_add_tail(&dev->todo_list, &net_todo_list);
++}
++
++static void rollback_registered(struct net_device *dev)
++{
++ BUG_ON(dev_boot_phase);
++ ASSERT_RTNL();
++
++ /* Some devices call without registering for initialization unwind. */
++ if (dev->reg_state == NETREG_UNINITIALIZED) {
++ printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
++ "was registered\n", dev->name, dev);
++
++ WARN_ON(1);
++ return;
++ }
++
++ BUG_ON(dev->reg_state != NETREG_REGISTERED);
++
++ /* If device is running, close it first. */
++ dev_close(dev);
++
++ /* And unlink it from device chain. */
++ unlist_netdevice(dev);
++
++ dev->reg_state = NETREG_UNREGISTERING;
++
++ synchronize_net();
++
++ /* Shutdown queueing discipline. */
++ dev_shutdown(dev);
++
++
++ /* Notify protocols, that we are about to destroy
++ this device. They should clean all the things.
++ */
++ call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
++
++ /*
++ * Flush the unicast and multicast chains
++ */
++ dev_addr_discard(dev);
++
++ if (dev->netdev_ops->ndo_uninit)
++ dev->netdev_ops->ndo_uninit(dev);
++
++ /* Notifier chain MUST detach us from master device. */
++ WARN_ON(dev->master);
++
++ /* Remove entries from kobject tree */
++ netdev_unregister_kobject(dev);
++
++ synchronize_net();
++
++ dev_put(dev);
++}
++
++static void __netdev_init_queue_locks_one(struct net_device *dev,
++ struct netdev_queue *dev_queue,
++ void *_unused)
++{
++ spin_lock_init(&dev_queue->_xmit_lock);
++ netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
++ dev_queue->xmit_lock_owner = -1;
++}
++
++static void netdev_init_queue_locks(struct net_device *dev)
++{
++ netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
++ __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
++}
++
++unsigned long netdev_fix_features(unsigned long features, const char *name)
++{
++ /* Fix illegal SG+CSUM combinations. */
++ if ((features & NETIF_F_SG) &&
++ !(features & NETIF_F_ALL_CSUM)) {
++ if (name)
++ printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
++ "checksum feature.\n", name);
++ features &= ~NETIF_F_SG;
++ }
++
++ /* TSO requires that SG is present as well. */
++ if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
++ if (name)
++ printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
++ "SG feature.\n", name);
++ features &= ~NETIF_F_TSO;
++ }
++
++ if (features & NETIF_F_UFO) {
++ if (!(features & NETIF_F_GEN_CSUM)) {
++ if (name)
++ printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
++ "since no NETIF_F_HW_CSUM feature.\n",
++ name);
++ features &= ~NETIF_F_UFO;
++ }
++
++ if (!(features & NETIF_F_SG)) {
++ if (name)
++ printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
++ "since no NETIF_F_SG feature.\n", name);
++ features &= ~NETIF_F_UFO;
++ }
++ }
++
++ return features;
++}
++EXPORT_SYMBOL(netdev_fix_features);
++
++/* Some devices need to (re-)set their netdev_ops inside
++ * ->init() or similar. If that happens, we have to setup
++ * the compat pointers again.
++ */
++void netdev_resync_ops(struct net_device *dev)
++{
++#ifdef CONFIG_COMPAT_NET_DEV_OPS
++ const struct net_device_ops *ops = dev->netdev_ops;
++
++ dev->init = ops->ndo_init;
++ dev->uninit = ops->ndo_uninit;
++ dev->open = ops->ndo_open;
++ dev->change_rx_flags = ops->ndo_change_rx_flags;
++ dev->set_rx_mode = ops->ndo_set_rx_mode;
++ dev->set_multicast_list = ops->ndo_set_multicast_list;
++ dev->set_mac_address = ops->ndo_set_mac_address;
++ dev->validate_addr = ops->ndo_validate_addr;
++ dev->do_ioctl = ops->ndo_do_ioctl;
++ dev->set_config = ops->ndo_set_config;
++ dev->change_mtu = ops->ndo_change_mtu;
++ dev->neigh_setup = ops->ndo_neigh_setup;
++ dev->tx_timeout = ops->ndo_tx_timeout;
++ dev->get_stats = ops->ndo_get_stats;
++ dev->vlan_rx_register = ops->ndo_vlan_rx_register;
++ dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
++ dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
++#ifdef CONFIG_NET_POLL_CONTROLLER
++ dev->poll_controller = ops->ndo_poll_controller;
++#endif
++#endif
++}
++EXPORT_SYMBOL(netdev_resync_ops);
++
++/**
++ * register_netdevice - register a network device
++ * @dev: device to register
++ *
++ * Take a completed network device structure and add it to the kernel
++ * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
++ * chain. 0 is returned on success. A negative errno code is returned
++ * on a failure to set up the device, or if the name is a duplicate.
++ *
++ * Callers must hold the rtnl semaphore. You may want
++ * register_netdev() instead of this.
++ *
++ * BUGS:
++ * The locking appears insufficient to guarantee two parallel registers
++ * will not get the same name.
++ */
++
++int register_netdevice(struct net_device *dev)
++{
++ struct hlist_head *head;
++ struct hlist_node *p;
++ int ret;
++ struct net *net = dev_net(dev);
++
++ BUG_ON(dev_boot_phase);
++ ASSERT_RTNL();
++
++ might_sleep();
++
++ /* When net_device's are persistent, this will be fatal. */
++ BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
++ BUG_ON(!net);
++
++ spin_lock_init(&dev->addr_list_lock);
++ netdev_set_addr_lockdep_class(dev);
++ netdev_init_queue_locks(dev);
++
++ dev->iflink = -1;
++
++#ifdef CONFIG_COMPAT_NET_DEV_OPS
++ /* Netdevice_ops API compatibility support.
++ * This is temporary until all network devices are converted.
++ */
++ if (dev->netdev_ops) {
++ netdev_resync_ops(dev);
++ } else {
++ char drivername[64];
++ pr_info("%s (%s): not using net_device_ops yet\n",
++ dev->name, netdev_drivername(dev, drivername, 64));
++
++ /* This works only because net_device_ops and the
++ compatibility structure are the same. */
++ dev->netdev_ops = (void *) &(dev->init);
++ }
++#endif
++
++ /* Init, if this function is available */
++ if (dev->netdev_ops->ndo_init) {
++ ret = dev->netdev_ops->ndo_init(dev);
++ if (ret) {
++ if (ret > 0)
++ ret = -EIO;
++ goto out;
++ }
++ }
++
++ if (!dev_valid_name(dev->name)) {
++ ret = -EINVAL;
++ goto err_uninit;
++ }
++
++ dev->ifindex = dev_new_index(net);
++ if (dev->iflink == -1)
++ dev->iflink = dev->ifindex;
++
++ /* Check for existence of name */
++ head = dev_name_hash(net, dev->name);
++ hlist_for_each(p, head) {
++ struct net_device *d
++ = hlist_entry(p, struct net_device, name_hlist);
++ if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
++ ret = -EEXIST;
++ goto err_uninit;
++ }
++ }
++
++ /* Fix illegal checksum combinations */
++ if ((dev->features & NETIF_F_HW_CSUM) &&
++ (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
++ printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
++ dev->name);
++ dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
++ }
++
++ if ((dev->features & NETIF_F_NO_CSUM) &&
++ (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
++ printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
++ dev->name);
++ dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
++ }
++
++ dev->features = netdev_fix_features(dev->features, dev->name);
++
++ /* Enable software GSO if SG is supported. */
++ if (dev->features & NETIF_F_SG)
++ dev->features |= NETIF_F_GSO;
++
++ netdev_initialize_kobject(dev);
++ ret = netdev_register_kobject(dev);
++ if (ret)
++ goto err_uninit;
++ dev->reg_state = NETREG_REGISTERED;
++
++ /*
++ * Default initial state at registry is that the
++ * device is present.
++ */
++
++ set_bit(__LINK_STATE_PRESENT, &dev->state);
++
++ dev_init_scheduler(dev);
++ dev_hold(dev);
++ list_netdevice(dev);
++
++ /* Notify protocols, that a new device appeared. */
++ ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
++ ret = notifier_to_errno(ret);
++ if (ret) {
++ rollback_registered(dev);
++ dev->reg_state = NETREG_UNREGISTERED;
++ }
++
++out:
++ return ret;
++
++err_uninit:
++ if (dev->netdev_ops->ndo_uninit)
++ dev->netdev_ops->ndo_uninit(dev);
++ goto out;
++}
++
++/**
++ * init_dummy_netdev - init a dummy network device for NAPI
++ * @dev: device to init
++ *
++ * This takes a network device structure and initialize the minimum
++ * amount of fields so it can be used to schedule NAPI polls without
++ * registering a full blown interface. This is to be used by drivers
++ * that need to tie several hardware interfaces to a single NAPI
++ * poll scheduler due to HW limitations.
++ */
++int init_dummy_netdev(struct net_device *dev)
++{
++ /* Clear everything. Note we don't initialize spinlocks
++ * are they aren't supposed to be taken by any of the
++ * NAPI code and this dummy netdev is supposed to be
++ * only ever used for NAPI polls
++ */
++ memset(dev, 0, sizeof(struct net_device));
++
++ /* make sure we BUG if trying to hit standard
++ * register/unregister code path
++ */
++ dev->reg_state = NETREG_DUMMY;
++
++ /* initialize the ref count */
++ atomic_set(&dev->refcnt, 1);
++
++ /* NAPI wants this */
++ INIT_LIST_HEAD(&dev->napi_list);
++
++ /* a dummy interface is started by default */
++ set_bit(__LINK_STATE_PRESENT, &dev->state);
++ set_bit(__LINK_STATE_START, &dev->state);
++
++ return 0;
++}
++EXPORT_SYMBOL_GPL(init_dummy_netdev);
++
++
++/**
++ * register_netdev - register a network device
++ * @dev: device to register
++ *
++ * Take a completed network device structure and add it to the kernel
++ * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
++ * chain. 0 is returned on success. A negative errno code is returned
++ * on a failure to set up the device, or if the name is a duplicate.
++ *
++ * This is a wrapper around register_netdevice that takes the rtnl semaphore
++ * and expands the device name if you passed a format string to
++ * alloc_netdev.
++ */
++int register_netdev(struct net_device *dev)
++{
++ int err;
++
++ rtnl_lock();
++
++ /*
++ * If the name is a format string the caller wants us to do a
++ * name allocation.
++ */
++ if (strchr(dev->name, '%')) {
++ err = dev_alloc_name(dev, dev->name);
++ if (err < 0)
++ goto out;
++ }
++
++ err = register_netdevice(dev);
++out:
++ rtnl_unlock();
++ return err;
++}
++EXPORT_SYMBOL(register_netdev);
++
++/*
++ * netdev_wait_allrefs - wait until all references are gone.
++ *
++ * This is called when unregistering network devices.
++ *
++ * Any protocol or device that holds a reference should register
++ * for netdevice notification, and cleanup and put back the
++ * reference if they receive an UNREGISTER event.
++ * We can get stuck here if buggy protocols don't correctly
++ * call dev_put.
++ */
++static void netdev_wait_allrefs(struct net_device *dev)
++{
++ unsigned long rebroadcast_time, warning_time;
++
++ rebroadcast_time = warning_time = jiffies;
++ while (atomic_read(&dev->refcnt) != 0) {
++ if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
++ rtnl_lock();
++
++ /* Rebroadcast unregister notification */
++ call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
++
++ if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
++ &dev->state)) {
++ /* We must not have linkwatch events
++ * pending on unregister. If this
++ * happens, we simply run the queue
++ * unscheduled, resulting in a noop
++ * for this device.
++ */
++ linkwatch_run_queue();
++ }
++
++ __rtnl_unlock();
++
++ rebroadcast_time = jiffies;
++ }
++
++ msleep(250);
++
++ if (time_after(jiffies, warning_time + 10 * HZ)) {
++ printk(KERN_EMERG "unregister_netdevice: "
++ "waiting for %s to become free. Usage "
++ "count = %d\n",
++ dev->name, atomic_read(&dev->refcnt));
++ warning_time = jiffies;
++ }
++ }
++}
++
++/* The sequence is:
++ *
++ * rtnl_lock();
++ * ...
++ * register_netdevice(x1);
++ * register_netdevice(x2);
++ * ...
++ * unregister_netdevice(y1);
++ * unregister_netdevice(y2);
++ * ...
++ * rtnl_unlock();
++ * free_netdev(y1);
++ * free_netdev(y2);
++ *
++ * We are invoked by rtnl_unlock().
++ * This allows us to deal with problems:
++ * 1) We can delete sysfs objects which invoke hotplug
++ * without deadlocking with linkwatch via keventd.
++ * 2) Since we run with the RTNL semaphore not held, we can sleep
++ * safely in order to wait for the netdev refcnt to drop to zero.
++ *
++ * We must not return until all unregister events added during
++ * the interval the lock was held have been completed.
++ */
++void netdev_run_todo(void)
++{
++ struct list_head list;
++
++ /* Snapshot list, allow later requests */
++ list_replace_init(&net_todo_list, &list);
++
++ __rtnl_unlock();
++
++ while (!list_empty(&list)) {
++ struct net_device *dev
++ = list_entry(list.next, struct net_device, todo_list);
++ list_del(&dev->todo_list);
++
++ if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
++ printk(KERN_ERR "network todo '%s' but state %d\n",
++ dev->name, dev->reg_state);
++ dump_stack();
++ continue;
++ }
++
++ dev->reg_state = NETREG_UNREGISTERED;
++
++ on_each_cpu(flush_backlog, dev, 1);
++
++ netdev_wait_allrefs(dev);
++
++ /* paranoia */
++ BUG_ON(atomic_read(&dev->refcnt));
++ WARN_ON(dev->ip_ptr);
++ WARN_ON(dev->ip6_ptr);
++ WARN_ON(dev->dn_ptr);
++
++ if (dev->destructor)
++ dev->destructor(dev);
++
++ /* Free network device */
++ kobject_put(&dev->dev.kobj);
++ }
++}
++
++/**
++ * dev_get_stats - get network device statistics
++ * @dev: device to get statistics from
++ *
++ * Get network statistics from device. The device driver may provide
++ * its own method by setting dev->netdev_ops->get_stats; otherwise
++ * the internal statistics structure is used.
++ */
++const struct net_device_stats *dev_get_stats(struct net_device *dev)
++ {
++ const struct net_device_ops *ops = dev->netdev_ops;
++
++ if (ops->ndo_get_stats)
++ return ops->ndo_get_stats(dev);
++ else
++ return &dev->stats;
++}
++EXPORT_SYMBOL(dev_get_stats);
++
++static void netdev_init_one_queue(struct net_device *dev,
++ struct netdev_queue *queue,
++ void *_unused)
++{
++ queue->dev = dev;
++}
++
++static void netdev_init_queues(struct net_device *dev)
++{
++ netdev_init_one_queue(dev, &dev->rx_queue, NULL);
++ netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
++ spin_lock_init(&dev->tx_global_lock);
++}
++
++/**
++ * alloc_netdev_mq - allocate network device
++ * @sizeof_priv: size of private data to allocate space for
++ * @name: device name format string
++ * @setup: callback to initialize device
++ * @queue_count: the number of subqueues to allocate
++ *
++ * Allocates a struct net_device with private data area for driver use
++ * and performs basic initialization. Also allocates subquue structs
++ * for each queue on the device at the end of the netdevice.
++ */
++struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
++ void (*setup)(struct net_device *), unsigned int queue_count)
++{
++ struct netdev_queue *tx;
++ struct net_device *dev;
++ size_t alloc_size;
++ void *p;
++
++ BUG_ON(strlen(name) >= sizeof(dev->name));
++
++ alloc_size = sizeof(struct net_device);
++ if (sizeof_priv) {
++ /* ensure 32-byte alignment of private area */
++ alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
++ alloc_size += sizeof_priv;
++ }
++ /* ensure 32-byte alignment of whole construct */
++ alloc_size += NETDEV_ALIGN_CONST;
++
++ p = kzalloc(alloc_size, GFP_KERNEL);
++ if (!p) {
++ printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
++ return NULL;
++ }
++
++ tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
++ if (!tx) {
++ printk(KERN_ERR "alloc_netdev: Unable to allocate "
++ "tx qdiscs.\n");
++ kfree(p);
++ return NULL;
++ }
++
++ dev = (struct net_device *)
++ (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
++ dev->padded = (char *)dev - (char *)p;
++ dev_net_set(dev, &init_net);
++
++ dev->_tx = tx;
++ dev->num_tx_queues = queue_count;
++ dev->real_num_tx_queues = queue_count;
++
++ dev->gso_max_size = GSO_MAX_SIZE;
++
++ netdev_init_queues(dev);
++
++ INIT_LIST_HEAD(&dev->napi_list);
++ setup(dev);
++ strcpy(dev->name, name);
++ return dev;
++}
++EXPORT_SYMBOL(alloc_netdev_mq);
++
++/**
++ * free_netdev - free network device
++ * @dev: device
++ *
++ * This function does the last stage of destroying an allocated device
++ * interface. The reference to the device object is released.
++ * If this is the last reference then it will be freed.
++ */
++void free_netdev(struct net_device *dev)
++{
++ struct napi_struct *p, *n;
++
++ release_net(dev_net(dev));
++
++ kfree(dev->_tx);
++
++ list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
++ netif_napi_del(p);
++
++ /* Compatibility with error handling in drivers */
++ if (dev->reg_state == NETREG_UNINITIALIZED) {
++ kfree((char *)dev - dev->padded);
++ return;
++ }
++
++ BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
++ dev->reg_state = NETREG_RELEASED;
++
++ /* will free via device release */
++ put_device(&dev->dev);
++}
++
++/**
++ * synchronize_net - Synchronize with packet receive processing
++ *
++ * Wait for packets currently being received to be done.
++ * Does not block later packets from starting.
++ */
++void synchronize_net(void)
++{
++ might_sleep();
++ synchronize_rcu();
++}
++
++/**
++ * unregister_netdevice - remove device from the kernel
++ * @dev: device
++ *
++ * This function shuts down a device interface and removes it
++ * from the kernel tables.
++ *
++ * Callers must hold the rtnl semaphore. You may want
++ * unregister_netdev() instead of this.
++ */
++
++void unregister_netdevice(struct net_device *dev)
++{
++ ASSERT_RTNL();
++
++ rollback_registered(dev);
++ /* Finish processing unregister after unlock */
++ net_set_todo(dev);
++}
++
++/**
++ * unregister_netdev - remove device from the kernel
++ * @dev: device
++ *
++ * This function shuts down a device interface and removes it
++ * from the kernel tables.
++ *
++ * This is just a wrapper for unregister_netdevice that takes
++ * the rtnl semaphore. In general you want to use this and not
++ * unregister_netdevice.
++ */
++void unregister_netdev(struct net_device *dev)
++{
++ rtnl_lock();
++ unregister_netdevice(dev);
++ rtnl_unlock();
++}
++
++EXPORT_SYMBOL(unregister_netdev);
++
++/**
++ * dev_change_net_namespace - move device to different nethost namespace
++ * @dev: device
++ * @net: network namespace
++ * @pat: If not NULL name pattern to try if the current device name
++ * is already taken in the destination network namespace.
++ *
++ * This function shuts down a device interface and moves it
++ * to a new network namespace. On success 0 is returned, on
++ * a failure a netagive errno code is returned.
++ *
++ * Callers must hold the rtnl semaphore.
++ */
++
++int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
++{
++ char buf[IFNAMSIZ];
++ const char *destname;
++ int err;
++
++ ASSERT_RTNL();
++
++ /* Don't allow namespace local devices to be moved. */
++ err = -EINVAL;
++ if (dev->features & NETIF_F_NETNS_LOCAL)
++ goto out;
++
++#ifdef CONFIG_SYSFS
++ /* Don't allow real devices to be moved when sysfs
++ * is enabled.
++ */
++ err = -EINVAL;
++ if (dev->dev.parent)
++ goto out;
++#endif
++
++ /* Ensure the device has been registrered */
++ err = -EINVAL;
++ if (dev->reg_state != NETREG_REGISTERED)
++ goto out;
++
++ /* Get out if there is nothing todo */
++ err = 0;
++ if (net_eq(dev_net(dev), net))
++ goto out;
++
++ /* Pick the destination device name, and ensure
++ * we can use it in the destination network namespace.
++ */
++ err = -EEXIST;
++ destname = dev->name;
++ if (__dev_get_by_name(net, destname)) {
++ /* We get here if we can't use the current device name */
++ if (!pat)
++ goto out;
++ if (!dev_valid_name(pat))
++ goto out;
++ if (strchr(pat, '%')) {
++ if (__dev_alloc_name(net, pat, buf) < 0)
++ goto out;
++ destname = buf;
++ } else
++ destname = pat;
++ if (__dev_get_by_name(net, destname))
++ goto out;
++ }
++
++ /*
++ * And now a mini version of register_netdevice unregister_netdevice.
++ */
++
++ /* If device is running close it first. */
++ dev_close(dev);
++
++ /* And unlink it from device chain */
++ err = -ENODEV;
++ unlist_netdevice(dev);
++
++ synchronize_net();
++
++ /* Shutdown queueing discipline. */
++ dev_shutdown(dev);
++
++ /* Notify protocols, that we are about to destroy
++ this device. They should clean all the things.
++ */
++ call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
++
++ /*
++ * Flush the unicast and multicast chains
++ */
++ dev_addr_discard(dev);
++
++ netdev_unregister_kobject(dev);
++
++ /* Actually switch the network namespace */
++ dev_net_set(dev, net);
++
++ /* Assign the new device name */
++ if (destname != dev->name)
++ strcpy(dev->name, destname);
++
++ /* If there is an ifindex conflict assign a new one */
++ if (__dev_get_by_index(net, dev->ifindex)) {
++ int iflink = (dev->iflink == dev->ifindex);
++ dev->ifindex = dev_new_index(net);
++ if (iflink)
++ dev->iflink = dev->ifindex;
++ }
++
++ /* Fixup kobjects */
++ err = netdev_register_kobject(dev);
++ WARN_ON(err);
++
++ /* Add the device back in the hashes */
++ list_netdevice(dev);
++
++ /* Notify protocols, that a new device appeared. */
++ call_netdevice_notifiers(NETDEV_REGISTER, dev);
++
++ synchronize_net();
++ err = 0;
++out:
++ return err;
++}
++
++static int dev_cpu_callback(struct notifier_block *nfb,
++ unsigned long action,
++ void *ocpu)
++{
++ struct sk_buff **list_skb;
++ struct Qdisc **list_net;
++ struct sk_buff *skb;
++ unsigned int cpu, oldcpu = (unsigned long)ocpu;
++ struct softnet_data *sd, *oldsd;
++
++ if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
++ return NOTIFY_OK;
++
++ local_irq_disable();
++ cpu = smp_processor_id();
++ sd = &per_cpu(softnet_data, cpu);
++ oldsd = &per_cpu(softnet_data, oldcpu);
++
++ /* Find end of our completion_queue. */
++ list_skb = &sd->completion_queue;
++ while (*list_skb)
++ list_skb = &(*list_skb)->next;
++ /* Append completion queue from offline CPU. */
++ *list_skb = oldsd->completion_queue;
++ oldsd->completion_queue = NULL;
++
++ /* Find end of our output_queue. */
++ list_net = &sd->output_queue;
++ while (*list_net)
++ list_net = &(*list_net)->next_sched;
++ /* Append output queue from offline CPU. */
++ *list_net = oldsd->output_queue;
++ oldsd->output_queue = NULL;
++
++ raise_softirq_irqoff(NET_TX_SOFTIRQ);
++ local_irq_enable();
++
++ /* Process offline CPU's input_pkt_queue */
++ while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
++ netif_rx(skb);
++
++ return NOTIFY_OK;
++}
++
++
++/**
++ * netdev_increment_features - increment feature set by one
++ * @all: current feature set
++ * @one: new feature set
++ * @mask: mask feature set
++ *
++ * Computes a new feature set after adding a device with feature set
++ * @one to the master device with current feature set @all. Will not
++ * enable anything that is off in @mask. Returns the new feature set.
++ */
++unsigned long netdev_increment_features(unsigned long all, unsigned long one,
++ unsigned long mask)
++{
++ /* If device needs checksumming, downgrade to it. */
++ if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
++ all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
++ else if (mask & NETIF_F_ALL_CSUM) {
++ /* If one device supports v4/v6 checksumming, set for all. */
++ if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
++ !(all & NETIF_F_GEN_CSUM)) {
++ all &= ~NETIF_F_ALL_CSUM;
++ all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
++ }
++
++ /* If one device supports hw checksumming, set for all. */
++ if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
++ all &= ~NETIF_F_ALL_CSUM;
++ all |= NETIF_F_HW_CSUM;
++ }
++ }
++
++ one |= NETIF_F_ALL_CSUM;
++
++ one |= all & NETIF_F_ONE_FOR_ALL;
++ all &= one | NETIF_F_LLTX | NETIF_F_GSO;
++ all |= one & mask & NETIF_F_ONE_FOR_ALL;
++
++ return all;
++}
++EXPORT_SYMBOL(netdev_increment_features);
++
++static struct hlist_head *netdev_create_hash(void)
++{
++ int i;
++ struct hlist_head *hash;
++
++ hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
++ if (hash != NULL)
++ for (i = 0; i < NETDEV_HASHENTRIES; i++)
++ INIT_HLIST_HEAD(&hash[i]);
++
++ return hash;
++}
++
++/* Initialize per network namespace state */
++static int __net_init netdev_init(struct net *net)
++{
++ INIT_LIST_HEAD(&net->dev_base_head);
++
++ net->dev_name_head = netdev_create_hash();
++ if (net->dev_name_head == NULL)
++ goto err_name;
++
++ net->dev_index_head = netdev_create_hash();
++ if (net->dev_index_head == NULL)
++ goto err_idx;
++
++ return 0;
++
++err_idx:
++ kfree(net->dev_name_head);
++err_name:
++ return -ENOMEM;
++}
++
++/**
++ * netdev_drivername - network driver for the device
++ * @dev: network device
++ * @buffer: buffer for resulting name
++ * @len: size of buffer
++ *
++ * Determine network driver for device.
++ */
++char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
++{
++ const struct device_driver *driver;
++ const struct device *parent;
++
++ if (len <= 0 || !buffer)
++ return buffer;
++ buffer[0] = 0;
++
++ parent = dev->dev.parent;
++
++ if (!parent)
++ return buffer;
++
++ driver = parent->driver;
++ if (driver && driver->name)
++ strlcpy(buffer, driver->name, len);
++ return buffer;
++}
++
++static void __net_exit netdev_exit(struct net *net)
++{
++ kfree(net->dev_name_head);
++ kfree(net->dev_index_head);
++}
++
++static struct pernet_operations __net_initdata netdev_net_ops = {
++ .init = netdev_init,
++ .exit = netdev_exit,
++};
++
++static void __net_exit default_device_exit(struct net *net)
++{
++ struct net_device *dev;
++ /*
++ * Push all migratable of the network devices back to the
++ * initial network namespace
++ */
++ rtnl_lock();
++restart:
++ for_each_netdev(net, dev) {
++ int err;
++ char fb_name[IFNAMSIZ];
++
++ /* Ignore unmoveable devices (i.e. loopback) */
++ if (dev->features & NETIF_F_NETNS_LOCAL)
++ continue;
++
++ /* Delete virtual devices */
++ if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
++ dev->rtnl_link_ops->dellink(dev);
++ goto restart;
++ }
++
++ /* Push remaing network devices to init_net */
++ snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
++ err = dev_change_net_namespace(dev, &init_net, fb_name);
++ if (err) {
++ printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
++ __func__, dev->name, err);
++ BUG();
++ }
++ goto restart;
++ }
++ rtnl_unlock();
++}
++
++static struct pernet_operations __net_initdata default_device_ops = {
++ .exit = default_device_exit,
++};
++
++/*
++ * Initialize the DEV module. At boot time this walks the device list and
++ * unhooks any devices that fail to initialise (normally hardware not
++ * present) and leaves us with a valid list of present and active devices.
++ *
++ */
++
++/*
++ * This is called single threaded during boot, so no need
++ * to take the rtnl semaphore.
++ */
++static int __init net_dev_init(void)
++{
++ int i, rc = -ENOMEM;
++
++ BUG_ON(!dev_boot_phase);
++
++ if (dev_proc_init())
++ goto out;
++
++ if (netdev_kobject_init())
++ goto out;
++
++ INIT_LIST_HEAD(&ptype_all);
++ for (i = 0; i < PTYPE_HASH_SIZE; i++)
++ INIT_LIST_HEAD(&ptype_base[i]);
++
++ if (register_pernet_subsys(&netdev_net_ops))
++ goto out;
++
++ /*
++ * Initialise the packet receive queues.
++ */
++
++ for_each_possible_cpu(i) {
++ struct softnet_data *queue;
++
++ queue = &per_cpu(softnet_data, i);
++ skb_queue_head_init(&queue->input_pkt_queue);
++ queue->completion_queue = NULL;
++ INIT_LIST_HEAD(&queue->poll_list);
++
++ queue->backlog.poll = process_backlog;
++ queue->backlog.weight = weight_p;
++ queue->backlog.gro_list = NULL;
++ queue->backlog.gro_count = 0;
++ }
++
++ dev_boot_phase = 0;
++
++ /* The loopback device is special if any other network devices
++ * is present in a network namespace the loopback device must
++ * be present. Since we now dynamically allocate and free the
++ * loopback device ensure this invariant is maintained by
++ * keeping the loopback device as the first device on the
++ * list of network devices. Ensuring the loopback devices
++ * is the first device that appears and the last network device
++ * that disappears.
++ */
++ if (register_pernet_device(&loopback_net_ops))
++ goto out;
++
++ if (register_pernet_device(&default_device_ops))
++ goto out;
++
++ open_softirq(NET_TX_SOFTIRQ, net_tx_action);
++ open_softirq(NET_RX_SOFTIRQ, net_rx_action);
++
++ hotcpu_notifier(dev_cpu_callback, 0);
++ dst_init();
++ dev_mcast_init();
++ rc = 0;
++out:
++ return rc;
++}
++
++subsys_initcall(net_dev_init);
++
++static int __init initialize_hashrnd(void)
++{
++ get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
++ return 0;
++}
++
++late_initcall_sync(initialize_hashrnd);
++
++EXPORT_SYMBOL(__dev_get_by_index);
++EXPORT_SYMBOL(__dev_get_by_name);
++EXPORT_SYMBOL(__dev_remove_pack);
++EXPORT_SYMBOL(dev_valid_name);
++EXPORT_SYMBOL(dev_add_pack);
++EXPORT_SYMBOL(dev_alloc_name);
++EXPORT_SYMBOL(dev_close);
++EXPORT_SYMBOL(dev_get_by_flags);
++EXPORT_SYMBOL(dev_get_by_index);
++EXPORT_SYMBOL(dev_get_by_name);
++EXPORT_SYMBOL(dev_open);
++EXPORT_SYMBOL(dev_queue_xmit);
++EXPORT_SYMBOL(dev_remove_pack);
++EXPORT_SYMBOL(dev_set_allmulti);
++EXPORT_SYMBOL(dev_set_promiscuity);
++EXPORT_SYMBOL(dev_change_flags);
++EXPORT_SYMBOL(dev_set_mtu);
++EXPORT_SYMBOL(dev_set_mac_address);
++EXPORT_SYMBOL(free_netdev);
++EXPORT_SYMBOL(netdev_boot_setup_check);
++EXPORT_SYMBOL(netdev_set_master);
++EXPORT_SYMBOL(netdev_state_change);
++EXPORT_SYMBOL(netif_receive_skb);
++EXPORT_SYMBOL(netif_rx);
++EXPORT_SYMBOL(register_gifconf);
++EXPORT_SYMBOL(register_netdevice);
++EXPORT_SYMBOL(register_netdevice_notifier);
++EXPORT_SYMBOL(skb_checksum_help);
++EXPORT_SYMBOL(synchronize_net);
++EXPORT_SYMBOL(unregister_netdevice);
++EXPORT_SYMBOL(unregister_netdevice_notifier);
++EXPORT_SYMBOL(net_enable_timestamp);
++EXPORT_SYMBOL(net_disable_timestamp);
++EXPORT_SYMBOL(dev_get_flags);
++
++#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
++EXPORT_SYMBOL(br_handle_frame_hook);
++EXPORT_SYMBOL(br_fdb_get_hook);
++EXPORT_SYMBOL(br_fdb_put_hook);
++#endif
++
++EXPORT_SYMBOL(dev_load);
++
++EXPORT_PER_CPU_SYMBOL(softnet_data);
+diff --unified --recursive --new-file linux-2.6.30/net/Kconfig linux-2.6.30-1-686-smp-PF_RING/net/Kconfig
+--- linux-2.6.30/net/Kconfig 2009-06-10 05:05:27.000000000 +0200
++++ linux-2.6.30-1-686-smp-PF_RING/net/Kconfig 2009-07-21 04:40:31.395512101 +0200
+@@ -30,6 +30,7 @@
+ source "net/xfrm/Kconfig"
+ source "net/iucv/Kconfig"
+
++source "net/ring/Kconfig"
+ config INET
+ bool "TCP/IP networking"
+ ---help---
+diff --unified --recursive --new-file linux-2.6.30/net/Makefile linux-2.6.30-1-686-smp-PF_RING/net/Makefile
+--- linux-2.6.30/net/Makefile 2009-06-10 05:05:27.000000000 +0200
++++ linux-2.6.30-1-686-smp-PF_RING/net/Makefile 2009-07-21 04:40:31.378997724 +0200
+@@ -46,6 +46,7 @@
+ obj-$(CONFIG_PHONET) += phonet/
+ ifneq ($(CONFIG_VLAN_8021Q),)
+ obj-y += 8021q/
++obj-$(CONFIG_RING) += ring/
+ endif
+ obj-$(CONFIG_IP_DCCP) += dccp/
+ obj-$(CONFIG_IP_SCTP) += sctp/
+diff --unified --recursive --new-file linux-2.6.30/net/Makefile.ORG linux-2.6.30-1-686-smp-PF_RING/net/Makefile.ORG
+--- linux-2.6.30/net/Makefile.ORG 1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.30-1-686-smp-PF_RING/net/Makefile.ORG 2009-07-21 04:40:31.369103612 +0200
+@@ -0,0 +1,67 @@
++#
++# Makefile for the linux networking.
++#
++# 2 Sep 2000, Christoph Hellwig <hch@infradead.org>
++# Rewritten to use lists instead of if-statements.
++#
++
++obj-y := nonet.o
++
++obj-$(CONFIG_NET) := socket.o core/
++
++tmp-$(CONFIG_COMPAT) := compat.o
++obj-$(CONFIG_NET) += $(tmp-y)
++
++# LLC has to be linked before the files in net/802/
++obj-$(CONFIG_LLC) += llc/
++obj-$(CONFIG_NET) += ethernet/ 802/ sched/ netlink/
++obj-$(CONFIG_NETFILTER) += netfilter/
++obj-$(CONFIG_INET) += ipv4/
++obj-$(CONFIG_XFRM) += xfrm/
++obj-$(CONFIG_UNIX) += unix/
++ifneq ($(CONFIG_IPV6),)
++obj-y += ipv6/
++endif
++obj-$(CONFIG_PACKET) += packet/
++obj-$(CONFIG_NET_KEY) += key/
++obj-$(CONFIG_NET_SCHED) += sched/
++obj-$(CONFIG_BRIDGE) += bridge/
++obj-$(CONFIG_NET_DSA) += dsa/
++obj-$(CONFIG_IPX) += ipx/
++obj-$(CONFIG_ATALK) += appletalk/
++obj-$(CONFIG_WAN_ROUTER) += wanrouter/
++obj-$(CONFIG_X25) += x25/
++obj-$(CONFIG_LAPB) += lapb/
++obj-$(CONFIG_NETROM) += netrom/
++obj-$(CONFIG_ROSE) += rose/
++obj-$(CONFIG_AX25) += ax25/
++obj-$(CONFIG_CAN) += can/
++obj-$(CONFIG_IRDA) += irda/
++obj-$(CONFIG_BT) += bluetooth/
++obj-$(CONFIG_SUNRPC) += sunrpc/
++obj-$(CONFIG_AF_RXRPC) += rxrpc/
++obj-$(CONFIG_ATM) += atm/
++obj-$(CONFIG_DECNET) += decnet/
++obj-$(CONFIG_ECONET) += econet/
++obj-$(CONFIG_PHONET) += phonet/
++ifneq ($(CONFIG_VLAN_8021Q),)
++obj-y += 8021q/
++endif
++obj-$(CONFIG_IP_DCCP) += dccp/
++obj-$(CONFIG_IP_SCTP) += sctp/
++obj-$(CONFIG_RDS) += rds/
++obj-y += wireless/
++obj-$(CONFIG_MAC80211) += mac80211/
++obj-$(CONFIG_TIPC) += tipc/
++obj-$(CONFIG_NETLABEL) += netlabel/
++obj-$(CONFIG_IUCV) += iucv/
++obj-$(CONFIG_RFKILL) += rfkill/
++obj-$(CONFIG_NET_9P) += 9p/
++ifneq ($(CONFIG_DCB),)
++obj-y += dcb/
++endif
++
++ifeq ($(CONFIG_NET),y)
++obj-$(CONFIG_SYSCTL) += sysctl_net.o
++endif
++obj-$(CONFIG_WIMAX) += wimax/
+diff --unified --recursive --new-file linux-2.6.30/net/ring/Kconfig linux-2.6.30-1-686-smp-PF_RING/net/ring/Kconfig
+--- linux-2.6.30/net/ring/Kconfig 1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.30-1-686-smp-PF_RING/net/ring/Kconfig 2009-07-21 04:40:31.399104158 +0200
+@@ -0,0 +1,14 @@
++config RING
++ tristate "PF_RING sockets (EXPERIMENTAL)"
++ depends on EXPERIMENTAL
++ ---help---
++ PF_RING socket family, optimized for packet capture.
++ If a PF_RING socket is bound to an adapter (via the bind() system
++ call), such adapter will be used in read-only mode until the socket
++ is destroyed. Whenever an incoming packet is received from the adapter
++ it will not passed to upper layers, but instead it is copied to a ring
++ buffer, which in turn is exported to user space applications via mmap.
++ Please refer to http://luca.ntop.org/Ring.pdf for more.
++
++ Say N unless you know what you are doing.
++
+diff --unified --recursive --new-file linux-2.6.30/net/ring/Makefile linux-2.6.30-1-686-smp-PF_RING/net/ring/Makefile
+--- linux-2.6.30/net/ring/Makefile 1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.30-1-686-smp-PF_RING/net/ring/Makefile 2009-07-21 04:40:31.315770393 +0200
+@@ -0,0 +1,7 @@
++#
++# Makefile for the ring driver.
++#
++
++obj-m += ring.o
++
++ring-objs := ring_packet.o
+diff --unified --recursive --new-file linux-2.6.30/net/ring/ring_packet.c linux-2.6.30-1-686-smp-PF_RING/net/ring/ring_packet.c
+--- linux-2.6.30/net/ring/ring_packet.c 1970-01-01 01:00:00.000000000 +0100
++++ linux-2.6.30-1-686-smp-PF_RING/net/ring/ring_packet.c 2009-07-21 04:40:31.315770393 +0200
+@@ -0,0 +1,4897 @@
++/* ***************************************************************
++ *
++ * (C) 2004-09 - Luca Deri <deri@ntop.org>
++ *
++ * This code includes contributions courtesy of
++ * - Amit D. Chaudhary <amit_ml@rajgad.com>
++ * - Andrew Gallatin <gallatyn@myri.com>
++ * - Brad Doctor <brad@stillsecure.com>
++ * - Felipe Huici <felipe.huici@nw.neclab.eu>
++ * - Francesco Fusco <fusco@ntop.org> (IP defrag)
++ * - Helmut Manck <helmut.manck@secunet.com>
++ * - Hitoshi Irino <irino@sfc.wide.ad.jp>
++ * - Jakov Haron <jyh@cabel.net>
++ * - Jeff Randall <jrandall@nexvu.com>
++ * - Kevin Wormington <kworm@sofnet.com>
++ * - Mahdi Dashtbozorgi <rdfm2000@gmail.com>
++ * - Marketakis Yannis <marketak@ics.forth.gr>
++ * - Matthew J. Roth <mroth@imminc.com>
++ * - Michael Stiller <ms@2scale.net> (VM memory support)
++ * - Noam Dev <noamdev@gmail.com>
++ * - Siva Kollipara <siva@cs.arizona.edu>
++ * - Vincent Carrier <vicarrier@wanadoo.fr>
++ * - Eugene Bogush <b_eugene@ukr.net>
++ * - Samir Chang <coobyhb@gmail.com>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU General Public License for more details.
++ *
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
++ *
++ */
++
++#include <linux/version.h>
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,19))
++#include <linux/autoconf.h>
++#else
++#include <linux/config.h>
++#endif
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/socket.h>
++#include <linux/skbuff.h>
++#include <linux/rtnetlink.h>
++#include <linux/in.h>
++#include <linux/inet.h>
++#include <linux/in6.h>
++#include <linux/init.h>
++#include <linux/filter.h>
++#include <linux/ring.h>
++#include <linux/ip.h>
++#include <linux/tcp.h>
++#include <linux/udp.h>
++#include <linux/list.h>
++#include <linux/netdevice.h>
++#include <linux/proc_fs.h>
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++#include <net/xfrm.h>
++#else
++#include <linux/poll.h>
++#endif
++#include <net/sock.h>
++#include <asm/io.h> /* needed for virt_to_phys() */
++#ifdef CONFIG_INET
++#include <net/inet_common.h>
++#endif
++#include <net/ip.h>
++
++/* ================================================== */
++
++/*
++ * regcomp and regexec -- regsub and regerror are elsewhere
++ * @(#)regexp.c 1.3 of 18 April 87
++ *
++ * Copyright (c) 1986 by University of Toronto.
++ * Written by Henry Spencer. Not derived from licensed software.
++ *
++ * Permission is granted to anyone to use this software for any
++ * purpose on any computer system, and to redistribute it freely,
++ * subject to the following restrictions:
++ *
++ * 1. The author is not responsible for the consequences of use of
++ * this software, no matter how awful, even if they arise
++ * from defects in it.
++ *
++ * 2. The origin of this software must not be misrepresented, either
++ * by explicit claim or by omission.
++ *
++ * 3. Altered versions must be plainly marked as such, and must not
++ * be misrepresented as being the original software.
++ *
++ * Beware that some of this code is subtly aware of the way operator
++ * precedence is structured in regular expressions. Serious changes in
++ * regular-expression syntax might require a total rethink.
++ *
++ * This code was modified by Ethan Sommer to work within the kernel
++ * (it now uses kmalloc etc..)
++ *
++ * Modified slightly by Matthew Strait to use more modern C.
++ */
++
++/* added by ethan and matt. Lets it work in both kernel and user space.
++ (So iptables can use it, for instance.) Yea, it goes both ways... */
++#if __KERNEL__
++#define malloc(foo) kmalloc(foo,GFP_ATOMIC)
++#else
++#define printk(format,args...) printf(format,##args)
++#endif
++
++void regerror(char * s)
++{
++ printk("<3>Regexp: %s\n", s);
++ /* NOTREACHED */
++}
++
++/*
++ * The "internal use only" fields in regexp.h are present to pass info from
++ * compile to execute that permits the execute phase to run lots faster on
++ * simple cases. They are:
++ *
++ * regstart char that must begin a match; '\0' if none obvious
++ * reganch is the match anchored (at beginning-of-line only)?
++ * regmust string (pointer into program) that match must include, or NULL
++ * regmlen length of regmust string
++ *
++ * Regstart and reganch permit very fast decisions on suitable starting points
++ * for a match, cutting down the work a lot. Regmust permits fast rejection
++ * of lines that cannot possibly match. The regmust tests are costly enough
++ * that regcomp() supplies a regmust only if the r.e. contains something
++ * potentially expensive (at present, the only such thing detected is * or +
++ * at the start of the r.e., which can involve a lot of backup). Regmlen is
++ * supplied because the test in regexec() needs it and regcomp() is computing
++ * it anyway.
++ */
++
++/*
++ * Structure for regexp "program". This is essentially a linear encoding
++ * of a nondeterministic finite-state machine (aka syntax charts or
++ * "railroad normal form" in parsing technology). Each node is an opcode
++ * plus a "next" pointer, possibly plus an operand. "Next" pointers of
++ * all nodes except BRANCH implement concatenation; a "next" pointer with
++ * a BRANCH on both ends of it is connecting two alternatives. (Here we
++ * have one of the subtle syntax dependencies: an individual BRANCH (as
++ * opposed to a collection of them) is never concatenated with anything
++ * because of operator precedence.) The operand of some types of node is
++ * a literal string; for others, it is a node leading into a sub-FSM. In
++ * particular, the operand of a BRANCH node is the first node of the branch.
++ * (NB this is *not* a tree structure: the tail of the branch connects
++ * to the thing following the set of BRANCHes.) The opcodes are:
++ */
++
++/* definition number opnd? meaning */
++#define END 0 /* no End of program. */
++#define BOL 1 /* no Match "" at beginning of line. */
++#define EOL 2 /* no Match "" at end of line. */
++#define ANY 3 /* no Match any one character. */
++#define ANYOF 4 /* str Match any character in this string. */
++#define ANYBUT 5 /* str Match any character not in this string. */
++#define BRANCH 6 /* node Match this alternative, or the next... */
++#define BACK 7 /* no Match "", "next" ptr points backward. */
++#define EXACTLY 8 /* str Match this string. */
++#define NOTHING 9 /* no Match empty string. */
++#define STAR 10 /* node Match this (simple) thing 0 or more times. */
++#define PLUS 11 /* node Match this (simple) thing 1 or more times. */
++#define OPEN 20 /* no Mark this point in input as start of #n. */
++ /* OPEN+1 is number 1, etc. */
++#define CLOSE 30 /* no Analogous to OPEN. */
++
++/*
++ * Opcode notes:
++ *
++ * BRANCH The set of branches constituting a single choice are hooked
++ * together with their "next" pointers, since precedence prevents
++ * anything being concatenated to any individual branch. The
++ * "next" pointer of the last BRANCH in a choice points to the
++ * thing following the whole choice. This is also where the
++ * final "next" pointer of each individual branch points; each
++ * branch starts with the operand node of a BRANCH node.
++ *
++ * BACK Normal "next" pointers all implicitly point forward; BACK
++ * exists to make loop structures possible.
++ *
++ * STAR,PLUS '?', and complex '*' and '+', are implemented as circular
++ * BRANCH structures using BACK. Simple cases (one character
++ * per match) are implemented with STAR and PLUS for speed
++ * and to minimize recursive plunges.
++ *
++ * OPEN,CLOSE ...are numbered at compile time.
++ */
++
++/*
++ * A node is one char of opcode followed by two chars of "next" pointer.
++ * "Next" pointers are stored as two 8-bit pieces, high order first. The
++ * value is a positive offset from the opcode of the node containing it.
++ * An operand, if any, simply follows the node. (Note that much of the
++ * code generation knows about this implicit relationship.)
++ *
++ * Using two bytes for the "next" pointer is vast overkill for most things,
++ * but allows patterns to get big without disasters.
++ */
++#define OP(p) (*(p))
++#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
++#define OPERAND(p) ((p) + 3)
++
++/*
++ * See regmagic.h for one further detail of program structure.
++ */
++
++
++/*
++ * Utility definitions.
++ */
++#ifndef CHARBITS
++#define UCHARAT(p) ((int)*(unsigned char *)(p))
++#else
++#define UCHARAT(p) ((int)*(p)&CHARBITS)
++#endif
++
++#define FAIL(m) { regerror(m); return(NULL); }
++#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?')
++#define META "^$.[()|?+*\\"
++
++/*
++ * Flags to be passed up and down.
++ */
++#define HASWIDTH 01 /* Known never to match null string. */
++#define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */
++#define SPSTART 04 /* Starts with * or +. */
++#define WORST 0 /* Worst case. */
++
++/*
++ * Global work variables for regcomp().
++ */
++struct match_globals {
++ char *reginput; /* String-input pointer. */
++ char *regbol; /* Beginning of input, for ^ check. */
++ char **regstartp; /* Pointer to startp array. */
++ char **regendp; /* Ditto for endp. */
++ char *regparse; /* Input-scan pointer. */
++ int regnpar; /* () count. */
++ char regdummy;
++ char *regcode; /* Code-emit pointer; ®dummy = don't. */
++ long regsize; /* Code size. */
++};
++
++/*
++ * Forward declarations for regcomp()'s friends.
++ */
++#ifndef STATIC
++#define STATIC static
++#endif
++STATIC char *reg(struct match_globals *g, int paren,int *flagp);
++STATIC char *regbranch(struct match_globals *g, int *flagp);
++STATIC char *regpiece(struct match_globals *g, int *flagp);
++STATIC char *regatom(struct match_globals *g, int *flagp);
++STATIC char *regnode(struct match_globals *g, char op);
++STATIC char *regnext(struct match_globals *g, char *p);
++STATIC void regc(struct match_globals *g, char b);
++STATIC void reginsert(struct match_globals *g, char op, char *opnd);
++STATIC void regtail(struct match_globals *g, char *p, char *val);
++STATIC void regoptail(struct match_globals *g, char *p, char *val);
++
++static u_int8_t case_insensitive = 1;
++
++__kernel_size_t my_strcspn(const char *s1,const char *s2)
++{
++ char *scan1;
++ char *scan2;
++ int count;
++
++ count = 0;
++ for (scan1 = (char *)s1; *scan1 != '\0'; scan1++) {
++ for (scan2 = (char *)s2; *scan2 != '\0';) /* ++ moved down. */
++ if (*scan1 == *scan2++)
++ return(count);
++ count++;
++ }
++ return(count);
++}
++
++/* ********************************************** */
++
++inline char tolower(char c) {
++ return ((c >= 'A' && c <= 'Z') ? c + 32: c);
++}
++
++inline void string2lower(char* str, int str_len) {
++ int i;
++
++ for(i=0; i<str_len; i++) str[i] = tolower(str[i]);
++}
++
++/* ********************************************** */
++
++/*
++ - regcomp - compile a regular expression into internal code
++ *
++ * We can't allocate space until we know how big the compiled form will be,
++ * but we can't compile it (and thus know how big it is) until we've got a
++ * place to put the code. So we cheat: we compile it twice, once with code
++ * generation turned off and size counting turned on, and once "for real".
++ * This also means that we don't allocate space until we are sure that the
++ * thing really will compile successfully, and we never have to move the
++ * code and thus invalidate pointers into it. (Note that it has to be in
++ * one piece because free() must be able to free it all.)
++ *
++ * Beware that the optimization-preparation code in here knows about some
++ * of the structure of the compiled regexp.
++ */
++regexp *
++regcomp(char *exp,int *patternsize)
++{
++ register regexp *r;
++ register char *scan;
++ register char *longest;
++ register int len;
++ int flags;
++ struct match_globals g;
++
++ /* commented out by ethan
++ extern char *malloc();
++ */
++
++ if (exp == NULL)
++ FAIL("NULL argument");
++
++ if(case_insensitive) string2lower(exp, strlen(exp));
++
++ /* First pass: determine size, legality. */
++ g.regparse = exp;
++ g.regnpar = 1;
++ g.regsize = 0L;
++ g.regcode = &g.regdummy;
++ regc(&g, MAGIC);
++ if (reg(&g, 0, &flags) == NULL)
++ return(NULL);
++
++ /* Small enough for pointer-storage convention? */
++ if (g.regsize >= 32767L) /* Probably could be 65535L. */
++ FAIL("regexp too big");
++
++ /* Allocate space. */
++ *patternsize=sizeof(regexp) + (unsigned)g.regsize;
++ r = (regexp *)malloc(sizeof(regexp) + (unsigned)g.regsize);
++ if (r == NULL)
++ FAIL("out of space");
++
++ /* Second pass: emit code. */
++ g.regparse = exp;
++ g.regnpar = 1;
++ g.regcode = r->program;
++ regc(&g, MAGIC);
++ if (reg(&g, 0, &flags) == NULL)
++ return(NULL);
++
++ /* Dig out information for optimizations. */
++ r->regstart = '\0'; /* Worst-case defaults. */
++ r->reganch = 0;
++ r->regmust = NULL;
++ r->regmlen = 0;
++ scan = r->program+1; /* First BRANCH. */
++ if (OP(regnext(&g, scan)) == END) { /* Only one top-level choice. */
++ scan = OPERAND(scan);
++
++ /* Starting-point info. */
++ if (OP(scan) == EXACTLY)
++ r->regstart = *OPERAND(scan);
++ else if (OP(scan) == BOL)
++ r->reganch++;
++
++ /*
++ * If there's something expensive in the r.e., find the
++ * longest literal string that must appear and make it the
++ * regmust. Resolve ties in favor of later strings, since
++ * the regstart check works with the beginning of the r.e.
++ * and avoiding duplication strengthens checking. Not a
++ * strong reason, but sufficient in the absence of others.
++ */
++ if (flags&SPSTART) {
++ longest = NULL;
++ len = 0;
++ for (; scan != NULL; scan = regnext(&g, scan))
++ if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) {
++ longest = OPERAND(scan);
++ len = strlen(OPERAND(scan));
++ }
++ r->regmust = longest;
++ r->regmlen = len;
++ }
++ }
++
++ return(r);
++}
++
++/*
++ - reg - regular expression, i.e. main body or parenthesized thing
++ *
++ * Caller must absorb opening parenthesis.
++ *
++ * Combining parenthesis handling with the base level of regular expression
++ * is a trifle forced, but the need to tie the tails of the branches to what
++ * follows makes it hard to avoid.
++ */
++static char *
++reg(struct match_globals *g, int paren, int *flagp /* Parenthesized? */ )
++{
++ register char *ret;
++ register char *br;
++ register char *ender;
++ register int parno = 0; /* 0 makes gcc happy */
++ int flags;
++
++ *flagp = HASWIDTH; /* Tentatively. */
++
++ /* Make an OPEN node, if parenthesized. */
++ if (paren) {
++ if (g->regnpar >= NSUBEXP)
++ FAIL("too many ()");
++ parno = g->regnpar;
++ g->regnpar++;
++ ret = regnode(g, OPEN+parno);
++ } else
++ ret = NULL;
++
++ /* Pick up the branches, linking them together. */
++ br = regbranch(g, &flags);
++ if (br == NULL)
++ return(NULL);
++ if (ret != NULL)
++ regtail(g, ret, br); /* OPEN -> first. */
++ else
++ ret = br;
++ if (!(flags&HASWIDTH))
++ *flagp &= ~HASWIDTH;
++ *flagp |= flags&SPSTART;
++ while (*g->regparse == '|') {
++ g->regparse++;
++ br = regbranch(g, &flags);
++ if (br == NULL)
++ return(NULL);
++ regtail(g, ret, br); /* BRANCH -> BRANCH. */
++ if (!(flags&HASWIDTH))
++ *flagp &= ~HASWIDTH;
++ *flagp |= flags&SPSTART;
++ }
++
++ /* Make a closing node, and hook it on the end. */
++ ender = regnode(g, (paren) ? CLOSE+parno : END);
++ regtail(g, ret, ender);
++
++ /* Hook the tails of the branches to the closing node. */
++ for (br = ret; br != NULL; br = regnext(g, br))
++ regoptail(g, br, ender);
++
++ /* Check for proper termination. */
++ if (paren && *g->regparse++ != ')') {
++ FAIL("unmatched ()");
++ } else if (!paren && *g->regparse != '\0') {
++ if (*g->regparse == ')') {
++ FAIL("unmatched ()");
++ } else
++ FAIL("junk on end"); /* "Can't happen". */
++ /* NOTREACHED */
++ }
++
++ return(ret);
++}
++
++/*
++ - regbranch - one alternative of an | operator
++ *
++ * Implements the concatenation operator.
++ */
++static char *
++regbranch(struct match_globals *g, int *flagp)
++{
++ register char *ret;
++ register char *chain;
++ register char *latest;
++ int flags;
++
++ *flagp = WORST; /* Tentatively. */
++
++ ret = regnode(g, BRANCH);
++ chain = NULL;
++ while (*g->regparse != '\0' && *g->regparse != '|' && *g->regparse != ')') {
++ latest = regpiece(g, &flags);
++ if (latest == NULL)
++ return(NULL);
++ *flagp |= flags&HASWIDTH;
++ if (chain == NULL) /* First piece. */
++ *flagp |= flags&SPSTART;
++ else
++ regtail(g, chain, latest);
++ chain = latest;
++ }
++ if (chain == NULL) /* Loop ran zero times. */
++ (void) regnode(g, NOTHING);
++
++ return(ret);
++}
++
++/*
++ - regpiece - something followed by possible [*+?]
++ *
++ * Note that the branching code sequences used for ? and the general cases
++ * of * and + are somewhat optimized: they use the same NOTHING node as
++ * both the endmarker for their branch list and the body of the last branch.
++ * It might seem that this node could be dispensed with entirely, but the
++ * endmarker role is not redundant.
++ */
++static char *
++regpiece(struct match_globals *g, int *flagp)
++{
++ register char *ret;
++ register char op;
++ register char *next;
++ int flags;
++
++ ret = regatom(g, &flags);
++ if (ret == NULL)
++ return(NULL);
++
++ op = *g->regparse;
++ if (!ISMULT(op)) {
++ *flagp = flags;
++ return(ret);
++ }
++
++ if (!(flags&HASWIDTH) && op != '?')
++ FAIL("*+ operand could be empty");
++ *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
++
++ if (op == '*' && (flags&SIMPLE))
++ reginsert(g, STAR, ret);
++ else if (op == '*') {
++ /* Emit x* as (x&|), where & means "self". */
++ reginsert(g, BRANCH, ret); /* Either x */
++ regoptail(g, ret, regnode(g, BACK)); /* and loop */
++ regoptail(g, ret, ret); /* back */
++ regtail(g, ret, regnode(g, BRANCH)); /* or */
++ regtail(g, ret, regnode(g, NOTHING)); /* null. */
++ } else if (op == '+' && (flags&SIMPLE))
++ reginsert(g, PLUS, ret);
++ else if (op == '+') {
++ /* Emit x+ as x(&|), where & means "self". */
++ next = regnode(g, BRANCH); /* Either */
++ regtail(g, ret, next);
++ regtail(g, regnode(g, BACK), ret); /* loop back */
++ regtail(g, next, regnode(g, BRANCH)); /* or */
++ regtail(g, ret, regnode(g, NOTHING)); /* null. */
++ } else if (op == '?') {
++ /* Emit x? as (x|) */
++ reginsert(g, BRANCH, ret); /* Either x */
++ regtail(g, ret, regnode(g, BRANCH)); /* or */
++ next = regnode(g, NOTHING); /* null. */
++ regtail(g, ret, next);
++ regoptail(g, ret, next);
++ }
++ g->regparse++;
++ if (ISMULT(*g->regparse))
++ FAIL("nested *?+");
++
++ return(ret);
++}
++
++/*
++ - regatom - the lowest level
++ *
++ * Optimization: gobbles an entire sequence of ordinary characters so that
++ * it can turn them into a single node, which is smaller to store and
++ * faster to run. Backslashed characters are exceptions, each becoming a
++ * separate node; the code is simpler that way and it's not worth fixing.
++ */
++static char *
++regatom(struct match_globals *g, int *flagp)
++{
++ register char *ret;
++ int flags;
++
++ *flagp = WORST; /* Tentatively. */
++
++ switch (*g->regparse++) {
++ case '^':
++ ret = regnode(g, BOL);
++ break;
++ case '$':
++ ret = regnode(g, EOL);
++ break;
++ case '.':
++ ret = regnode(g, ANY);
++ *flagp |= HASWIDTH|SIMPLE;
++ break;
++ case '[': {
++ register int class;
++ register int classend;
++
++ if (*g->regparse == '^') { /* Complement of range. */
++ ret = regnode(g, ANYBUT);
++ g->regparse++;
++ } else
++ ret = regnode(g, ANYOF);
++ if (*g->regparse == ']' || *g->regparse == '-')
++ regc(g, *g->regparse++);
++ while (*g->regparse != '\0' && *g->regparse != ']') {
++ if (*g->regparse == '-') {
++ g->regparse++;
++ if (*g->regparse == ']' || *g->regparse == '\0')
++ regc(g, '-');
++ else {
++ class = UCHARAT(g->regparse-2)+1;
++ classend = UCHARAT(g->regparse);
++ if (class > classend+1)
++ FAIL("invalid [] range");
++ for (; class <= classend; class++)
++ regc(g, class);
++ g->regparse++;
++ }
++ } else
++ regc(g, *g->regparse++);
++ }
++ regc(g, '\0');
++ if (*g->regparse != ']')
++ FAIL("unmatched []");
++ g->regparse++;
++ *flagp |= HASWIDTH|SIMPLE;
++ }
++ break;
++ case '(':
++ ret = reg(g, 1, &flags);
++ if (ret == NULL)
++ return(NULL);
++ *flagp |= flags&(HASWIDTH|SPSTART);
++ break;
++ case '\0':
++ case '|':
++ case ')':
++ FAIL("internal urp"); /* Supposed to be caught earlier. */
++ break;
++ case '?':
++ case '+':
++ case '*':
++ FAIL("?+* follows nothing");
++ break;
++ case '\\':
++ if (*g->regparse == '\0')
++ FAIL("trailing \\");
++ ret = regnode(g, EXACTLY);
++ regc(g, *g->regparse++);
++ regc(g, '\0');
++ *flagp |= HASWIDTH|SIMPLE;
++ break;
++ default: {
++ register int len;
++ register char ender;
++
++ g->regparse--;
++ len = my_strcspn((const char *)g->regparse, (const char *)META);
++ if (len <= 0)
++ FAIL("internal disaster");
++ ender = *(g->regparse+len);
++ if (len > 1 && ISMULT(ender))
++ len--; /* Back off clear of ?+* operand. */
++ *flagp |= HASWIDTH;
++ if (len == 1)
++ *flagp |= SIMPLE;
++ ret = regnode(g, EXACTLY);
++ while (len > 0) {
++ regc(g, *g->regparse++);
++ len--;
++ }
++ regc(g, '\0');
++ }
++ break;
++ }
++
++ return(ret);
++}
++
++/*
++ - regnode - emit a node
++*/
++static char * /* Location. */
++regnode(struct match_globals *g, char op)
++{
++ register char *ret;
++ register char *ptr;
++
++ ret = g->regcode;
++ if (ret == &g->regdummy) {
++ g->regsize += 3;
++ return(ret);
++ }
++
++ ptr = ret;
++ *ptr++ = op;
++ *ptr++ = '\0'; /* Null "next" pointer. */
++ *ptr++ = '\0';
++ g->regcode = ptr;
++
++ return(ret);
++}
++
++/*
++ - regc - emit (if appropriate) a byte of code
++*/
++static void
++regc(struct match_globals *g, char b)
++{
++ if (g->regcode != &g->regdummy)
++ *g->regcode++ = b;
++ else
++ g->regsize++;
++}
++
++/*
++ - reginsert - insert an operator in front of already-emitted operand
++ *
++ * Means relocating the operand.
++ */
++static void
++reginsert(struct match_globals *g, char op, char* opnd)
++{
++ register char *src;
++ register char *dst;
++ register char *place;
++
++ if (g->regcode == &g->regdummy) {
++ g->regsize += 3;
++ return;
++ }
++
++ src = g->regcode;
++ g->regcode += 3;
++ dst = g->regcode;
++ while (src > opnd)
++ *--dst = *--src;
++
++ place = opnd; /* Op node, where operand used to be. */
++ *place++ = op;
++ *place++ = '\0';
++ *place++ = '\0';
++}
++
++/*
++ - regtail - set the next-pointer at the end of a node chain
++*/
++static void
++regtail(struct match_globals *g, char *p, char *val)
++{
++ register char *scan;
++ register char *temp;
++ register int offset;
++
++ if (p == &g->regdummy)
++ return;
++
++ /* Find last node. */
++ scan = p;
++ for (;;) {
++ temp = regnext(g, scan);
++ if (temp == NULL)
++ break;
++ scan = temp;
++ }
++
++ if (OP(scan) == BACK)
++ offset = scan - val;
++ else
++ offset = val - scan;
++ *(scan+1) = (offset>>8)&0377;
++ *(scan+2) = offset&0377;
++}
++
++/*
++ - regoptail - regtail on operand of first argument; nop if operandless
++*/
++static void
++regoptail(struct match_globals *g, char *p, char *val)
++{
++ /* "Operandless" and "op != BRANCH" are synonymous in practice. */
++ if (p == NULL || p == &g->regdummy || OP(p) != BRANCH)
++ return;
++ regtail(g, OPERAND(p), val);
++}
++
++/*
++ * regexec and friends
++ */
++
++
++/*
++ * Forwards.
++ */
++STATIC int regtry(struct match_globals *g, regexp *prog, char *string);
++STATIC int regmatch(struct match_globals *g, char *prog);
++STATIC int regrepeat(struct match_globals *g, char *p);
++
++/*
++ - regexec - match a regexp against a string
++*/
++int
++regexec(regexp *prog, char *string)
++{
++ register char *s;
++ struct match_globals g;
++
++ if(case_insensitive) string2lower(string, strlen(string));
++
++ /* Be paranoid... */
++ if (prog == NULL || string == NULL) {
++ printk("<3>Regexp: NULL parameter\n");
++ return(0);
++ }
++
++ /* Check validity of program. */
++ if (UCHARAT(prog->program) != MAGIC) {
++ printk("<3>Regexp: corrupted program\n");
++ return(0);
++ }
++
++ /* If there is a "must appear" string, look for it. */
++ if (prog->regmust != NULL) {
++ s = string;
++ while ((s = strchr(s, prog->regmust[0])) != NULL) {
++ if (strncmp(s, prog->regmust, prog->regmlen) == 0)
++ break; /* Found it. */
++ s++;
++ }
++ if (s == NULL) /* Not present. */
++ return(0);
++ }
++
++ /* Mark beginning of line for ^ . */
++ g.regbol = string;
++
++ /* Simplest case: anchored match need be tried only once. */
++ if (prog->reganch)
++ return(regtry(&g, prog, string));
++
++ /* Messy cases: unanchored match. */
++ s = string;
++ if (prog->regstart != '\0')
++ /* We know what char it must start with. */
++ while ((s = strchr(s, prog->regstart)) != NULL) {
++ if (regtry(&g, prog, s))
++ return(1);
++ s++;
++ }
++ else
++ /* We don't -- general case. */
++ do {
++ if (regtry(&g, prog, s))
++ return(1);
++ } while (*s++ != '\0');
++
++ /* Failure. */
++ return(0);
++}
++
++/*
++ - regtry - try match at specific point
++*/
++static int /* 0 failure, 1 success */
++regtry(struct match_globals *g, regexp *prog, char *string)
++{
++ register int i;
++ register char **sp;
++ register char **ep;
++
++ g->reginput = string;
++ g->regstartp = prog->startp;
++ g->regendp = prog->endp;
++
++ sp = prog->startp;
++ ep = prog->endp;
++ for (i = NSUBEXP; i > 0; i--) {
++ *sp++ = NULL;
++ *ep++ = NULL;
++ }
++ if (regmatch(g, prog->program + 1)) {
++ prog->startp[0] = string;
++ prog->endp[0] = g->reginput;
++ return(1);
++ } else
++ return(0);
++}
++
++/*
++ - regmatch - main matching routine
++ *
++ * Conceptually the strategy is simple: check to see whether the current
++ * node matches, call self recursively to see whether the rest matches,
++ * and then act accordingly. In practice we make some effort to avoid
++ * recursion, in particular by going through "ordinary" nodes (that don't
++ * need to know whether the rest of the match failed) by a loop instead of
++ * by recursion.
++ */
++static int /* 0 failure, 1 success */
++regmatch(struct match_globals *g, char *prog)
++{
++ register char *scan = prog; /* Current node. */
++ char *next; /* Next node. */
++
++#ifdef DEBUG
++ if (scan != NULL && regnarrate)
++ printk("%s(\n", regprop(scan));
++#endif
++ while (scan != NULL) {
++#ifdef DEBUG
++ if (regnarrate)
++ printk("%s...\n", regprop(scan));
++#endif
++ next = regnext(g, scan);
++
++ switch (OP(scan)) {
++ case BOL:
++ if (g->reginput != g->regbol)
++ return(0);
++ break;
++ case EOL:
++ if (*g->reginput != '\0')
++ return(0);
++ break;
++ case ANY:
++ if (*g->reginput == '\0')
++ return(0);
++ g->reginput++;
++ break;
++ case EXACTLY: {
++ register int len;
++ register char *opnd;
++
++ opnd = OPERAND(scan);
++ /* Inline the first character, for speed. */
++ if (*opnd != *g->reginput)
++ return(0);
++ len = strlen(opnd);
++ if (len > 1 && strncmp(opnd, g->reginput, len) != 0)
++ return(0);
++ g->reginput += len;
++ }
++ break;
++ case ANYOF:
++ if (*g->reginput == '\0' || strchr(OPERAND(scan), *g->reginput) == NULL)
++ return(0);
++ g->reginput++;
++ break;
++ case ANYBUT:
++ if (*g->reginput == '\0' || strchr(OPERAND(scan), *g->reginput) != NULL)
++ return(0);
++ g->reginput++;
++ break;
++ case NOTHING:
++ case BACK:
++ break;
++ case OPEN+1:
++ case OPEN+2:
++ case OPEN+3:
++ case OPEN+4:
++ case OPEN+5:
++ case OPEN+6:
++ case OPEN+7:
++ case OPEN+8:
++ case OPEN+9: {
++ register int no;
++ register char *save;
++
++ no = OP(scan) - OPEN;
++ save = g->reginput;
++
++ if (regmatch(g, next)) {
++ /*
++ * Don't set startp if some later
++ * invocation of the same parentheses
++ * already has.
++ */
++ if (g->regstartp[no] == NULL)
++ g->regstartp[no] = save;
++ return(1);
++ } else
++ return(0);
++ }
++ break;
++ case CLOSE+1:
++ case CLOSE+2:
++ case CLOSE+3:
++ case CLOSE+4:
++ case CLOSE+5:
++ case CLOSE+6:
++ case CLOSE+7:
++ case CLOSE+8:
++ case CLOSE+9:
++ {
++ register int no;
++ register char *save;
++
++ no = OP(scan) - CLOSE;
++ save = g->reginput;
++
++ if (regmatch(g, next)) {
++ /*
++ * Don't set endp if some later
++ * invocation of the same parentheses
++ * already has.
++ */
++ if (g->regendp[no] == NULL)
++ g->regendp[no] = save;
++ return(1);
++ } else
++ return(0);
++ }
++ break;
++ case BRANCH: {
++ register char *save;
++
++ if (OP(next) != BRANCH) /* No choice. */
++ next = OPERAND(scan); /* Avoid recursion. */
++ else {
++ do {
++ save = g->reginput;
++ if (regmatch(g, OPERAND(scan)))
++ return(1);
++ g->reginput = save;
++ scan = regnext(g, scan);
++ } while (scan != NULL && OP(scan) == BRANCH);
++ return(0);
++ /* NOTREACHED */
++ }
++ }
++ break;
++ case STAR:
++ case PLUS: {
++ register char nextch;
++ register int no;
++ register char *save;
++ register int min;
++
++ /*
++ * Lookahead to avoid useless match attempts
++ * when we know what character comes next.
++ */
++ nextch = '\0';
++ if (OP(next) == EXACTLY)
++ nextch = *OPERAND(next);
++ min = (OP(scan) == STAR) ? 0 : 1;
++ save = g->reginput;
++ no = regrepeat(g, OPERAND(scan));
++ while (no >= min) {
++ /* If it could work, try it. */
++ if (nextch == '\0' || *g->reginput == nextch)
++ if (regmatch(g, next))
++ return(1);
++ /* Couldn't or didn't -- back up. */
++ no--;
++ g->reginput = save + no;
++ }
++ return(0);
++ }
++ break;
++ case END:
++ return(1); /* Success! */
++ break;
++ default:
++ printk("<3>Regexp: memory corruption\n");
++ return(0);
++ break;
++ }
++
++ scan = next;
++ }
++
++ /*
++ * We get here only if there's trouble -- normally "case END" is
++ * the terminating point.
++ */
++ printk("<3>Regexp: corrupted pointers\n");
++ return(0);
++}
++
++/*
++ - regrepeat - repeatedly match something simple, report how many
++*/
++static int
++regrepeat(struct match_globals *g, char *p)
++{
++ register int count = 0;
++ register char *scan;
++ register char *opnd;
++
++ scan = g->reginput;
++ opnd = OPERAND(p);
++ switch (OP(p)) {
++ case ANY:
++ count = strlen(scan);
++ scan += count;
++ break;
++ case EXACTLY:
++ while (*opnd == *scan) {
++ count++;
++ scan++;
++ }
++ break;
++ case ANYOF:
++ while (*scan != '\0' && strchr(opnd, *scan) != NULL) {
++ count++;
++ scan++;
++ }
++ break;
++ case ANYBUT:
++ while (*scan != '\0' && strchr(opnd, *scan) == NULL) {
++ count++;
++ scan++;
++ }
++ break;
++ default: /* Oh dear. Called inappropriately. */
++ printk("<3>Regexp: internal foulup\n");
++ count = 0; /* Best compromise. */
++ break;
++ }
++ g->reginput = scan;
++
++ return(count);
++}
++
++/*
++ - regnext - dig the "next" pointer out of a node
++*/
++static char*
++regnext(struct match_globals *g, char *p)
++{
++ register int offset;
++
++ if (p == &g->regdummy)
++ return(NULL);
++
++ offset = NEXT(p);
++ if (offset == 0)
++ return(NULL);
++
++ if (OP(p) == BACK)
++ return(p-offset);
++ else
++ return(p+offset);
++}
++
++/* ================================================== */
++
++/* #define RING_DEBUG */
++
++/* ************************************************* */
++
++#define TH_FIN_MULTIPLIER 0x01
++#define TH_SYN_MULTIPLIER 0x02
++#define TH_RST_MULTIPLIER 0x04
++#define TH_PUSH_MULTIPLIER 0x08
++#define TH_ACK_MULTIPLIER 0x10
++#define TH_URG_MULTIPLIER 0x20
++
++/* ************************************************* */
++
++#define PROC_INFO "info"
++#define PROC_PLUGINS_INFO "plugins_info"
++
++/* ************************************************* */
++
++/* List of all ring sockets. */
++static struct list_head ring_table;
++static u_int ring_table_size;
++
++/*
++ For each device, pf_ring keeps a list of the number of
++ available ring socket slots. So that a caller knows in advance whether
++ there are slots available (for rings bound to such device)
++ that can potentially host the packet
++ */
++static struct list_head device_ring_list[MAX_NUM_DEVICES];
++
++/* List of all clusters */
++static struct list_head ring_cluster_list;
++
++/* List of all dna (direct nic access) devices */
++static struct list_head ring_dna_devices_list;
++static u_int dna_devices_list_size = 0;
++
++/* List of all plugins */
++static u_int plugin_registration_size = 0;
++static struct pfring_plugin_registration *plugin_registration[MAX_PLUGIN_ID] = { NULL };
++static u_short max_registered_plugin_id = 0;
++static rwlock_t ring_mgmt_lock = RW_LOCK_UNLOCKED;
++
++/* ********************************** */
++
++/* /proc entry for ring module */
++struct proc_dir_entry *ring_proc_dir = NULL;
++struct proc_dir_entry *ring_proc = NULL;
++struct proc_dir_entry *ring_proc_plugins_info = NULL;
++
++static int ring_proc_get_info(char *, char **, off_t, int, int *, void *);
++static int ring_proc_get_plugin_info(char *, char **, off_t, int, int *, void *);
++static void ring_proc_add(struct ring_opt *pfr, struct net_device *dev);
++static void ring_proc_remove(struct ring_opt *pfr);
++static void ring_proc_init(void);
++static void ring_proc_term(void);
++
++/*
++ Caveat
++ [http://lists.metaprl.org/pipermail/cs134-labs/2002-October/000025.html]
++
++ GFP_ATOMIC means roughly "make the allocation operation atomic". This
++ means that the kernel will try to find the memory using a pile of free
++ memory set aside for urgent allocation. If that pile doesn't have
++ enough free pages, the operation will fail. This flag is useful for
++ allocation within interrupt handlers.
++
++ GFP_KERNEL will try a little harder to find memory. There's a
++ possibility that the call to kmalloc() will sleep while the kernel is
++ trying to find memory (thus making it unsuitable for interrupt
++ handlers). It's much more rare for an allocation with GFP_KERNEL to
++ fail than with GFP_ATOMIC.
++
++ In all cases, kmalloc() should only be used allocating small amounts of
++ memory (a few kb). vmalloc() is better for larger amounts.
++
++ Also note that in lab 1 and lab 2, it would have been arguably better to
++ use GFP_KERNEL instead of GFP_ATOMIC. GFP_ATOMIC should be saved for
++ those instances in which a sleep would be totally unacceptable.
++*/
++/* ********************************** */
++
++/* Forward */
++static struct proto_ops ring_ops;
++
++#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11))
++static struct proto ring_proto;
++#endif
++
++static int skb_ring_handler(struct sk_buff *skb, u_char recv_packet,
++ u_char real_skb, short channel_id);
++static int buffer_ring_handler(struct net_device *dev, char *data, int len);
++static int remove_from_cluster(struct sock *sock, struct ring_opt *pfr);
++
++/* Extern */
++extern
++#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23))
++struct sk_buff*
++#else
++int
++#endif
++ip_defrag(struct sk_buff *skb, u32 user);
++
++/* ********************************** */
++
++/* Defaults */
++static unsigned int num_slots = 4096;
++static unsigned int enable_tx_capture = 1;
++static unsigned int enable_ip_defrag = 0;
++static unsigned int transparent_mode = 1;
++static u_int32_t ring_id_serial = 0;
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16))
++module_param(num_slots, uint, 0644);
++module_param(transparent_mode, uint, 0644);
++module_param(enable_tx_capture, uint, 0644);
++module_param(enable_ip_defrag, uint, 0644);
++#else
++MODULE_PARM(num_slots, "i");
++MODULE_PARM(transparent_mode, "i");
++MODULE_PARM(enable_tx_capture, "i");
++MODULE_PARM(enable_ip_defrag, "i");
++#endif
++
++MODULE_PARM_DESC(num_slots, "Number of ring slots");
++MODULE_PARM_DESC(transparent_mode,
++ "Set to 1 to set transparent mode "
++ "(slower but backwards compatible)");
++MODULE_PARM_DESC(enable_tx_capture, "Set to 1 to capture outgoing packets");
++MODULE_PARM_DESC(enable_ip_defrag,
++ "Set to 1 to enable IP defragmentation"
++ "(only rx traffic is defragmentead)");
++
++/* ********************************** */
++
++#define MIN_QUEUED_PKTS 64
++#define MAX_QUEUE_LOOPS 64
++
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++#define ring_sk_datatype(__sk) ((struct ring_opt *)__sk)
++#define ring_sk(__sk) ((__sk)->sk_protinfo)
++#else
++#define ring_sk_datatype(a) (a)
++#define ring_sk(__sk) ((__sk)->protinfo.pf_ring)
++#endif
++
++#define _rdtsc() ({ uint64_t x; asm volatile("rdtsc" : "=A" (x)); x; })
++
++/* ***************** Legacy code ************************ */
++
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22))
++static inline struct iphdr *ip_hdr(const struct sk_buff *skb)
++{
++ return (struct iphdr *)skb->nh.iph;
+}
+
++static inline void skb_set_network_header(struct sk_buff *skb,
++ const int offset)
++{
++ skb->nh.iph = (struct iphdr*)skb->data + offset;
++}
+
++static inline void skb_reset_network_header(struct sk_buff *skb)
++{
++ ;
++}
+
-+/*
-+ * Search Text or Binary Data for Pattern matches
-+ *
-+ * Sparse Storage Version
-+ */
-+static
-+inline
-+int
-+acsmSearchSparseNFA(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
-+ int (*Match) (void * id, int index, void *data),
-+ void *data)
-+{
-+ acstate_t state;
-+ ACSM_PATTERN2 * mlist;
-+ unsigned char * Tend;
-+ int nfound = 0;
-+ unsigned char * T, *Tc;
-+ int index;
-+ acstate_t ** NextState= acsm->acsmNextState;
-+ acstate_t * FailState= acsm->acsmFailState;
-+ ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
-+ unsigned char Tchar;
-+
-+ Tc = Tx;
-+ T = Tx;
-+ Tend = T + n;
-+
-+ for( state = 0; T < Tend; T++ )
-+ {
-+ acstate_t nstate;
++static inline void skb_reset_transport_header(struct sk_buff *skb)
++{
++ ;
++}
++#endif
+
-+ Tchar = xlatcase[ *T ];
++/* ***** Code taken from other kernel modules ******** */
+
-+ while( (nstate=SparseGetNextStateNFA(NextState[state],state,Tchar))==ACSM_FAIL_STATE2 )
-+ state = FailState[state];
++/**
++ * rvmalloc copied from usbvideo.c
++ */
++static void *rvmalloc(unsigned long size)
++{
++ void *mem;
++ unsigned long adr;
++ unsigned long pages = 0;
+
-+ state = nstate;
++#if defined(RING_DEBUG)
++ printk("[PF_RING] rvmalloc: %lu bytes\n", size);
++#endif
+
-+ for( mlist = MatchList[state];
-+ mlist!= NULL;
-+ mlist = mlist->next )
-+ {
-+ index = T - mlist->n - Tx;
-+ if( mlist->nocase )
-+ {
-+ nfound++;
-+ if (Match (mlist->id, index, data))
-+ return nfound;
-+ }
-+ else
-+ {
-+ if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
-+ {
-+ nfound++;
-+ if (Match (mlist->id, index, data))
-+ return nfound;
-+ }
-+ }
-+ }
-+ }
++ size = PAGE_ALIGN(size);
++ mem = vmalloc_32(size);
++ if (!mem)
++ return NULL;
++
++ memset(mem, 0, size); /* Clear the ram out, no junk to the user */
++ adr = (unsigned long) mem;
++ while (size > 0) {
++ SetPageReserved(vmalloc_to_page((void *)adr));
++ pages++;
++ adr += PAGE_SIZE;
++ size -= PAGE_SIZE;
++ }
+
-+ return nfound;
++#if defined(RING_DEBUG)
++ printk("[PF_RING] rvmalloc: %lu pages\n", pages);
++#endif
++ return mem;
+}
+
-+/*
-+ * Search Function
++/* ************************************************** */
++
++/**
++ * rvfree copied from usbvideo.c
+ */
-+int
-+acsmSearch2(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
-+ int (*Match) (void * id, int index, void *data),
-+ void *data)
++static void rvfree(void *mem, unsigned long size)
+{
++ unsigned long adr;
++ unsigned long pages = 0;
+
-+ switch( acsm->acsmFSA )
-+ {
-+ case FSA_DFA:
-+
-+ if( acsm->acsmFormat == ACF_FULL )
-+ {
-+ return acsmSearchSparseDFA_Full( acsm, Tx, n, Match,data );
-+ }
-+ else if( acsm->acsmFormat == ACF_BANDED )
-+ {
-+ return acsmSearchSparseDFA_Banded( acsm, Tx, n, Match,data );
-+ }
-+ else
-+ {
-+ return acsmSearchSparseDFA( acsm, Tx, n, Match,data );
-+ }
-+
-+ case FSA_NFA:
-+
-+ return acsmSearchSparseNFA( acsm, Tx, n, Match,data );
++#if defined(RING_DEBUG)
++ printk("[PF_RING] rvfree: %lu bytes\n", size);
++#endif
+
-+ case FSA_TRIE:
++ if (!mem)
++ return;
+
-+ return 0;
-+ }
-+ return 0;
++ adr = (unsigned long) mem;
++ while ((long) size > 0) {
++ ClearPageReserved(vmalloc_to_page((void *)adr));
++ pages++;
++ adr += PAGE_SIZE;
++ size -= PAGE_SIZE;
++ }
++#if defined(RING_DEBUG)
++ printk("[PF_RING] rvfree: %lu pages\n", pages);
++ printk("[PF_RING] rvfree: calling vfree....\n");
++#endif
++ vfree(mem);
++#if defined(RING_DEBUG)
++ printk("[PF_RING] rvfree: after vfree....\n");
++#endif
+}
+
++/* ********************************** */
+
-+/*
-+ * Free all memory
-+ */
-+void
-+acsmFree2 (ACSM_STRUCT2 * acsm)
++#define IP_DEFRAG_RING 1234
++
++/* Returns new sk_buff, or NULL */
++static struct sk_buff *ring_gather_frags(struct sk_buff *skb)
+{
-+ int i;
-+ ACSM_PATTERN2 * mlist, *ilist;
-+ for (i = 0; i < acsm->acsmMaxStates; i++)
-+ {
-+ mlist = acsm->acsmMatchList[i];
++#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23))
++ skb = ip_defrag(skb, IP_DEFRAG_RING);
+
-+ while (mlist)
-+ {
-+ ilist = mlist;
-+ mlist = mlist->next;
-+ AC_FREE (ilist);
-+ }
-+ AC_FREE(acsm->acsmNextState[i]);
-+ }
-+ AC_FREE(acsm->acsmFailState);
-+ AC_FREE(acsm->acsmMatchList);
++ if(skb)
++ ip_send_check(ip_hdr(skb));
++#else
++ if(ip_defrag(skb, IP_DEFRAG_RING))
++ skb = NULL;
++ else
++ ip_send_check(ip_hdr(skb));
++#endif
++
++ return(skb);
+}
+
+/* ********************************** */
+
-+static void ring_sock_destruct(struct sock *sk) {
++static void ring_sock_destruct(struct sock *sk)
++{
++ struct ring_opt *pfr;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
+ skb_queue_purge(&sk->sk_receive_queue);
+
+ if (!sock_flag(sk, SOCK_DEAD)) {
+#if defined(RING_DEBUG)
-+ printk("Attempt to release alive ring socket: %p\n", sk);
++ printk("[PF_RING] Attempt to release alive ring socket: %p\n", sk);
+#endif
+ return;
+ }
-+
-+ BUG_ON(!atomic_read(&sk->sk_rmem_alloc));
-+ BUG_ON(!atomic_read(&sk->sk_wmem_alloc));
+#else
-+
-+ BUG_ON(atomic_read(&sk->rmem_alloc)==0);
-+ BUG_ON(atomic_read(&sk->wmem_alloc)==0);
-+
+ if (!sk->dead) {
+#if defined(RING_DEBUG)
-+ printk("Attempt to release alive ring socket: %p\n", sk);
++ printk("[PF_RING] Attempt to release alive ring socket: %p\n", sk);
+#endif
+ return;
+ }
+#endif
+
-+ kfree(ring_sk(sk));
++ pfr = ring_sk(sk);
++
++ if(pfr) kfree(pfr);
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
+ MOD_DEC_USE_COUNT;
+
+/* ********************************** */
+
-+static void ring_proc_add(struct ring_opt *pfr) {
++static void ring_proc_add(struct ring_opt *pfr, struct net_device *dev)
++{
+ if(ring_proc_dir != NULL) {
-+ char name[16];
++ char name[64];
++
++ pfr->ring_pid = current->pid;
++ pfr->ring_id = ring_id_serial++;
+
-+ pfr->ring_pid = current->pid;
++ if(NULL != dev)
++ snprintf(name, sizeof(name), "%d-%s.%d", pfr->ring_pid, dev->name, pfr->ring_id);
++ else
++ snprintf(name, sizeof(name), "%d.%d", pfr->ring_pid, pfr->ring_id);
+
-+ snprintf(name, sizeof(name), "%d", pfr->ring_pid);
-+ create_proc_read_entry(name, 0, ring_proc_dir,
-+ ring_proc_get_info, pfr);
-+ /* printk("PF_RING: added /proc/net/pf_ring/%s\n", name); */
++ create_proc_read_entry(name, 0, ring_proc_dir, ring_proc_get_info, pfr);
++ /* printk("[PF_RING] added /proc/net/pf_ring/%s\n", name); */
++ /* printk("[PF_RING] %s has index %d\n", dev->name, dev->ifindex); */
+ }
+}
+
+/* ********************************** */
+
-+static void ring_proc_remove(struct ring_opt *pfr) {
++static void ring_proc_remove(struct ring_opt *pfr)
++{
+ if(ring_proc_dir != NULL) {
-+ char name[16];
++ char name[64];
++
++ if (pfr->ring_netdev && pfr->ring_netdev->name)
++ snprintf(name, sizeof(name), "%d-%s.%d",
++ pfr->ring_pid,pfr-> ring_netdev->name, pfr->ring_id);
++ else
++ snprintf(name, sizeof(name), "%d.%d", pfr->ring_pid, pfr->ring_id);
+
-+ snprintf(name, sizeof(name), "%d", pfr->ring_pid);
+ remove_proc_entry(name, ring_proc_dir);
-+ /* printk("PF_RING: removed /proc/net/pf_ring/%s\n", name); */
++ printk("[PF_RING] removed /proc/net/pf_ring/%s\n", name);
+ }
+}
+
+/* ********************************** */
+
++static u_int32_t num_queued_pkts(struct ring_opt *pfr)
++{
++ if(pfr->ring_slots != NULL) {
++ u_int32_t tot_insert = pfr->slots_info->tot_insert, tot_read = pfr->slots_info->tot_read;
++
++ if(tot_insert >= tot_read) {
++ return(tot_insert-tot_read);
++ } else {
++ return(((u_int32_t)-1)+tot_insert-tot_read);
++ }
++
++#if defined(RING_DEBUG)
++ printk("[PF_RING] -> [tot_insert=%d][tot_read=%d]\n",
++ tot_insert, tot_read);
++#endif
++ } else
++ return(0);
++}
++
++/* ************************************* */
++
++inline u_int get_num_ring_free_slots(struct ring_opt *pfr) {
++ return(pfr->slots_info->tot_slots - num_queued_pkts(pfr));
++}
++
++/* ********************************** */
++
+static int ring_proc_get_info(char *buf, char **start, off_t offset,
+ int len, int *unused, void *data)
+{
+
+ if(data == NULL) {
+ /* /proc/net/pf_ring/info */
-+ rlen = sprintf(buf,"Version : %s\n", RING_VERSION);
-+ rlen += sprintf(buf + rlen,"Bucket length : %d bytes\n", bucket_len);
-+ rlen += sprintf(buf + rlen,"Ring slots : %d\n", num_slots);
-+ rlen += sprintf(buf + rlen,"Sample rate : %d [1=no sampling]\n", sample_rate);
-+
-+ rlen += sprintf(buf + rlen,"Capture TX : %s\n",
++ rlen = sprintf(buf, "Version : %s\n", RING_VERSION);
++ rlen += sprintf(buf + rlen, "Ring slots : %d\n", num_slots);
++ rlen += sprintf(buf + rlen, "Slot version : %d\n", RING_FLOWSLOT_VERSION);
++ rlen += sprintf(buf + rlen, "Capture TX : %s\n",
+ enable_tx_capture ? "Yes [RX+TX]" : "No [RX only]");
-+ rlen += sprintf(buf + rlen,"Transparent mode : %s\n",
++ rlen += sprintf(buf + rlen, "IP Defragment : %s\n", enable_ip_defrag ? "Yes" : "No");
++ rlen += sprintf(buf + rlen, "Transparent mode : %s\n",
+ transparent_mode ? "Yes" : "No");
-+ rlen += sprintf(buf + rlen,"Total rings : %d\n", ring_table_size);
++ rlen += sprintf(buf + rlen, "Total rings : %d\n", ring_table_size);
++ rlen += sprintf(buf + rlen, "Total plugins : %d\n", plugin_registration_size);
+ } else {
+ /* detailed statistics about a PF_RING */
+ pfr = (struct ring_opt*)data;
+ if(fsi) {
+ rlen = sprintf(buf, "Bound Device : %s\n",
+ pfr->ring_netdev->name == NULL ? "<NULL>" : pfr->ring_netdev->name);
-+ rlen += sprintf(buf + rlen,"Version : %d\n", fsi->version);
-+ rlen += sprintf(buf + rlen,"Sampling Rate : %d\n", pfr->sample_rate);
-+ rlen += sprintf(buf + rlen,"BPF Filtering : %s\n", pfr->bpfFilter ? "Enabled" : "Disabled");
-+ rlen += sprintf(buf + rlen,"Bloom Filters : %s\n", pfr->bitmask_enabled ? "Enabled" : "Disabled");
-+ rlen += sprintf(buf + rlen,"Pattern Search: %s\n", pfr->acsm ? "Enabled" : "Disabled");
-+ rlen += sprintf(buf + rlen,"Cluster Id : %d\n", pfr->cluster_id);
-+ rlen += sprintf(buf + rlen,"Tot Slots : %d\n", fsi->tot_slots);
-+ rlen += sprintf(buf + rlen,"Slot Len : %d\n", fsi->slot_len);
-+ rlen += sprintf(buf + rlen,"Data Len : %d\n", fsi->data_len);
-+ rlen += sprintf(buf + rlen,"Tot Memory : %d\n", fsi->tot_mem);
-+ rlen += sprintf(buf + rlen,"Tot Packets : %lu\n", (unsigned long)fsi->tot_pkts);
-+ rlen += sprintf(buf + rlen,"Tot Pkt Lost : %lu\n", (unsigned long)fsi->tot_lost);
-+ rlen += sprintf(buf + rlen,"Tot Insert : %lu\n", (unsigned long)fsi->tot_insert);
-+ rlen += sprintf(buf + rlen,"Tot Read : %lu\n", (unsigned long)fsi->tot_read);
-+
++ rlen += sprintf(buf + rlen, "Version : %d\n", fsi->version);
++ rlen += sprintf(buf + rlen, "Sampling Rate : %d\n", pfr->sample_rate);
++ rlen += sprintf(buf + rlen, "Appl. Name : %s\n", pfr->appl_name ? pfr->appl_name : "<unknown>");
++ rlen += sprintf(buf + rlen, "IP Defragment : %s\n", enable_ip_defrag ? "Yes" : "No");
++ rlen += sprintf(buf + rlen, "BPF Filtering : %s\n", pfr->bpfFilter ? "Enabled" : "Disabled");
++ rlen += sprintf(buf + rlen, "# Filt. Rules : %d\n", pfr->num_filtering_rules);
++ rlen += sprintf(buf + rlen, "Cluster Id : %d\n", pfr->cluster_id);
++ rlen += sprintf(buf + rlen, "Channel Id : %d\n", pfr->channel_id);
++ rlen += sprintf(buf + rlen, "Tot Slots : %d\n", fsi->tot_slots);
++ rlen += sprintf(buf + rlen, "Bucket Len : %d\n", fsi->data_len);
++ rlen += sprintf(buf + rlen, "Slot Len : %d [bucket+header]\n", fsi->slot_len);
++ rlen += sprintf(buf + rlen, "Tot Memory : %d\n", fsi->tot_mem);
++ rlen += sprintf(buf + rlen, "Tot Packets : %lu\n", (unsigned long)fsi->tot_pkts);
++ rlen += sprintf(buf + rlen, "Tot Pkt Lost : %lu\n", (unsigned long)fsi->tot_lost);
++ rlen += sprintf(buf + rlen, "Tot Insert : %lu\n", (unsigned long)fsi->tot_insert);
++ rlen += sprintf(buf + rlen, "Tot Read : %lu\n", (unsigned long)fsi->tot_read);
++ rlen += sprintf(buf + rlen, "Num Free Slots: %u\n", get_num_ring_free_slots(pfr));
+ } else
+ rlen = sprintf(buf, "WARNING fsi == NULL\n");
+ } else
+
+/* ********************************** */
+
-+static void ring_proc_init(void) {
-+ ring_proc_dir = proc_mkdir("pf_ring", init_net.proc_net);
++static int ring_proc_get_plugin_info(char *buf, char **start, off_t offset,
++ int len, int *unused, void *data)
++{
++ int rlen = 0, i = 0;
++ struct pfring_plugin_registration* tmp = NULL;
++
++ /* FIXME: I should now the number of plugins registered */
++ if (!plugin_registration_size) return rlen;
++
++ /* plugins_info */
++
++ rlen += sprintf(buf + rlen , "ID\tPlugin\n");
++
++ for(i = 0; i < MAX_PLUGIN_ID; i++) {
++ tmp = plugin_registration[i];
++ if (tmp) {
++ rlen += sprintf(buf + rlen , "%d\t%s [%s]\n",
++ tmp->plugin_id, tmp->name, tmp->description);
++ }
++ }
++
++ return rlen;
++}
++
++/* ********************************** */
++
++static void ring_proc_init(void)
++{
++ ring_proc_dir = proc_mkdir("pf_ring",
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24))
++ init_net.
++#endif
++ proc_net);
+
+ if(ring_proc_dir) {
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30))
+ ring_proc_dir->owner = THIS_MODULE;
-+ ring_proc = create_proc_read_entry("info", 0, ring_proc_dir,
-+ ring_proc_get_info, NULL);
-+ if(!ring_proc)
-+ printk("PF_RING: unable to register proc file\n");
++#endif
++ ring_proc = create_proc_read_entry(PROC_INFO, 0,
++ ring_proc_dir,
++ ring_proc_get_info,
++ NULL);
++ ring_proc_plugins_info = create_proc_read_entry(PROC_PLUGINS_INFO, 0,
++ ring_proc_dir,
++ ring_proc_get_plugin_info,
++ NULL);
++ if(!ring_proc || !ring_proc_plugins_info)
++ printk("[PF_RING] unable to register proc file\n");
+ else {
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30))
+ ring_proc->owner = THIS_MODULE;
-+ printk("PF_RING: registered /proc/net/pf_ring/\n");
++ ring_proc_plugins_info->owner = THIS_MODULE;
++#endif
++ printk("[PF_RING] registered /proc/net/pf_ring/\n");
+ }
+ } else
-+ printk("PF_RING: unable to create /proc/net/pf_ring\n");
++ printk("[PF_RING] unable to create /proc/net/pf_ring\n");
+}
+
+/* ********************************** */
+
-+static void ring_proc_term(void) {
++static void ring_proc_term(void)
++{
+ if(ring_proc != NULL) {
-+ remove_proc_entry("info", ring_proc_dir);
-+ if(ring_proc_dir != NULL) remove_proc_entry("pf_ring", init_net.proc_net);
++ remove_proc_entry(PROC_INFO, ring_proc_dir);
++ printk("[PF_RING] removed /proc/net/pf_ring/%s\n", PROC_INFO);
++
++ remove_proc_entry(PROC_PLUGINS_INFO, ring_proc_dir);
++ printk("[PF_RING] removed /proc/net/pf_ring/%s\n", PROC_PLUGINS_INFO);
+
-+ printk("PF_RING: deregistered /proc/net/pf_ring\n");
++ if(ring_proc_dir != NULL) {
++ remove_proc_entry("pf_ring",
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24))
++ init_net.
++#endif
++ proc_net);
++ printk("[PF_RING] deregistered /proc/net/pf_ring\n");
++ }
+ }
+}
+
+ * store the sk in a new element and add it
+ * to the head of the list.
+ */
-+static inline void ring_insert(struct sock *sk) {
++static inline void ring_insert(struct sock *sk)
++{
+ struct ring_element *next;
++ struct ring_opt *pfr;
+
+#if defined(RING_DEBUG)
-+ printk("RING: ring_insert()\n");
++ printk("[PF_RING] ring_insert()\n");
+#endif
+
+ next = kmalloc(sizeof(struct ring_element), GFP_ATOMIC);
+ if(next != NULL) {
+ next->sk = sk;
-+ write_lock_irq(&ring_mgmt_lock);
++ write_lock_bh(&ring_mgmt_lock);
+ list_add(&next->list, &ring_table);
-+ write_unlock_irq(&ring_mgmt_lock);
++ write_unlock_bh(&ring_mgmt_lock);
+ } else {
+ if(net_ratelimit())
-+ printk("RING: could not kmalloc slot!!\n");
++ printk("[PF_RING] net_ratelimit() failure\n");
+ }
+
+ ring_table_size++;
-+ ring_proc_add(ring_sk(sk));
++ //ring_proc_add(ring_sk(sk));
++ pfr = (struct ring_opt *)ring_sk(sk);
++ pfr->ring_pid = current->pid;
+}
+
+/* ********************************** */
+ * stop when we find the one we're looking for (break),
+ * or when we reach the end of the list.
+ */
-+static inline void ring_remove(struct sock *sk) {
-+ struct list_head *ptr;
++static inline void ring_remove(struct sock *sk)
++{
++ struct list_head *ptr, *tmp_ptr;
+ struct ring_element *entry;
+
-+ for(ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) {
++#if defined(RING_DEBUG)
++ printk("[PF_RING] ring_remove()\n");
++#endif
++
++ list_for_each_safe(ptr, tmp_ptr, &ring_table) {
+ entry = list_entry(ptr, struct ring_element, list);
+
+ if(entry->sk == sk) {
+ list_del(ptr);
-+ kfree(ptr);
++ kfree(entry);
+ ring_table_size--;
+ break;
+ }
+ }
-+}
-+
-+/* ********************************** */
-+
-+static u_int32_t num_queued_pkts(struct ring_opt *pfr) {
-+
-+ if(pfr->ring_slots != NULL) {
-+
-+ u_int32_t tot_insert = pfr->slots_info->insert_idx,
-+#if defined(RING_DEBUG)
-+ tot_read = pfr->slots_info->tot_read, tot_pkts;
-+#else
-+ tot_read = pfr->slots_info->tot_read;
-+#endif
-+
-+ if(tot_insert >= tot_read) {
-+#if defined(RING_DEBUG)
-+ tot_pkts = tot_insert-tot_read;
-+#endif
-+ return(tot_insert-tot_read);
-+ } else {
-+#if defined(RING_DEBUG)
-+ tot_pkts = ((u_int32_t)-1)+tot_insert-tot_read;
-+#endif
-+ return(((u_int32_t)-1)+tot_insert-tot_read);
-+ }
+
+#if defined(RING_DEBUG)
-+ printk("-> num_queued_pkts=%d [tot_insert=%d][tot_read=%d]\n",
-+ tot_pkts, tot_insert, tot_read);
++ printk("[PF_RING] leaving ring_remove()\n");
+#endif
-+
-+ } else
-+ return(0);
+}
+
+/* ********************************** */
+
-+static inline FlowSlot* get_insert_slot(struct ring_opt *pfr) {
-+#if defined(RING_DEBUG)
-+ printk("get_insert_slot(%d)\n", pfr->slots_info->insert_idx);
-+#endif
-+
++static inline FlowSlot* get_insert_slot(struct ring_opt *pfr)
++{
+ if(pfr->ring_slots != NULL) {
+ FlowSlot *slot = (FlowSlot*)&(pfr->ring_slots[pfr->slots_info->insert_idx
+ *pfr->slots_info->slot_len]);
++#if defined(RING_DEBUG)
++ printk("[PF_RING] get_insert_slot(%d): returned slot [slot_state=%d]\n",
++ pfr->slots_info->insert_idx, slot->slot_state);
++#endif
+ return(slot);
-+ } else
++ } else {
++#if defined(RING_DEBUG)
++ printk("[PF_RING] get_insert_slot(%d): NULL slot\n", pfr->slots_info->insert_idx);
++#endif
+ return(NULL);
++ }
+}
+
+/* ********************************** */
+
-+static inline FlowSlot* get_remove_slot(struct ring_opt *pfr) {
++static inline FlowSlot* get_remove_slot(struct ring_opt *pfr)
++{
+#if defined(RING_DEBUG)
-+ printk("get_remove_slot(%d)\n", pfr->slots_info->remove_idx);
++ printk("[PF_RING] get_remove_slot(%d)\n", pfr->slots_info->remove_idx);
+#endif
+
+ if(pfr->ring_slots != NULL)
+
+/* ******************************************************* */
+
-+static int parse_pkt(struct sk_buff *skb, u_int16_t skb_displ,
-+ u_int8_t *l3_proto, u_int16_t *eth_type,
-+ u_int16_t *l3_offset, u_int16_t *l4_offset,
-+ u_int16_t *vlan_id, u_int32_t *ipv4_src,
-+ u_int32_t *ipv4_dst,
-+ u_int16_t *l4_src_port, u_int16_t *l4_dst_port,
-+ u_int16_t *payload_offset) {
++static int parse_pkt(struct sk_buff *skb,
++ u_int16_t skb_displ,
++ struct pfring_pkthdr *hdr)
++{
+ struct iphdr *ip;
+ struct ethhdr *eh = (struct ethhdr*)(skb->data-skb_displ);
+ u_int16_t displ;
+
-+ *l3_offset = *l4_offset = *l3_proto = *payload_offset = 0;
-+ *eth_type = ntohs(eh->h_proto);
++ memset(&hdr->parsed_pkt, 0, sizeof(struct pkt_parsing_info));
++ hdr->parsed_header_len = 9;
+
-+ if(*eth_type == 0x8100 /* 802.1q (VLAN) */) {
-+ (*vlan_id) = (skb->data[14] & 15)*256 + skb->data[15];
-+ *eth_type = (skb->data[16])*256 + skb->data[17];
-+ displ = 4;
-+ } else {
-+ displ = 0;
-+ (*vlan_id) = (u_int16_t)-1;
-+ }
++ hdr->parsed_pkt.eth_type = ntohs(eh->h_proto);
++ hdr->parsed_pkt.pkt_detail.offset.eth_offset = -skb_displ;
+
-+ if(*eth_type == 0x0800 /* IP */) {
-+ *l3_offset = displ+sizeof(struct ethhdr);
-+ ip = (struct iphdr*)(skb->data-skb_displ+(*l3_offset));
++ if(hdr->parsed_pkt.eth_type == 0x8100 /* 802.1q (VLAN) */)
++ {
++ hdr->parsed_pkt.pkt_detail.offset.vlan_offset = hdr->parsed_pkt.pkt_detail.offset.eth_offset + sizeof(struct ethhdr);
++ hdr->parsed_pkt.vlan_id = (skb->data[hdr->parsed_pkt.pkt_detail.offset.vlan_offset] & 15) * 256
++ + skb->data[hdr->parsed_pkt.pkt_detail.offset.vlan_offset + 1];
++ hdr->parsed_pkt.eth_type = (skb->data[hdr->parsed_pkt.pkt_detail.offset.vlan_offset + 2]) * 256
++ + skb->data[hdr->parsed_pkt.pkt_detail.offset.vlan_offset + 3];
++ displ = 4;
++ }
++ else
++ {
++ displ = 0;
++ hdr->parsed_pkt.vlan_id = 0; /* Any VLAN */
++ }
+
-+ *ipv4_src = ntohl(ip->saddr), *ipv4_dst = ntohl(ip->daddr), *l3_proto = ip->protocol;
++ if(hdr->parsed_pkt.eth_type == 0x0800 /* IP */) {
++ hdr->parsed_pkt.pkt_detail.offset.l3_offset = hdr->parsed_pkt.pkt_detail.offset.eth_offset+displ+sizeof(struct ethhdr);
++ ip = (struct iphdr*)(skb->data+hdr->parsed_pkt.pkt_detail.offset.l3_offset);
+
-+ if((ip->protocol == IPPROTO_TCP) || (ip->protocol == IPPROTO_UDP)) {
-+ *l4_offset = (*l3_offset)+(ip->ihl*4);
++ hdr->parsed_pkt.ipv4_src = ntohl(ip->saddr), hdr->parsed_pkt.ipv4_dst = ntohl(ip->daddr), hdr->parsed_pkt.l3_proto = ip->protocol;
++ hdr->parsed_pkt.ipv4_tos = ip->tos;
++ hdr->parsed_pkt.pkt_detail.offset.l4_offset = hdr->parsed_pkt.pkt_detail.offset.l3_offset+ip->ihl*4;
+
-+ if(ip->protocol == IPPROTO_TCP) {
-+ struct tcphdr *tcp = (struct tcphdr*)(skb->data-skb_displ+(*l4_offset));
-+ *l4_src_port = ntohs(tcp->source), *l4_dst_port = ntohs(tcp->dest);
-+ *payload_offset = (*l4_offset)+(tcp->doff * 4);
-+ } else if(ip->protocol == IPPROTO_UDP) {
-+ struct udphdr *udp = (struct udphdr*)(skb->data-skb_displ+(*l4_offset));
-+ *l4_src_port = ntohs(udp->source), *l4_dst_port = ntohs(udp->dest);
-+ *payload_offset = (*l4_offset)+sizeof(struct udphdr);
++ if((ip->protocol == IPPROTO_TCP) || (ip->protocol == IPPROTO_UDP))
++ {
++ if(ip->protocol == IPPROTO_TCP)
++ {
++ struct tcphdr *tcp = (struct tcphdr*)(skb->data+hdr->parsed_pkt.pkt_detail.offset.l4_offset);
++ hdr->parsed_pkt.l4_src_port = ntohs(tcp->source), hdr->parsed_pkt.l4_dst_port = ntohs(tcp->dest);
++ hdr->parsed_pkt.pkt_detail.offset.payload_offset = hdr->parsed_pkt.pkt_detail.offset.l4_offset+(tcp->doff * 4);
++ hdr->parsed_pkt.tcp_flags = (tcp->fin * TH_FIN_MULTIPLIER) + (tcp->syn * TH_SYN_MULTIPLIER) + (tcp->rst * TH_RST_MULTIPLIER) +
++ (tcp->psh * TH_PUSH_MULTIPLIER) + (tcp->ack * TH_ACK_MULTIPLIER) + (tcp->urg * TH_URG_MULTIPLIER);
++ } else if(ip->protocol == IPPROTO_UDP)
++ {
++ struct udphdr *udp = (struct udphdr*)(skb->data+hdr->parsed_pkt.pkt_detail.offset.l4_offset);
++ hdr->parsed_pkt.l4_src_port = ntohs(udp->source), hdr->parsed_pkt.l4_dst_port = ntohs(udp->dest);
++ hdr->parsed_pkt.pkt_detail.offset.payload_offset = hdr->parsed_pkt.pkt_detail.offset.l4_offset+sizeof(struct udphdr);
++ } else
++ hdr->parsed_pkt.pkt_detail.offset.payload_offset = hdr->parsed_pkt.pkt_detail.offset.l4_offset;
+ } else
-+ *payload_offset = (*l4_offset);
-+ } else
-+ *l4_src_port = *l4_dst_port = 0;
++ hdr->parsed_pkt.l4_src_port = hdr->parsed_pkt.l4_dst_port = 0;
++
++ hdr->parsed_pkt.pkt_detail.offset.eth_offset = skb_displ;
+
+ return(1); /* IP */
+ } /* TODO: handle IPv6 */
+ return(0); /* No IP */
+}
+
-+/* **************************************************************** */
++/* ********************************** */
+
-+static void reset_bitmask(bitmask_selector *selector)
++inline u_int32_t hash_pkt(u_int16_t vlan_id, u_int8_t proto,
++ u_int32_t host_peer_a, u_int32_t host_peer_b,
++ u_int16_t port_peer_a, u_int16_t port_peer_b)
+{
-+ memset((char*)selector->bits_memory, 0, selector->num_bits/8);
-+
-+ while(selector->clashes != NULL) {
-+ bitmask_counter_list *next = selector->clashes->next;
-+ kfree(selector->clashes);
-+ selector->clashes = next;
-+ }
++ return(vlan_id+proto+host_peer_a+host_peer_b+port_peer_a+port_peer_b);
+}
+
-+/* **************************************************************** */
++/* ********************************** */
+
-+static void alloc_bitmask(u_int32_t tot_bits, bitmask_selector *selector)
++inline u_int32_t hash_pkt_header(struct pfring_pkthdr *hdr, u_char mask_src, u_char mask_dst)
+{
-+ u_int tot_mem = tot_bits/8;
-+
-+ if(tot_mem <= PAGE_SIZE)
-+ selector->order = 1;
-+ else {
-+ for(selector->order = 0; (PAGE_SIZE << selector->order) < tot_mem; selector->order++)
-+ ;
-+ }
-+
-+ printk("BITMASK: [order=%d][tot_mem=%d]\n", selector->order, tot_mem);
++ return(hash_pkt(hdr->parsed_pkt.vlan_id,
++ hdr->parsed_pkt.l3_proto,
++ mask_src ? 0 : hdr->parsed_pkt.ipv4_src,
++ mask_dst ? 0 : hdr->parsed_pkt.ipv4_dst,
++ mask_src ? 0 : hdr->parsed_pkt.l4_src_port,
++ mask_dst ? 0 : hdr->parsed_pkt.l4_dst_port));
++}
+
-+ while((selector->bits_memory = __get_free_pages(GFP_ATOMIC, selector->order)) == 0)
-+ if(selector->order-- == 0)
-+ break;
++/* ********************************** */
+
-+ if(selector->order == 0) {
-+ printk("BITMASK: ERROR not enough memory for bitmask\n");
-+ selector->num_bits = 0;
-+ return;
-+ }
++static int hash_bucket_match(filtering_hash_bucket *hash_bucket,
++ struct pfring_pkthdr *hdr,
++ u_char mask_src, u_char mask_dst)
++{
++ if((hash_bucket->rule.proto == hdr->parsed_pkt.l3_proto)
++ && (hash_bucket->rule.vlan_id == hdr->parsed_pkt.vlan_id)
++ && (((hash_bucket->rule.host_peer_a == (mask_src ? 0 : hdr->parsed_pkt.ipv4_src))
++ && (hash_bucket->rule.host_peer_b == (mask_dst ? 0 : hdr->parsed_pkt.ipv4_dst))
++ && (hash_bucket->rule.port_peer_a == (mask_src ? 0 : hdr->parsed_pkt.l4_src_port))
++ && (hash_bucket->rule.port_peer_b == (mask_dst ? 0 : hdr->parsed_pkt.l4_dst_port)))
++ ||
++ ((hash_bucket->rule.host_peer_a == (mask_dst ? 0 : hdr->parsed_pkt.ipv4_dst))
++ && (hash_bucket->rule.host_peer_b == (mask_src ? 0 : hdr->parsed_pkt.ipv4_src))
++ && (hash_bucket->rule.port_peer_a == (mask_dst ? 0 : hdr->parsed_pkt.l4_dst_port))
++ && (hash_bucket->rule.port_peer_b == (mask_src ? 0 : hdr->parsed_pkt.l4_src_port))))) {
++ hash_bucket->rule.jiffies_last_match = jiffies;
++ return(1);
++ } else
++ return(0);
++}
+
-+ tot_mem = PAGE_SIZE << selector->order;
-+ printk("BITMASK: succesfully allocated [tot_mem=%d][order=%d]\n",
-+ tot_mem, selector->order);
++/* ********************************** */
+
-+ selector->num_bits = tot_mem*8;
-+ selector->clashes = NULL;
-+ reset_bitmask(selector);
++inline int hash_bucket_match_rule(filtering_hash_bucket *hash_bucket,
++ hash_filtering_rule *rule)
++{
++ int debug = 0;
++
++ if(debug)
++ printk("[PF_RING] (%u,%d,%d.%d.%d.%d:%u,%d.%d.%d.%d:%u) (%u,%d,%d.%d.%d.%d:%u,%d.%d.%d.%d:%u)\n",
++ hash_bucket->rule.vlan_id, hash_bucket->rule.proto,
++ ((hash_bucket->rule.host_peer_a >> 24) & 0xff),
++ ((hash_bucket->rule.host_peer_a >> 16) & 0xff),
++ ((hash_bucket->rule.host_peer_a >> 8) & 0xff),
++ ((hash_bucket->rule.host_peer_a >> 0) & 0xff),
++ hash_bucket->rule.port_peer_a,
++ ((hash_bucket->rule.host_peer_b >> 24) & 0xff),
++ ((hash_bucket->rule.host_peer_b >> 16) & 0xff),
++ ((hash_bucket->rule.host_peer_b >> 8) & 0xff),
++ ((hash_bucket->rule.host_peer_b >> 0) & 0xff),
++ hash_bucket->rule.port_peer_b,
++ rule->vlan_id, rule->proto,
++ ((rule->host_peer_a >> 24) & 0xff),
++ ((rule->host_peer_a >> 16) & 0xff),
++ ((rule->host_peer_a >> 8) & 0xff),
++ ((rule->host_peer_a >> 0) & 0xff),
++ rule->port_peer_a,
++ ((rule->host_peer_b >> 24) & 0xff),
++ ((rule->host_peer_b >> 16) & 0xff),
++ ((rule->host_peer_b >> 8) & 0xff),
++ ((rule->host_peer_b >> 0) & 0xff),
++ rule->port_peer_b);
++
++ if((hash_bucket->rule.proto == rule->proto)
++ && (hash_bucket->rule.vlan_id == rule->vlan_id)
++ && (((hash_bucket->rule.host_peer_a == rule->host_peer_a)
++ && (hash_bucket->rule.host_peer_b == rule->host_peer_b)
++ && (hash_bucket->rule.port_peer_a == rule->port_peer_a)
++ && (hash_bucket->rule.port_peer_b == rule->port_peer_b))
++ ||
++ ((hash_bucket->rule.host_peer_a == rule->host_peer_b)
++ && (hash_bucket->rule.host_peer_b == rule->host_peer_a)
++ && (hash_bucket->rule.port_peer_a == rule->port_peer_b)
++ && (hash_bucket->rule.port_peer_b == rule->port_peer_a)))) {
++ hash_bucket->rule.jiffies_last_match = jiffies;
++ return(1);
++ } else
++ return(0);
+}
+
+/* ********************************** */
+
-+static void free_bitmask(bitmask_selector *selector)
++inline int hash_filtering_rule_match(hash_filtering_rule *a,
++ hash_filtering_rule *b)
+{
-+ if(selector->bits_memory > 0)
-+ free_pages(selector->bits_memory, selector->order);
++ int debug = 0;
++
++ if(debug)
++ printk("[PF_RING] (%u,%d,%d.%d.%d.%d:%u,%d.%d.%d.%d:%u) (%u,%d,%d.%d.%d.%d:%u,%d.%d.%d.%d:%u)\n",
++ a->vlan_id, a->proto,
++ ((a->host_peer_a >> 24) & 0xff),
++ ((a->host_peer_a >> 16) & 0xff),
++ ((a->host_peer_a >> 8) & 0xff),
++ ((a->host_peer_a >> 0) & 0xff),
++ a->port_peer_a,
++ ((a->host_peer_b >> 24) & 0xff),
++ ((a->host_peer_b >> 16) & 0xff),
++ ((a->host_peer_b >> 8) & 0xff),
++ ((a->host_peer_b >> 0) & 0xff),
++ a->port_peer_b,
++
++ b->vlan_id, b->proto,
++ ((b->host_peer_a >> 24) & 0xff),
++ ((b->host_peer_a >> 16) & 0xff),
++ ((b->host_peer_a >> 8) & 0xff),
++ ((b->host_peer_a >> 0) & 0xff),
++ b->port_peer_a,
++ ((b->host_peer_b >> 24) & 0xff),
++ ((b->host_peer_b >> 16) & 0xff),
++ ((b->host_peer_b >> 8) & 0xff),
++ ((b->host_peer_b >> 0) & 0xff),
++ b->port_peer_b);
++
++
++ if((a->proto == b->proto)
++ && (a->vlan_id == b->vlan_id)
++ && (((a->host_peer_a == b->host_peer_a)
++ && (a->host_peer_b == b->host_peer_b)
++ && (a->port_peer_a == b->port_peer_a)
++ && (a->port_peer_b == b->port_peer_b))
++ ||
++ ((a->host_peer_a == b->host_peer_b)
++ && (a->host_peer_b == b->host_peer_a)
++ && (a->port_peer_a == b->port_peer_b)
++ && (a->port_peer_b == b->port_peer_a)))) {
++ return(1);
++ } else
++ return(0);
+}
+
+/* ********************************** */
+
-+static void set_bit_bitmask(bitmask_selector *selector, u_int32_t the_bit) {
-+ u_int32_t idx = the_bit % selector->num_bits;
-+
-+ if(BITMASK_ISSET(idx, selector)) {
-+ bitmask_counter_list *head = selector->clashes;
++/* 0 = no match, 1 = match */
++static int match_filtering_rule(struct ring_opt *the_ring,
++ filtering_rule_element *rule,
++ struct pfring_pkthdr *hdr,
++ struct sk_buff *skb,
++ int displ,
++ struct parse_buffer *parse_memory_buffer[],
++ u_int8_t *free_parse_mem,
++ u_int *last_matched_plugin,
++ packet_action_behaviour *behaviour)
++{
++ int debug = 0;
+
-+ printk("BITMASK: bit %u was already set\n", the_bit);
++ /* if(debug) printk("[PF_RING] match_filtering_rule()\n"); */
+
-+ while(head != NULL) {
-+ if(head->bit_id == the_bit) {
-+ head->bit_counter++;
-+ printk("BITMASK: bit %u is now set to %d\n", the_bit, head->bit_counter);
-+ return;
-+ }
++ *behaviour = use_rule_forward_policy; /* Default */
+
-+ head = head->next;
-+ }
++ if((rule->rule.core_fields.vlan_id > 0) && (hdr->parsed_pkt.vlan_id != rule->rule.core_fields.vlan_id)) return(0);
++ if((rule->rule.core_fields.proto > 0) && (hdr->parsed_pkt.l3_proto != rule->rule.core_fields.proto)) return(0);
+
-+ head = kmalloc(sizeof(bitmask_counter_list), GFP_KERNEL);
-+ if(head) {
-+ head->bit_id = the_bit;
-+ head->bit_counter = 1 /* previous value */ + 1 /* the requested set */;
-+ head->next = selector->clashes;
-+ selector->clashes = head;
-+ } else {
-+ printk("BITMASK: not enough memory\n");
-+ return;
-+ }
-+ } else {
-+ BITMASK_SET(idx, selector);
-+ printk("BITMASK: bit %u is now set\n", the_bit);
++ if(rule->rule.core_fields.host_low > 0) {
++ if(((hdr->parsed_pkt.ipv4_src < rule->rule.core_fields.host_low)
++ || (hdr->parsed_pkt.ipv4_src > rule->rule.core_fields.host_high))
++ && ((hdr->parsed_pkt.ipv4_dst < rule->rule.core_fields.host_low)
++ || (hdr->parsed_pkt.ipv4_dst > rule->rule.core_fields.host_high)))
++ return(0);
+ }
-+}
-+
-+/* ********************************** */
-+
-+static u_char is_set_bit_bitmask(bitmask_selector *selector, u_int32_t the_bit) {
-+ u_int32_t idx = the_bit % selector->num_bits;
-+ return(BITMASK_ISSET(idx, selector));
-+}
+
-+/* ********************************** */
-+
-+static void clear_bit_bitmask(bitmask_selector *selector, u_int32_t the_bit) {
-+ u_int32_t idx = the_bit % selector->num_bits;
++ if((rule->rule.core_fields.port_high > 0)
++ && (!((hdr->parsed_pkt.l4_src_port >= rule->rule.core_fields.port_low)
++ && (hdr->parsed_pkt.l4_src_port <= rule->rule.core_fields.port_high)))
++ && (!((hdr->parsed_pkt.l4_dst_port >= rule->rule.core_fields.port_low)
++ && (hdr->parsed_pkt.l4_dst_port <= rule->rule.core_fields.port_high))))
++ return(0);
+
-+ if(!BITMASK_ISSET(idx, selector))
-+ printk("BITMASK: bit %u was not set\n", the_bit);
-+ else {
-+ bitmask_counter_list *head = selector->clashes, *prev = NULL;
++ if(rule->rule.balance_pool > 0) {
++ u_int32_t balance_hash = hash_pkt_header(hdr, 0, 0) % rule->rule.balance_pool;
++ if(balance_hash != rule->rule.balance_id) return(0);
++ }
+
-+ while(head != NULL) {
-+ if(head->bit_id == the_bit) {
-+ head->bit_counter--;
++ if(rule->pattern != NULL) {
++ if((hdr->parsed_pkt.pkt_detail.offset.payload_offset > 0)
++ && (hdr->caplen > hdr->parsed_pkt.pkt_detail.offset.payload_offset)) {
++ char *payload = (char*)&(skb->data[hdr->parsed_pkt.pkt_detail.offset.payload_offset /* -displ */]);
++ int i, rc, payload_len = hdr->caplen - hdr->parsed_pkt.pkt_detail.offset.payload_offset - displ;
++
++ if(payload_len > 0) {
++ if(debug) {
++ printk("[PF_RING] Trying to match pattern [caplen=%d][len=%d][displ=%d][payload_offset=%d][",
++ hdr->caplen, payload_len, displ, hdr->parsed_pkt.pkt_detail.offset.payload_offset);
++
++ for(i=0; i<payload_len; i++) printk("[%d/%c]", i, payload[i] & 0xFF);
++ printk("]\n");
++ }
++
++ payload[payload_len] = '\0';
++
++ if(debug) printk("[PF_RING] Attempt to match [%s]\n", payload);
++ rc = regexec(rule->pattern, payload);
++
++ if(debug)
++ printk("[PF_RING] Match returned: %d [payload_len=%d][%s]\n", rc, payload_len, payload);
++
++ if(rc == 0)
++ return(0); /* No match */
++ } else
++ return(0); /* No payload data */
++ } else
++ return(0); /* No payload data */
++ }
+
-+ printk("BITMASK: bit %u is now set to %d\n",
-+ the_bit, head->bit_counter);
++ if((rule->rule.extended_fields.filter_plugin_id > 0)
++ && (rule->rule.extended_fields.filter_plugin_id < MAX_PLUGIN_ID)
++ && (plugin_registration[rule->rule.extended_fields.filter_plugin_id] != NULL)
++ && (plugin_registration[rule->rule.extended_fields.filter_plugin_id]->pfring_plugin_filter_skb != NULL)
++ ) {
++ int rc;
+
-+ if(head->bit_counter == 1) {
-+ /* We can now delete this entry as '1' can be
-+ accommodated into the bitmask */
++ if(debug)
++ printk("[PF_RING] rule->plugin_id [rule_id=%d][filter_plugin_id=%d][plugin_action=%d][ptr=%p]\n",
++ rule->rule.rule_id,
++ rule->rule.extended_fields.filter_plugin_id,
++ rule->rule.plugin_action.plugin_id,
++ plugin_registration[rule->rule.plugin_action.plugin_id]);
+
-+ if(prev == NULL)
-+ selector->clashes = head->next;
-+ else
-+ prev->next = head->next;
++ rc = plugin_registration[rule->rule.extended_fields.filter_plugin_id]
++ ->pfring_plugin_filter_skb(the_ring, rule, hdr, skb,
++ &parse_memory_buffer[rule->rule.extended_fields.filter_plugin_id]);
+
-+ kfree(head);
-+ }
-+ return;
-+ }
++ if(parse_memory_buffer[rule->rule.extended_fields.filter_plugin_id]) *free_parse_mem = 1;
+
-+ prev = head; head = head->next;
++ if(rc <= 0) {
++ return(0); /* No match */
++ } else {
++ *last_matched_plugin = rule->rule.extended_fields.filter_plugin_id;
++ hdr->parsed_pkt.last_matched_plugin_id = rule->rule.extended_fields.filter_plugin_id;
++
++ if(debug)
++ printk("[PF_RING] [last_matched_plugin = %d][buffer=%p][len=%d]\n",
++ *last_matched_plugin, parse_memory_buffer[rule->rule.extended_fields.filter_plugin_id],
++ parse_memory_buffer[rule->rule.extended_fields.filter_plugin_id] ?
++ parse_memory_buffer[rule->rule.extended_fields.filter_plugin_id]->mem_len : 0);
+ }
-+
-+ BITMASK_CLR(idx, selector);
-+ printk("BITMASK: bit %u is now reset\n", the_bit);
+ }
-+}
+
-+/* ********************************** */
++ /* Action to be performed in case of match */
++ if((rule->rule.plugin_action.plugin_id != 0)
++ && (rule->rule.plugin_action.plugin_id < MAX_PLUGIN_ID)
++ && (plugin_registration[rule->rule.plugin_action.plugin_id] != NULL)
++ && (plugin_registration[rule->rule.plugin_action.plugin_id]->pfring_plugin_handle_skb != NULL)
++ ) {
++ if(debug) printk("[PF_RING] Calling pfring_plugin_handle_skb()\n");
++
++ plugin_registration[rule->rule.plugin_action.plugin_id]
++ ->pfring_plugin_handle_skb(the_ring, rule, NULL, hdr, skb,
++ rule->rule.extended_fields.filter_plugin_id,
++ &parse_memory_buffer[rule->rule.extended_fields.filter_plugin_id],
++ behaviour);
+
-+/* Hash function */
-+static u_int32_t sdb_hash(u_int32_t value) {
-+ u_int32_t hash = 0, i;
-+ u_int8_t str[sizeof(value)];
++ if(*last_matched_plugin == 0)
++ *last_matched_plugin = rule->rule.plugin_action.plugin_id;
+
-+ memcpy(str, &value, sizeof(value));
++ if(parse_memory_buffer[rule->rule.plugin_action.plugin_id]) *free_parse_mem = 1;
++ } else {
++ if(debug) printk("[PF_RING] Skipping pfring_plugin_handle_skb(plugin_action=%d)\n",
++ rule->rule.plugin_action.plugin_id);
++ }
+
-+ for(i = 0; i < sizeof(value); i++) {
-+ hash = str[i] + (hash << 6) + (hash << 16) - hash;
++ if(debug) {
++ printk("[PF_RING] MATCH: match_filtering_rule(vlan=%u, proto=%u, sip=%u, sport=%u, dip=%u, dport=%u)\n",
++ hdr->parsed_pkt.vlan_id, hdr->parsed_pkt.l3_proto, hdr->parsed_pkt.ipv4_src, hdr->parsed_pkt.l4_src_port,
++ hdr->parsed_pkt.ipv4_dst, hdr->parsed_pkt.l4_dst_port);
++ printk("[PF_RING] [rule(vlan=%u, proto=%u, ip=%u-%u, port=%u-%u)(behaviour=%d)]\n",
++ rule->rule.core_fields.vlan_id, rule->rule.core_fields.proto,
++ rule->rule.core_fields.host_low, rule->rule.core_fields.host_high,
++ rule->rule.core_fields.port_low,
++ rule->rule.core_fields.port_high, *behaviour);
+ }
+
-+ return(hash);
++ rule->rule.jiffies_last_match = jiffies;
++ return(1); /* match */
+}
+
+/* ********************************** */
+
-+static void handle_bloom_filter_rule(struct ring_opt *pfr, char *buf) {
-+ u_int count;
-+
-+ if(buf == NULL)
-+ return;
-+ else
-+ count = strlen(buf);
-+
-+ printk("PF_RING: -> handle_bloom_filter_rule(%s)\n", buf);
++static void add_pkt_to_ring(struct sk_buff *skb,
++ struct ring_opt *pfr,
++ struct pfring_pkthdr *hdr,
++ int displ, short channel_id,
++ int offset, void* plugin_mem)
++{
++ char *ring_bucket;
++ int idx;
++ FlowSlot *theSlot;
++ int32_t the_bit = 1 << channel_id;
+
-+ if((buf[count-1] == '\n') || (buf[count-1] == '\r')) buf[count-1] = '\0';
++ if(!pfr->ring_active) return;
+
-+ if(count > 1) {
-+ u_int32_t the_bit;
++#if defined(RING_DEBUG)
++ printk("[PF_RING] --> add_pkt_to_ring(len=%d) [pfr->channel_id=%d][channel_id=%d]\n",
++ hdr->len, pfr->channel_id, channel_id);
++#endif
+
-+ if(!strncmp(&buf[1], "vlan=", 5)) {
-+ sscanf(&buf[6], "%d", &the_bit);
++ if((pfr->channel_id != RING_ANY_CHANNEL)
++ && (channel_id != RING_ANY_CHANNEL)
++ && ((pfr->channel_id & the_bit) != the_bit))
++ return; /* Wrong channel */
+
-+ if(buf[0] == '+')
-+ set_bit_bitmask(&pfr->vlan_bitmask, the_bit), pfr->num_vlan_bitmask_add++;
-+ else
-+ clear_bit_bitmask(&pfr->vlan_bitmask, the_bit), pfr->num_vlan_bitmask_remove++;
-+ } else if(!strncmp(&buf[1], "mac=", 4)) {
-+ int a, b, c, d, e, f;
++ write_lock_bh(&pfr->ring_index_lock);
++ idx = pfr->slots_info->insert_idx;
++ idx++, theSlot = get_insert_slot(pfr);
++ pfr->slots_info->tot_pkts++;
+
-+ if(sscanf(&buf[5], "%02x:%02x:%02x:%02x:%02x:%02x:",
-+ &a, &b, &c, &d, &e, &f) == 6) {
-+ u_int32_t mac_addr = (a & 0xff) + (b & 0xff) + ((c & 0xff) << 24) + ((d & 0xff) << 16) + ((e & 0xff) << 8) + (f & 0xff);
++ if((theSlot == NULL) || (theSlot->slot_state != 0)) {
++ /* No room left */
++ pfr->slots_info->tot_lost++;
++ write_unlock_bh(&pfr->ring_index_lock);
++ return;
++ }
+
-+ /* printk("PF_RING: -> [%u][%u][%u][%u][%u][%u] -> [%u]\n", a, b, c, d, e, f, mac_addr); */
++ ring_bucket = &theSlot->bucket;
++ memcpy(ring_bucket, hdr, sizeof(struct pfring_pkthdr)); /* Copy extended packet header */
+
-+ if(buf[0] == '+')
-+ set_bit_bitmask(&pfr->mac_bitmask, mac_addr), pfr->num_mac_bitmask_add++;
-+ else
-+ clear_bit_bitmask(&pfr->mac_bitmask, mac_addr), pfr->num_mac_bitmask_remove++;
-+ } else
-+ printk("PF_RING: -> Invalid MAC address '%s'\n", &buf[5]);
-+ } else if(!strncmp(&buf[1], "ip=", 3)) {
-+ int a, b, c, d;
++ if((plugin_mem != NULL) && (offset > 0)) {
++ memcpy(&ring_bucket[sizeof(struct pfring_pkthdr)], plugin_mem, offset);
++ }
+
-+ if(sscanf(&buf[4], "%d.%d.%d.%d", &a, &b, &c, &d) == 4) {
-+ u_int32_t ip_addr = ((a & 0xff) << 24) + ((b & 0xff) << 16) + ((c & 0xff) << 8) + (d & 0xff);
++ if(skb != NULL) {
++ hdr->caplen = min(pfr->bucket_len-offset, hdr->caplen);
+
-+ if(buf[0] == '+')
-+ set_bit_bitmask(&pfr->ip_bitmask, ip_addr), set_bit_bitmask(&pfr->ip_bitmask, sdb_hash(ip_addr)), pfr->num_ip_bitmask_add++;
-+ else
-+ clear_bit_bitmask(&pfr->ip_bitmask, ip_addr), clear_bit_bitmask(&pfr->twin_ip_bitmask, sdb_hash(ip_addr)), pfr->num_ip_bitmask_remove++;
-+ } else
-+ printk("PF_RING: -> Invalid IP address '%s'\n", &buf[4]);
-+ } else if(!strncmp(&buf[1], "port=", 5)) {
-+ sscanf(&buf[6], "%d", &the_bit);
++ if(hdr->caplen > 0) {
++#if defined(RING_DEBUG)
++ printk("[PF_RING] --> [caplen=%d][len=%d][displ=%d][parsed_header_len=%d][bucket_len=%d]\n",
++ hdr->caplen, hdr->len, displ, hdr->parsed_header_len, pfr->bucket_len);
++#endif
++ skb_copy_bits(skb, -displ, &ring_bucket[sizeof(struct pfring_pkthdr)+offset], hdr->caplen);
++ } else {
++ if(hdr->parsed_header_len >= pfr->bucket_len) {
++ static u_char print_once = 0;
+
-+ if(buf[0] == '+')
-+ set_bit_bitmask(&pfr->port_bitmask, the_bit), set_bit_bitmask(&pfr->port_bitmask, sdb_hash(the_bit)), pfr->num_port_bitmask_add++;
-+ else
-+ clear_bit_bitmask(&pfr->port_bitmask, the_bit), clear_bit_bitmask(&pfr->twin_port_bitmask, sdb_hash(the_bit)), pfr->num_port_bitmask_remove++;
-+ } else if(!strncmp(&buf[1], "proto=", 6)) {
-+ if(!strncmp(&buf[7], "tcp", 3)) the_bit = 6;
-+ else if(!strncmp(&buf[7], "udp", 3)) the_bit = 17;
-+ else if(!strncmp(&buf[7], "icmp", 4)) the_bit = 1;
-+ else sscanf(&buf[7], "%d", &the_bit);
-+
-+ if(buf[0] == '+')
-+ set_bit_bitmask(&pfr->proto_bitmask, the_bit);
-+ else
-+ clear_bit_bitmask(&pfr->proto_bitmask, the_bit);
-+ } else
-+ printk("PF_RING: -> Unknown rule type '%s'\n", buf);
++ if(!print_once) {
++ printk("[PF_RING] WARNING: the bucket len is [%d] shorter than the plugin parsed header [%d]\n",
++ pfr->bucket_len, hdr->parsed_header_len);
++ print_once = 1;
++ }
++ }
++ }
+ }
-+}
+
-+/* ********************************** */
++ if(idx == pfr->slots_info->tot_slots)
++ pfr->slots_info->insert_idx = 0;
++ else
++ pfr->slots_info->insert_idx = idx;
+
-+static void reset_bloom_filters(struct ring_opt *pfr) {
-+ reset_bitmask(&pfr->mac_bitmask);
-+ reset_bitmask(&pfr->vlan_bitmask);
-+ reset_bitmask(&pfr->ip_bitmask); reset_bitmask(&pfr->twin_ip_bitmask);
-+ reset_bitmask(&pfr->port_bitmask); reset_bitmask(&pfr->twin_port_bitmask);
-+ reset_bitmask(&pfr->proto_bitmask);
++#if defined(RING_DEBUG)
++ printk("[PF_RING] ==> insert_idx=%d\n", pfr->slots_info->insert_idx);
++#endif
+
-+ pfr->num_mac_bitmask_add = pfr->num_mac_bitmask_remove = 0;
-+ pfr->num_vlan_bitmask_add = pfr->num_vlan_bitmask_remove = 0;
-+ pfr->num_ip_bitmask_add = pfr->num_ip_bitmask_remove = 0;
-+ pfr->num_port_bitmask_add = pfr->num_port_bitmask_remove = 0;
-+ pfr->num_proto_bitmask_add = pfr->num_proto_bitmask_remove = 0;
++ pfr->slots_info->tot_insert++;
++ theSlot->slot_state = 1;
++ write_unlock_bh(&pfr->ring_index_lock);
+
-+ printk("PF_RING: rules have been reset\n");
++ /* wakeup in case of poll() */
++ if(waitqueue_active(&pfr->ring_slots_waitqueue))
++ wake_up_interruptible(&pfr->ring_slots_waitqueue);
+}
+
+/* ********************************** */
+
-+static void init_blooms(struct ring_opt *pfr) {
-+ alloc_bitmask(4096, &pfr->mac_bitmask);
-+ alloc_bitmask(4096, &pfr->vlan_bitmask);
-+ alloc_bitmask(32768, &pfr->ip_bitmask); alloc_bitmask(32768, &pfr->twin_ip_bitmask);
-+ alloc_bitmask(4096, &pfr->port_bitmask); alloc_bitmask(4096, &pfr->twin_port_bitmask);
-+ alloc_bitmask(4096, &pfr->proto_bitmask);
-+
-+ pfr->num_mac_bitmask_add = pfr->num_mac_bitmask_remove = 0;
-+ pfr->num_vlan_bitmask_add = pfr->num_vlan_bitmask_remove = 0;
-+ pfr->num_ip_bitmask_add = pfr->num_ip_bitmask_remove = 0;
-+ pfr->num_port_bitmask_add = pfr->num_port_bitmask_remove = 0;
-+ pfr->num_proto_bitmask_add = pfr->num_proto_bitmask_remove = 0;
-+
-+ reset_bloom_filters(pfr);
++static int add_hdr_to_ring(struct ring_opt *pfr,
++ struct pfring_pkthdr *hdr) {
++ read_lock_bh(&ring_mgmt_lock);
++ add_pkt_to_ring(NULL, pfr, hdr, 0, 0, 0, NULL);
++ read_unlock_bh(&ring_mgmt_lock);
++ return(0);
+}
+
+/* ********************************** */
+
-+inline int MatchFound (void* id, int index, void *data) { return(0); }
++/* Free filtering placeholders */
++static void free_parse_memory(struct parse_buffer *parse_memory_buffer[]) {
++ int i;
++
++ for(i=1; i<=max_registered_plugin_id; i++)
++ if(parse_memory_buffer[i]) {
++ if(parse_memory_buffer[i]->mem != NULL) {
++ kfree(parse_memory_buffer[i]->mem);
++ }
++
++ kfree(parse_memory_buffer[i]);
++ }
++}
+
+/* ********************************** */
+
-+static void add_skb_to_ring(struct sk_buff *skb,
-+ struct ring_opt *pfr,
-+ u_char recv_packet,
-+ u_char real_skb /* 1=skb 0=faked skb */) {
-+ FlowSlot *theSlot;
-+ int idx, displ, fwd_pkt = 0;
-+
-+ if(recv_packet) {
-+ /* Hack for identifying a packet received by the e1000 */
-+ if(real_skb) {
-+ displ = SKB_DISPLACEMENT;
-+ } else
-+ displ = 0; /* Received by the e1000 wrapper */
-+ } else
-+ displ = 0;
++static int add_skb_to_ring(struct sk_buff *skb,
++ struct ring_opt *pfr,
++ struct pfring_pkthdr *hdr,
++ int is_ip_pkt,
++ int displ,
++ short channel_id)
++{
++ int fwd_pkt = 0;
++ struct list_head *ptr, *tmp_ptr;
++ u_int8_t free_parse_mem = 0;
++ u_int last_matched_plugin = 0, debug = 0;
++ u_char hash_found = 0;
++ struct parse_buffer *parse_memory_buffer[MAX_PLUGIN_ID] = { NULL };
++ /* This is a memory holder
++ for storing parsed packet information
++ that will then be freed when the packet
++ has been handled
++ */
+
-+ write_lock(&pfr->ring_index_lock);
-+ pfr->slots_info->tot_pkts++;
-+ write_unlock(&pfr->ring_index_lock);
++ if(!pfr->ring_active) return(-1);
++ atomic_set(&pfr->num_ring_users, 1);
+
-+ /* BPF Filtering (from af_packet.c) */
++ /* [1] BPF Filtering (from af_packet.c) */
+ if(pfr->bpfFilter != NULL) {
+ unsigned res = 1, len;
+
+ len = skb->len-skb->data_len;
+
-+ write_lock(&pfr->ring_index_lock);
+ skb->data -= displ;
+ res = sk_run_filter(skb, pfr->bpfFilter->insns, pfr->bpfFilter->len);
+ skb->data += displ;
-+ write_unlock(&pfr->ring_index_lock);
+
+ if(res == 0) {
+ /* Filter failed */
-+
+#if defined(RING_DEBUG)
-+ printk("add_skb_to_ring(skb): Filter failed [len=%d][tot=%llu]"
++ printk("[PF_RING] add_skb_to_ring(skb): Filter failed [len=%d][tot=%llu]"
+ "[insertIdx=%d][pkt_type=%d][cloned=%d]\n",
+ (int)skb->len, pfr->slots_info->tot_pkts,
+ pfr->slots_info->insert_idx,
+ skb->pkt_type, skb->cloned);
+#endif
-+
-+ return;
++ atomic_set(&pfr->num_ring_users, 0);
++ return(-1);
+ }
+ }
+
-+ /* ************************** */
++#if defined(RING_DEBUG)
++ printk("[PF_RING] add_skb_to_ring: [displ=%d][len=%d][caplen=%d]"
++ "[is_ip_pkt=%d][%d -> %d]\n",
++ displ, hdr->len, hdr->caplen,
++ is_ip_pkt, hdr->parsed_pkt.l4_src_port,
++ hdr->parsed_pkt.l4_dst_port);
++#endif
+
-+ if(pfr->sample_rate > 1) {
-+ if(pfr->pktToSample == 0) {
-+ write_lock(&pfr->ring_index_lock);
-+ pfr->pktToSample = pfr->sample_rate;
-+ write_unlock(&pfr->ring_index_lock);
-+ } else {
-+ write_lock(&pfr->ring_index_lock);
-+ pfr->pktToSample--;
-+ write_unlock(&pfr->ring_index_lock);
++ /* ************************************* */
+
+#if defined(RING_DEBUG)
-+ printk("add_skb_to_ring(skb): sampled packet [len=%d]"
-+ "[tot=%llu][insertIdx=%d][pkt_type=%d][cloned=%d]\n",
-+ (int)skb->len, pfr->slots_info->tot_pkts,
-+ pfr->slots_info->insert_idx,
-+ skb->pkt_type, skb->cloned);
++ printk("[PF_RING] add_skb_to_ring(skb) [len=%d][tot=%llu][insertIdx=%d]"
++ "[pkt_type=%d][cloned=%d]\n",
++ (int)skb->len, pfr->slots_info->tot_pkts,
++ pfr->slots_info->insert_idx,
++ skb->pkt_type, skb->cloned);
+#endif
-+ return;
-+ }
-+ }
+
-+ /* ************************************* */
++ /* Extensions */
++ fwd_pkt = pfr->rules_default_accept_policy;
++ /* printk("[PF_RING] rules_default_accept_policy: [fwd_pkt=%d]\n", fwd_pkt); */
++
++ /* ************************** */
+
-+ if((pfr->reflector_dev != NULL)
-+ && (!netif_queue_stopped(pfr->reflector_dev))) {
-+ int cpu = smp_processor_id();
++ /* [2] Filter packet according to rules */
+
-+ /* increase reference counter so that this skb is not freed */
-+ atomic_inc(&skb->users);
++ if(0)
++ printk("[PF_RING] About to evaluate packet [len=%d][tot=%llu][insertIdx=%d]"
++ "[pkt_type=%d][cloned=%d]\n",
++ (int)skb->len, pfr->slots_info->tot_pkts,
++ pfr->slots_info->insert_idx,
++ skb->pkt_type, skb->cloned);
+
-+ skb->data -= displ;
++ /* [2.1] Search the hash */
++ if(pfr->filtering_hash != NULL) {
++ u_int hash_idx;
++ filtering_hash_bucket *hash_bucket;
+
-+ /* send it */
-+ if (netdev_get_tx_queue(pfr->reflector_dev, 0)->xmit_lock_owner != cpu) {
-+ /* Patch below courtesy of Matthew J. Roth <mroth@imminc.com> */
-+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
-+ spin_lock_bh(&pfr->reflector_dev->xmit_lock);
-+ pfr->reflector_dev->xmit_lock_owner = cpu;
-+ spin_unlock_bh(&pfr->reflector_dev->xmit_lock);
-+#else
-+ netif_tx_lock_bh(pfr->reflector_dev);
-+#endif
-+ if (pfr->reflector_dev->hard_start_xmit(skb, pfr->reflector_dev) == 0) {
-+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
-+ spin_lock_bh(&pfr->reflector_dev->xmit_lock);
-+ pfr->reflector_dev->xmit_lock_owner = -1;
-+ spin_unlock_bh(&pfr->reflector_dev->xmit_lock);
-+#else
-+ netif_tx_unlock_bh(pfr->reflector_dev);
-+#endif
-+ skb->data += displ;
-+#if defined(RING_DEBUG)
-+ printk("++ hard_start_xmit succeeded\n");
-+#endif
-+ return; /* OK */
++ hash_idx = hash_pkt_header(hdr, 0, 0) % DEFAULT_RING_HASH_SIZE;
++ hash_bucket = pfr->filtering_hash[hash_idx];
++
++ while(hash_bucket != NULL) {
++ if(hash_bucket_match(hash_bucket, hdr, 0, 0)) {
++ hash_found = 1;
++ break;
++ } else
++ hash_bucket = hash_bucket->next;
++ } /* while */
++
++ if(hash_found) {
++ packet_action_behaviour behaviour = forward_packet_and_stop_rule_evaluation;
++
++ if((hash_bucket->rule.plugin_action.plugin_id != 0)
++ && (hash_bucket->rule.plugin_action.plugin_id < MAX_PLUGIN_ID)
++ && (plugin_registration[hash_bucket->rule.plugin_action.plugin_id] != NULL)
++ && (plugin_registration[hash_bucket->rule.plugin_action.plugin_id]->pfring_plugin_handle_skb != NULL)
++ ) {
++ plugin_registration[hash_bucket->rule.plugin_action.plugin_id]
++ ->pfring_plugin_handle_skb(pfr, NULL, hash_bucket, hdr, skb,
++ 0 /* no plugin */,
++ &parse_memory_buffer[hash_bucket->rule.plugin_action.plugin_id],
++ &behaviour);
++
++ if(parse_memory_buffer[hash_bucket->rule.plugin_action.plugin_id]) free_parse_mem = 1;
++ last_matched_plugin = hash_bucket->rule.plugin_action.plugin_id;
++ hdr->parsed_pkt.last_matched_plugin_id = hash_bucket->rule.plugin_action.plugin_id;
+ }
+
-+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
-+ spin_lock_bh(&pfr->reflector_dev->xmit_lock);
-+ pfr->reflector_dev->xmit_lock_owner = -1;
-+ spin_unlock_bh(&pfr->reflector_dev->xmit_lock);
-+#else
-+ netif_tx_unlock_bh(pfr->reflector_dev);
-+#endif
++ if((behaviour == forward_packet_and_stop_rule_evaluation)
++ || (behaviour == forward_packet_add_rule_and_stop_rule_evaluation)
++ )
++ fwd_pkt = 1;
++ else if(behaviour == dont_forward_packet_and_stop_rule_evaluation)
++ fwd_pkt = 0;
++ else {
++ if(hash_bucket->rule.rule_action == forward_packet_and_stop_rule_evaluation) {
++ fwd_pkt = 1;
++ } else if(hash_bucket->rule.rule_action == dont_forward_packet_and_stop_rule_evaluation) {
++ fwd_pkt = 0;
++ } else if(hash_bucket->rule.rule_action == execute_action_and_continue_rule_evaluation) {
++ hash_found = 0; /* This way we also evaluate the list of rules */
++ }
++ }
++ } else {
++ /* printk("[PF_RING] Packet not found\n"); */
+ }
-+
-+#if defined(RING_DEBUG)
-+ printk("++ hard_start_xmit failed\n");
-+#endif
-+ skb->data += displ;
-+ return; /* -ENETDOWN */
+ }
+
-+ /* ************************************* */
-+
-+#if defined(RING_DEBUG)
-+ printk("add_skb_to_ring(skb) [len=%d][tot=%llu][insertIdx=%d]"
-+ "[pkt_type=%d][cloned=%d]\n",
-+ (int)skb->len, pfr->slots_info->tot_pkts,
-+ pfr->slots_info->insert_idx,
-+ skb->pkt_type, skb->cloned);
-+#endif
++ /* [2.2] Search rules list */
++ if((!hash_found) && (pfr->num_filtering_rules > 0)) {
++ list_for_each_safe(ptr, tmp_ptr, &pfr->rules)
++ {
++ filtering_rule_element *entry;
++ packet_action_behaviour behaviour = forward_packet_and_stop_rule_evaluation;
+
-+ idx = pfr->slots_info->insert_idx;
-+ theSlot = get_insert_slot(pfr);
++ entry = list_entry(ptr, filtering_rule_element, list);
+
-+ if((theSlot != NULL) && (theSlot->slot_state == 0)) {
-+ struct pcap_pkthdr *hdr;
-+ char *bucket;
-+ int is_ip_pkt, debug = 0;
++ if(match_filtering_rule(pfr, entry, hdr, skb, displ,
++ parse_memory_buffer, &free_parse_mem,
++ &last_matched_plugin, &behaviour))
++ {
++
++ if(behaviour == use_rule_forward_policy)
++ behaviour = entry->rule.rule_action;
++
++ if(debug) printk("[PF_RING] behaviour=%d\n", behaviour);
++
++ if(behaviour == forward_packet_and_stop_rule_evaluation) {
++ fwd_pkt = 1;
++ break;
++ } else if(behaviour == forward_packet_add_rule_and_stop_rule_evaluation) {
++ filtering_hash_bucket *hash_bucket;
++
++ fwd_pkt = 1;
++
++ hash_bucket = (filtering_hash_bucket*)kcalloc(1, sizeof(filtering_hash_bucket), GFP_KERNEL);
++
++ if(hash_bucket) {
++ int rc;
++
++ hash_bucket->rule.vlan_id = hdr->parsed_pkt.vlan_id;
++ hash_bucket->rule.proto = hdr->parsed_pkt.l3_proto;
++ hash_bucket->rule.host_peer_a = hdr->parsed_pkt.ipv4_src;
++ hash_bucket->rule.host_peer_b = hdr->parsed_pkt.ipv4_dst;
++ hash_bucket->rule.port_peer_a = hdr->parsed_pkt.l4_src_port;
++ hash_bucket->rule.port_peer_b = hdr->parsed_pkt.l4_dst_port;
++ hash_bucket->rule.rule_action = forward_packet_and_stop_rule_evaluation;
++ hash_bucket->rule.jiffies_last_match = jiffies; /* Avoid immediate rule purging */
++
++ //write_lock_bh(&pfr->ring_rules_lock);
++ rc = pfr->handle_hash_rule(pfr, hash_bucket, 1 /* add_rule_from_plugin */);
++ pfr->num_filtering_rules++;
++ // write_unlock_bh(&pfr->ring_rules_lock);
++
++ if(rc != 0) {
++ kfree(hash_bucket);
++ return(-1);
++ } else {
++ if(debug) printk("[PF_RING] Added rule: [%d.%d.%d.%d:%d <-> %d.%d.%d.%d:%d][tot_rules=%d]\n",
++ ((hash_bucket->rule.host_peer_a >> 24) & 0xff),
++ ((hash_bucket->rule.host_peer_a >> 16) & 0xff),
++ ((hash_bucket->rule.host_peer_a >> 8) & 0xff),
++ ((hash_bucket->rule.host_peer_a >> 0) & 0xff),
++ hash_bucket->rule.port_peer_a,
++ ((hash_bucket->rule.host_peer_b >> 24) & 0xff),
++ ((hash_bucket->rule.host_peer_b >> 16) & 0xff),
++ ((hash_bucket->rule.host_peer_b >> 8) & 0xff),
++ ((hash_bucket->rule.host_peer_b >> 0) & 0xff),
++ hash_bucket->rule.port_peer_b,
++ pfr->num_filtering_rules);
++ }
++ }
+
-+ /* Update Index */
-+ idx++;
++ break;
++ } else if(behaviour == dont_forward_packet_and_stop_rule_evaluation) {
++ fwd_pkt = 0;
++ break;
++ } else {
++ if(entry->rule.rule_action == forward_packet_and_stop_rule_evaluation) {
++ fwd_pkt = 1;
++ break;
++ } else if(entry->rule.rule_action == dont_forward_packet_and_stop_rule_evaluation) {
++ fwd_pkt = 0;
++ break;
++ } else if(entry->rule.rule_action == execute_action_and_continue_rule_evaluation) {
++ /* The action has already been performed inside match_filtering_rule()
++ hence instead of stopping rule evaluation, the next rule
++ will be evaluated */
++ }
++ }
++ }
++ } /* for */
++ }
+
-+ bucket = &theSlot->bucket;
-+ hdr = (struct pcap_pkthdr*)bucket;
++ if(fwd_pkt) {
++ /* We accept the packet: it needs to be queued */
+
-+ /* BD - API changed for time keeping */
-+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14))
-+ if(skb->stamp.tv_sec == 0) do_gettimeofday(&skb->stamp);
++ /* [3] Packet sampling */
++ if(pfr->sample_rate > 1) {
++ write_lock_bh(&pfr->ring_index_lock);
++ pfr->slots_info->tot_pkts++;
+
-+ hdr->ts.tv_sec = skb->stamp.tv_sec, hdr->ts.tv_usec = skb->stamp.tv_usec;
-+#else
-+ if(skb->tstamp.tv64 == 0) __net_timestamp(skb);
++ if(pfr->pktToSample == 0) {
++ pfr->pktToSample = pfr->sample_rate;
++ } else {
++ pfr->pktToSample--;
+
-+ struct timeval tv = ktime_to_timeval(skb->tstamp);
-+ hdr->ts.tv_sec = tv.tv_sec, hdr->ts.tv_usec = tv.tv_usec;
++#if defined(RING_DEBUG)
++ printk("[PF_RING] add_skb_to_ring(skb): sampled packet [len=%d]"
++ "[tot=%llu][insertIdx=%d][pkt_type=%d][cloned=%d]\n",
++ (int)skb->len, pfr->slots_info->tot_pkts,
++ pfr->slots_info->insert_idx,
++ skb->pkt_type, skb->cloned);
+#endif
-+ hdr->caplen = skb->len+displ;
+
-+ if(hdr->caplen > pfr->slots_info->data_len)
-+ hdr->caplen = pfr->slots_info->data_len;
++ write_unlock_bh(&pfr->ring_index_lock);
++ if(free_parse_mem) free_parse_memory(parse_memory_buffer);
++ atomic_set(&pfr->num_ring_users, 0);
++ return(-1);
++ }
+
-+ hdr->len = skb->len+displ;
++ write_unlock_bh(&pfr->ring_index_lock);
++ }
+
-+ /* Extensions */
-+ is_ip_pkt = parse_pkt(skb, displ,
-+ &hdr->l3_proto,
-+ &hdr->eth_type,
-+ &hdr->l3_offset,
-+ &hdr->l4_offset,
-+ &hdr->vlan_id,
-+ &hdr->ipv4_src,
-+ &hdr->ipv4_dst,
-+ &hdr->l4_src_port,
-+ &hdr->l4_dst_port,
-+ &hdr->payload_offset);
++ /* [4] Check if there is a reflector device defined */
++ if((pfr->reflector_dev != NULL)
++ && (!netif_queue_stopped(pfr->reflector_dev) /* TX is in good shape */)
++ )
++ {
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,30))
++ struct netdev_queue *txq = netdev_get_tx_queue(pfr->reflector_dev, 0 /* TX queue 0 */);
++#endif
++ int ret;
+
-+ if(is_ip_pkt && pfr->bitmask_enabled) {
-+ int vlan_match = 0;
++ atomic_inc(&skb->users); /* Avoid others to free the skb and crash */
+
-+ fwd_pkt = 0;
++ HARD_TX_LOCK(pfr->reflector_dev,
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,30))
++ txq,
++#endif
++ smp_processor_id());
++ skb->data -= displ, skb->len += displ;
++ ret = pfr->reflector_dev->hard_start_xmit(skb, pfr->reflector_dev);
++ skb->data += displ, skb->len -= displ;
++ HARD_TX_UNLOCK(pfr->reflector_dev
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,30))
++ , txq
++#endif
++ );
++
++#if defined(RING_DEBUG)
++ printk("[PF_RING] reflect(len=%d, displ=%d): %d\n", skb->len, displ, ret);
++#endif
+
-+ if(debug) {
-+ if(is_ip_pkt)
-+ printk(KERN_INFO "PF_RING: [proto=%d][vlan=%d][sport=%d][dport=%d][src=%u][dst=%u]\n",
-+ hdr->l3_proto, hdr->vlan_id, hdr->l4_src_port, hdr->l4_dst_port, hdr->ipv4_src, hdr->ipv4_dst);
-+ else
-+ printk(KERN_INFO "PF_RING: [proto=%d][vlan=%d]\n", hdr->l3_proto, hdr->vlan_id);
++ atomic_set(&pfr->num_ring_users, 0); /* Done */
++ if(free_parse_mem) free_parse_memory(parse_memory_buffer);
++ return(ret == NETDEV_TX_OK ? 0 : -ENETDOWN); /* -ENETDOWN */
+ }
+
-+ if(hdr->vlan_id != (u_int16_t)-1) {
-+ vlan_match = is_set_bit_bitmask(&pfr->vlan_bitmask, hdr->vlan_id);
-+ } else
-+ vlan_match = 1;
-+
-+ if(vlan_match) {
-+ struct ethhdr *eh = (struct ethhdr*)(skb->data);
-+ u_int32_t src_mac = (eh->h_source[0] & 0xff) + (eh->h_source[1] & 0xff) + ((eh->h_source[2] & 0xff) << 24)
-+ + ((eh->h_source[3] & 0xff) << 16) + ((eh->h_source[4] & 0xff) << 8) + (eh->h_source[5] & 0xff);
-+
-+ if(debug) printk(KERN_INFO "PF_RING: [src_mac=%u]\n", src_mac);
++ /* No reflector device: the packet needs to be queued */
++ if(hdr->caplen > 0) {
++ /* Copy the packet into the bucket */
++ int offset;
++ void *mem;
+
-+ fwd_pkt |= is_set_bit_bitmask(&pfr->mac_bitmask, src_mac);
++ if((last_matched_plugin > 0)
++ && (parse_memory_buffer[last_matched_plugin] != NULL)) {
++ offset = hdr->parsed_header_len = parse_memory_buffer[last_matched_plugin]->mem_len;
+
-+ if(!fwd_pkt) {
-+ u_int32_t dst_mac = (eh->h_dest[0] & 0xff) + (eh->h_dest[1] & 0xff) + ((eh->h_dest[2] & 0xff) << 24)
-+ + ((eh->h_dest[3] & 0xff) << 16) + ((eh->h_dest[4] & 0xff) << 8) + (eh->h_dest[5] & 0xff);
++ hdr->parsed_pkt.last_matched_plugin_id = last_matched_plugin;
+
-+ if(debug) printk(KERN_INFO "PF_RING: [dst_mac=%u]\n", dst_mac);
++#if defined(RING_DEBUG)
++ printk("[PF_RING] --> [last_matched_plugin = %d][parsed_header_len=%d]\n",
++ last_matched_plugin, hdr->parsed_header_len);
++#endif
+
-+ fwd_pkt |= is_set_bit_bitmask(&pfr->mac_bitmask, dst_mac);
++ if(offset > pfr->bucket_len) offset = hdr->parsed_header_len = pfr->bucket_len;
+
-+ if(is_ip_pkt && (!fwd_pkt)) {
-+ fwd_pkt |= is_set_bit_bitmask(&pfr->ip_bitmask, hdr->ipv4_src);
++ mem = parse_memory_buffer[last_matched_plugin]->mem;
++ } else
++ offset = 0, hdr->parsed_header_len = 0, mem = NULL;
+
-+ if(!fwd_pkt) {
-+ fwd_pkt |= is_set_bit_bitmask(&pfr->ip_bitmask, hdr->ipv4_dst);
++ add_pkt_to_ring(skb, pfr, hdr, displ, channel_id, offset, mem);
++ }
++ }
+
-+ if((!fwd_pkt) && ((hdr->l3_proto == IPPROTO_TCP)
-+ || (hdr->l3_proto == IPPROTO_UDP))) {
-+ fwd_pkt |= is_set_bit_bitmask(&pfr->port_bitmask, hdr->l4_src_port);
-+ if(!fwd_pkt) fwd_pkt |= is_set_bit_bitmask(&pfr->port_bitmask, hdr->l4_dst_port);
-+ }
++#if defined(RING_DEBUG)
++ printk("[PF_RING] [pfr->slots_info->insert_idx=%d]\n", pfr->slots_info->insert_idx);
++#endif
+
-+ if(!fwd_pkt) fwd_pkt |= is_set_bit_bitmask(&pfr->proto_bitmask, hdr->l3_proto);
-+ }
-+ }
-+ }
-+ }
-+ } else
-+ fwd_pkt = 1;
++ if(free_parse_mem) free_parse_memory(parse_memory_buffer);
++ atomic_set(&pfr->num_ring_users, 0);
+
-+ if(fwd_pkt && (pfr->acsm != NULL)) {
-+ if((hdr->payload_offset > 0) && ((skb->len+skb->mac_len) > hdr->payload_offset)) {
-+ char *payload = (skb->data-displ+hdr->payload_offset);
-+ int payload_len = skb->len /* + skb->mac_len */ - hdr->payload_offset;
++ return(0);
++}
+
-+ if((payload_len > 0)
-+ && ((hdr->l4_src_port == 80) || (hdr->l4_dst_port == 80))) {
-+ int rc;
-+
-+ if(0) {
-+ char buf[1500];
-+
-+ memcpy(buf, payload, payload_len);
-+ buf[payload_len] = '\0';
-+ printk("[%s]\n", payload);
-+ }
++/* ********************************** */
+
-+ /* printk("Tring to match pattern [len=%d][%s]\n", payload_len, payload); */
-+ rc = acsmSearch2(pfr->acsm, payload, payload_len, MatchFound, (void *)0) ? 1 : 0;
++static u_int hash_skb(ring_cluster_element *cluster_ptr,
++ struct sk_buff *skb,
++ int displ)
++{
++ u_int idx;
++ struct iphdr *ip;
+
-+ // printk("Match result: %d\n", fwd_pkt);
-+ if(rc) {
-+ printk("Pattern matched!\n");
-+ } else {
-+ fwd_pkt = 0;
-+ }
-+ } else
-+ fwd_pkt = 0;
-+ } else
-+ fwd_pkt = 0;
++ if(cluster_ptr->cluster.hashing_mode == cluster_round_robin)
++ {
++ idx = cluster_ptr->cluster.hashing_id++;
+ }
++ else
++ {
++ /* Per-flow clustering */
++ if(skb->len > sizeof(struct iphdr)+sizeof(struct tcphdr))
++ {
++ /*
++ skb->data+displ
+
-+ if(fwd_pkt) {
-+ memcpy(&bucket[sizeof(struct pcap_pkthdr)], skb->data-displ, hdr->caplen);
++ Always points to to the IP part of the packet
++ */
++ ip = (struct iphdr*)(skb->data+displ);
++ idx = ip->saddr+ip->daddr+ip->protocol;
+
-+#if defined(RING_DEBUG)
-+ {
-+ static unsigned int lastLoss = 0;
-+
-+ if(pfr->slots_info->tot_lost
-+ && (lastLoss != pfr->slots_info->tot_lost)) {
-+ printk("add_skb_to_ring(%d): [data_len=%d]"
-+ "[hdr.caplen=%d][skb->len=%d]"
-+ "[pcap_pkthdr=%d][removeIdx=%d]"
-+ "[loss=%lu][page=%u][slot=%u]\n",
-+ idx-1, pfr->slots_info->data_len, hdr->caplen, skb->len,
-+ sizeof(struct pcap_pkthdr),
-+ pfr->slots_info->remove_idx,
-+ (long unsigned int)pfr->slots_info->tot_lost,
-+ pfr->insert_page_id, pfr->insert_slot_id);
-+
-+ lastLoss = pfr->slots_info->tot_lost;
++ if(ip->protocol == IPPROTO_TCP)
++ {
++ struct tcphdr *tcp = (struct tcphdr*)(skb->data+displ
++ +sizeof(struct iphdr));
++ idx += tcp->source+tcp->dest;
++ }
++ else if(ip->protocol == IPPROTO_UDP)
++ {
++ struct udphdr *udp = (struct udphdr*)(skb->data+displ
++ +sizeof(struct iphdr));
++ idx += udp->source+udp->dest;
++ }
+ }
-+ }
-+#endif
-+
-+ write_lock(&pfr->ring_index_lock);
-+ if(idx == pfr->slots_info->tot_slots)
-+ pfr->slots_info->insert_idx = 0;
+ else
-+ pfr->slots_info->insert_idx = idx;
-+
-+ pfr->slots_info->tot_insert++;
-+ theSlot->slot_state = 1;
-+ write_unlock(&pfr->ring_index_lock);
++ idx = skb->len;
+ }
-+ } else {
-+ write_lock(&pfr->ring_index_lock);
-+ pfr->slots_info->tot_lost++;
-+ write_unlock(&pfr->ring_index_lock);
+
-+#if defined(RING_DEBUG)
-+ printk("add_skb_to_ring(skb): packet lost [loss=%lu]"
-+ "[removeIdx=%u][insertIdx=%u]\n",
-+ (long unsigned int)pfr->slots_info->tot_lost,
-+ pfr->slots_info->remove_idx, pfr->slots_info->insert_idx);
++ return(idx % cluster_ptr->cluster.num_cluster_elements);
++}
++
++/* ********************************** */
++
++static int register_plugin(struct pfring_plugin_registration *reg)
++{
++ if(reg == NULL) return(-1);
++
++#ifdef RING_DEBUG
++ printk("[PF_RING] --> register_plugin(%d)\n", reg->plugin_id);
+#endif
-+ }
+
-+ if(fwd_pkt) {
++ if((reg->plugin_id >= MAX_PLUGIN_ID) || (reg->plugin_id == 0))
++ return(-EINVAL);
+
-+ /* wakeup in case of poll() */
-+ if(waitqueue_active(&pfr->ring_slots_waitqueue))
-+ wake_up_interruptible(&pfr->ring_slots_waitqueue);
++ if(plugin_registration[reg->plugin_id] != NULL)
++ return(-EINVAL); /* plugin already registered */
++ else {
++ plugin_registration[reg->plugin_id] = reg;
++ plugin_registration_size++;
++
++ max_registered_plugin_id = max(max_registered_plugin_id, reg->plugin_id);
++
++ printk("[PF_RING] registered plugin [id=%d][max=%d][%p]\n",
++ reg->plugin_id, max_registered_plugin_id, plugin_registration[reg->plugin_id]);
++ try_module_get(THIS_MODULE); /* Increment usage count */
++ return(0);
+ }
+}
+
+/* ********************************** */
+
-+static u_int hash_skb(struct ring_cluster *cluster_ptr,
-+ struct sk_buff *skb, u_char recv_packet) {
-+ u_int idx;
-+ int displ;
-+ struct iphdr *ip;
++int unregister_plugin(u_int16_t pfring_plugin_id)
++{
++ int i;
+
-+ if(cluster_ptr->hashing_mode == cluster_round_robin) {
-+ idx = cluster_ptr->hashing_id++;
-+ } else {
-+ /* Per-flow clustering */
-+ if(skb->len > sizeof(struct iphdr)+sizeof(struct tcphdr)) {
-+ if(recv_packet)
-+ displ = 0;
-+ else
-+ displ = SKB_DISPLACEMENT;
++ if(pfring_plugin_id >= MAX_PLUGIN_ID)
++ return(-EINVAL);
+
-+ /*
-+ skb->data+displ
++ if(plugin_registration[pfring_plugin_id] == NULL)
++ return(-EINVAL); /* plugin not registered */
++ else {
++ struct list_head *ptr, *tmp_ptr, *ring_ptr, *ring_tmp_ptr;
+
-+ Always points to to the IP part of the packet
-+ */
++ plugin_registration[pfring_plugin_id] = NULL;
++ plugin_registration_size--;
++
++ read_lock_bh(&ring_mgmt_lock);
++ list_for_each_safe(ring_ptr, ring_tmp_ptr, &ring_table) {
++ struct ring_element *entry = list_entry(ring_ptr, struct ring_element, list);
++ struct ring_opt *pfr = ring_sk(entry->sk);
+
-+ ip = (struct iphdr*)(skb->data+displ);
++ list_for_each_safe(ptr, tmp_ptr, &pfr->rules)
++ {
++ filtering_rule_element *rule;
++
++ rule = list_entry(ptr, filtering_rule_element, list);
++
++ if(rule->rule.plugin_action.plugin_id == pfring_plugin_id) {
++ if(plugin_registration[pfring_plugin_id]
++ && plugin_registration[pfring_plugin_id]->pfring_plugin_free_ring_mem) {
++ /* Custom free function */
++ plugin_registration[pfring_plugin_id]->pfring_plugin_free_ring_mem(rule);
++ } else {
++ if(rule->plugin_data_ptr != NULL) {
++ kfree(rule->plugin_data_ptr);
++ rule->plugin_data_ptr = NULL;
++ }
++ }
+
-+ idx = ip->saddr+ip->daddr+ip->protocol;
++ rule->rule.plugin_action.plugin_id = 0;
++ }
++ }
++ }
++ read_unlock_bh(&ring_mgmt_lock);
+
-+ if(ip->protocol == IPPROTO_TCP) {
-+ struct tcphdr *tcp = (struct tcphdr*)(skb->data+displ
-+ +sizeof(struct iphdr));
-+ idx += tcp->source+tcp->dest;
-+ } else if(ip->protocol == IPPROTO_UDP) {
-+ struct udphdr *udp = (struct udphdr*)(skb->data+displ
-+ +sizeof(struct iphdr));
-+ idx += udp->source+udp->dest;
++ for(i=MAX_PLUGIN_ID-1; i>0; i--) {
++ if(plugin_registration[i] != NULL) {
++ max_registered_plugin_id = i;
++ break;
+ }
-+ } else
-+ idx = skb->len;
-+ }
++ }
+
-+ return(idx % cluster_ptr->num_cluster_elements);
++ printk("[PF_RING] unregistered plugin [id=%d][max=%d]\n",
++ pfring_plugin_id, max_registered_plugin_id);
++ module_put(THIS_MODULE); /* Decrement usage count */
++ return(0);
++ }
+}
+
+/* ********************************** */
+
+static int skb_ring_handler(struct sk_buff *skb,
+ u_char recv_packet,
-+ u_char real_skb /* 1=skb 0=faked skb */) {
++ u_char real_skb /* 1=real skb, 0=faked skb */,
++ short channel_id)
++{
+ struct sock *skElement;
-+ int rc = 0;
++ int rc = 0, is_ip_pkt;
+ struct list_head *ptr;
-+ struct ring_cluster *cluster_ptr;
++ struct pfring_pkthdr hdr;
++ int displ;
++ struct sk_buff *skk = NULL;
++ struct sk_buff *orig_skb = skb;
++
++#ifdef PROFILING
++ uint64_t rdt = _rdtsc(), rdt1, rdt2;
++#endif
++
++ if((!skb) /* Invalid skb */
++ || ((!enable_tx_capture) && (!recv_packet)))
++ {
++ /*
++ An outgoing packet is about to be sent out
++ but we decided not to handle transmitted
++ packets.
++ */
++ return(0);
++ }
++
++#if defined(RING_DEBUG)
++ if(1) {
++ struct timeval tv;
++
++ skb_get_timestamp(skb, &tv);
++ printk("[PF_RING] skb_ring_handler() [skb=%p][%u.%u][len=%d][dev=%s][csum=%u]\n",
++ skb, (unsigned int)tv.tv_sec, (unsigned int)tv.tv_usec, skb->len,
++ skb->dev->name == NULL ? "<NULL>" : skb->dev->name, skb->csum);
++ }
++#endif
++
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21))
++ if(channel_id == RING_ANY_CHANNEL /* Unknown channel */)
++ channel_id = skb->iif; /* Might have been set by the driver */
++#endif
++
++#if defined (RING_DEBUG)
++ /* printk("[PF_RING] channel_id=%d\n", channel_id); */
++#endif
++
++#ifdef PROFILING
++ rdt1 = _rdtsc();
++#endif
++
++ if(recv_packet) {
++ /* Hack for identifying a packet received by the e1000 */
++ if(real_skb)
++ displ = SKB_DISPLACEMENT;
++ else
++ displ = 0; /* Received by the e1000 wrapper */
++ } else
++ displ = 0;
++
++ is_ip_pkt = parse_pkt(skb, displ, &hdr);
++
++ /* (de)Fragmentation <fusco@ntop.org> */
++ if (enable_ip_defrag
++ && real_skb
++ && is_ip_pkt
++ && recv_packet
++ && (ring_table_size > 0))
++ {
++ struct sk_buff *cloned = NULL;
++ struct iphdr* iphdr = NULL;
++
++ skb_reset_network_header(skb);
++ skb_reset_transport_header(skb);
++ skb_set_network_header(skb, ETH_HLEN-displ);
++
++ iphdr = ip_hdr(skb);
++
++ if(iphdr) {
++#if defined (RING_DEBUG)
++ printk("[PF_RING] [version=%d] %X -> %X\n", iphdr->version, iphdr->saddr, iphdr->daddr);
++#endif
++ if (iphdr->frag_off & htons(IP_MF | IP_OFFSET))
++ {
++ if((cloned = skb_clone(skb, GFP_ATOMIC)) != NULL)
++ {
++#if defined (RING_DEBUG)
++ int offset = ntohs(iphdr->frag_off);
++ offset &= IP_OFFSET;
++ offset <<= 3;
++
++ printk("[PF_RING] There is a fragment to handle [proto=%d][frag_off=%u]"
++ "[ip_id=%u][network_header=%d][displ=%d]\n",
++ iphdr->protocol, offset, ntohs(iphdr->id),
++ hdr.parsed_pkt.pkt_detail.offset.l3_offset-displ, displ);
++#endif
++ skk = ring_gather_frags(cloned);
++
++ if(skk != NULL)
++ {
++#if defined (RING_DEBUG)
++ printk("[PF_RING] IP reasm on new skb [skb_len=%d][head_len=%d][nr_frags=%d][frag_list=%p]\n",
++ (int)skk->len, skb_headlen(skk),
++ skb_shinfo(skk)->nr_frags, skb_shinfo(skk)->frag_list);
++#endif
++ skb = skk;
++ parse_pkt(skb, displ, &hdr);
++ hdr.len = hdr.caplen = skb->len+displ;
++ } else {
++ //printk("[PF_RING] Fragment queued \n");
++ return(0); /* mask rcvd fragments */
++ }
++ }
++ }
++ else
++ {
++#if defined (RING_DEBUG)
++ printk("[PF_RING] Do not seems to be a fragmented ip_pkt[iphdr=%p]\n", iphdr);
++#endif
++ }
++ }
++ }
+
-+#ifdef PROFILING
-+ uint64_t rdt = _rdtsc(), rdt1, rdt2;
++ /* BD - API changed for time keeping */
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14))
++ if(skb->stamp.tv_sec == 0) do_gettimeofday(&skb->stamp);
++ hdr.ts.tv_sec = skb->stamp.tv_sec, hdr.ts.tv_usec = skb->stamp.tv_usec;
++#elif (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22))
++ if(skb->tstamp.off_sec == 0) __net_timestamp(skb);
++ hdr.ts.tv_sec = skb->tstamp.off_sec, hdr.ts.tv_usec = skb->tstamp.off_usec;
++#else /* 2.6.22 and above */
++ if(skb->tstamp.tv64 == 0) __net_timestamp(skb);
++ hdr.ts = ktime_to_timeval(skb->tstamp);
+#endif
+
-+ if((!skb) /* Invalid skb */
-+ || ((!enable_tx_capture) && (!recv_packet))) {
-+ /*
-+ An outgoing packet is about to be sent out
-+ but we decided not to handle transmitted
-+ packets.
-+ */
-+ return(0);
-+ }
++ hdr.len = hdr.caplen = skb->len+displ;
+
-+#if defined(RING_DEBUG)
-+ if(0) {
-+ printk("skb_ring_handler() [len=%d][dev=%s]\n", skb->len,
-+ skb->dev->name == NULL ? "<NULL>" : skb->dev->name);
-+ }
-+#endif
++ /* Avoid the ring to be manipulated while playing with it */
++ read_lock_bh(&ring_mgmt_lock);
+
-+#ifdef PROFILING
-+ rdt1 = _rdtsc();
++#if 0
++ printk("[PF_RING] -----------------------------------\n");
+#endif
+
+ /* [1] Check unclustered sockets */
-+ for (ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) {
++ list_for_each(ptr, &ring_table) {
+ struct ring_opt *pfr;
+ struct ring_element *entry;
+
+ entry = list_entry(ptr, struct ring_element, list);
+
-+ read_lock(&ring_mgmt_lock);
+ skElement = entry->sk;
+ pfr = ring_sk(skElement);
-+ read_unlock(&ring_mgmt_lock);
++
++#if 0
++ if(pfr && (pfr->ring_slots != NULL)) {
++ /* if(pfr->ring_netdev && pfr->ring_netdev->name && strcmp(pfr->ring_netdev->name, "eth0")) */
++ printk("[PF_RING] Received packet [device=%s][socket=%s][%p]\n",
++ skb->dev->name ? skb->dev->name : "<unknown>",
++ pfr->ring_netdev->name ? pfr->ring_netdev->name : "<unknown>", pfr);
++ }
++#endif
+
+ if((pfr != NULL)
+ && (pfr->cluster_id == 0 /* No cluster */)
+ && (pfr->ring_slots != NULL)
-+ && ((pfr->ring_netdev == skb->dev) || ((skb->dev->flags & IFF_SLAVE) && pfr->ring_netdev == skb->dev->master))) {
++ && ((pfr->ring_netdev == skb->dev)
++ || ((skb->dev->flags & IFF_SLAVE)
++ && (pfr->ring_netdev == skb->dev->master)))) {
+ /* We've found the ring where the packet can be stored */
-+ read_lock(&ring_mgmt_lock);
-+ add_skb_to_ring(skb, pfr, recv_packet, real_skb);
-+ read_unlock(&ring_mgmt_lock);
++ int old_caplen = hdr.caplen; /* Keep old lenght */
++ hdr.caplen = min(hdr.caplen, pfr->bucket_len);
++#if 0
++ printk("[PF_RING] MATCH received packet [device=%s][socket=%s][%p]\n",
++ skb->dev->name ? skb->dev->name : "<unknown>",
++ pfr->ring_netdev->name ? pfr->ring_netdev->name : "<unknown>", pfr);
++#endif
+
++ add_skb_to_ring(skb, pfr, &hdr, is_ip_pkt, displ, channel_id);
++ hdr.caplen = old_caplen;
+ rc = 1; /* Ring found: we've done our job */
+ }
+ }
+
+ /* [2] Check socket clusters */
-+ cluster_ptr = ring_cluster_list;
-+
-+ while(cluster_ptr != NULL) {
++ list_for_each(ptr, &ring_cluster_list) {
++ ring_cluster_element *cluster_ptr;
+ struct ring_opt *pfr;
+
-+ if(cluster_ptr->num_cluster_elements > 0) {
-+ u_int skb_hash = hash_skb(cluster_ptr, skb, recv_packet);
++ cluster_ptr = list_entry(ptr, ring_cluster_element, list);
++
++ if(cluster_ptr->cluster.num_cluster_elements > 0) {
++ u_int skb_hash = hash_skb(cluster_ptr, skb, displ);
+
-+ read_lock(&ring_mgmt_lock);
-+ skElement = cluster_ptr->sk[skb_hash];
-+ read_unlock(&ring_mgmt_lock);
++ skElement = cluster_ptr->cluster.sk[skb_hash];
+
+ if(skElement != NULL) {
+ pfr = ring_sk(skElement);
+
+ if((pfr != NULL)
+ && (pfr->ring_slots != NULL)
-+ && ((pfr->ring_netdev == skb->dev) || ((skb->dev->flags & IFF_SLAVE) && pfr->ring_netdev == skb->dev->master))) {
++ && ((pfr->ring_netdev == skb->dev)
++ || ((skb->dev->flags & IFF_SLAVE)
++ && (pfr->ring_netdev == skb->dev->master)))) {
+ /* We've found the ring where the packet can be stored */
-+ read_lock(&ring_mgmt_lock);
-+ add_skb_to_ring(skb, pfr, recv_packet, real_skb);
-+ read_unlock(&ring_mgmt_lock);
-+
++ add_skb_to_ring(skb, pfr, &hdr, is_ip_pkt, displ, channel_id);
+ rc = 1; /* Ring found: we've done our job */
+ }
+ }
+ }
-+
-+ cluster_ptr = cluster_ptr->next;
+ }
+
++ read_unlock_bh(&ring_mgmt_lock);
++
+#ifdef PROFILING
+ rdt1 = _rdtsc()-rdt1;
+#endif
+ rdt2 = _rdtsc();
+#endif
+
-+ if(transparent_mode) rc = 0;
++ /* Fragment handling */
++ if(skk != NULL)
++ kfree_skb(skk);
++
++ if(rc == 1) {
++ if(transparent_mode) {
++ rc = 0;
++ } else {
++ if(recv_packet && real_skb) {
++#if defined(RING_DEBUG)
++ printk("[PF_RING] kfree_skb()\n");
++#endif
+
-+ if((rc != 0) && real_skb)
-+ dev_kfree_skb(skb); /* Free the skb */
++ kfree_skb(orig_skb);
++ }
++ }
++ }
+
+#ifdef PROFILING
+ rdt2 = _rdtsc()-rdt2;
+ rdt = _rdtsc()-rdt;
+
+#if defined(RING_DEBUG)
-+ printk("# cycles: %d [lock costed %d %d%%][free costed %d %d%%]\n",
++ printk("[PF_RING] # cycles: %d [lock costed %d %d%%][free costed %d %d%%]\n",
+ (int)rdt, rdt-rdt1,
+ (int)((float)((rdt-rdt1)*100)/(float)rdt),
+ rdt2,
+#endif
+#endif
+
++ //printk("[PF_RING] Returned %d\n", rc);
+ return(rc); /* 0 = packet not handled */
+}
+
+struct sk_buff skb;
+
+static int buffer_ring_handler(struct net_device *dev,
-+ char *data, int len) {
-+
++ char *data, int len)
++{
+#if defined(RING_DEBUG)
-+ printk("buffer_ring_handler: [dev=%s][len=%d]\n",
++ printk("[PF_RING] buffer_ring_handler: [dev=%s][len=%d]\n",
+ dev->name == NULL ? "<NULL>" : dev->name, len);
+#endif
+
++ skb.dev = dev, skb.len = len, skb.data = data, skb.data_len = len;
++
+ /* BD - API changed for time keeping */
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14))
-+ skb.dev = dev, skb.len = len, skb.data = data,
-+ skb.data_len = len, skb.stamp.tv_sec = 0; /* Calculate the time */
++ skb.stamp.tv_sec = 0;
++#elif (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22))
++ skb.tstamp.off_sec = 0;
+#else
-+ skb.dev = dev, skb.len = len, skb.data = data,
-+ skb.data_len = len, skb.tstamp.tv64 = 0; /* Calculate the time */
++ skb.tstamp.tv64 = 0;
+#endif
+
-+ skb_ring_handler(&skb, 1, 0 /* fake skb */);
++ return(skb_ring_handler(&skb, 1, 0 /* fake skb */, -1 /* Unknown channel */));
++}
++
++/* ************************************* */
+
-+ return(0);
++static int handle_filtering_hash_bucket(struct ring_opt *pfr,
++ filtering_hash_bucket* rule,
++ u_char add_rule)
++{
++ u_int32_t hash_value = hash_pkt(rule->rule.vlan_id, rule->rule.proto,
++ rule->rule.host_peer_a, rule->rule.host_peer_b,
++ rule->rule.port_peer_a, rule->rule.port_peer_b) % DEFAULT_RING_HASH_SIZE;
++ int rc = -1, debug = 0;
++
++ if(debug) printk("[PF_RING] handle_filtering_hash_bucket(vlan=%u, proto=%u, "
++ "sip=%d.%d.%d.%d, sport=%u, dip=%d.%d.%d.%d, dport=%u, "
++ "hash_value=%u, add_rule=%d) called\n",
++ rule->rule.vlan_id, rule->rule.proto,
++ ((rule->rule.host_peer_a >> 24) & 0xff),
++ ((rule->rule.host_peer_a >> 16) & 0xff),
++ ((rule->rule.host_peer_a >> 8) & 0xff),
++ ((rule->rule.host_peer_a >> 0) & 0xff),
++ rule->rule.port_peer_a,
++ ((rule->rule.host_peer_b >> 24) & 0xff),
++ ((rule->rule.host_peer_b >> 16) & 0xff),
++ ((rule->rule.host_peer_b >> 8) & 0xff),
++ ((rule->rule.host_peer_b >> 0) & 0xff),
++ rule->rule.port_peer_b,
++ hash_value, add_rule);
++
++ if(add_rule) {
++ if(pfr->filtering_hash == NULL)
++ pfr->filtering_hash = (filtering_hash_bucket**)kcalloc(DEFAULT_RING_HASH_SIZE,
++ sizeof(filtering_hash_bucket*),
++ GFP_ATOMIC);
++ if(pfr->filtering_hash == NULL) {
++ /* kfree(rule); */
++ if(debug) printk("[PF_RING] handle_filtering_hash_bucket() returned %d [0]\n", -EFAULT);
++ return(-EFAULT);
++ }
++ }
++
++ if(debug) printk("[PF_RING] handle_filtering_hash_bucket() allocated memory\n");
++
++ if(pfr->filtering_hash == NULL) {
++ /* We're trying to delete a hash rule from an empty hash */
++ return(-EFAULT);
++ }
++
++ if(pfr->filtering_hash[hash_value] == NULL) {
++ if(add_rule)
++ pfr->filtering_hash[hash_value] = rule, rule->next = NULL, rc = 0;
++ else {
++ if(debug) printk("[PF_RING] handle_filtering_hash_bucket() returned %d [1]\n", -1);
++ return(-1); /* Unable to find the specified rule */
++ }
++ } else {
++ filtering_hash_bucket *prev = NULL, *bucket = pfr->filtering_hash[hash_value];
++
++ while(bucket != NULL) {
++ if(hash_filtering_rule_match(&bucket->rule, &rule->rule)) {
++ if(add_rule) {
++ if(debug) printk("[PF_RING] Duplicate found while adding rule: discarded\n");
++ /* kfree(rule); */
++ return(-EFAULT);
++ } else {
++ /* We've found the bucket to delete */
++
++ if(debug) printk("[PF_RING] handle_filtering_hash_bucket() found a bucket to delete: removing it\n");
++ if(prev == NULL)
++ pfr->filtering_hash[hash_value] = bucket->next;
++ else
++ prev->next = bucket->next;
++
++ /* Free the bucket */
++ if(bucket->plugin_data_ptr) kfree(bucket->plugin_data_ptr);
++ kfree(bucket);
++ if(debug) printk("[PF_RING] handle_filtering_hash_bucket() returned %d [2]\n", 0);
++ return(0);
++ }
++ } else {
++ prev = bucket;
++ bucket = bucket->next;
++ }
++ }
++
++ if(add_rule) {
++ /* If the flow arrived until here, then this rule is unique */
++
++ if(debug) printk("[PF_RING] handle_filtering_hash_bucket() no duplicate rule found: adding the rule\n");
++ rule->next = pfr->filtering_hash[hash_value];
++ pfr->filtering_hash[hash_value] = rule;
++ rc = 0;
++ } else {
++ /* The rule we searched for has not been found */
++ rc = -1;
++ }
++ }
++
++ if(debug) printk("[PF_RING] handle_filtering_hash_bucket() returned %d [3]\n", rc);
++
++ return(rc);
+}
+
+/* ********************************** */
+
-+static int ring_create(struct net *net, struct socket *sock, int protocol) {
++static int ring_create(
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24))
++ struct net *net,
++#endif
++ struct socket *sock, int protocol)
++{
+ struct sock *sk;
+ struct ring_opt *pfr;
+ int err;
+
+#if defined(RING_DEBUG)
-+ printk("RING: ring_create()\n");
++ printk("[PF_RING] ring_create()\n");
+#endif
+
+ /* Are you root, superuser or so ? */
+
+ // BD: -- broke this out to keep it more simple and clear as to what the
+ // options are.
-+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
++ sk = sk_alloc(PF_RING, GFP_KERNEL, 1); /* Kernel 2.4 */
++#else
++ /* 2.6.X */
+#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,11))
+ sk = sk_alloc(PF_RING, GFP_KERNEL, 1, NULL);
+#else
++#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24))
+ // BD: API changed in 2.6.12, ref:
+ // http://svn.clkao.org/svnweb/linux/revision/?rev=28201
-+ sk = sk_alloc(net, PF_RING, GFP_ATOMIC, &ring_proto);
-+#endif
++ sk = sk_alloc(PF_RING, GFP_ATOMIC, &ring_proto, 1);
+#else
-+ /* Kernel 2.4 */
-+ sk = sk_alloc(PF_RING, GFP_KERNEL, 1);
++ sk = sk_alloc(net, PF_INET, GFP_KERNEL, &ring_proto);
++#endif
++#endif
+#endif
+
+ if (sk == NULL)
+ goto out;
+ }
+ memset(pfr, 0, sizeof(*pfr));
++ pfr->ring_active = 0; /* We activate as soon as somebody waits for packets */
++ pfr->channel_id = RING_ANY_CHANNEL;
++ pfr->bucket_len = DEFAULT_BUCKET_LEN;
++ pfr->handle_hash_rule = handle_filtering_hash_bucket;
+ init_waitqueue_head(&pfr->ring_slots_waitqueue);
-+ pfr->ring_index_lock = RW_LOCK_UNLOCKED;
-+ atomic_set(&pfr->num_ring_slots_waiters, 0);
-+ init_blooms(pfr);
-+ pfr->acsm = NULL;
++ rwlock_init(&pfr->ring_index_lock);
++ rwlock_init(&pfr->ring_rules_lock);
++ atomic_set(&pfr->num_ring_users, 0);
++ INIT_LIST_HEAD(&pfr->rules);
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
+ sk->sk_family = PF_RING;
+ ring_insert(sk);
+
+#if defined(RING_DEBUG)
-+ printk("RING: ring_create() - created\n");
++ printk("[PF_RING] ring_create() - created\n");
+#endif
+
+ return(0);
+{
+ struct sock *sk = sock->sk;
+ struct ring_opt *pfr = ring_sk(sk);
++ struct list_head *ptr, *tmp_ptr;
++ void * ring_memory_ptr;
+
-+ if(!sk) return 0;
-+
-+#if defined(RING_DEBUG)
-+ printk("RING: called ring_release\n");
-+#endif
++ if(!sk)
++ return 0;
++ else
++ pfr->ring_active = 0;
+
++ while(atomic_read(&pfr->num_ring_users) > 0) {
++ schedule();
++ }
++
+#if defined(RING_DEBUG)
-+ printk("RING: ring_release entered\n");
++ printk("[PF_RING] called ring_release\n");
+#endif
+
+ /*
+ The calls below must be placed outside the
-+ write_lock_irq...write_unlock_irq block.
++ write_lock_bh...write_unlock_bh block.
+ */
+ sock_orphan(sk);
+ ring_proc_remove(ring_sk(sk));
+
-+ write_lock_irq(&ring_mgmt_lock);
++ if(pfr->ring_netdev && (pfr->ring_netdev->ifindex < MAX_NUM_DEVICES)) {
++ struct list_head *ptr, *tmp_ptr;
++ device_ring_list_element *entry;
++
++ list_for_each_safe(ptr, tmp_ptr, &device_ring_list[pfr->ring_netdev->ifindex]) {
++ entry = list_entry(ptr, device_ring_list_element, list);
++
++ if(entry->the_ring == pfr) {
++ list_del(ptr);
++ kfree(entry);
++ break;
++ }
++ }
++ }
++
++ write_lock_bh(&ring_mgmt_lock);
+ ring_remove(sk);
+ sock->sk = NULL;
+
-+ /* Free the ring buffer */
-+ if(pfr->ring_memory) {
-+ struct page *page, *page_end;
++ /* Free rules */
++ list_for_each_safe(ptr, tmp_ptr, &pfr->rules)
++ {
++ filtering_rule_element *rule;
+
-+ page_end = virt_to_page(pfr->ring_memory + (PAGE_SIZE << pfr->order) - 1);
-+ for(page = virt_to_page(pfr->ring_memory); page <= page_end; page++)
-+ ClearPageReserved(page);
++ rule = list_entry(ptr, filtering_rule_element, list);
+
-+ free_pages(pfr->ring_memory, pfr->order);
-+ }
++ if(plugin_registration[rule->rule.plugin_action.plugin_id]
++ && plugin_registration[rule->rule.plugin_action.plugin_id]->pfring_plugin_free_ring_mem) {
++ /* Custom free function */
++ plugin_registration[rule->rule.plugin_action.plugin_id]->pfring_plugin_free_ring_mem(rule);
++ } else {
++#ifdef DEBUG
++ printk("[PF_RING] --> default_free [rule->rule.plugin_action.plugin_id=%d]\n",
++ rule->rule.plugin_action.plugin_id);
++#endif
++ if(rule->plugin_data_ptr != NULL) {
++ kfree(rule->plugin_data_ptr);
++ rule->plugin_data_ptr = NULL;
++ }
++ }
++
++ if(rule->pattern) kfree(rule->pattern);
++
++ list_del(ptr);
++ kfree(rule);
++ }
+
-+ free_bitmask(&pfr->mac_bitmask);
-+ free_bitmask(&pfr->vlan_bitmask);
-+ free_bitmask(&pfr->ip_bitmask); free_bitmask(&pfr->twin_ip_bitmask);
-+ free_bitmask(&pfr->port_bitmask); free_bitmask(&pfr->twin_port_bitmask);
-+ free_bitmask(&pfr->proto_bitmask);
++ /* Filtering hash rules */
++ if(pfr->filtering_hash) {
++ int i;
+
-+ if(pfr->acsm != NULL) acsmFree2(pfr->acsm);
++ for(i=0; i<DEFAULT_RING_HASH_SIZE; i++) {
++ if(pfr->filtering_hash[i] != NULL) {
++ filtering_hash_bucket *scan = pfr->filtering_hash[i], *next;
+
-+ kfree(pfr);
++ while(scan != NULL) {
++ next = scan->next;
++ if(scan->plugin_data_ptr != NULL) kfree(scan->plugin_data_ptr);
++ kfree(scan);
++ scan = next;
++ }
++ }
++ }
++
++ kfree(pfr->filtering_hash);
++ }
++
++ if(pfr->reflector_dev != NULL)
++ dev_put(pfr->reflector_dev); /* Release device */
++
++ /* Free the ring buffer later, vfree needs interrupts enabled */
++ ring_memory_ptr = pfr->ring_memory;
+ ring_sk(sk) = NULL;
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
+#endif
+
+ sock_put(sk);
-+ write_unlock_irq(&ring_mgmt_lock);
++ write_unlock_bh(&ring_mgmt_lock);
++ if(pfr->appl_name != NULL) kfree(pfr->appl_name);
++
++ if(ring_memory_ptr != NULL) {
++#if defined(RING_DEBUG)
++ printk("[PF_RING] ring_release: rvfree\n");
++#endif
++ rvfree(ring_memory_ptr, pfr->slots_info->tot_mem);
++ }
++
++ kfree(pfr);
++
++#if defined(RING_DEBUG)
++ printk("[PF_RING] ring_release: rvfree done\n");
++#endif
+
+#if defined(RING_DEBUG)
-+ printk("RING: ring_release leaving\n");
++ printk("[PF_RING] ring_release: done\n");
+#endif
+
+ return 0;
+}
+
+/* ********************************** */
++
+/*
+ * We create a ring for this socket and bind it to the specified device
+ */
+ u_int the_slot_len;
+ u_int32_t tot_mem;
+ struct ring_opt *pfr = ring_sk(sk);
-+ struct page *page, *page_end;
++ // struct page *page, *page_end;
+
+ if(!dev) return(-1);
+
+#if defined(RING_DEBUG)
-+ printk("RING: packet_ring_bind(%s) called\n", dev->name);
++ printk("[PF_RING] packet_ring_bind(%s) called\n", dev->name);
+#endif
+
+ /* **********************************************
+#ifdef RING_MAGIC
+ + sizeof(u_char)
+#endif
-+ + sizeof(struct pcap_pkthdr)
-+ + bucket_len /* flowSlot.bucket */;
++ + sizeof(struct pfring_pkthdr)
++ + pfr->bucket_len /* flowSlot.bucket */;
+
+ tot_mem = sizeof(FlowSlotInfo) + num_slots*the_slot_len;
++ if (tot_mem % PAGE_SIZE)
++ tot_mem += PAGE_SIZE - (tot_mem % PAGE_SIZE);
+
-+ /*
-+ Calculate the value of the order parameter used later.
-+ See http://www.linuxjournal.com/article.php?sid=1133
-+ */
-+ for(pfr->order = 0;(PAGE_SIZE << pfr->order) < tot_mem; pfr->order++) ;
-+
-+ /*
-+ We now try to allocate the memory as required. If we fail
-+ we try to allocate a smaller amount or memory (hence a
-+ smaller ring).
-+ */
-+ while((pfr->ring_memory = __get_free_pages(GFP_ATOMIC, pfr->order)) == 0)
-+ if(pfr->order-- == 0)
-+ break;
++ pfr->ring_memory = rvmalloc(tot_mem);
+
-+ if(pfr->order == 0) {
-+ printk("RING: ERROR not enough memory for ring\n");
-+ return(-1);
++ if (pfr->ring_memory != NULL) {
++ printk("[PF_RING] successfully allocated %lu bytes at 0x%08lx\n",
++ (unsigned long) tot_mem, (unsigned long) pfr->ring_memory);
+ } else {
-+ printk("RING: succesfully allocated %lu KB [tot_mem=%d][order=%ld]\n",
-+ PAGE_SIZE >> (10 - pfr->order), tot_mem, pfr->order);
++ printk("[PF_RING] ERROR: not enough memory for ring\n");
++ return(-1);
+ }
+
-+ tot_mem = PAGE_SIZE << pfr->order;
-+ memset((char*)pfr->ring_memory, 0, tot_mem);
-+
-+ /* Now we need to reserve the pages */
-+ page_end = virt_to_page(pfr->ring_memory + (PAGE_SIZE << pfr->order) - 1);
-+ for(page = virt_to_page(pfr->ring_memory); page <= page_end; page++)
-+ SetPageReserved(page);
++ // memset(pfr->ring_memory, 0, tot_mem); // rvmalloc does the memset already
+
+ pfr->slots_info = (FlowSlotInfo*)pfr->ring_memory;
+ pfr->ring_slots = (char*)(pfr->ring_memory+sizeof(FlowSlotInfo));
+
+ pfr->slots_info->version = RING_FLOWSLOT_VERSION;
+ pfr->slots_info->slot_len = the_slot_len;
-+ pfr->slots_info->data_len = bucket_len;
++ pfr->slots_info->data_len = pfr->bucket_len;
+ pfr->slots_info->tot_slots = (tot_mem-sizeof(FlowSlotInfo))/the_slot_len;
+ pfr->slots_info->tot_mem = tot_mem;
-+ pfr->slots_info->sample_rate = sample_rate;
++ pfr->slots_info->sample_rate = 1;
+
-+ printk("RING: allocated %d slots [slot_len=%d][tot_mem=%u]\n",
++ printk("[PF_RING] allocated %d slots [slot_len=%d][tot_mem=%u]\n",
+ pfr->slots_info->tot_slots, pfr->slots_info->slot_len,
+ pfr->slots_info->tot_mem);
+
+ }
+#endif
+
++ pfr->sample_rate = 1; /* No sampling */
+ pfr->insert_page_id = 1, pfr->insert_slot_id = 0;
++ pfr->rules_default_accept_policy = 1, pfr->num_filtering_rules = 0;
++ ring_proc_add(ring_sk(sk), dev);
++
++ if(dev->ifindex < MAX_NUM_DEVICES) {
++ device_ring_list_element *elem;
++
++ /* printk("[PF_RING] Adding ring to device index %d\n", dev->ifindex); */
++
++ elem = kmalloc(sizeof(device_ring_list_element), GFP_ATOMIC);
++ if(elem != NULL) {
++ elem->the_ring = pfr;
++ INIT_LIST_HEAD(&elem->list);
++ list_add(&elem->list, &device_ring_list[dev->ifindex]);
++ /* printk("[PF_RING] Added ring to device index %d\n", dev->ifindex); */
++ }
++ }
+
+ /*
+ IMPORTANT
+ struct net_device *dev = NULL;
+
+#if defined(RING_DEBUG)
-+ printk("RING: ring_bind() called\n");
++ printk("[PF_RING] ring_bind() called\n");
+#endif
+
+ /*
+ * Check legality
+ */
-+ if (addr_len != sizeof(struct sockaddr))
++ if(addr_len != sizeof(struct sockaddr))
++ return -EINVAL;
++ if(sa->sa_family != PF_RING)
+ return -EINVAL;
-+ if (sa->sa_family != PF_RING)
++ if(sa->sa_data == NULL)
+ return -EINVAL;
+
+ /* Safety check: add trailing zero if missing */
+ sa->sa_data[sizeof(sa->sa_data)-1] = '\0';
+
+#if defined(RING_DEBUG)
-+ printk("RING: searching device %s\n", sa->sa_data);
++ printk("[PF_RING] searching device %s\n", sa->sa_data);
+#endif
+
-+ if((dev = __dev_get_by_name(&init_net, sa->sa_data)) == NULL) {
++ if((dev = __dev_get_by_name(
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24))
++ &init_net,
++#endif
++ sa->sa_data)) == NULL) {
+#if defined(RING_DEBUG)
-+ printk("RING: search failed\n");
++ printk("[PF_RING] search failed\n");
+#endif
+ return(-EINVAL);
+ } else
+
+/* ************************************* */
+
-+static int ring_mmap(struct file *file,
-+ struct socket *sock,
-+ struct vm_area_struct *vma)
++/*
++ * rvmalloc / rvfree / kvirt_to_pa copied from usbvideo.c
++ */
++unsigned long kvirt_to_pa(unsigned long adr)
+{
-+ struct sock *sk = sock->sk;
-+ struct ring_opt *pfr = ring_sk(sk);
-+ unsigned long size, start;
-+ u_int pagesToMap;
-+ char *ptr;
++ unsigned long kva, ret;
+
-+#if defined(RING_DEBUG)
-+ printk("RING: ring_mmap() called\n");
++ kva = (unsigned long) page_address(vmalloc_to_page((void *)adr));
++ kva |= adr & (PAGE_SIZE-1); /* restore the offset */
++ ret = __pa(kva);
++ return ret;
++}
++
++/* ************************************* */
++
++static int do_memory_mmap(struct vm_area_struct *vma,
++ unsigned long size, char *ptr,
++ u_int flags, int mode) {
++ unsigned long start;
++ unsigned long page;
++
++ /* we do not want to have this area swapped out, lock it */
++ vma->vm_flags |= flags;
++ start = vma->vm_start;
++
++ while(size > 0)
++ {
++ int rc;
++
++ if(mode == 0) {
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11))
++ page = vmalloc_to_pfn(ptr);
++ rc = remap_pfn_range(vma, start, page, PAGE_SIZE, PAGE_SHARED);
++#else
++ page = vmalloc_to_page(ptr);
++ page = kvirt_to_pa(ptr);
++ rc = remap_page_range(vma, start, page, PAGE_SIZE, PAGE_SHARED);
+#endif
++ } else if(mode == 1) {
++ rc = remap_pfn_range(vma, start,
++ __pa(ptr) >> PAGE_SHIFT,
++ PAGE_SIZE, PAGE_SHARED);
++ } else {
++ rc = remap_pfn_range(vma, start,
++ ((unsigned long)ptr) >> PAGE_SHIFT,
++ PAGE_SIZE, PAGE_SHARED);
++ }
+
-+ if(pfr->ring_memory == 0) {
++ if(rc) {
+#if defined(RING_DEBUG)
-+ printk("RING: ring_mmap() failed: mapping area to an unbound socket\n");
++ printk("[PF_RING] remap_pfn_range() failed\n");
+#endif
-+ return -EINVAL;
-+ }
++ return(-EAGAIN);
++ }
++
++ start += PAGE_SIZE;
++ ptr += PAGE_SIZE;
++ if (size > PAGE_SIZE) {
++ size -= PAGE_SIZE;
++ } else {
++ size = 0;
++ }
++ }
++
++ return(0);
++}
++
++/* ************************************* */
+
-+ size = (unsigned long)(vma->vm_end-vma->vm_start);
++static int ring_mmap(struct file *file,
++ struct socket *sock,
++ struct vm_area_struct *vma)
++{
++ struct sock *sk = sock->sk;
++ struct ring_opt *pfr = ring_sk(sk);
++ int rc;
++ unsigned long size = (unsigned long)(vma->vm_end - vma->vm_start);
+
+ if(size % PAGE_SIZE) {
+#if defined(RING_DEBUG)
-+ printk("RING: ring_mmap() failed: len is not multiple of PAGE_SIZE\n");
++ printk("[PF_RING] ring_mmap() failed: "
++ "len is not multiple of PAGE_SIZE\n");
+#endif
+ return(-EINVAL);
+ }
+
-+ /* if userspace tries to mmap beyond end of our buffer, fail */
-+ if(size > pfr->slots_info->tot_mem) {
+#if defined(RING_DEBUG)
-+ printk("proc_mmap() failed: area too large [%ld > %d]\n", size, pfr->slots_info->tot_mem);
++ printk("[PF_RING] ring_mmap() called, size: %ld bytes\n", size);
+#endif
-+ return(-EINVAL);
-+ }
+
-+ pagesToMap = size/PAGE_SIZE;
++ if((pfr->dna_device == NULL) && (pfr->ring_memory == NULL)) {
++#if defined(RING_DEBUG)
++ printk("[PF_RING] ring_mmap() failed: "
++ "mapping area to an unbound socket\n");
++#endif
++ return -EINVAL;
++ }
+
++ if(pfr->dna_device == NULL) {
++ /* if userspace tries to mmap beyond end of our buffer, fail */
++ if(size > pfr->slots_info->tot_mem) {
+#if defined(RING_DEBUG)
-+ printk("RING: ring_mmap() called. %d pages to map\n", pagesToMap);
++ printk("[PF_RING] ring_mmap() failed: "
++ "area too large [%ld > %d]\n",
++ size, pfr->slots_info->tot_mem);
+#endif
++ return(-EINVAL);
++ }
+
+#if defined(RING_DEBUG)
-+ printk("RING: mmap [slot_len=%d][tot_slots=%d] for ring on device %s\n",
-+ pfr->slots_info->slot_len, pfr->slots_info->tot_slots,
-+ pfr->ring_netdev->name);
++ printk("[PF_RING] mmap [slot_len=%d]"
++ "[tot_slots=%d] for ring on device %s\n",
++ pfr->slots_info->slot_len, pfr->slots_info->tot_slots,
++ pfr->ring_netdev->name);
+#endif
+
-+ /* we do not want to have this area swapped out, lock it */
-+ vma->vm_flags |= VM_LOCKED;
-+ start = vma->vm_start;
++ if((rc = do_memory_mmap(vma, size, pfr->ring_memory, VM_LOCKED, 0)) < 0)
++ return(rc);
++ } else {
++ /* DNA Device */
++ if(pfr->dna_device == NULL) return(-EAGAIN);
++
++ switch(pfr->mmap_count) {
++ case 0:
++ if((rc = do_memory_mmap(vma, size,
++ (void*)pfr->dna_device->packet_memory,
++ VM_LOCKED, 1)) < 0)
++ return(rc);
++ break;
+
-+ /* Ring slots start from page 1 (page 0 is reserved for FlowSlotInfo) */
-+ ptr = (char*)(start+PAGE_SIZE);
++ case 1:
++ if((rc = do_memory_mmap(vma, size,
++ (void*)pfr->dna_device->descr_packet_memory,
++ VM_LOCKED, 1)) < 0)
++ return(rc);
++ break;
+
-+ if(remap_page_range(
-+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
-+ vma,
-+#endif
-+ start,
-+ __pa(pfr->ring_memory),
-+ PAGE_SIZE*pagesToMap, vma->vm_page_prot)) {
-+#if defined(RING_DEBUG)
-+ printk("remap_page_range() failed\n");
-+#endif
-+ return(-EAGAIN);
++ case 2:
++ if((rc = do_memory_mmap(vma, size,
++ (void*)pfr->dna_device->phys_card_memory,
++ (VM_RESERVED | VM_IO), 2)) < 0)
++ return(rc);
++ break;
++
++ default:
++ return(-EAGAIN);
++ }
++
++ pfr->mmap_count++;
+ }
+
+#if defined(RING_DEBUG)
-+ printk("proc_mmap(pagesToMap=%d): success.\n", pagesToMap);
++ printk("[PF_RING] ring_mmap succeeded\n");
+#endif
+
+ return 0;
+static int ring_recvmsg(struct kiocb *iocb, struct socket *sock,
+ struct msghdr *msg, size_t len, int flags)
+#else
-+ static int ring_recvmsg(struct socket *sock, struct msghdr *msg, int len,
-+ int flags, struct scm_cookie *scm)
++ static int ring_recvmsg(struct socket *sock, struct msghdr *msg, int len,
++ int flags, struct scm_cookie *scm)
+#endif
+{
+ FlowSlot* slot;
+ u_int32_t queued_pkts, num_loops = 0;
+
+#if defined(RING_DEBUG)
-+ printk("ring_recvmsg called\n");
++ printk("[PF_RING] ring_recvmsg called\n");
+#endif
+
++ pfr->ring_active = 1;
+ slot = get_remove_slot(pfr);
+
+ while((queued_pkts = num_queued_pkts(pfr)) < MIN_QUEUED_PKTS) {
+ wait_event_interruptible(pfr->ring_slots_waitqueue, 1);
+
+#if defined(RING_DEBUG)
-+ printk("-> ring_recvmsg returning %d [queued_pkts=%d][num_loops=%d]\n",
++ printk("[PF_RING] -> ring_recvmsg returning %d [queued_pkts=%d][num_loops=%d]\n",
+ slot->slot_state, queued_pkts, num_loops);
+#endif
+
+
+#if defined(RING_DEBUG)
+ if(slot != NULL)
-+ printk("ring_recvmsg is returning [queued_pkts=%d][num_loops=%d]\n",
++ printk("[PF_RING] ring_recvmsg is returning [queued_pkts=%d][num_loops=%d]\n",
+ queued_pkts, num_loops);
+#endif
+
+{
+ FlowSlot* slot;
+ struct ring_opt *pfr = ring_sk(sock->sk);
++ int rc;
++
++ /* printk("[PF_RING] -- poll called\n"); */
++
++ if(pfr->dna_device == NULL) {
++ /* PF_RING mode */
+
+#if defined(RING_DEBUG)
-+ printk("poll called\n");
++ printk("[PF_RING] poll called (non DNA device)\n");
+#endif
+
-+ slot = get_remove_slot(pfr);
++ pfr->ring_active = 1;
++ slot = get_remove_slot(pfr);
+
-+ if((slot != NULL) && (slot->slot_state == 0))
-+ poll_wait(file, &pfr->ring_slots_waitqueue, wait);
++ if((slot != NULL) && (slot->slot_state == 0))
++ poll_wait(file, &pfr->ring_slots_waitqueue, wait);
+
+#if defined(RING_DEBUG)
-+ printk("poll returning %d\n", slot->slot_state);
++ printk("[PF_RING] poll returning %d\n", slot->slot_state);
+#endif
+
-+ if((slot != NULL) && (slot->slot_state == 1))
-+ return(POLLIN | POLLRDNORM);
-+ else
-+ return(0);
++ if((slot != NULL) && (slot->slot_state == 1))
++ return(POLLIN | POLLRDNORM);
++ else
++ return(0);
++ } else {
++ /* DNA mode */
++
++#if defined(RING_DEBUG)
++ printk("[PF_RING] poll called on DNA device [%d]\n",
++ *pfr->dna_device->interrupt_received);
++#endif
++
++ if(pfr->dna_device->wait_packet_function_ptr == NULL)
++ return(0);
++
++ rc = pfr->dna_device->wait_packet_function_ptr(pfr->dna_device->adapter_ptr, 1);
++ if(rc == 0) /* No packet arrived yet */ {
++ /* poll_wait(file, pfr->dna_device->packet_waitqueue, wait); */
++ } else
++ rc = pfr->dna_device->wait_packet_function_ptr(pfr->dna_device->adapter_ptr, 0);
++
++ //*pfr->dna_device->interrupt_received = rc;
++ if(rc == 0) rc = *pfr->dna_device->interrupt_received;
++
++#if defined(RING_DEBUG)
++ printk("[PF_RING] poll %s return [%d]\n",
++ pfr->ring_netdev->name,
++ *pfr->dna_device->interrupt_received);
++#endif
++
++ if(rc) {
++ return(POLLIN | POLLRDNORM);
++ } else {
++ return(0);
++ }
++ }
+}
+
+/* ************************************* */
+
-+int add_to_cluster_list(struct ring_cluster *el,
-+ struct sock *sock) {
-+
-+ if(el->num_cluster_elements == CLUSTER_LEN)
++int add_to_cluster_list(ring_cluster_element *el,
++ struct sock *sock)
++{
++ if(el->cluster.num_cluster_elements == CLUSTER_LEN)
+ return(-1); /* Cluster full */
+
-+ ring_sk_datatype(ring_sk(sock))->cluster_id = el->cluster_id;
-+ el->sk[el->num_cluster_elements] = sock;
-+ el->num_cluster_elements++;
++ ring_sk_datatype(ring_sk(sock))->cluster_id = el->cluster.cluster_id;
++ el->cluster.sk[el->cluster.num_cluster_elements] = sock;
++ el->cluster.num_cluster_elements++;
+ return(0);
+}
+
+/* ************************************* */
+
+int remove_from_cluster_list(struct ring_cluster *el,
-+ struct sock *sock) {
++ struct sock *sock)
++{
+ int i, j;
+
+ for(i=0; i<CLUSTER_LEN; i++)
+static int remove_from_cluster(struct sock *sock,
+ struct ring_opt *pfr)
+{
-+ struct ring_cluster *el;
++ struct list_head *ptr, *tmp_ptr;
+
+#if defined(RING_DEBUG)
-+ printk("--> remove_from_cluster(%d)\n", pfr->cluster_id);
++ printk("[PF_RING] --> remove_from_cluster(%d)\n", pfr->cluster_id);
+#endif
+
+ if(pfr->cluster_id == 0 /* 0 = No Cluster */)
+ return(0); /* Noting to do */
+
-+ el = ring_cluster_list;
++ list_for_each_safe(ptr, tmp_ptr, &ring_cluster_list) {
++ ring_cluster_element *cluster_ptr;
+
-+ while(el != NULL) {
-+ if(el->cluster_id == pfr->cluster_id) {
-+ return(remove_from_cluster_list(el, sock));
-+ } else
-+ el = el->next;
++ cluster_ptr = list_entry(ptr, ring_cluster_element, list);
++
++ if(cluster_ptr->cluster.cluster_id == pfr->cluster_id) {
++ return(remove_from_cluster_list(&cluster_ptr->cluster, sock));
++ }
+ }
+
+ return(-EINVAL); /* Not found */
+ struct ring_opt *pfr,
+ u_short cluster_id)
+{
-+ struct ring_cluster *el;
++ struct list_head *ptr, *tmp_ptr;
++ ring_cluster_element *cluster_ptr;
+
+#ifndef RING_DEBUG
-+ printk("--> add_to_cluster(%d)\n", cluster_id);
++ printk("[PF_RING] --> add_to_cluster(%d)\n", cluster_id);
+#endif
+
+ if(cluster_id == 0 /* 0 = No Cluster */) return(-EINVAL);
+ if(pfr->cluster_id != 0)
+ remove_from_cluster(sock, pfr);
+
-+ el = ring_cluster_list;
++ list_for_each_safe(ptr, tmp_ptr, &ring_cluster_list) {
++ cluster_ptr = list_entry(ptr, ring_cluster_element, list);
+
-+ while(el != NULL) {
-+ if(el->cluster_id == cluster_id) {
-+ return(add_to_cluster_list(el, sock));
-+ } else
-+ el = el->next;
++ if(cluster_ptr->cluster.cluster_id == cluster_id) {
++ return(add_to_cluster_list(cluster_ptr, sock));
++ }
++ }
++
++ /* There's no existing cluster. We need to create one */
++ if((cluster_ptr = kmalloc(sizeof(ring_cluster_element),
++ GFP_KERNEL)) == NULL)
++ return(-ENOMEM);
++
++ INIT_LIST_HEAD(&cluster_ptr->list);
++
++ cluster_ptr->cluster.cluster_id = cluster_id;
++ cluster_ptr->cluster.num_cluster_elements = 1;
++ cluster_ptr->cluster.hashing_mode = cluster_per_flow; /* Default */
++ cluster_ptr->cluster.hashing_id = 0;
++
++ memset(cluster_ptr->cluster.sk, 0, sizeof(cluster_ptr->cluster.sk));
++ cluster_ptr->cluster.sk[0] = sock;
++ pfr->cluster_id = cluster_id;
++
++ list_add(&cluster_ptr->list, &ring_cluster_list); /* Add as first entry */
++
++ return(0); /* 0 = OK */
++}
++
++/* ************************************* */
++
++static int ring_map_dna_device(struct ring_opt *pfr,
++ dna_device_mapping *mapping) {
++ int debug = 0;
++
++ if(mapping->operation == remove_device_mapping) {
++ pfr->dna_device = NULL;
++ if(debug)
++ printk("[PF_RING] ring_map_dna_device(%s): removed mapping\n",
++ mapping->device_name);
++ return(0);
++ } else {
++ struct list_head *ptr, *tmp_ptr;
++ dna_device_list *entry;
++
++ list_for_each_safe(ptr, tmp_ptr, &ring_dna_devices_list) {
++ entry = list_entry(ptr, dna_device_list, list);
++
++ if((!strcmp(entry->dev.netdev->name, mapping->device_name))
++ && (entry->dev.channel_id == mapping->channel_id)) {
++ pfr->dna_device = &entry->dev, pfr->ring_netdev = entry->dev.netdev;
++
++ if(debug)
++ printk("[PF_RING] ring_map_dna_device(%s): added mapping\n",
++ mapping->device_name);
++
++ return(0);
++ }
++ }
++ }
++
++ printk("[PF_RING] ring_map_dna_device(%s): mapping failed\n",
++ mapping->device_name);
++
++ return(-1);
++}
++
++/* ************************************* */
++
++static void purge_idle_hash_rules(struct ring_opt *pfr, uint16_t rule_inactivity)
++{
++ int i, num_purged_rules = 0, debug = 0;
++ unsigned long expire_jiffies = jiffies - msecs_to_jiffies(1000*rule_inactivity);
++
++ if(debug)
++ printk("[PF_RING] purge_idle_hash_rules(rule_inactivity=%d)\n", rule_inactivity);
++
++ /* Free filtering hash rules inactive for more than rule_inactivity seconds */
++ if(pfr->filtering_hash != NULL) {
++ for(i=0; i<DEFAULT_RING_HASH_SIZE; i++) {
++ if(pfr->filtering_hash[i] != NULL) {
++ filtering_hash_bucket *scan = pfr->filtering_hash[i], *next, *prev = NULL;
++
++ while(scan != NULL) {
++ next = scan->next;
++
++ if(scan->rule.jiffies_last_match < expire_jiffies) {
++ /* Expired rule: free it */
++
++ if(debug)
++ printk("[PF_RING] Purging hash rule "
++ /* "[last_match=%u][expire_jiffies=%u]" */
++ "[%d.%d.%d.%d:%d <-> %d.%d.%d.%d:%d][purged=%d][tot_rules=%d]\n",
++ /*
++ (unsigned int)scan->rule.jiffies_last_match,
++ (unsigned int)expire_jiffies,
++ */
++ ((scan->rule.host_peer_a >> 24) & 0xff),
++ ((scan->rule.host_peer_a >> 16) & 0xff),
++ ((scan->rule.host_peer_a >> 8) & 0xff),
++ ((scan->rule.host_peer_a >> 0) & 0xff),
++ scan->rule.port_peer_a,
++ ((scan->rule.host_peer_b >> 24) & 0xff),
++ ((scan->rule.host_peer_b >> 16) & 0xff),
++ ((scan->rule.host_peer_b >> 8) & 0xff),
++ ((scan->rule.host_peer_b >> 0) & 0xff),
++ scan->rule.port_peer_b,
++ num_purged_rules, pfr->num_filtering_rules);
++
++ if(scan->plugin_data_ptr != NULL) kfree(scan->plugin_data_ptr);
++ kfree(scan);
++
++ if(prev == NULL)
++ pfr->filtering_hash[i] = next;
++ else
++ prev->next = next;
++
++ pfr->num_filtering_rules--, num_purged_rules++;
++ } else
++ prev = scan;
++
++ scan = next;
++ }
++ }
++ }
+ }
-+
-+ /* There's no existing cluster. We need to create one */
-+ if((el = kmalloc(sizeof(struct ring_cluster), GFP_KERNEL)) == NULL)
-+ return(-ENOMEM);
-+
-+ el->cluster_id = cluster_id;
-+ el->num_cluster_elements = 1;
-+ el->hashing_mode = cluster_per_flow; /* Default */
-+ el->hashing_id = 0;
-+
-+ memset(el->sk, 0, sizeof(el->sk));
-+ el->sk[0] = sock;
-+ el->next = ring_cluster_list;
-+ ring_cluster_list = el;
-+ pfr->cluster_id = cluster_id;
-+
-+ return(0); /* 0 = OK */
++
++ if(debug)
++ printk("[PF_RING] Purged %d hash rules [tot_rules=%d]\n",
++ num_purged_rules, pfr->num_filtering_rules);
+}
+
+/* ************************************* */
+/* Code taken/inspired from core/sock.c */
+static int ring_setsockopt(struct socket *sock,
+ int level, int optname,
-+ char *optval, int optlen)
++ char __user *optval, int optlen)
+{
+ struct ring_opt *pfr = ring_sk(sock->sk);
-+ int val, found, ret = 0;
-+ u_int cluster_id, do_enable;
-+ char devName[8], bloom_filter[256], aho_pattern[256];
-+
-+ if(pfr == NULL) return(-EINVAL);
++ int val, found, ret = 0 /* OK */;
++ u_int cluster_id, debug = 0;
++ int32_t channel_id;
++ char devName[8], applName[32+1];
++ struct list_head *prev = NULL;
++ filtering_rule_element *entry, *rule;
++ u_int16_t rule_id, rule_inactivity;
++
++ if(pfr == NULL)
++ return(-EINVAL);
+
+ if (get_user(val, (int *)optval))
+ return -EFAULT;
+ {
+ case SO_ATTACH_FILTER:
+ ret = -EINVAL;
-+ if (optlen == sizeof(struct sock_fprog)) {
-+ unsigned int fsize;
-+ struct sock_fprog fprog;
-+ struct sk_filter *filter;
++ if (optlen == sizeof(struct sock_fprog))
++ {
++ unsigned int fsize;
++ struct sock_fprog fprog;
++ struct sk_filter *filter;
+
-+ ret = -EFAULT;
++ ret = -EFAULT;
+
-+ /*
-+ NOTE
++ /*
++ NOTE
+
-+ Do not call copy_from_user within a held
-+ splinlock (e.g. ring_mgmt_lock) as this caused
-+ problems when certain debugging was enabled under
-+ 2.6.5 -- including hard lockups of the machine.
-+ */
-+ if(copy_from_user(&fprog, optval, sizeof(fprog)))
-+ break;
++ Do not call copy_from_user within a held
++ splinlock (e.g. ring_mgmt_lock) as this caused
++ problems when certain debugging was enabled under
++ 2.6.5 -- including hard lockups of the machine.
++ */
++ if(copy_from_user(&fprog, optval, sizeof(fprog)))
++ break;
+
-+ fsize = sizeof(struct sock_filter) * fprog.len;
-+ filter = kmalloc(fsize, GFP_KERNEL);
++ /* Fix below courtesy of Noam Dev <noamdev@gmail.com> */
++ fsize = sizeof(struct sock_filter) * fprog.len;
++ filter = kmalloc(fsize + sizeof(struct sk_filter), GFP_KERNEL);
+
-+ if(filter == NULL) {
-+ ret = -ENOMEM;
-+ break;
-+ }
++ if(filter == NULL)
++ {
++ ret = -ENOMEM;
++ break;
++ }
+
-+ if(copy_from_user(filter->insns, fprog.filter, fsize))
-+ break;
++ if(copy_from_user(filter->insns, fprog.filter, fsize))
++ break;
+
-+ filter->len = fprog.len;
++ filter->len = fprog.len;
+
-+ if(sk_chk_filter(filter->insns, filter->len) != 0) {
-+ /* Bad filter specified */
-+ kfree(filter);
-+ pfr->bpfFilter = NULL;
-+ break;
-+ }
++ if(sk_chk_filter(filter->insns, filter->len) != 0)
++ {
++ /* Bad filter specified */
++ kfree(filter);
++ pfr->bpfFilter = NULL;
++ break;
++ }
+
-+ /* get the lock, set the filter, release the lock */
-+ write_lock(&ring_mgmt_lock);
-+ pfr->bpfFilter = filter;
-+ write_unlock(&ring_mgmt_lock);
-+ ret = 0;
-+ }
++ /* get the lock, set the filter, release the lock */
++ write_lock(&pfr->ring_rules_lock);
++ pfr->bpfFilter = filter;
++ write_unlock(&pfr->ring_rules_lock);
++ ret = 0;
++ }
+ break;
+
+ case SO_DETACH_FILTER:
-+ write_lock(&ring_mgmt_lock);
++ write_lock(&pfr->ring_rules_lock);
+ found = 1;
-+ if(pfr->bpfFilter != NULL) {
-+ kfree(pfr->bpfFilter);
-+ pfr->bpfFilter = NULL;
-+ write_unlock(&ring_mgmt_lock);
-+ break;
-+ }
-+ ret = -ENONET;
++ if(pfr->bpfFilter != NULL)
++ {
++ kfree(pfr->bpfFilter);
++ pfr->bpfFilter = NULL;
++ } else
++ ret = -ENONET;
++ write_unlock(&pfr->ring_rules_lock);
+ break;
+
+ case SO_ADD_TO_CLUSTER:
+ if (copy_from_user(&cluster_id, optval, sizeof(cluster_id)))
+ return -EFAULT;
+
-+ write_lock(&ring_mgmt_lock);
++ write_lock(&pfr->ring_rules_lock);
+ ret = add_to_cluster(sock->sk, pfr, cluster_id);
-+ write_unlock(&ring_mgmt_lock);
++ write_unlock(&pfr->ring_rules_lock);
+ break;
+
+ case SO_REMOVE_FROM_CLUSTER:
-+ write_lock(&ring_mgmt_lock);
++ write_lock(&pfr->ring_rules_lock);
+ ret = remove_from_cluster(sock->sk, pfr);
-+ write_unlock(&ring_mgmt_lock);
++ write_unlock(&pfr->ring_rules_lock);
++ break;
++
++ case SO_SET_CHANNEL_ID:
++ if(optlen != sizeof(channel_id))
++ return -EINVAL;
++
++ if(copy_from_user(&channel_id, optval, sizeof(channel_id)))
++ return -EFAULT;
++
++ pfr->channel_id = channel_id;
++#if defined(RING_DEBUG)
++ printk("[PF_RING] [pfr->channel_id=%d][channel_id=%d]\n",
++ pfr->channel_id, channel_id);
++#endif
++ ret = 0;
++ break;
++
++ case SO_SET_APPL_NAME:
++ if(optlen > sizeof(applName) /* Names should not be too long */)
++ return -EINVAL;
++
++ if(copy_from_user(&applName, optval, optlen))
++ return -EFAULT;
++
++ if(pfr->appl_name != NULL) kfree(pfr->appl_name);
++ pfr->appl_name = (char*)kmalloc(optlen+1, GFP_ATOMIC);
++ if(pfr->appl_name != NULL) {
++ memcpy(pfr->appl_name, applName, optlen);
++ pfr->appl_name[optlen] = '\0';
++ }
++
++ ret = 0;
++ break;
++
++ case SO_PURGE_IDLE_HASH_RULES:
++ if(optlen != sizeof(rule_inactivity))
++ return -EINVAL;
++
++ if(copy_from_user(&rule_inactivity, optval, sizeof(rule_inactivity)))
++ return -EFAULT;
++ else {
++ if(rule_inactivity > 0) {
++ write_lock(&pfr->ring_rules_lock);
++ purge_idle_hash_rules(pfr, rule_inactivity);
++ write_unlock(&pfr->ring_rules_lock);
++ }
++ ret = 0;
++ }
+ break;
+
+ case SO_SET_REFLECTOR:
+ if(optlen >= (sizeof(devName)-1))
+ return -EINVAL;
+
-+ if(optlen > 0) {
-+ if(copy_from_user(devName, optval, optlen))
-+ return -EFAULT;
-+ }
++ if(optlen > 0)
++ {
++ if(copy_from_user(devName, optval, optlen))
++ return -EFAULT;
++ }
+
+ devName[optlen] = '\0';
+
+#if defined(RING_DEBUG)
-+ printk("+++ SO_SET_REFLECTOR(%s)\n", devName);
++ printk("[PF_RING] +++ SO_SET_REFLECTOR(%s)\n", devName);
+#endif
+
-+ write_lock(&ring_mgmt_lock);
-+ pfr->reflector_dev = dev_get_by_name(&init_net, devName);
-+ write_unlock(&ring_mgmt_lock);
++ write_lock(&pfr->ring_rules_lock);
++ pfr->reflector_dev = dev_get_by_name(
++#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24))
++ &init_net,
++#endif
++ devName);
++ write_unlock(&pfr->ring_rules_lock);
+
+#if defined(RING_DEBUG)
+ if(pfr->reflector_dev != NULL)
-+ printk("SO_SET_REFLECTOR(%s): succeded\n", devName);
++ printk("[PF_RING] SO_SET_REFLECTOR(%s): succeded\n", devName);
+ else
-+ printk("SO_SET_REFLECTOR(%s): device unknown\n", devName);
++ printk("[PF_RING] SO_SET_REFLECTOR(%s): device unknown\n", devName);
+#endif
+ break;
+
-+ case SO_SET_BLOOM:
-+ if(optlen >= (sizeof(bloom_filter)-1))
++ case SO_TOGGLE_FILTER_POLICY:
++ if(optlen != sizeof(u_int8_t))
+ return -EINVAL;
++ else {
++ u_int8_t new_policy;
+
-+ if(optlen > 0) {
-+ if(copy_from_user(bloom_filter, optval, optlen))
++ if(copy_from_user(&new_policy, optval, optlen))
+ return -EFAULT;
++
++ write_lock(&pfr->ring_rules_lock);
++ pfr->rules_default_accept_policy = new_policy;
++ write_unlock(&pfr->ring_rules_lock);
++ /*
++ if(debug) printk("[PF_RING] SO_TOGGLE_FILTER_POLICY: default policy is %s\n",
++ pfr->rules_default_accept_policy ? "accept" : "drop");
++ */
+ }
++ break;
+
-+ bloom_filter[optlen] = '\0';
++ case SO_ADD_FILTERING_RULE:
++ if(debug) printk("[PF_RING] +++ SO_ADD_FILTERING_RULE(len=%d)\n", optlen);
+
-+ write_lock(&ring_mgmt_lock);
-+ handle_bloom_filter_rule(pfr, bloom_filter);
-+ write_unlock(&ring_mgmt_lock);
-+ break;
++ if(optlen == sizeof(filtering_rule)) {
++ struct list_head *ptr, *tmp_ptr;
+
-+ case SO_SET_STRING:
-+ if(optlen >= (sizeof(aho_pattern)-1))
-+ return -EINVAL;
++ if(debug) printk("[PF_RING] Allocating memory\n");
++
++ rule = (filtering_rule_element*)kcalloc(1, sizeof(filtering_rule_element), GFP_KERNEL);
+
-+ if(optlen > 0) {
-+ if(copy_from_user(aho_pattern, optval, optlen))
++ if(rule == NULL)
+ return -EFAULT;
-+ }
+
-+ aho_pattern[optlen] = '\0';
++ if(copy_from_user(&rule->rule, optval, optlen))
++ return -EFAULT;
++
++ INIT_LIST_HEAD(&rule->list);
++
++ if(rule->rule.extended_fields.filter_plugin_id > 0) {
++ int ret = 0;
+
-+ write_lock(&ring_mgmt_lock);
-+ if(pfr->acsm != NULL) acsmFree2(pfr->acsm);
-+ if(optlen > 0) {
-+#if 1
-+ if((pfr->acsm = acsmNew2()) != NULL) {
-+ int nc=1 /* case sensitive */, i = 0;
++ if(rule->rule.extended_fields.filter_plugin_id >= MAX_PLUGIN_ID)
++ ret = -EFAULT;
++ else if(plugin_registration[rule->rule.extended_fields.filter_plugin_id] == NULL)
++ ret = -EFAULT;
+
-+ pfr->acsm->acsmFormat = ACF_BANDED;
-+ acsmAddPattern2(pfr->acsm, (unsigned char*)aho_pattern,
-+ (int)strlen(aho_pattern), nc, 0, 0,(void*)aho_pattern, i);
-+ acsmCompile2(pfr->acsm);
++ if(ret != 0) {
++ kfree(rule);
++ return(ret);
++ }
+ }
-+#else
-+ pfr->acsm = kmalloc (10, GFP_KERNEL); /* TEST */
-+#endif
++
++ if(rule->rule.plugin_action.plugin_id > 0) {
++ int ret = 0;
++
++ if(rule->rule.plugin_action.plugin_id >= MAX_PLUGIN_ID)
++ ret = -EFAULT;
++ else if(plugin_registration[rule->rule.plugin_action.plugin_id] == NULL)
++ ret = -EFAULT;
++
++ if(ret != 0) {
++ kfree(rule);
++ return(ret);
++ }
++ }
++
++ /* Compile pattern if present */
++ if(strlen(rule->rule.extended_fields.payload_pattern) > 0)
++ {
++ int patternsize;
++
++ rule->pattern = regcomp(rule->rule.extended_fields.payload_pattern,
++ &patternsize);
++
++ if(rule->pattern == NULL) {
++ printk("[PF_RING] Unable to compile pattern '%s'\n",
++ rule->rule.extended_fields.payload_pattern);
++ rule->pattern = NULL;
++ } else
++ printk("[PF_RING] Compiled pattern '%s'\n", rule->rule.extended_fields.payload_pattern);
++ } else
++ rule->pattern = NULL;
++
++ write_lock(&pfr->ring_rules_lock);
++ if(debug) printk("[PF_RING] SO_ADD_FILTERING_RULE: About to add rule %d\n", rule->rule.rule_id);
++
++ /* Implement an ordered add */
++ list_for_each_safe(ptr, tmp_ptr, &pfr->rules)
++ {
++ entry = list_entry(ptr, filtering_rule_element, list);
++
++ if(debug) printk("[PF_RING] SO_ADD_FILTERING_RULE: [current rule %d][rule to add %d]\n",
++ entry->rule.rule_id, rule->rule.rule_id);
++
++ if(entry->rule.rule_id == rule->rule.rule_id)
++ {
++ memcpy(&entry->rule, &rule->rule, sizeof(filtering_rule));
++ if(entry->pattern != NULL) kfree(entry->pattern);
++ entry->pattern = rule->pattern;
++ kfree(rule);
++ rule = NULL;
++ if(debug) printk("[PF_RING] SO_ADD_FILTERING_RULE: overwritten rule_id %d\n", entry->rule.rule_id);
++ break;
++ } else if(entry->rule.rule_id > rule->rule.rule_id) {
++ if(prev == NULL) {
++ list_add(&rule->list, &pfr->rules); /* Add as first entry */
++ pfr->num_filtering_rules++;
++ if(debug) printk("[PF_RING] SO_ADD_FILTERING_RULE: added rule %d as head rule\n", rule->rule.rule_id);
++ } else {
++ list_add(&rule->list, prev);
++ pfr->num_filtering_rules++;
++ if(debug) printk("[PF_RING] SO_ADD_FILTERING_RULE: added rule %d\n", rule->rule.rule_id);
++ }
++
++ rule = NULL;
++ break;
++ } else
++ prev = ptr;
++ } /* for */
++
++ if(rule != NULL)
++ {
++ if(prev == NULL)
++ {
++ list_add(&rule->list, &pfr->rules); /* Add as first entry */
++ pfr->num_filtering_rules++;
++ if(debug) printk("[PF_RING] SO_ADD_FILTERING_RULE: added rule %d as first rule\n", rule->rule.rule_id);
++ }
++ else
++ {
++ list_add_tail(&rule->list, &pfr->rules); /* Add as first entry */
++ pfr->num_filtering_rules++;
++ if(debug) printk("[PF_RING] SO_ADD_FILTERING_RULE: added rule %d as last rule\n", rule->rule.rule_id);
++ }
++ }
++
++ write_unlock(&pfr->ring_rules_lock);
++ } else if(optlen == sizeof(hash_filtering_rule)) {
++ /* This is a hash rule */
++ filtering_hash_bucket *rule = (filtering_hash_bucket*)kcalloc(1, sizeof(filtering_hash_bucket), GFP_KERNEL);
++ int rc;
++
++ if(rule == NULL)
++ return -EFAULT;
++
++ if(copy_from_user(&rule->rule, optval, optlen))
++ return -EFAULT;
++
++ write_lock(&pfr->ring_rules_lock);
++ rc = handle_filtering_hash_bucket(pfr, rule, 1 /* add */);
++ pfr->num_filtering_rules++;
++ write_unlock(&pfr->ring_rules_lock);
++
++ if(rc != 0) {
++ kfree(rule);
++ return(rc);
++ }
++ } else {
++ printk("[PF_RING] Bad rule length (%d): discarded\n", optlen);
++ return -EFAULT;
+ }
-+ write_unlock(&ring_mgmt_lock);
+ break;
+
-+ case SO_TOGGLE_BLOOM_STATE:
-+ if(optlen >= (sizeof(bloom_filter)-1))
++ case SO_REMOVE_FILTERING_RULE:
++ if(optlen == sizeof(u_int16_t /* rule _id */))
++ {
++ /* This is a list rule */
++ u_int8_t rule_found = 0;
++ struct list_head *ptr, *tmp_ptr;
++
++ if(copy_from_user(&rule_id, optval, optlen))
++ return -EFAULT;
++
++ write_lock(&pfr->ring_rules_lock);
++
++ list_for_each_safe(ptr, tmp_ptr, &pfr->rules)
++ {
++ entry = list_entry(ptr, filtering_rule_element, list);
++
++ if(entry->rule.rule_id == rule_id)
++ {
++ if(entry->pattern) kfree(entry->pattern);
++ list_del(ptr);
++ pfr->num_filtering_rules--;
++ if(entry->plugin_data_ptr != NULL) kfree(entry->plugin_data_ptr);
++ kfree(entry);
++ if(debug) printk("[PF_RING] SO_REMOVE_FILTERING_RULE: rule %d has been removed\n", rule_id);
++ rule_found = 1;
++ break;
++ }
++ } /* for */
++
++ write_unlock(&pfr->ring_rules_lock);
++ if(!rule_found) {
++ if(debug) printk("[PF_RING] SO_REMOVE_FILTERING_RULE: rule %d does not exist\n", rule_id);
++ return -EFAULT; /* Rule not found */
++ }
++ } else if(optlen == sizeof(hash_filtering_rule)) {
++ /* This is a hash rule */
++ filtering_hash_bucket rule;
++ int rc;
++
++ if(copy_from_user(&rule.rule, optval, optlen))
++ return -EFAULT;
++
++ write_lock(&pfr->ring_rules_lock);
++ rc = handle_filtering_hash_bucket(pfr, &rule, 0 /* delete */);
++ pfr->num_filtering_rules--;
++ write_unlock(&pfr->ring_rules_lock);
++ if(rc != 0) return(rc);
++ } else
++ return -EFAULT;
++ break;
++
++ case SO_SET_SAMPLING_RATE:
++ if(optlen != sizeof(pfr->sample_rate))
+ return -EINVAL;
+
-+ if(optlen > 0) {
-+ if(copy_from_user(&do_enable, optval, optlen))
++ if(copy_from_user(&pfr->sample_rate, optval, sizeof(pfr->sample_rate)))
++ return -EFAULT;
++ break;
++
++ case SO_ACTIVATE_RING:
++ if(debug) printk("[PF_RING] * SO_ACTIVATE_RING *\n");
++ found = 1, pfr->ring_active = 1;
++ break;
++
++ case SO_RING_BUCKET_LEN:
++ if(optlen != sizeof(u_int32_t))
++ return -EINVAL;
++ else {
++ if(copy_from_user(&pfr->bucket_len, optval, optlen))
+ return -EFAULT;
+ }
-+
-+ write_lock(&ring_mgmt_lock);
-+ if(do_enable)
-+ pfr->bitmask_enabled = 1;
-+ else
-+ pfr->bitmask_enabled = 0;
-+ write_unlock(&ring_mgmt_lock);
-+ printk("SO_TOGGLE_BLOOM_STATE: bloom bitmask %s\n",
-+ pfr->bitmask_enabled ? "enabled" : "disabled");
+ break;
+
-+ case SO_RESET_BLOOM_FILTERS:
-+ if(optlen >= (sizeof(bloom_filter)-1))
++ case SO_MAP_DNA_DEVICE:
++ if(optlen != sizeof(dna_device_mapping))
+ return -EINVAL;
++ else {
++ dna_device_mapping mapping;
+
-+ if(optlen > 0) {
-+ if(copy_from_user(&do_enable, optval, optlen))
++ if(copy_from_user(&mapping, optval, optlen))
+ return -EFAULT;
++ else {
++ ret = ring_map_dna_device(pfr, &mapping), found = 1;
++ }
+ }
-+
-+ write_lock(&ring_mgmt_lock);
-+ reset_bloom_filters(pfr);
-+ write_unlock(&ring_mgmt_lock);
+ break;
+
+ default:
+
+/* ************************************* */
+
++static int ring_getsockopt(struct socket *sock,
++ int level, int optname,
++ char __user *optval,
++ int __user *optlen)
++{
++ int len, debug = 0;
++ struct ring_opt *pfr = ring_sk(sock->sk);
++
++ if(pfr == NULL)
++ return(-EINVAL);
++
++ if(get_user(len, optlen))
++ return -EFAULT;
++
++ if(len < 0)
++ return -EINVAL;
++
++ switch(optname)
++ {
++ case SO_GET_RING_VERSION:
++ {
++ u_int32_t version = RING_VERSION_NUM;
++
++ if(copy_to_user(optval, &version, sizeof(version)))
++ return -EFAULT;
++ }
++ break;
++
++ case PACKET_STATISTICS:
++ {
++ struct tpacket_stats st;
++
++ if (len > sizeof(struct tpacket_stats))
++ len = sizeof(struct tpacket_stats);
++
++ st.tp_packets = pfr->slots_info->tot_insert;
++ st.tp_drops = pfr->slots_info->tot_lost;
++
++ if (copy_to_user(optval, &st, len))
++ return -EFAULT;
++ break;
++ }
++
++ case SO_GET_HASH_FILTERING_RULE_STATS:
++ {
++ int rc = -EFAULT;
++
++ if(len >= sizeof(hash_filtering_rule)) {
++ hash_filtering_rule rule;
++ u_int hash_idx;
++
++ if(pfr->filtering_hash == NULL) {
++ printk("[PF_RING] so_get_hash_filtering_rule_stats(): no hash failure\n");
++ return -EFAULT;
++ }
++
++ if(copy_from_user(&rule, optval, sizeof(rule))) {
++ printk("[PF_RING] so_get_hash_filtering_rule_stats: copy_from_user() failure\n");
++ return -EFAULT;
++ }
++
++ if(debug)
++ printk("[PF_RING] so_get_hash_filtering_rule_stats"
++ "(vlan=%u, proto=%u, sip=%u, sport=%u, dip=%u, dport=%u)\n",
++ rule.vlan_id, rule.proto,
++ rule.host_peer_a, rule.port_peer_a,
++ rule.host_peer_b, rule.port_peer_b);
++
++ hash_idx = hash_pkt(rule.vlan_id, rule.proto,
++ rule.host_peer_a, rule.host_peer_b,
++ rule.port_peer_a, rule.port_peer_b) % DEFAULT_RING_HASH_SIZE;
++
++ if(pfr->filtering_hash[hash_idx] != NULL) {
++ filtering_hash_bucket *bucket;
++
++ read_lock(&pfr->ring_rules_lock);
++ bucket = pfr->filtering_hash[hash_idx];
++
++ if(debug) printk("[PF_RING] so_get_hash_filtering_rule_stats(): bucket=%p\n", bucket);
++
++ while(bucket != NULL) {
++ if(hash_bucket_match_rule(bucket, &rule)) {
++ char *buffer = kmalloc(len, GFP_ATOMIC);
++
++ if(buffer == NULL) {
++ printk("[PF_RING] so_get_hash_filtering_rule_stats() no memory failure\n");
++ rc = -EFAULT;
++ } else {
++ if((plugin_registration[rule.plugin_action.plugin_id] == NULL)
++ || (plugin_registration[rule.plugin_action.plugin_id]->pfring_plugin_get_stats == NULL)) {
++ printk("[PF_RING] Found rule but pluginId %d is not registered\n",
++ rule.plugin_action.plugin_id);
++ rc = -EFAULT;
++ } else
++ rc = plugin_registration[rule.plugin_action.plugin_id]->
++ pfring_plugin_get_stats(pfr, NULL, bucket, buffer, len);
++
++ if(rc > 0) {
++ if(copy_to_user(optval, buffer, rc)) {
++ printk("[PF_RING] copy_to_user() failure\n");
++ rc = -EFAULT;
++ }
++ }
++ }
++ break;
++ } else
++ bucket = bucket->next;
++ } /* while */
++
++ read_unlock(&pfr->ring_rules_lock);
++ } else {
++ if(debug)
++ printk("[PF_RING] so_get_hash_filtering_rule_stats(): entry not found [hash_idx=%d]\n",
++ hash_idx);
++ }
++ }
++
++ return(rc);
++ break;
++ }
++
++ case SO_GET_FILTERING_RULE_STATS:
++ {
++ char *buffer = NULL;
++ int rc = -EFAULT;
++ struct list_head *ptr, *tmp_ptr;
++ u_int16_t rule_id;
++
++ if(len < sizeof(rule_id))
++ return -EINVAL;
++
++ if(copy_from_user(&rule_id, optval, sizeof(rule_id)))
++ return -EFAULT;
++
++ if(debug)
++ printk("[PF_RING] SO_GET_FILTERING_RULE_STATS: rule_id=%d\n", rule_id);
++
++ read_lock(&pfr->ring_rules_lock);
++ list_for_each_safe(ptr, tmp_ptr, &pfr->rules)
++ {
++ filtering_rule_element *rule;
++
++ rule = list_entry(ptr, filtering_rule_element, list);
++ if(rule->rule.rule_id == rule_id)
++ {
++ buffer = kmalloc(len, GFP_ATOMIC);
++
++ if(buffer == NULL)
++ rc = -EFAULT;
++ else {
++ if((plugin_registration[rule->rule.plugin_action.plugin_id] == NULL)
++ || (plugin_registration[rule->rule.plugin_action.plugin_id]->pfring_plugin_get_stats == NULL)) {
++ printk("[PF_RING] Found rule %d but pluginId %d is not registered\n",
++ rule_id, rule->rule.plugin_action.plugin_id);
++ rc = -EFAULT;
++ } else
++ rc = plugin_registration[rule->rule.plugin_action.plugin_id]
++ ->pfring_plugin_get_stats(pfr, rule, NULL, buffer, len);
++
++ if(rc > 0) {
++ if(copy_to_user(optval, buffer, rc)) {
++ rc = -EFAULT;
++ }
++ }
++ }
++ break;
++ }
++ }
++
++ read_unlock(&pfr->ring_rules_lock);
++ if(buffer != NULL) kfree(buffer);
++
++ /* printk("[PF_RING] SO_GET_FILTERING_RULE_STATS *END*\n"); */
++ return(rc);
++ break;
++ }
++
++ case SO_GET_MAPPED_DNA_DEVICE:
++ {
++ if(pfr->dna_device == NULL)
++ return -EFAULT;
++
++ if (len > sizeof(dna_device))
++ len = sizeof(dna_device);
++
++ if (copy_to_user(optval, pfr->dna_device, len))
++ return -EFAULT;
++
++ break;
++ }
++
++ default:
++ return -ENOPROTOOPT;
++ }
++
++ if(put_user(len, optlen))
++ return -EFAULT;
++ else
++ return(0);
++}
++
++/* ************************************* */
++
++u_int get_num_device_free_slots(int ifindex) {
++ int num = 0;
++
++ if((ifindex >= 0) && (ifindex < MAX_NUM_DEVICES)) {
++ struct list_head *ptr, *tmp_ptr;
++ device_ring_list_element *entry;
++
++ list_for_each_safe(ptr, tmp_ptr, &device_ring_list[ifindex]) {
++ int num_free_slots;
++
++ entry = list_entry(ptr, device_ring_list_element, list);
++
++ num_free_slots = get_num_ring_free_slots(entry->the_ring);
++
++ if(num_free_slots == 0)
++ return(0);
++ else {
++ if(num == 0)
++ num = num_free_slots;
++ else if(num > num_free_slots)
++ num = num_free_slots;
++ }
++ }
++ }
++
++ return(num);
++}
++
++/* ************************************* */
++
++void dna_device_handler(dna_device_operation operation,
++ unsigned long packet_memory,
++ u_int packet_memory_num_slots,
++ u_int packet_memory_slot_len,
++ u_int packet_memory_tot_len,
++ void *descr_packet_memory,
++ u_int descr_packet_memory_num_slots,
++ u_int descr_packet_memory_slot_len,
++ u_int descr_packet_memory_tot_len,
++ u_int channel_id,
++ void *phys_card_memory,
++ u_int phys_card_memory_len,
++ struct net_device *netdev,
++ dna_device_model device_model,
++ wait_queue_head_t *packet_waitqueue,
++ u_int8_t *interrupt_received,
++ void *adapter_ptr,
++ dna_wait_packet wait_packet_function_ptr) {
++ int debug = 0;
++
++ if(debug)
++ printk("[PF_RING] dna_device_handler(%s)\n", netdev->name);
++
++ if(operation == add_device_mapping) {
++ dna_device_list *next;
++
++ next = kmalloc(sizeof(dna_device_list), GFP_ATOMIC);
++ if(next != NULL) {
++ next->dev.packet_memory = packet_memory;
++ next->dev.packet_memory_num_slots = packet_memory_num_slots;
++ next->dev.packet_memory_slot_len = packet_memory_slot_len;
++ next->dev.packet_memory_tot_len = packet_memory_tot_len;
++ next->dev.descr_packet_memory = descr_packet_memory;
++ next->dev.descr_packet_memory_num_slots = descr_packet_memory_num_slots;
++ next->dev.descr_packet_memory_slot_len = descr_packet_memory_slot_len;
++ next->dev.descr_packet_memory_tot_len = descr_packet_memory_tot_len;
++ next->dev.phys_card_memory = phys_card_memory;
++ next->dev.phys_card_memory_len = phys_card_memory_len;
++ next->dev.channel_id = channel_id;
++ next->dev.netdev = netdev;
++ next->dev.device_model = device_model;
++ next->dev.packet_waitqueue = packet_waitqueue;
++ next->dev.interrupt_received = interrupt_received;
++ next->dev.adapter_ptr = adapter_ptr;
++ next->dev.wait_packet_function_ptr = wait_packet_function_ptr;
++ list_add(&next->list, &ring_dna_devices_list);
++ dna_devices_list_size++;
++ } else {
++ printk("[PF_RING] Could not kmalloc slot!!\n");
++ }
++ } else {
++ struct list_head *ptr, *tmp_ptr;
++ dna_device_list *entry;
++
++ list_for_each_safe(ptr, tmp_ptr, &ring_dna_devices_list) {
++ entry = list_entry(ptr, dna_device_list, list);
++
++ if((entry->dev.netdev == netdev)
++ && (entry->dev.channel_id == channel_id)) {
++ list_del(ptr);
++ kfree(entry);
++ dna_devices_list_size--;
++ break;
++ }
++ }
++ }
++
++ if(debug)
++ printk("[PF_RING] dna_device_handler(%s): [dna_devices_list_size=%d]\n",
++ netdev->name, dna_devices_list_size);
++}
++
++/* ************************************* */
++
+static int ring_ioctl(struct socket *sock,
+ unsigned int cmd, unsigned long arg)
+{
-+ switch(cmd)
-+ {
++ switch(cmd) {
+#ifdef CONFIG_INET
-+ case SIOCGIFFLAGS:
-+ case SIOCSIFFLAGS:
-+ case SIOCGIFCONF:
-+ case SIOCGIFMETRIC:
-+ case SIOCSIFMETRIC:
-+ case SIOCGIFMEM:
-+ case SIOCSIFMEM:
-+ case SIOCGIFMTU:
-+ case SIOCSIFMTU:
-+ case SIOCSIFLINK:
-+ case SIOCGIFHWADDR:
-+ case SIOCSIFHWADDR:
-+ case SIOCSIFMAP:
-+ case SIOCGIFMAP:
-+ case SIOCSIFSLAVE:
-+ case SIOCGIFSLAVE:
-+ case SIOCGIFINDEX:
-+ case SIOCGIFNAME:
-+ case SIOCGIFCOUNT:
-+ case SIOCSIFHWBROADCAST:
-+ return(inet_dgram_ops.ioctl(sock, cmd, arg));
++ case SIOCGIFFLAGS:
++ case SIOCSIFFLAGS:
++ case SIOCGIFCONF:
++ case SIOCGIFMETRIC:
++ case SIOCSIFMETRIC:
++ case SIOCGIFMEM:
++ case SIOCSIFMEM:
++ case SIOCGIFMTU:
++ case SIOCSIFMTU:
++ case SIOCSIFLINK:
++ case SIOCGIFHWADDR:
++ case SIOCSIFHWADDR:
++ case SIOCSIFMAP:
++ case SIOCGIFMAP:
++ case SIOCSIFSLAVE:
++ case SIOCGIFSLAVE:
++ case SIOCGIFINDEX:
++ case SIOCGIFNAME:
++ case SIOCGIFCOUNT:
++ case SIOCSIFHWBROADCAST:
++ return(inet_dgram_ops.ioctl(sock, cmd, arg));
+#endif
+
-+ default:
-+ return -ENOIOCTLCMD;
-+ }
++ default:
++ return -ENOIOCTLCMD;
++ }
+
+ return 0;
+}
+ .shutdown = sock_no_shutdown,
+ .sendpage = sock_no_sendpage,
+ .sendmsg = sock_no_sendmsg,
-+ .getsockopt = sock_no_getsockopt,
+
+ /* Now the operations that really occur. */
+ .release = ring_release,
+ .mmap = ring_mmap,
+ .poll = ring_poll,
+ .setsockopt = ring_setsockopt,
++ .getsockopt = ring_getsockopt,
+ .ioctl = ring_ioctl,
+ .recvmsg = ring_recvmsg,
+};
+
+static void __exit ring_exit(void)
+{
-+ struct list_head *ptr;
++ struct list_head *ptr, *tmp_ptr;
+ struct ring_element *entry;
+
-+ for(ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) {
++ list_for_each_safe(ptr, tmp_ptr, &ring_table) {
+ entry = list_entry(ptr, struct ring_element, list);
++ list_del(ptr);
+ kfree(entry);
+ }
+
-+ while(ring_cluster_list != NULL) {
-+ struct ring_cluster *next = ring_cluster_list->next;
-+ kfree(ring_cluster_list);
-+ ring_cluster_list = next;
++ list_for_each_safe(ptr, tmp_ptr, &ring_cluster_list) {
++ ring_cluster_element *cluster_ptr;
++
++ cluster_ptr = list_entry(ptr, ring_cluster_element, list);
++
++ list_del(ptr);
++ kfree(cluster_ptr);
++ }
++
++ list_for_each_safe(ptr, tmp_ptr, &ring_dna_devices_list) {
++ dna_device_list *elem;
++
++ elem = list_entry(ptr, dna_device_list, list);
++
++ list_del(ptr);
++ kfree(elem);
+ }
+
++ set_register_pfring_plugin(NULL);
++ set_unregister_pfring_plugin(NULL);
+ set_skb_ring_handler(NULL);
++ set_add_hdr_to_ring(NULL);
+ set_buffer_ring_handler(NULL);
++ set_read_device_pfring_free_slots(NULL);
++ set_ring_dna_device_handler(NULL);
+ sock_unregister(PF_RING);
+ ring_proc_term();
-+ printk("PF_RING shut down.\n");
++ printk("[PF_RING] unloaded\n");
+}
+
+/* ************************************ */
+
+static int __init ring_init(void)
+{
-+ printk("Welcome to PF_RING %s\n(C) 2004-07 L.Deri <deri@ntop.org>\n",
++ int i;
++
++ printk("[PF_RING] Welcome to PF_RING %s\n"
++ "(C) 2004-09 L.Deri <deri@ntop.org>\n",
+ RING_VERSION);
+
+ INIT_LIST_HEAD(&ring_table);
-+ ring_cluster_list = NULL;
++ INIT_LIST_HEAD(&ring_cluster_list);
++ INIT_LIST_HEAD(&ring_dna_devices_list);
++
++ for(i=0; i<MAX_NUM_DEVICES; i++)
++ INIT_LIST_HEAD(&device_ring_list[i]);
+
+ sock_register(&ring_family_ops);
+
+ set_skb_ring_handler(skb_ring_handler);
++ set_add_hdr_to_ring(add_hdr_to_ring);
+ set_buffer_ring_handler(buffer_ring_handler);
++ set_register_pfring_plugin(register_plugin);
++ set_unregister_pfring_plugin(unregister_plugin);
++ set_read_device_pfring_free_slots(get_num_device_free_slots);
++ set_ring_dna_device_handler(dna_device_handler);
+
+ if(get_buffer_ring_handler() != buffer_ring_handler) {
-+ printk("PF_RING: set_buffer_ring_handler FAILED\n");
++ printk("[PF_RING] set_buffer_ring_handler FAILED\n");
+
+ set_skb_ring_handler(NULL);
+ set_buffer_ring_handler(NULL);
+ sock_unregister(PF_RING);
+ return -1;
+ } else {
-+ printk("PF_RING: bucket length %d bytes\n", bucket_len);
-+ printk("PF_RING: ring slots %d\n", num_slots);
-+ printk("PF_RING: sample rate %d [1=no sampling]\n", sample_rate);
-+ printk("PF_RING: capture TX %s\n",
++ printk("[PF_RING] Ring slots %d\n", num_slots);
++ printk("[PF_RING] Slot version %d\n", RING_FLOWSLOT_VERSION);
++ printk("[PF_RING] Capture TX %s\n",
+ enable_tx_capture ? "Yes [RX+TX]" : "No [RX only]");
-+ printk("PF_RING: transparent mode %s\n",
-+ transparent_mode ? "Yes" : "No");
-+
-+ printk("PF_RING initialized correctly.\n");
++ printk("[PF_RING] IP Defragment %s\n", enable_ip_defrag ? "Yes" : "No");
++ printk("[PF_RING] Initialized correctly\n");
+
+ ring_proc_init();
+ return 0;
+
+module_init(ring_init);
+module_exit(ring_exit);
++
+MODULE_LICENSE("GPL");
++MODULE_AUTHOR("Luca Deri <deri@ntop.org>");
++MODULE_DESCRIPTION("Packet capture acceleration by means of a ring buffer");
+
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
+MODULE_ALIAS_NETPROTO(PF_RING);