linux-PF_RING.patch

   1 diff --unified --recursive --new-file linux-2.6.21.4/include/linux/ring.h linux-2.6.21.4-1-686-smp-ring3/include/linux/ring.h
   2 --- linux-2.6.21.4/include/linux/ring.h 1970-01-01 00:00:00.000000000 +0000
   3 +++ linux-2.6.21.4-1-686-smp-ring3/include/linux/ring.h 2007-06-10 16:43:04.346421348 +0000
   4 @@ -0,0 +1,240 @@
   5 +/*
   6 + * Definitions for packet ring
   7 + *
   8 + * 2004-07 Luca Deri <deri@ntop.org>
   9 + */
  10 +#ifndef __RING_H
  11 +#define __RING_H
  12 +
  13 +#define INCLUDE_MAC_INFO
  14 +
  15 +#ifdef INCLUDE_MAC_INFO
  16 +#define SKB_DISPLACEMENT    14 /* Include MAC address information */
  17 +#else
  18 +#define SKB_DISPLACEMENT    0  /* Do NOT include MAC address information */
  19 +#endif
  20 +
  21 +#define RING_MAGIC
  22 +#define RING_MAGIC_VALUE            0x88
  23 +#define RING_FLOWSLOT_VERSION          6
  24 +#define RING_VERSION             "3.4.1"
  25 +
  26 +#define SO_ADD_TO_CLUSTER        99
  27 +#define SO_REMOVE_FROM_CLUSTER  100
  28 +#define SO_SET_REFLECTOR        101
  29 +#define SO_SET_BLOOM            102
  30 +#define SO_SET_STRING           103
  31 +#define SO_TOGGLE_BLOOM_STATE   104
  32 +#define SO_RESET_BLOOM_FILTERS  105
  33 +
  34 +#define BITMASK_SET(n, p)       (((char*)p->bits_memory)[n/8] |= (1<<(n % 8)))
  35 +#define BITMASK_CLR(n, p)       (((char*)p->bits_memory)[n/8] &= ~(1<<(n % 8)))
  36 +#define BITMASK_ISSET(n, p)     (((char*)p->bits_memory)[n/8] &  (1<<(n % 8)))
  37 +
  38 +/* *********************************** */
  39 +
  40 +/*
  41 +  Aho-Corasick code taken from Snort
  42 +  under GPL license
  43 +*/
  44 +/*
  45 + *   DEFINES and Typedef's
  46 + */
  47 +#define MAX_ALPHABET_SIZE 256
  48 +
  49 +/*
  50 +  FAIL STATE for 1,2,or 4 bytes for state transitions
  51 +
  52 +  Uncomment this define to use 32 bit state values
  53 +  #define AC32
  54 +*/
  55 +
  56 +typedef    unsigned short acstate_t;
  57 +#define ACSM_FAIL_STATE2 0xffff
  58 +
  59 +/*
  60 + *
  61 + */
  62 +typedef
  63 +struct _acsm_pattern2
  64 +{
  65 +  struct  _acsm_pattern2 *next;
  66 +
  67 +  unsigned char         *patrn;
  68 +  unsigned char         *casepatrn;
  69 +  int      n;
  70 +  int      nocase;
  71 +  int      offset;
  72 +  int      depth;
  73 +  void *   id;
  74 +  int      iid;
  75 +
  76 +} ACSM_PATTERN2;
  77 +
  78 +/*
  79 + *    transition nodes  - either 8 or 12 bytes
  80 + */
  81 +typedef
  82 +struct trans_node_s {
  83 +
  84 +  acstate_t    key;           /* The character that got us here - sized to keep structure aligned on 4 bytes */
  85 +                              /* to better the caching opportunities. A value that crosses the cache line */
  86 +                              /* forces an expensive reconstruction, typing this as acstate_t stops that. */
  87 +  acstate_t    next_state;    /*  */
  88 +  struct trans_node_s * next; /* next transition for this state */
  89 +
  90 +} trans_node_t;
  91 +
  92 +
  93 +/*
  94 + *  User specified final storage type for the state transitions
  95 + */
  96 +enum {
  97 +  ACF_FULL,
  98 +  ACF_SPARSE,
  99 +  ACF_BANDED,
 100 +  ACF_SPARSEBANDS,
 101 +};
 102 +
 103 +/*
 104 + *   User specified machine types
 105 + *
 106 + *   TRIE : Keyword trie
 107 + *   NFA  :
 108 + *   DFA  :
 109 + */
 110 +enum {
 111 +  FSA_TRIE,
 112 +  FSA_NFA,
 113 +  FSA_DFA,
 114 +};
 115 +
 116 +/*
 117 + *   Aho-Corasick State Machine Struct - one per group of pattterns
 118 + */
 119 +typedef struct {
 120 +  int acsmMaxStates;
 121 +  int acsmNumStates;
 122 +
 123 +  ACSM_PATTERN2    * acsmPatterns;
 124 +  acstate_t        * acsmFailState;
 125 +  ACSM_PATTERN2   ** acsmMatchList;
 126 +
 127 +  /* list of transitions in each state, this is used to build the nfa & dfa */
 128 +  /* after construction we convert to sparse or full format matrix and free */
 129 +  /* the transition lists */
 130 +  trans_node_t ** acsmTransTable;
 131 +
 132 +  acstate_t ** acsmNextState;
 133 +  int          acsmFormat;
 134 +  int          acsmSparseMaxRowNodes;
 135 +  int          acsmSparseMaxZcnt;
 136 +
 137 +  int          acsmNumTrans;
 138 +  int          acsmAlphabetSize;
 139 +  int          acsmFSA;
 140 +
 141 +} ACSM_STRUCT2;
 142 +
 143 +/* *********************************** */
 144 +
 145 +#ifndef HAVE_PCAP
 146 +struct pcap_pkthdr {
 147 +  struct timeval ts;    /* time stamp */
 148 +  u_int32_t caplen;     /* length of portion present */
 149 +  u_int32_t len;        /* length this packet (off wire) */
 150 +  /* packet parsing info */
 151 +  u_int16_t eth_type;   /* Ethernet type */
 152 +  u_int16_t vlan_id;    /* VLAN Id or -1 for no vlan */
 153 +  u_int8_t  l3_proto;   /* Layer 3 protocol */
 154 +  u_int16_t l3_offset, l4_offset, payload_offset; /* Offsets of L3/L4/payload elements */
 155 +  u_int32_t ipv4_src, ipv4_dst;   /* IPv4 src/dst IP addresses */
 156 +  u_int16_t l4_src_port, l4_dst_port; /* Layer 4 src/dst ports */
 157 +};
 158 +#endif
 159 +
 160 +/* *********************************** */
 161 +
 162 +typedef struct _counter_list {
 163 +  u_int32_t bit_id;
 164 +  u_int32_t bit_counter;
 165 +  struct _counter_list *next;
 166 +} bitmask_counter_list;
 167 +
 168 +typedef struct {
 169 +  u_int32_t num_bits, order, num_pages;
 170 +  unsigned long bits_memory;
 171 +  bitmask_counter_list *clashes;
 172 +} bitmask_selector;
 173 +
 174 +/* *********************************** */
 175 +
 176 +enum cluster_type {
 177 +  cluster_per_flow = 0,
 178 +  cluster_round_robin
 179 +};
 180 +
 181 +/* *********************************** */
 182 +
 183 +#define RING_MIN_SLOT_SIZE    (60+sizeof(struct pcap_pkthdr))
 184 +#define RING_MAX_SLOT_SIZE    (1514+sizeof(struct pcap_pkthdr))
 185 +
 186 +/* *********************************** */
 187 +
 188 +typedef struct flowSlotInfo {
 189 +  u_int16_t version, sample_rate;
 190 +  u_int32_t tot_slots, slot_len, data_len, tot_mem;
 191 +
 192 +  u_int64_t tot_pkts, tot_lost;
 193 +  u_int64_t tot_insert, tot_read;
 194 +  u_int32_t insert_idx, remove_idx;
 195 +} FlowSlotInfo;
 196 +
 197 +/* *********************************** */
 198 +
 199 +typedef struct flowSlot {
 200 +#ifdef RING_MAGIC
 201 +  u_char     magic;      /* It must alwasy be zero */
 202 +#endif
 203 +  u_char     slot_state; /* 0=empty, 1=full   */
 204 +  u_char     bucket;     /* bucket[bucketLen] */
 205 +} FlowSlot;
 206 +
 207 +/* *********************************** */
 208 +
 209 +#ifdef __KERNEL__
 210 +
 211 +FlowSlotInfo* getRingPtr(void);
 212 +int allocateRing(char *deviceName, u_int numSlots,
 213 +                u_int bucketLen, u_int sampleRate);
 214 +unsigned int pollRing(struct file *fp, struct poll_table_struct * wait);
 215 +void deallocateRing(void);
 216 +
 217 +/* ************************* */
 218 +
 219 +typedef int (*handle_ring_skb)(struct sk_buff *skb,
 220 +                              u_char recv_packet, u_char real_skb);
 221 +extern handle_ring_skb get_skb_ring_handler(void);
 222 +extern void set_skb_ring_handler(handle_ring_skb the_handler);
 223 +extern void do_skb_ring_handler(struct sk_buff *skb,
 224 +                               u_char recv_packet, u_char real_skb);
 225 +
 226 +typedef int (*handle_ring_buffer)(struct net_device *dev,
 227 +                                 char *data, int len);
 228 +extern handle_ring_buffer get_buffer_ring_handler(void);
 229 +extern void set_buffer_ring_handler(handle_ring_buffer the_handler);
 230 +extern int do_buffer_ring_handler(struct net_device *dev,
 231 +                                 char *data, int len);
 232 +#endif /* __KERNEL__  */
 233 +
 234 +/* *********************************** */
 235 +
 236 +#define PF_RING          27      /* Packet Ring */
 237 +#define SOCK_RING        PF_RING
 238 +
 239 +/* ioctl() */
 240 +#define SIORINGPOLL      0x8888
 241 +
 242 +/* *********************************** */
 243 +
 244 +#endif /* __RING_H */
 245 diff --unified --recursive --new-file linux-2.6.21.4/net/Kconfig linux-2.6.21.4-1-686-smp-ring3/net/Kconfig
 246 --- linux-2.6.21.4/net/Kconfig  2007-06-07 21:27:31.000000000 +0000
 247 +++ linux-2.6.21.4-1-686-smp-ring3/net/Kconfig  2007-06-10 16:43:04.402423771 +0000
 248 @@ -39,6 +39,7 @@
 249  source "net/xfrm/Kconfig"
 250  source "net/iucv/Kconfig"
 251
 252 +source "net/ring/Kconfig"
 253  config INET
 254         bool "TCP/IP networking"
 255         ---help---
 256 diff --unified --recursive --new-file linux-2.6.21.4/net/Makefile linux-2.6.21.4-1-686-smp-ring3/net/Makefile
 257 --- linux-2.6.21.4/net/Makefile 2007-06-07 21:27:31.000000000 +0000
 258 +++ linux-2.6.21.4-1-686-smp-ring3/net/Makefile 2007-06-10 16:43:04.394423425 +0000
 259 @@ -42,6 +42,7 @@
 260  obj-$(CONFIG_DECNET)           += decnet/
 261  obj-$(CONFIG_ECONET)           += econet/
 262  obj-$(CONFIG_VLAN_8021Q)       += 8021q/
 263 +obj-$(CONFIG_RING)             += ring/
 264  obj-$(CONFIG_IP_DCCP)          += dccp/
 265  obj-$(CONFIG_IP_SCTP)          += sctp/
 266  obj-$(CONFIG_IEEE80211)                += ieee80211/
 267 diff --unified --recursive --new-file linux-2.6.21.4/net/Makefile.ORG linux-2.6.21.4-1-686-smp-ring3/net/Makefile.ORG
 268 --- linux-2.6.21.4/net/Makefile.ORG     1970-01-01 00:00:00.000000000 +0000
 269 +++ linux-2.6.21.4-1-686-smp-ring3/net/Makefile.ORG     2007-06-10 16:43:04.386423079 +0000
 270 @@ -0,0 +1,54 @@
 271 +#
 272 +# Makefile for the linux networking.
 273 +#
 274 +# 2 Sep 2000, Christoph Hellwig <hch@infradead.org>
 275 +# Rewritten to use lists instead of if-statements.
 276 +#
 277 +
 278 +obj-y  := nonet.o
 279 +
 280 +obj-$(CONFIG_NET)              := socket.o core/
 281 +
 282 +tmp-$(CONFIG_COMPAT)           := compat.o
 283 +obj-$(CONFIG_NET)              += $(tmp-y)
 284 +
 285 +# LLC has to be linked before the files in net/802/
 286 +obj-$(CONFIG_LLC)              += llc/
 287 +obj-$(CONFIG_NET)              += ethernet/ 802/ sched/ netlink/
 288 +obj-$(CONFIG_NETFILTER)                += netfilter/
 289 +obj-$(CONFIG_INET)             += ipv4/
 290 +obj-$(CONFIG_XFRM)             += xfrm/
 291 +obj-$(CONFIG_UNIX)             += unix/
 292 +ifneq ($(CONFIG_IPV6),)
 293 +obj-y                          += ipv6/
 294 +endif
 295 +obj-$(CONFIG_PACKET)           += packet/
 296 +obj-$(CONFIG_NET_KEY)          += key/
 297 +obj-$(CONFIG_NET_SCHED)                += sched/
 298 +obj-$(CONFIG_BRIDGE)           += bridge/
 299 +obj-$(CONFIG_IPX)              += ipx/
 300 +obj-$(CONFIG_ATALK)            += appletalk/
 301 +obj-$(CONFIG_WAN_ROUTER)       += wanrouter/
 302 +obj-$(CONFIG_X25)              += x25/
 303 +obj-$(CONFIG_LAPB)             += lapb/
 304 +obj-$(CONFIG_NETROM)           += netrom/
 305 +obj-$(CONFIG_ROSE)             += rose/
 306 +obj-$(CONFIG_AX25)             += ax25/
 307 +obj-$(CONFIG_IRDA)             += irda/
 308 +obj-$(CONFIG_BT)               += bluetooth/
 309 +obj-$(CONFIG_SUNRPC)           += sunrpc/
 310 +obj-$(CONFIG_RXRPC)            += rxrpc/
 311 +obj-$(CONFIG_ATM)              += atm/
 312 +obj-$(CONFIG_DECNET)           += decnet/
 313 +obj-$(CONFIG_ECONET)           += econet/
 314 +obj-$(CONFIG_VLAN_8021Q)       += 8021q/
 315 +obj-$(CONFIG_IP_DCCP)          += dccp/
 316 +obj-$(CONFIG_IP_SCTP)          += sctp/
 317 +obj-$(CONFIG_IEEE80211)                += ieee80211/
 318 +obj-$(CONFIG_TIPC)             += tipc/
 319 +obj-$(CONFIG_NETLABEL)         += netlabel/
 320 +obj-$(CONFIG_IUCV)             += iucv/
 321 +
 322 +ifeq ($(CONFIG_NET),y)
 323 +obj-$(CONFIG_SYSCTL)           += sysctl_net.o
 324 +endif
 325 diff --unified --recursive --new-file linux-2.6.21.4/net/core/dev.c linux-2.6.21.4-1-686-smp-ring3/net/core/dev.c
 326 --- linux-2.6.21.4/net/core/dev.c       2007-06-07 21:27:31.000000000 +0000
 327 +++ linux-2.6.21.4-1-686-smp-ring3/net/core/dev.c       2007-06-10 16:43:04.382422906 +0000
 328 @@ -117,6 +117,56 @@
 329  #include <linux/err.h>
 330  #include <linux/ctype.h>
 331
 332 +#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
 333 +
 334 +/* #define RING_DEBUG */
 335 +
 336 +#include <linux/ring.h>
 337 +#include <linux/version.h>
 338 +
 339 +static handle_ring_skb ring_handler = NULL;
 340 +
 341 +handle_ring_skb get_skb_ring_handler() { return(ring_handler); }
 342 +
 343 +void set_skb_ring_handler(handle_ring_skb the_handler) {
 344 +  ring_handler = the_handler;
 345 +}
 346 +
 347 +void do_skb_ring_handler(struct sk_buff *skb,
 348 +                        u_char recv_packet, u_char real_skb) {
 349 +  if(ring_handler)
 350 +    ring_handler(skb, recv_packet, real_skb);
 351 +}
 352 +
 353 +/* ******************* */
 354 +
 355 +static handle_ring_buffer buffer_ring_handler = NULL;
 356 +
 357 +handle_ring_buffer get_buffer_ring_handler() { return(buffer_ring_handler); }
 358 +
 359 +void set_buffer_ring_handler(handle_ring_buffer the_handler) {
 360 +  buffer_ring_handler = the_handler;
 361 +}
 362 +
 363 +int do_buffer_ring_handler(struct net_device *dev, char *data, int len) {
 364 +  if(buffer_ring_handler) {
 365 +    buffer_ring_handler(dev, data, len);
 366 +    return(1);
 367 +  } else
 368 +    return(0);
 369 +}
 370 +
 371 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
 372 +EXPORT_SYMBOL(get_skb_ring_handler);
 373 +EXPORT_SYMBOL(set_skb_ring_handler);
 374 +EXPORT_SYMBOL(do_skb_ring_handler);
 375 +
 376 +EXPORT_SYMBOL(get_buffer_ring_handler);
 377 +EXPORT_SYMBOL(set_buffer_ring_handler);
 378 +EXPORT_SYMBOL(do_buffer_ring_handler);
 379 +#endif
 380 +
 381 +#endif
 382  /*
 383   *     The list of packet types we will receive (as opposed to discard)
 384   *     and the routines to invoke.
 385 @@ -1474,6 +1524,10 @@
 386         skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
 387  #endif
 388         if (q->enqueue) {
 389 +#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
 390 +       if(ring_handler) ring_handler(skb, 0, 1);
 391 +#endif /* CONFIG_RING */
 392 +
 393                 /* Grab device queue */
 394                 spin_lock(&dev->queue_lock);
 395                 q = dev->qdisc;
 396 @@ -1574,6 +1628,13 @@
 397         unsigned long flags;
 398
 399         /* if netpoll wants it, pretend we never saw it */
 400 +#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
 401 +       if(ring_handler && ring_handler(skb, 1, 1)) {
 402 +         /* The packet has been copied into a ring */
 403 +         return(NET_RX_SUCCESS);
 404 +       }
 405 +#endif /* CONFIG_RING */
 406 +
 407         if (netpoll_rx(skb))
 408                 return NET_RX_DROP;
 409
 410 @@ -1764,6 +1825,13 @@
 411         struct net_device *orig_dev;
 412         int ret = NET_RX_DROP;
 413         __be16 type;
 414 +#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
 415 +       if(ring_handler && ring_handler(skb, 1, 1)) {
 416 +         /* The packet has been copied into a ring */
 417 +         return(NET_RX_SUCCESS);
 418 +       }
 419 +#endif /* CONFIG_RING */
 420 +
 421
 422         /* if we've gotten here through NAPI, check netpoll */
 423         if (skb->dev->poll && netpoll_rx(skb))
 424 diff --unified --recursive --new-file linux-2.6.21.4/net/core/dev.c.ORG linux-2.6.21.4-1-686-smp-ring3/net/core/dev.c.ORG
 425 --- linux-2.6.21.4/net/core/dev.c.ORG   1970-01-01 00:00:00.000000000 +0000
 426 +++ linux-2.6.21.4-1-686-smp-ring3/net/core/dev.c.ORG   2007-06-10 16:43:04.354421694 +0000
 427 @@ -0,0 +1,3571 @@
 428 +/*
 429 + *     NET3    Protocol independent device support routines.
 430 + *
 431 + *             This program is free software; you can redistribute it and/or
 432 + *             modify it under the terms of the GNU General Public License
 433 + *             as published by the Free Software Foundation; either version
 434 + *             2 of the License, or (at your option) any later version.
 435 + *
 436 + *     Derived from the non IP parts of dev.c 1.0.19
 437 + *             Authors:        Ross Biro
 438 + *                             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 439 + *                             Mark Evans, <evansmp@uhura.aston.ac.uk>
 440 + *
 441 + *     Additional Authors:
 442 + *             Florian la Roche <rzsfl@rz.uni-sb.de>
 443 + *             Alan Cox <gw4pts@gw4pts.ampr.org>
 444 + *             David Hinds <dahinds@users.sourceforge.net>
 445 + *             Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
 446 + *             Adam Sulmicki <adam@cfar.umd.edu>
 447 + *              Pekka Riikonen <priikone@poesidon.pspt.fi>
 448 + *
 449 + *     Changes:
 450 + *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
 451 + *                                     to 2 if register_netdev gets called
 452 + *                                     before net_dev_init & also removed a
 453 + *                                     few lines of code in the process.
 454 + *             Alan Cox        :       device private ioctl copies fields back.
 455 + *             Alan Cox        :       Transmit queue code does relevant
 456 + *                                     stunts to keep the queue safe.
 457 + *             Alan Cox        :       Fixed double lock.
 458 + *             Alan Cox        :       Fixed promisc NULL pointer trap
 459 + *             ????????        :       Support the full private ioctl range
 460 + *             Alan Cox        :       Moved ioctl permission check into
 461 + *                                     drivers
 462 + *             Tim Kordas      :       SIOCADDMULTI/SIOCDELMULTI
 463 + *             Alan Cox        :       100 backlog just doesn't cut it when
 464 + *                                     you start doing multicast video 8)
 465 + *             Alan Cox        :       Rewrote net_bh and list manager.
 466 + *             Alan Cox        :       Fix ETH_P_ALL echoback lengths.
 467 + *             Alan Cox        :       Took out transmit every packet pass
 468 + *                                     Saved a few bytes in the ioctl handler
 469 + *             Alan Cox        :       Network driver sets packet type before
 470 + *                                     calling netif_rx. Saves a function
 471 + *                                     call a packet.
 472 + *             Alan Cox        :       Hashed net_bh()
 473 + *             Richard Kooijman:       Timestamp fixes.
 474 + *             Alan Cox        :       Wrong field in SIOCGIFDSTADDR
 475 + *             Alan Cox        :       Device lock protection.
 476 + *             Alan Cox        :       Fixed nasty side effect of device close
 477 + *                                     changes.
 478 + *             Rudi Cilibrasi  :       Pass the right thing to
 479 + *                                     set_mac_address()
 480 + *             Dave Miller     :       32bit quantity for the device lock to
 481 + *                                     make it work out on a Sparc.
 482 + *             Bjorn Ekwall    :       Added KERNELD hack.
 483 + *             Alan Cox        :       Cleaned up the backlog initialise.
 484 + *             Craig Metz      :       SIOCGIFCONF fix if space for under
 485 + *                                     1 device.
 486 + *         Thomas Bogendoerfer :       Return ENODEV for dev_open, if there
 487 + *                                     is no device open function.
 488 + *             Andi Kleen      :       Fix error reporting for SIOCGIFCONF
 489 + *         Michael Chastain    :       Fix signed/unsigned for SIOCGIFCONF
 490 + *             Cyrus Durgin    :       Cleaned for KMOD
 491 + *             Adam Sulmicki   :       Bug Fix : Network Device Unload
 492 + *                                     A network device unload needs to purge
 493 + *                                     the backlog queue.
 494 + *     Paul Rusty Russell      :       SIOCSIFNAME
 495 + *              Pekka Riikonen  :      Netdev boot-time settings code
 496 + *              Andrew Morton   :       Make unregister_netdevice wait
 497 + *                                     indefinitely on dev->refcnt
 498 + *             J Hadi Salim    :       - Backlog queue sampling
 499 + *                                     - netif_rx() feedback
 500 + */
 501 +
 502 +#include <asm/uaccess.h>
 503 +#include <asm/system.h>
 504 +#include <linux/bitops.h>
 505 +#include <linux/capability.h>
 506 +#include <linux/cpu.h>
 507 +#include <linux/types.h>
 508 +#include <linux/kernel.h>
 509 +#include <linux/sched.h>
 510 +#include <linux/mutex.h>
 511 +#include <linux/string.h>
 512 +#include <linux/mm.h>
 513 +#include <linux/socket.h>
 514 +#include <linux/sockios.h>
 515 +#include <linux/errno.h>
 516 +#include <linux/interrupt.h>
 517 +#include <linux/if_ether.h>
 518 +#include <linux/netdevice.h>
 519 +#include <linux/etherdevice.h>
 520 +#include <linux/notifier.h>
 521 +#include <linux/skbuff.h>
 522 +#include <net/sock.h>
 523 +#include <linux/rtnetlink.h>
 524 +#include <linux/proc_fs.h>
 525 +#include <linux/seq_file.h>
 526 +#include <linux/stat.h>
 527 +#include <linux/if_bridge.h>
 528 +#include <net/dst.h>
 529 +#include <net/pkt_sched.h>
 530 +#include <net/checksum.h>
 531 +#include <linux/highmem.h>
 532 +#include <linux/init.h>
 533 +#include <linux/kmod.h>
 534 +#include <linux/module.h>
 535 +#include <linux/kallsyms.h>
 536 +#include <linux/netpoll.h>
 537 +#include <linux/rcupdate.h>
 538 +#include <linux/delay.h>
 539 +#include <linux/wireless.h>
 540 +#include <net/iw_handler.h>
 541 +#include <asm/current.h>
 542 +#include <linux/audit.h>
 543 +#include <linux/dmaengine.h>
 544 +#include <linux/err.h>
 545 +#include <linux/ctype.h>
 546 +
 547 +/*
 548 + *     The list of packet types we will receive (as opposed to discard)
 549 + *     and the routines to invoke.
 550 + *
 551 + *     Why 16. Because with 16 the only overlap we get on a hash of the
 552 + *     low nibble of the protocol value is RARP/SNAP/X.25.
 553 + *
 554 + *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
 555 + *             sure which should go first, but I bet it won't make much
 556 + *             difference if we are running VLANs.  The good news is that
 557 + *             this protocol won't be in the list unless compiled in, so
 558 + *             the average user (w/out VLANs) will not be adversely affected.
 559 + *             --BLG
 560 + *
 561 + *             0800    IP
 562 + *             8100    802.1Q VLAN
 563 + *             0001    802.3
 564 + *             0002    AX.25
 565 + *             0004    802.2
 566 + *             8035    RARP
 567 + *             0005    SNAP
 568 + *             0805    X.25
 569 + *             0806    ARP
 570 + *             8137    IPX
 571 + *             0009    Localtalk
 572 + *             86DD    IPv6
 573 + */
 574 +
 575 +static DEFINE_SPINLOCK(ptype_lock);
 576 +static struct list_head ptype_base[16];        /* 16 way hashed list */
 577 +static struct list_head ptype_all;             /* Taps */
 578 +
 579 +#ifdef CONFIG_NET_DMA
 580 +static struct dma_client *net_dma_client;
 581 +static unsigned int net_dma_count;
 582 +static spinlock_t net_dma_event_lock;
 583 +#endif
 584 +
 585 +/*
 586 + * The @dev_base list is protected by @dev_base_lock and the rtnl
 587 + * semaphore.
 588 + *
 589 + * Pure readers hold dev_base_lock for reading.
 590 + *
 591 + * Writers must hold the rtnl semaphore while they loop through the
 592 + * dev_base list, and hold dev_base_lock for writing when they do the
 593 + * actual updates.  This allows pure readers to access the list even
 594 + * while a writer is preparing to update it.
 595 + *
 596 + * To put it another way, dev_base_lock is held for writing only to
 597 + * protect against pure readers; the rtnl semaphore provides the
 598 + * protection against other writers.
 599 + *
 600 + * See, for example usages, register_netdevice() and
 601 + * unregister_netdevice(), which must be called with the rtnl
 602 + * semaphore held.
 603 + */
 604 +struct net_device *dev_base;
 605 +static struct net_device **dev_tail = &dev_base;
 606 +DEFINE_RWLOCK(dev_base_lock);
 607 +
 608 +EXPORT_SYMBOL(dev_base);
 609 +EXPORT_SYMBOL(dev_base_lock);
 610 +
 611 +#define NETDEV_HASHBITS        8
 612 +static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
 613 +static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
 614 +
 615 +static inline struct hlist_head *dev_name_hash(const char *name)
 616 +{
 617 +       unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 618 +       return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
 619 +}
 620 +
 621 +static inline struct hlist_head *dev_index_hash(int ifindex)
 622 +{
 623 +       return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
 624 +}
 625 +
 626 +/*
 627 + *     Our notifier list
 628 + */
 629 +
 630 +static RAW_NOTIFIER_HEAD(netdev_chain);
 631 +
 632 +/*
 633 + *     Device drivers call our routines to queue packets here. We empty the
 634 + *     queue in the local softnet handler.
 635 + */
 636 +DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
 637 +
 638 +#ifdef CONFIG_SYSFS
 639 +extern int netdev_sysfs_init(void);
 640 +extern int netdev_register_sysfs(struct net_device *);
 641 +extern void netdev_unregister_sysfs(struct net_device *);
 642 +#else
 643 +#define netdev_sysfs_init()            (0)
 644 +#define netdev_register_sysfs(dev)     (0)
 645 +#define        netdev_unregister_sysfs(dev)    do { } while(0)
 646 +#endif
 647 +
 648 +
 649 +/*******************************************************************************
 650 +
 651 +               Protocol management and registration routines
 652 +
 653 +*******************************************************************************/
 654 +
 655 +/*
 656 + *     For efficiency
 657 + */
 658 +
 659 +static int netdev_nit;
 660 +
 661 +/*
 662 + *     Add a protocol ID to the list. Now that the input handler is
 663 + *     smarter we can dispense with all the messy stuff that used to be
 664 + *     here.
 665 + *
 666 + *     BEWARE!!! Protocol handlers, mangling input packets,
 667 + *     MUST BE last in hash buckets and checking protocol handlers
 668 + *     MUST start from promiscuous ptype_all chain in net_bh.
 669 + *     It is true now, do not change it.
 670 + *     Explanation follows: if protocol handler, mangling packet, will
 671 + *     be the first on list, it is not able to sense, that packet
 672 + *     is cloned and should be copied-on-write, so that it will
 673 + *     change it and subsequent readers will get broken packet.
 674 + *                                                     --ANK (980803)
 675 + */
 676 +
 677 +/**
 678 + *     dev_add_pack - add packet handler
 679 + *     @pt: packet type declaration
 680 + *
 681 + *     Add a protocol handler to the networking stack. The passed &packet_type
 682 + *     is linked into kernel lists and may not be freed until it has been
 683 + *     removed from the kernel lists.
 684 + *
 685 + *     This call does not sleep therefore it can not
 686 + *     guarantee all CPU's that are in middle of receiving packets
 687 + *     will see the new packet type (until the next received packet).
 688 + */
 689 +
 690 +void dev_add_pack(struct packet_type *pt)
 691 +{
 692 +       int hash;
 693 +
 694 +       spin_lock_bh(&ptype_lock);
 695 +       if (pt->type == htons(ETH_P_ALL)) {
 696 +               netdev_nit++;
 697 +               list_add_rcu(&pt->list, &ptype_all);
 698 +       } else {
 699 +               hash = ntohs(pt->type) & 15;
 700 +               list_add_rcu(&pt->list, &ptype_base[hash]);
 701 +       }
 702 +       spin_unlock_bh(&ptype_lock);
 703 +}
 704 +
 705 +/**
 706 + *     __dev_remove_pack        - remove packet handler
 707 + *     @pt: packet type declaration
 708 + *
 709 + *     Remove a protocol handler that was previously added to the kernel
 710 + *     protocol handlers by dev_add_pack(). The passed &packet_type is removed
 711 + *     from the kernel lists and can be freed or reused once this function
 712 + *     returns.
 713 + *
 714 + *      The packet type might still be in use by receivers
 715 + *     and must not be freed until after all the CPU's have gone
 716 + *     through a quiescent state.
 717 + */
 718 +void __dev_remove_pack(struct packet_type *pt)
 719 +{
 720 +       struct list_head *head;
 721 +       struct packet_type *pt1;
 722 +
 723 +       spin_lock_bh(&ptype_lock);
 724 +
 725 +       if (pt->type == htons(ETH_P_ALL)) {
 726 +               netdev_nit--;
 727 +               head = &ptype_all;
 728 +       } else
 729 +               head = &ptype_base[ntohs(pt->type) & 15];
 730 +
 731 +       list_for_each_entry(pt1, head, list) {
 732 +               if (pt == pt1) {
 733 +                       list_del_rcu(&pt->list);
 734 +                       goto out;
 735 +               }
 736 +       }
 737 +
 738 +       printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
 739 +out:
 740 +       spin_unlock_bh(&ptype_lock);
 741 +}
 742 +/**
 743 + *     dev_remove_pack  - remove packet handler
 744 + *     @pt: packet type declaration
 745 + *
 746 + *     Remove a protocol handler that was previously added to the kernel
 747 + *     protocol handlers by dev_add_pack(). The passed &packet_type is removed
 748 + *     from the kernel lists and can be freed or reused once this function
 749 + *     returns.
 750 + *
 751 + *     This call sleeps to guarantee that no CPU is looking at the packet
 752 + *     type after return.
 753 + */
 754 +void dev_remove_pack(struct packet_type *pt)
 755 +{
 756 +       __dev_remove_pack(pt);
 757 +
 758 +       synchronize_net();
 759 +}
 760 +
 761 +/******************************************************************************
 762 +
 763 +                     Device Boot-time Settings Routines
 764 +
 765 +*******************************************************************************/
 766 +
 767 +/* Boot time configuration table */
 768 +static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
 769 +
 770 +/**
 771 + *     netdev_boot_setup_add   - add new setup entry
 772 + *     @name: name of the device
 773 + *     @map: configured settings for the device
 774 + *
 775 + *     Adds new setup entry to the dev_boot_setup list.  The function
 776 + *     returns 0 on error and 1 on success.  This is a generic routine to
 777 + *     all netdevices.
 778 + */
 779 +static int netdev_boot_setup_add(char *name, struct ifmap *map)
 780 +{
 781 +       struct netdev_boot_setup *s;
 782 +       int i;
 783 +
 784 +       s = dev_boot_setup;
 785 +       for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 786 +               if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
 787 +                       memset(s[i].name, 0, sizeof(s[i].name));
 788 +                       strcpy(s[i].name, name);
 789 +                       memcpy(&s[i].map, map, sizeof(s[i].map));
 790 +                       break;
 791 +               }
 792 +       }
 793 +
 794 +       return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
 795 +}
 796 +
 797 +/**
 798 + *     netdev_boot_setup_check - check boot time settings
 799 + *     @dev: the netdevice
 800 + *
 801 + *     Check boot time settings for the device.
 802 + *     The found settings are set for the device to be used
 803 + *     later in the device probing.
 804 + *     Returns 0 if no settings found, 1 if they are.
 805 + */
 806 +int netdev_boot_setup_check(struct net_device *dev)
 807 +{
 808 +       struct netdev_boot_setup *s = dev_boot_setup;
 809 +       int i;
 810 +
 811 +       for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 812 +               if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
 813 +                   !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
 814 +                       dev->irq        = s[i].map.irq;
 815 +                       dev->base_addr  = s[i].map.base_addr;
 816 +                       dev->mem_start  = s[i].map.mem_start;
 817 +                       dev->mem_end    = s[i].map.mem_end;
 818 +                       return 1;
 819 +               }
 820 +       }
 821 +       return 0;
 822 +}
 823 +
 824 +
 825 +/**
 826 + *     netdev_boot_base        - get address from boot time settings
 827 + *     @prefix: prefix for network device
 828 + *     @unit: id for network device
 829 + *
 830 + *     Check boot time settings for the base address of device.
 831 + *     The found settings are set for the device to be used
 832 + *     later in the device probing.
 833 + *     Returns 0 if no settings found.
 834 + */
 835 +unsigned long netdev_boot_base(const char *prefix, int unit)
 836 +{
 837 +       const struct netdev_boot_setup *s = dev_boot_setup;
 838 +       char name[IFNAMSIZ];
 839 +       int i;
 840 +
 841 +       sprintf(name, "%s%d", prefix, unit);
 842 +
 843 +       /*
 844 +        * If device already registered then return base of 1
 845 +        * to indicate not to probe for this interface
 846 +        */
 847 +       if (__dev_get_by_name(name))
 848 +               return 1;
 849 +
 850 +       for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
 851 +               if (!strcmp(name, s[i].name))
 852 +                       return s[i].map.base_addr;
 853 +       return 0;
 854 +}
 855 +
 856 +/*
 857 + * Saves at boot time configured settings for any netdevice.
 858 + */
 859 +int __init netdev_boot_setup(char *str)
 860 +{
 861 +       int ints[5];
 862 +       struct ifmap map;
 863 +
 864 +       str = get_options(str, ARRAY_SIZE(ints), ints);
 865 +       if (!str || !*str)
 866 +               return 0;
 867 +
 868 +       /* Save settings */
 869 +       memset(&map, 0, sizeof(map));
 870 +       if (ints[0] > 0)
 871 +               map.irq = ints[1];
 872 +       if (ints[0] > 1)
 873 +               map.base_addr = ints[2];
 874 +       if (ints[0] > 2)
 875 +               map.mem_start = ints[3];
 876 +       if (ints[0] > 3)
 877 +               map.mem_end = ints[4];
 878 +
 879 +       /* Add new entry to the list */
 880 +       return netdev_boot_setup_add(str, &map);
 881 +}
 882 +
 883 +__setup("netdev=", netdev_boot_setup);
 884 +
 885 +/*******************************************************************************
 886 +
 887 +                           Device Interface Subroutines
 888 +
 889 +*******************************************************************************/
 890 +
 891 +/**
 892 + *     __dev_get_by_name       - find a device by its name
 893 + *     @name: name to find
 894 + *
 895 + *     Find an interface by name. Must be called under RTNL semaphore
 896 + *     or @dev_base_lock. If the name is found a pointer to the device
 897 + *     is returned. If the name is not found then %NULL is returned. The
 898 + *     reference counters are not incremented so the caller must be
 899 + *     careful with locks.
 900 + */
 901 +
 902 +struct net_device *__dev_get_by_name(const char *name)
 903 +{
 904 +       struct hlist_node *p;
 905 +
 906 +       hlist_for_each(p, dev_name_hash(name)) {
 907 +               struct net_device *dev
 908 +                       = hlist_entry(p, struct net_device, name_hlist);
 909 +               if (!strncmp(dev->name, name, IFNAMSIZ))
 910 +                       return dev;
 911 +       }
 912 +       return NULL;
 913 +}
 914 +
 915 +/**
 916 + *     dev_get_by_name         - find a device by its name
 917 + *     @name: name to find
 918 + *
 919 + *     Find an interface by name. This can be called from any
 920 + *     context and does its own locking. The returned handle has
 921 + *     the usage count incremented and the caller must use dev_put() to
 922 + *     release it when it is no longer needed. %NULL is returned if no
 923 + *     matching device is found.
 924 + */
 925 +
 926 +struct net_device *dev_get_by_name(const char *name)
 927 +{
 928 +       struct net_device *dev;
 929 +
 930 +       read_lock(&dev_base_lock);
 931 +       dev = __dev_get_by_name(name);
 932 +       if (dev)
 933 +               dev_hold(dev);
 934 +       read_unlock(&dev_base_lock);
 935 +       return dev;
 936 +}
 937 +
 938 +/**
 939 + *     __dev_get_by_index - find a device by its ifindex
 940 + *     @ifindex: index of device
 941 + *
 942 + *     Search for an interface by index. Returns %NULL if the device
 943 + *     is not found or a pointer to the device. The device has not
 944 + *     had its reference counter increased so the caller must be careful
 945 + *     about locking. The caller must hold either the RTNL semaphore
 946 + *     or @dev_base_lock.
 947 + */
 948 +
 949 +struct net_device *__dev_get_by_index(int ifindex)
 950 +{
 951 +       struct hlist_node *p;
 952 +
 953 +       hlist_for_each(p, dev_index_hash(ifindex)) {
 954 +               struct net_device *dev
 955 +                       = hlist_entry(p, struct net_device, index_hlist);
 956 +               if (dev->ifindex == ifindex)
 957 +                       return dev;
 958 +       }
 959 +       return NULL;
 960 +}
 961 +
 962 +
 963 +/**
 964 + *     dev_get_by_index - find a device by its ifindex
 965 + *     @ifindex: index of device
 966 + *
 967 + *     Search for an interface by index. Returns NULL if the device
 968 + *     is not found or a pointer to the device. The device returned has
 969 + *     had a reference added and the pointer is safe until the user calls
 970 + *     dev_put to indicate they have finished with it.
 971 + */
 972 +
 973 +struct net_device *dev_get_by_index(int ifindex)
 974 +{
 975 +       struct net_device *dev;
 976 +
 977 +       read_lock(&dev_base_lock);
 978 +       dev = __dev_get_by_index(ifindex);
 979 +       if (dev)
 980 +               dev_hold(dev);
 981 +       read_unlock(&dev_base_lock);
 982 +       return dev;
 983 +}
 984 +
 985 +/**
 986 + *     dev_getbyhwaddr - find a device by its hardware address
 987 + *     @type: media type of device
 988 + *     @ha: hardware address
 989 + *
 990 + *     Search for an interface by MAC address. Returns NULL if the device
 991 + *     is not found or a pointer to the device. The caller must hold the
 992 + *     rtnl semaphore. The returned device has not had its ref count increased
 993 + *     and the caller must therefore be careful about locking
 994 + *
 995 + *     BUGS:
 996 + *     If the API was consistent this would be __dev_get_by_hwaddr
 997 + */
 998 +
 999 +struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
1000 +{
1001 +       struct net_device *dev;
1002 +
1003 +       ASSERT_RTNL();
1004 +
1005 +       for (dev = dev_base; dev; dev = dev->next)
1006 +               if (dev->type == type &&
1007 +                   !memcmp(dev->dev_addr, ha, dev->addr_len))
1008 +                       break;
1009 +       return dev;
1010 +}
1011 +
1012 +EXPORT_SYMBOL(dev_getbyhwaddr);
1013 +
1014 +struct net_device *dev_getfirstbyhwtype(unsigned short type)
1015 +{
1016 +       struct net_device *dev;
1017 +
1018 +       rtnl_lock();
1019 +       for (dev = dev_base; dev; dev = dev->next) {
1020 +               if (dev->type == type) {
1021 +                       dev_hold(dev);
1022 +                       break;
1023 +               }
1024 +       }
1025 +       rtnl_unlock();
1026 +       return dev;
1027 +}
1028 +
1029 +EXPORT_SYMBOL(dev_getfirstbyhwtype);
1030 +
1031 +/**
1032 + *     dev_get_by_flags - find any device with given flags
1033 + *     @if_flags: IFF_* values
1034 + *     @mask: bitmask of bits in if_flags to check
1035 + *
1036 + *     Search for any interface with the given flags. Returns NULL if a device
1037 + *     is not found or a pointer to the device. The device returned has
1038 + *     had a reference added and the pointer is safe until the user calls
1039 + *     dev_put to indicate they have finished with it.
1040 + */
1041 +
1042 +struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
1043 +{
1044 +       struct net_device *dev;
1045 +
1046 +       read_lock(&dev_base_lock);
1047 +       for (dev = dev_base; dev != NULL; dev = dev->next) {
1048 +               if (((dev->flags ^ if_flags) & mask) == 0) {
1049 +                       dev_hold(dev);
1050 +                       break;
1051 +               }
1052 +       }
1053 +       read_unlock(&dev_base_lock);
1054 +       return dev;
1055 +}
1056 +
1057 +/**
1058 + *     dev_valid_name - check if name is okay for network device
1059 + *     @name: name string
1060 + *
1061 + *     Network device names need to be valid file names to
1062 + *     to allow sysfs to work.  We also disallow any kind of
1063 + *     whitespace.
1064 + */
1065 +int dev_valid_name(const char *name)
1066 +{
1067 +       if (*name == '\0')
1068 +               return 0;
1069 +       if (strlen(name) >= IFNAMSIZ)
1070 +               return 0;
1071 +       if (!strcmp(name, ".") || !strcmp(name, ".."))
1072 +               return 0;
1073 +
1074 +       while (*name) {
1075 +               if (*name == '/' || isspace(*name))
1076 +                       return 0;
1077 +               name++;
1078 +       }
1079 +       return 1;
1080 +}
1081 +
1082 +/**
1083 + *     dev_alloc_name - allocate a name for a device
1084 + *     @dev: device
1085 + *     @name: name format string
1086 + *
1087 + *     Passed a format string - eg "lt%d" it will try and find a suitable
1088 + *     id. It scans list of devices to build up a free map, then chooses
1089 + *     the first empty slot. The caller must hold the dev_base or rtnl lock
1090 + *     while allocating the name and adding the device in order to avoid
1091 + *     duplicates.
1092 + *     Limited to bits_per_byte * page size devices (ie 32K on most platforms).
1093 + *     Returns the number of the unit assigned or a negative errno code.
1094 + */
1095 +
1096 +int dev_alloc_name(struct net_device *dev, const char *name)
1097 +{
1098 +       int i = 0;
1099 +       char buf[IFNAMSIZ];
1100 +       const char *p;
1101 +       const int max_netdevices = 8*PAGE_SIZE;
1102 +       long *inuse;
1103 +       struct net_device *d;
1104 +
1105 +       p = strnchr(name, IFNAMSIZ-1, '%');
1106 +       if (p) {
1107 +               /*
1108 +                * Verify the string as this thing may have come from
1109 +                * the user.  There must be either one "%d" and no other "%"
1110 +                * characters.
1111 +                */
1112 +               if (p[1] != 'd' || strchr(p + 2, '%'))
1113 +                       return -EINVAL;
1114 +
1115 +               /* Use one page as a bit array of possible slots */
1116 +               inuse = (long *) get_zeroed_page(GFP_ATOMIC);
1117 +               if (!inuse)
1118 +                       return -ENOMEM;
1119 +
1120 +               for (d = dev_base; d; d = d->next) {
1121 +                       if (!sscanf(d->name, name, &i))
1122 +                               continue;
1123 +                       if (i < 0 || i >= max_netdevices)
1124 +                               continue;
1125 +
1126 +                       /*  avoid cases where sscanf is not exact inverse of printf */
1127 +                       snprintf(buf, sizeof(buf), name, i);
1128 +                       if (!strncmp(buf, d->name, IFNAMSIZ))
1129 +                               set_bit(i, inuse);
1130 +               }
1131 +
1132 +               i = find_first_zero_bit(inuse, max_netdevices);
1133 +               free_page((unsigned long) inuse);
1134 +       }
1135 +
1136 +       snprintf(buf, sizeof(buf), name, i);
1137 +       if (!__dev_get_by_name(buf)) {
1138 +               strlcpy(dev->name, buf, IFNAMSIZ);
1139 +               return i;
1140 +       }
1141 +
1142 +       /* It is possible to run out of possible slots
1143 +        * when the name is long and there isn't enough space left
1144 +        * for the digits, or if all bits are used.
1145 +        */
1146 +       return -ENFILE;
1147 +}
1148 +
1149 +
1150 +/**
1151 + *     dev_change_name - change name of a device
1152 + *     @dev: device
1153 + *     @newname: name (or format string) must be at least IFNAMSIZ
1154 + *
1155 + *     Change name of a device, can pass format strings "eth%d".
1156 + *     for wildcarding.
1157 + */
1158 +int dev_change_name(struct net_device *dev, char *newname)
1159 +{
1160 +       int err = 0;
1161 +
1162 +       ASSERT_RTNL();
1163 +
1164 +       if (dev->flags & IFF_UP)
1165 +               return -EBUSY;
1166 +
1167 +       if (!dev_valid_name(newname))
1168 +               return -EINVAL;
1169 +
1170 +       if (strchr(newname, '%')) {
1171 +               err = dev_alloc_name(dev, newname);
1172 +               if (err < 0)
1173 +                       return err;
1174 +               strcpy(newname, dev->name);
1175 +       }
1176 +       else if (__dev_get_by_name(newname))
1177 +               return -EEXIST;
1178 +       else
1179 +               strlcpy(dev->name, newname, IFNAMSIZ);
1180 +
1181 +       device_rename(&dev->dev, dev->name);
1182 +       hlist_del(&dev->name_hlist);
1183 +       hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
1184 +       raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
1185 +
1186 +       return err;
1187 +}
1188 +
1189 +/**
1190 + *     netdev_features_change - device changes features
1191 + *     @dev: device to cause notification
1192 + *
1193 + *     Called to indicate a device has changed features.
1194 + */
1195 +void netdev_features_change(struct net_device *dev)
1196 +{
1197 +       raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
1198 +}
1199 +EXPORT_SYMBOL(netdev_features_change);
1200 +
1201 +/**
1202 + *     netdev_state_change - device changes state
1203 + *     @dev: device to cause notification
1204 + *
1205 + *     Called to indicate a device has changed state. This function calls
1206 + *     the notifier chains for netdev_chain and sends a NEWLINK message
1207 + *     to the routing socket.
1208 + */
1209 +void netdev_state_change(struct net_device *dev)
1210 +{
1211 +       if (dev->flags & IFF_UP) {
1212 +               raw_notifier_call_chain(&netdev_chain,
1213 +                               NETDEV_CHANGE, dev);
1214 +               rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1215 +       }
1216 +}
1217 +
1218 +/**
1219 + *     dev_load        - load a network module
1220 + *     @name: name of interface
1221 + *
1222 + *     If a network interface is not present and the process has suitable
1223 + *     privileges this function loads the module. If module loading is not
1224 + *     available in this kernel then it becomes a nop.
1225 + */
1226 +
1227 +void dev_load(const char *name)
1228 +{
1229 +       struct net_device *dev;
1230 +
1231 +       read_lock(&dev_base_lock);
1232 +       dev = __dev_get_by_name(name);
1233 +       read_unlock(&dev_base_lock);
1234 +
1235 +       if (!dev && capable(CAP_SYS_MODULE))
1236 +               request_module("%s", name);
1237 +}
1238 +
1239 +static int default_rebuild_header(struct sk_buff *skb)
1240 +{
1241 +       printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
1242 +              skb->dev ? skb->dev->name : "NULL!!!");
1243 +       kfree_skb(skb);
1244 +       return 1;
1245 +}
1246 +
1247 +
1248 +/**
1249 + *     dev_open        - prepare an interface for use.
1250 + *     @dev:   device to open
1251 + *
1252 + *     Takes a device from down to up state. The device's private open
1253 + *     function is invoked and then the multicast lists are loaded. Finally
1254 + *     the device is moved into the up state and a %NETDEV_UP message is
1255 + *     sent to the netdev notifier chain.
1256 + *
1257 + *     Calling this function on an active interface is a nop. On a failure
1258 + *     a negative errno code is returned.
1259 + */
1260 +int dev_open(struct net_device *dev)
1261 +{
1262 +       int ret = 0;
1263 +
1264 +       /*
1265 +        *      Is it already up?
1266 +        */
1267 +
1268 +       if (dev->flags & IFF_UP)
1269 +               return 0;
1270 +
1271 +       /*
1272 +        *      Is it even present?
1273 +        */
1274 +       if (!netif_device_present(dev))
1275 +               return -ENODEV;
1276 +
1277 +       /*
1278 +        *      Call device private open method
1279 +        */
1280 +       set_bit(__LINK_STATE_START, &dev->state);
1281 +       if (dev->open) {
1282 +               ret = dev->open(dev);
1283 +               if (ret)
1284 +                       clear_bit(__LINK_STATE_START, &dev->state);
1285 +       }
1286 +
1287 +       /*
1288 +        *      If it went open OK then:
1289 +        */
1290 +
1291 +       if (!ret) {
1292 +               /*
1293 +                *      Set the flags.
1294 +                */
1295 +               dev->flags |= IFF_UP;
1296 +
1297 +               /*
1298 +                *      Initialize multicasting status
1299 +                */
1300 +               dev_mc_upload(dev);
1301 +
1302 +               /*
1303 +                *      Wakeup transmit queue engine
1304 +                */
1305 +               dev_activate(dev);
1306 +
1307 +               /*
1308 +                *      ... and announce new interface.
1309 +                */
1310 +               raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
1311 +       }
1312 +       return ret;
1313 +}
1314 +
1315 +/**
1316 + *     dev_close - shutdown an interface.
1317 + *     @dev: device to shutdown
1318 + *
1319 + *     This function moves an active device into down state. A
1320 + *     %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1321 + *     is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1322 + *     chain.
1323 + */
1324 +int dev_close(struct net_device *dev)
1325 +{
1326 +       if (!(dev->flags & IFF_UP))
1327 +               return 0;
1328 +
1329 +       /*
1330 +        *      Tell people we are going down, so that they can
1331 +        *      prepare to death, when device is still operating.
1332 +        */
1333 +       raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
1334 +
1335 +       dev_deactivate(dev);
1336 +
1337 +       clear_bit(__LINK_STATE_START, &dev->state);
1338 +
1339 +       /* Synchronize to scheduled poll. We cannot touch poll list,
1340 +        * it can be even on different cpu. So just clear netif_running(),
1341 +        * and wait when poll really will happen. Actually, the best place
1342 +        * for this is inside dev->stop() after device stopped its irq
1343 +        * engine, but this requires more changes in devices. */
1344 +
1345 +       smp_mb__after_clear_bit(); /* Commit netif_running(). */
1346 +       while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
1347 +               /* No hurry. */
1348 +               msleep(1);
1349 +       }
1350 +
1351 +       /*
1352 +        *      Call the device specific close. This cannot fail.
1353 +        *      Only if device is UP
1354 +        *
1355 +        *      We allow it to be called even after a DETACH hot-plug
1356 +        *      event.
1357 +        */
1358 +       if (dev->stop)
1359 +               dev->stop(dev);
1360 +
1361 +       /*
1362 +        *      Device is now down.
1363 +        */
1364 +
1365 +       dev->flags &= ~IFF_UP;
1366 +
1367 +       /*
1368 +        * Tell people we are down
1369 +        */
1370 +       raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
1371 +
1372 +       return 0;
1373 +}
1374 +
1375 +
1376 +/*
1377 + *     Device change register/unregister. These are not inline or static
1378 + *     as we export them to the world.
1379 + */
1380 +
1381 +/**
1382 + *     register_netdevice_notifier - register a network notifier block
1383 + *     @nb: notifier
1384 + *
1385 + *     Register a notifier to be called when network device events occur.
1386 + *     The notifier passed is linked into the kernel structures and must
1387 + *     not be reused until it has been unregistered. A negative errno code
1388 + *     is returned on a failure.
1389 + *
1390 + *     When registered all registration and up events are replayed
1391 + *     to the new notifier to allow device to have a race free
1392 + *     view of the network device list.
1393 + */
1394 +
1395 +int register_netdevice_notifier(struct notifier_block *nb)
1396 +{
1397 +       struct net_device *dev;
1398 +       int err;
1399 +
1400 +       rtnl_lock();
1401 +       err = raw_notifier_chain_register(&netdev_chain, nb);
1402 +       if (!err) {
1403 +               for (dev = dev_base; dev; dev = dev->next) {
1404 +                       nb->notifier_call(nb, NETDEV_REGISTER, dev);
1405 +
1406 +                       if (dev->flags & IFF_UP)
1407 +                               nb->notifier_call(nb, NETDEV_UP, dev);
1408 +               }
1409 +       }
1410 +       rtnl_unlock();
1411 +       return err;
1412 +}
1413 +
1414 +/**
1415 + *     unregister_netdevice_notifier - unregister a network notifier block
1416 + *     @nb: notifier
1417 + *
1418 + *     Unregister a notifier previously registered by
1419 + *     register_netdevice_notifier(). The notifier is unlinked into the
1420 + *     kernel structures and may then be reused. A negative errno code
1421 + *     is returned on a failure.
1422 + */
1423 +
1424 +int unregister_netdevice_notifier(struct notifier_block *nb)
1425 +{
1426 +       int err;
1427 +
1428 +       rtnl_lock();
1429 +       err = raw_notifier_chain_unregister(&netdev_chain, nb);
1430 +       rtnl_unlock();
1431 +       return err;
1432 +}
1433 +
1434 +/**
1435 + *     call_netdevice_notifiers - call all network notifier blocks
1436 + *      @val: value passed unmodified to notifier function
1437 + *      @v:   pointer passed unmodified to notifier function
1438 + *
1439 + *     Call all network notifier blocks.  Parameters and return value
1440 + *     are as for raw_notifier_call_chain().
1441 + */
1442 +
1443 +int call_netdevice_notifiers(unsigned long val, void *v)
1444 +{
1445 +       return raw_notifier_call_chain(&netdev_chain, val, v);
1446 +}
1447 +
1448 +/* When > 0 there are consumers of rx skb time stamps */
1449 +static atomic_t netstamp_needed = ATOMIC_INIT(0);
1450 +
1451 +void net_enable_timestamp(void)
1452 +{
1453 +       atomic_inc(&netstamp_needed);
1454 +}
1455 +
1456 +void net_disable_timestamp(void)
1457 +{
1458 +       atomic_dec(&netstamp_needed);
1459 +}
1460 +
1461 +void __net_timestamp(struct sk_buff *skb)
1462 +{
1463 +       struct timeval tv;
1464 +
1465 +       do_gettimeofday(&tv);
1466 +       skb_set_timestamp(skb, &tv);
1467 +}
1468 +EXPORT_SYMBOL(__net_timestamp);
1469 +
1470 +static inline void net_timestamp(struct sk_buff *skb)
1471 +{
1472 +       if (atomic_read(&netstamp_needed))
1473 +               __net_timestamp(skb);
1474 +       else {
1475 +               skb->tstamp.off_sec = 0;
1476 +               skb->tstamp.off_usec = 0;
1477 +       }
1478 +}
1479 +
1480 +/*
1481 + *     Support routine. Sends outgoing frames to any network
1482 + *     taps currently in use.
1483 + */
1484 +
1485 +static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1486 +{
1487 +       struct packet_type *ptype;
1488 +
1489 +       net_timestamp(skb);
1490 +
1491 +       rcu_read_lock();
1492 +       list_for_each_entry_rcu(ptype, &ptype_all, list) {
1493 +               /* Never send packets back to the socket
1494 +                * they originated from - MvS (miquels@drinkel.ow.org)
1495 +                */
1496 +               if ((ptype->dev == dev || !ptype->dev) &&
1497 +                   (ptype->af_packet_priv == NULL ||
1498 +                    (struct sock *)ptype->af_packet_priv != skb->sk)) {
1499 +                       struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1500 +                       if (!skb2)
1501 +                               break;
1502 +
1503 +                       /* skb->nh should be correctly
1504 +                          set by sender, so that the second statement is
1505 +                          just protection against buggy protocols.
1506 +                        */
1507 +                       skb2->mac.raw = skb2->data;
1508 +
1509 +                       if (skb2->nh.raw < skb2->data ||
1510 +                           skb2->nh.raw > skb2->tail) {
1511 +                               if (net_ratelimit())
1512 +                                       printk(KERN_CRIT "protocol %04x is "
1513 +                                              "buggy, dev %s\n",
1514 +                                              skb2->protocol, dev->name);
1515 +                               skb2->nh.raw = skb2->data;
1516 +                       }
1517 +
1518 +                       skb2->h.raw = skb2->nh.raw;
1519 +                       skb2->pkt_type = PACKET_OUTGOING;
1520 +                       ptype->func(skb2, skb->dev, ptype, skb->dev);
1521 +               }
1522 +       }
1523 +       rcu_read_unlock();
1524 +}
1525 +
1526 +
1527 +void __netif_schedule(struct net_device *dev)
1528 +{
1529 +       if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
1530 +               unsigned long flags;
1531 +               struct softnet_data *sd;
1532 +
1533 +               local_irq_save(flags);
1534 +               sd = &__get_cpu_var(softnet_data);
1535 +               dev->next_sched = sd->output_queue;
1536 +               sd->output_queue = dev;
1537 +               raise_softirq_irqoff(NET_TX_SOFTIRQ);
1538 +               local_irq_restore(flags);
1539 +       }
1540 +}
1541 +EXPORT_SYMBOL(__netif_schedule);
1542 +
1543 +void __netif_rx_schedule(struct net_device *dev)
1544 +{
1545 +       unsigned long flags;
1546 +
1547 +       local_irq_save(flags);
1548 +       dev_hold(dev);
1549 +       list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
1550 +       if (dev->quota < 0)
1551 +               dev->quota += dev->weight;
1552 +       else
1553 +               dev->quota = dev->weight;
1554 +       __raise_softirq_irqoff(NET_RX_SOFTIRQ);
1555 +       local_irq_restore(flags);
1556 +}
1557 +EXPORT_SYMBOL(__netif_rx_schedule);
1558 +
1559 +void dev_kfree_skb_any(struct sk_buff *skb)
1560 +{
1561 +       if (in_irq() || irqs_disabled())
1562 +               dev_kfree_skb_irq(skb);
1563 +       else
1564 +               dev_kfree_skb(skb);
1565 +}
1566 +EXPORT_SYMBOL(dev_kfree_skb_any);
1567 +
1568 +
1569 +/* Hot-plugging. */
1570 +void netif_device_detach(struct net_device *dev)
1571 +{
1572 +       if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1573 +           netif_running(dev)) {
1574 +               netif_stop_queue(dev);
1575 +       }
1576 +}
1577 +EXPORT_SYMBOL(netif_device_detach);
1578 +
1579 +void netif_device_attach(struct net_device *dev)
1580 +{
1581 +       if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1582 +           netif_running(dev)) {
1583 +               netif_wake_queue(dev);
1584 +               __netdev_watchdog_up(dev);
1585 +       }
1586 +}
1587 +EXPORT_SYMBOL(netif_device_attach);
1588 +
1589 +
1590 +/*
1591 + * Invalidate hardware checksum when packet is to be mangled, and
1592 + * complete checksum manually on outgoing path.
1593 + */
1594 +int skb_checksum_help(struct sk_buff *skb)
1595 +{
1596 +       __wsum csum;
1597 +       int ret = 0, offset = skb->h.raw - skb->data;
1598 +
1599 +       if (skb->ip_summed == CHECKSUM_COMPLETE)
1600 +               goto out_set_summed;
1601 +
1602 +       if (unlikely(skb_shinfo(skb)->gso_size)) {
1603 +               /* Let GSO fix up the checksum. */
1604 +               goto out_set_summed;
1605 +       }
1606 +
1607 +       if (skb_cloned(skb)) {
1608 +               ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1609 +               if (ret)
1610 +                       goto out;
1611 +       }
1612 +
1613 +       BUG_ON(offset > (int)skb->len);
1614 +       csum = skb_checksum(skb, offset, skb->len-offset, 0);
1615 +
1616 +       offset = skb->tail - skb->h.raw;
1617 +       BUG_ON(offset <= 0);
1618 +       BUG_ON(skb->csum_offset + 2 > offset);
1619 +
1620 +       *(__sum16*)(skb->h.raw + skb->csum_offset) = csum_fold(csum);
1621 +
1622 +out_set_summed:
1623 +       skb->ip_summed = CHECKSUM_NONE;
1624 +out:
1625 +       return ret;
1626 +}
1627 +
1628 +/**
1629 + *     skb_gso_segment - Perform segmentation on skb.
1630 + *     @skb: buffer to segment
1631 + *     @features: features for the output path (see dev->features)
1632 + *
1633 + *     This function segments the given skb and returns a list of segments.
1634 + *
1635 + *     It may return NULL if the skb requires no segmentation.  This is
1636 + *     only possible when GSO is used for verifying header integrity.
1637 + */
1638 +struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1639 +{
1640 +       struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1641 +       struct packet_type *ptype;
1642 +       __be16 type = skb->protocol;
1643 +       int err;
1644 +
1645 +       BUG_ON(skb_shinfo(skb)->frag_list);
1646 +
1647 +       skb->mac.raw = skb->data;
1648 +       skb->mac_len = skb->nh.raw - skb->data;
1649 +       __skb_pull(skb, skb->mac_len);
1650 +
1651 +       if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1652 +               if (skb_header_cloned(skb) &&
1653 +                   (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1654 +                       return ERR_PTR(err);
1655 +       }
1656 +
1657 +       rcu_read_lock();
1658 +       list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
1659 +               if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1660 +                       if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1661 +                               err = ptype->gso_send_check(skb);
1662 +                               segs = ERR_PTR(err);
1663 +                               if (err || skb_gso_ok(skb, features))
1664 +                                       break;
1665 +                               __skb_push(skb, skb->data - skb->nh.raw);
1666 +                       }
1667 +                       segs = ptype->gso_segment(skb, features);
1668 +                       break;
1669 +               }
1670 +       }
1671 +       rcu_read_unlock();
1672 +
1673 +       __skb_push(skb, skb->data - skb->mac.raw);
1674 +
1675 +       return segs;
1676 +}
1677 +
1678 +EXPORT_SYMBOL(skb_gso_segment);
1679 +
1680 +/* Take action when hardware reception checksum errors are detected. */
1681 +#ifdef CONFIG_BUG
1682 +void netdev_rx_csum_fault(struct net_device *dev)
1683 +{
1684 +       if (net_ratelimit()) {
1685 +               printk(KERN_ERR "%s: hw csum failure.\n",
1686 +                       dev ? dev->name : "<unknown>");
1687 +               dump_stack();
1688 +       }
1689 +}
1690 +EXPORT_SYMBOL(netdev_rx_csum_fault);
1691 +#endif
1692 +
1693 +/* Actually, we should eliminate this check as soon as we know, that:
1694 + * 1. IOMMU is present and allows to map all the memory.
1695 + * 2. No high memory really exists on this machine.
1696 + */
1697 +
1698 +static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1699 +{
1700 +#ifdef CONFIG_HIGHMEM
1701 +       int i;
1702 +
1703 +       if (dev->features & NETIF_F_HIGHDMA)
1704 +               return 0;
1705 +
1706 +       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1707 +               if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1708 +                       return 1;
1709 +
1710 +#endif
1711 +       return 0;
1712 +}
1713 +
1714 +struct dev_gso_cb {
1715 +       void (*destructor)(struct sk_buff *skb);
1716 +};
1717 +
1718 +#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1719 +
1720 +static void dev_gso_skb_destructor(struct sk_buff *skb)
1721 +{
1722 +       struct dev_gso_cb *cb;
1723 +
1724 +       do {
1725 +               struct sk_buff *nskb = skb->next;
1726 +
1727 +               skb->next = nskb->next;
1728 +               nskb->next = NULL;
1729 +               kfree_skb(nskb);
1730 +       } while (skb->next);
1731 +
1732 +       cb = DEV_GSO_CB(skb);
1733 +       if (cb->destructor)
1734 +               cb->destructor(skb);
1735 +}
1736 +
1737 +/**
1738 + *     dev_gso_segment - Perform emulated hardware segmentation on skb.
1739 + *     @skb: buffer to segment
1740 + *
1741 + *     This function segments the given skb and stores the list of segments
1742 + *     in skb->next.
1743 + */
1744 +static int dev_gso_segment(struct sk_buff *skb)
1745 +{
1746 +       struct net_device *dev = skb->dev;
1747 +       struct sk_buff *segs;
1748 +       int features = dev->features & ~(illegal_highdma(dev, skb) ?
1749 +                                        NETIF_F_SG : 0);
1750 +
1751 +       segs = skb_gso_segment(skb, features);
1752 +
1753 +       /* Verifying header integrity only. */
1754 +       if (!segs)
1755 +               return 0;
1756 +
1757 +       if (unlikely(IS_ERR(segs)))
1758 +               return PTR_ERR(segs);
1759 +
1760 +       skb->next = segs;
1761 +       DEV_GSO_CB(skb)->destructor = skb->destructor;
1762 +       skb->destructor = dev_gso_skb_destructor;
1763 +
1764 +       return 0;
1765 +}
1766 +
1767 +int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1768 +{
1769 +       if (likely(!skb->next)) {
1770 +               if (netdev_nit)
1771 +                       dev_queue_xmit_nit(skb, dev);
1772 +
1773 +               if (netif_needs_gso(dev, skb)) {
1774 +                       if (unlikely(dev_gso_segment(skb)))
1775 +                               goto out_kfree_skb;
1776 +                       if (skb->next)
1777 +                               goto gso;
1778 +               }
1779 +
1780 +               return dev->hard_start_xmit(skb, dev);
1781 +       }
1782 +
1783 +gso:
1784 +       do {
1785 +               struct sk_buff *nskb = skb->next;
1786 +               int rc;
1787 +
1788 +               skb->next = nskb->next;
1789 +               nskb->next = NULL;
1790 +               rc = dev->hard_start_xmit(nskb, dev);
1791 +               if (unlikely(rc)) {
1792 +                       nskb->next = skb->next;
1793 +                       skb->next = nskb;
1794 +                       return rc;
1795 +               }
1796 +               if (unlikely(netif_queue_stopped(dev) && skb->next))
1797 +                       return NETDEV_TX_BUSY;
1798 +       } while (skb->next);
1799 +
1800 +       skb->destructor = DEV_GSO_CB(skb)->destructor;
1801 +
1802 +out_kfree_skb:
1803 +       kfree_skb(skb);
1804 +       return 0;
1805 +}
1806 +
1807 +#define HARD_TX_LOCK(dev, cpu) {                       \
1808 +       if ((dev->features & NETIF_F_LLTX) == 0) {      \
1809 +               netif_tx_lock(dev);                     \
1810 +       }                                               \
1811 +}
1812 +
1813 +#define HARD_TX_UNLOCK(dev) {                          \
1814 +       if ((dev->features & NETIF_F_LLTX) == 0) {      \
1815 +               netif_tx_unlock(dev);                   \
1816 +       }                                               \
1817 +}
1818 +
1819 +/**
1820 + *     dev_queue_xmit - transmit a buffer
1821 + *     @skb: buffer to transmit
1822 + *
1823 + *     Queue a buffer for transmission to a network device. The caller must
1824 + *     have set the device and priority and built the buffer before calling
1825 + *     this function. The function can be called from an interrupt.
1826 + *
1827 + *     A negative errno code is returned on a failure. A success does not
1828 + *     guarantee the frame will be transmitted as it may be dropped due
1829 + *     to congestion or traffic shaping.
1830 + *
1831 + * -----------------------------------------------------------------------------------
1832 + *      I notice this method can also return errors from the queue disciplines,
1833 + *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1834 + *      be positive.
1835 + *
1836 + *      Regardless of the return value, the skb is consumed, so it is currently
1837 + *      difficult to retry a send to this method.  (You can bump the ref count
1838 + *      before sending to hold a reference for retry if you are careful.)
1839 + *
1840 + *      When calling this method, interrupts MUST be enabled.  This is because
1841 + *      the BH enable code must have IRQs enabled so that it will not deadlock.
1842 + *          --BLG
1843 + */
1844 +
1845 +int dev_queue_xmit(struct sk_buff *skb)
1846 +{
1847 +       struct net_device *dev = skb->dev;
1848 +       struct Qdisc *q;
1849 +       int rc = -ENOMEM;
1850 +
1851 +       /* GSO will handle the following emulations directly. */
1852 +       if (netif_needs_gso(dev, skb))
1853 +               goto gso;
1854 +
1855 +       if (skb_shinfo(skb)->frag_list &&
1856 +           !(dev->features & NETIF_F_FRAGLIST) &&
1857 +           __skb_linearize(skb))
1858 +               goto out_kfree_skb;
1859 +
1860 +       /* Fragmented skb is linearized if device does not support SG,
1861 +        * or if at least one of fragments is in highmem and device
1862 +        * does not support DMA from it.
1863 +        */
1864 +       if (skb_shinfo(skb)->nr_frags &&
1865 +           (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1866 +           __skb_linearize(skb))
1867 +               goto out_kfree_skb;
1868 +
1869 +       /* If packet is not checksummed and device does not support
1870 +        * checksumming for this protocol, complete checksumming here.
1871 +        */
1872 +       if (skb->ip_summed == CHECKSUM_PARTIAL &&
1873 +           (!(dev->features & NETIF_F_GEN_CSUM) &&
1874 +            (!(dev->features & NETIF_F_IP_CSUM) ||
1875 +             skb->protocol != htons(ETH_P_IP))))
1876 +               if (skb_checksum_help(skb))
1877 +                       goto out_kfree_skb;
1878 +
1879 +gso:
1880 +       spin_lock_prefetch(&dev->queue_lock);
1881 +
1882 +       /* Disable soft irqs for various locks below. Also
1883 +        * stops preemption for RCU.
1884 +        */
1885 +       rcu_read_lock_bh();
1886 +
1887 +       /* Updates of qdisc are serialized by queue_lock.
1888 +        * The struct Qdisc which is pointed to by qdisc is now a
1889 +        * rcu structure - it may be accessed without acquiring
1890 +        * a lock (but the structure may be stale.) The freeing of the
1891 +        * qdisc will be deferred until it's known that there are no
1892 +        * more references to it.
1893 +        *
1894 +        * If the qdisc has an enqueue function, we still need to
1895 +        * hold the queue_lock before calling it, since queue_lock
1896 +        * also serializes access to the device queue.
1897 +        */
1898 +
1899 +       q = rcu_dereference(dev->qdisc);
1900 +#ifdef CONFIG_NET_CLS_ACT
1901 +       skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1902 +#endif
1903 +       if (q->enqueue) {
1904 +               /* Grab device queue */
1905 +               spin_lock(&dev->queue_lock);
1906 +               q = dev->qdisc;
1907 +               if (q->enqueue) {
1908 +                       rc = q->enqueue(skb, q);
1909 +                       qdisc_run(dev);
1910 +                       spin_unlock(&dev->queue_lock);
1911 +
1912 +                       rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1913 +                       goto out;
1914 +               }
1915 +               spin_unlock(&dev->queue_lock);
1916 +       }
1917 +
1918 +       /* The device has no queue. Common case for software devices:
1919 +          loopback, all the sorts of tunnels...
1920 +
1921 +          Really, it is unlikely that netif_tx_lock protection is necessary
1922 +          here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1923 +          counters.)
1924 +          However, it is possible, that they rely on protection
1925 +          made by us here.
1926 +
1927 +          Check this and shot the lock. It is not prone from deadlocks.
1928 +          Either shot noqueue qdisc, it is even simpler 8)
1929 +        */
1930 +       if (dev->flags & IFF_UP) {
1931 +               int cpu = smp_processor_id(); /* ok because BHs are off */
1932 +
1933 +               if (dev->xmit_lock_owner != cpu) {
1934 +
1935 +                       HARD_TX_LOCK(dev, cpu);
1936 +
1937 +                       if (!netif_queue_stopped(dev)) {
1938 +                               rc = 0;
1939 +                               if (!dev_hard_start_xmit(skb, dev)) {
1940 +                                       HARD_TX_UNLOCK(dev);
1941 +                                       goto out;
1942 +                               }
1943 +                       }
1944 +                       HARD_TX_UNLOCK(dev);
1945 +                       if (net_ratelimit())
1946 +                               printk(KERN_CRIT "Virtual device %s asks to "
1947 +                                      "queue packet!\n", dev->name);
1948 +               } else {
1949 +                       /* Recursion is detected! It is possible,
1950 +                        * unfortunately */
1951 +                       if (net_ratelimit())
1952 +                               printk(KERN_CRIT "Dead loop on virtual device "
1953 +                                      "%s, fix it urgently!\n", dev->name);
1954 +               }
1955 +       }
1956 +
1957 +       rc = -ENETDOWN;
1958 +       rcu_read_unlock_bh();
1959 +
1960 +out_kfree_skb:
1961 +       kfree_skb(skb);
1962 +       return rc;
1963 +out:
1964 +       rcu_read_unlock_bh();
1965 +       return rc;
1966 +}
1967 +
1968 +
1969 +/*=======================================================================
1970 +                       Receiver routines
1971 +  =======================================================================*/
1972 +
1973 +int netdev_max_backlog = 1000;
1974 +int netdev_budget = 300;
1975 +int weight_p = 64;            /* old backlog weight */
1976 +
1977 +DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1978 +
1979 +
1980 +/**
1981 + *     netif_rx        -       post buffer to the network code
1982 + *     @skb: buffer to post
1983 + *
1984 + *     This function receives a packet from a device driver and queues it for
1985 + *     the upper (protocol) levels to process.  It always succeeds. The buffer
1986 + *     may be dropped during processing for congestion control or by the
1987 + *     protocol layers.
1988 + *
1989 + *     return values:
1990 + *     NET_RX_SUCCESS  (no congestion)
1991 + *     NET_RX_CN_LOW   (low congestion)
1992 + *     NET_RX_CN_MOD   (moderate congestion)
1993 + *     NET_RX_CN_HIGH  (high congestion)
1994 + *     NET_RX_DROP     (packet was dropped)
1995 + *
1996 + */
1997 +
1998 +int netif_rx(struct sk_buff *skb)
1999 +{
2000 +       struct softnet_data *queue;
2001 +       unsigned long flags;
2002 +
2003 +       /* if netpoll wants it, pretend we never saw it */
2004 +       if (netpoll_rx(skb))
2005 +               return NET_RX_DROP;
2006 +
2007 +       if (!skb->tstamp.off_sec)
2008 +               net_timestamp(skb);
2009 +
2010 +       /*
2011 +        * The code is rearranged so that the path is the most
2012 +        * short when CPU is congested, but is still operating.
2013 +        */
2014 +       local_irq_save(flags);
2015 +       queue = &__get_cpu_var(softnet_data);
2016 +
2017 +       __get_cpu_var(netdev_rx_stat).total++;
2018 +       if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
2019 +               if (queue->input_pkt_queue.qlen) {
2020 +enqueue:
2021 +                       dev_hold(skb->dev);
2022 +                       __skb_queue_tail(&queue->input_pkt_queue, skb);
2023 +                       local_irq_restore(flags);
2024 +                       return NET_RX_SUCCESS;
2025 +               }
2026 +
2027 +               netif_rx_schedule(&queue->backlog_dev);
2028 +               goto enqueue;
2029 +       }
2030 +
2031 +       __get_cpu_var(netdev_rx_stat).dropped++;
2032 +       local_irq_restore(flags);
2033 +
2034 +       kfree_skb(skb);
2035 +       return NET_RX_DROP;
2036 +}
2037 +
2038 +int netif_rx_ni(struct sk_buff *skb)
2039 +{
2040 +       int err;
2041 +
2042 +       preempt_disable();
2043 +       err = netif_rx(skb);
2044 +       if (local_softirq_pending())
2045 +               do_softirq();
2046 +       preempt_enable();
2047 +
2048 +       return err;
2049 +}
2050 +
2051 +EXPORT_SYMBOL(netif_rx_ni);
2052 +
2053 +static inline struct net_device *skb_bond(struct sk_buff *skb)
2054 +{
2055 +       struct net_device *dev = skb->dev;
2056 +
2057 +       if (dev->master) {
2058 +               if (skb_bond_should_drop(skb)) {
2059 +                       kfree_skb(skb);
2060 +                       return NULL;
2061 +               }
2062 +               skb->dev = dev->master;
2063 +       }
2064 +
2065 +       return dev;
2066 +}
2067 +
2068 +static void net_tx_action(struct softirq_action *h)
2069 +{
2070 +       struct softnet_data *sd = &__get_cpu_var(softnet_data);
2071 +
2072 +       if (sd->completion_queue) {
2073 +               struct sk_buff *clist;
2074 +
2075 +               local_irq_disable();
2076 +               clist = sd->completion_queue;
2077 +               sd->completion_queue = NULL;
2078 +               local_irq_enable();
2079 +
2080 +               while (clist) {
2081 +                       struct sk_buff *skb = clist;
2082 +                       clist = clist->next;
2083 +
2084 +                       BUG_TRAP(!atomic_read(&skb->users));
2085 +                       __kfree_skb(skb);
2086 +               }
2087 +       }
2088 +
2089 +       if (sd->output_queue) {
2090 +               struct net_device *head;
2091 +
2092 +               local_irq_disable();
2093 +               head = sd->output_queue;
2094 +               sd->output_queue = NULL;
2095 +               local_irq_enable();
2096 +
2097 +               while (head) {
2098 +                       struct net_device *dev = head;
2099 +                       head = head->next_sched;
2100 +
2101 +                       smp_mb__before_clear_bit();
2102 +                       clear_bit(__LINK_STATE_SCHED, &dev->state);
2103 +
2104 +                       if (spin_trylock(&dev->queue_lock)) {
2105 +                               qdisc_run(dev);
2106 +                               spin_unlock(&dev->queue_lock);
2107 +                       } else {
2108 +                               netif_schedule(dev);
2109 +                       }
2110 +               }
2111 +       }
2112 +}
2113 +
2114 +static __inline__ int deliver_skb(struct sk_buff *skb,
2115 +                                 struct packet_type *pt_prev,
2116 +                                 struct net_device *orig_dev)
2117 +{
2118 +       atomic_inc(&skb->users);
2119 +       return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2120 +}
2121 +
2122 +#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
2123 +int (*br_handle_frame_hook)(struct net_bridge_port *p, struct sk_buff **pskb);
2124 +struct net_bridge;
2125 +struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
2126 +                                               unsigned char *addr);
2127 +void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent);
2128 +
2129 +static __inline__ int handle_bridge(struct sk_buff **pskb,
2130 +                                   struct packet_type **pt_prev, int *ret,
2131 +                                   struct net_device *orig_dev)
2132 +{
2133 +       struct net_bridge_port *port;
2134 +
2135 +       if ((*pskb)->pkt_type == PACKET_LOOPBACK ||
2136 +           (port = rcu_dereference((*pskb)->dev->br_port)) == NULL)
2137 +               return 0;
2138 +
2139 +       if (*pt_prev) {
2140 +               *ret = deliver_skb(*pskb, *pt_prev, orig_dev);
2141 +               *pt_prev = NULL;
2142 +       }
2143 +
2144 +       return br_handle_frame_hook(port, pskb);
2145 +}
2146 +#else
2147 +#define handle_bridge(skb, pt_prev, ret, orig_dev)     (0)
2148 +#endif
2149 +
2150 +#ifdef CONFIG_NET_CLS_ACT
2151 +/* TODO: Maybe we should just force sch_ingress to be compiled in
2152 + * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2153 + * a compare and 2 stores extra right now if we dont have it on
2154 + * but have CONFIG_NET_CLS_ACT
2155 + * NOTE: This doesnt stop any functionality; if you dont have
2156 + * the ingress scheduler, you just cant add policies on ingress.
2157 + *
2158 + */
2159 +static int ing_filter(struct sk_buff *skb)
2160 +{
2161 +       struct Qdisc *q;
2162 +       struct net_device *dev = skb->dev;
2163 +       int result = TC_ACT_OK;
2164 +
2165 +       if (dev->qdisc_ingress) {
2166 +               __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
2167 +               if (MAX_RED_LOOP < ttl++) {
2168 +                       printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n",
2169 +                               skb->iif, skb->dev->ifindex);
2170 +                       return TC_ACT_SHOT;
2171 +               }
2172 +
2173 +               skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
2174 +
2175 +               skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
2176 +
2177 +               spin_lock(&dev->queue_lock);
2178 +               if ((q = dev->qdisc_ingress) != NULL)
2179 +                       result = q->enqueue(skb, q);
2180 +               spin_unlock(&dev->queue_lock);
2181 +
2182 +       }
2183 +
2184 +       return result;
2185 +}
2186 +#endif
2187 +
2188 +int netif_receive_skb(struct sk_buff *skb)
2189 +{
2190 +       struct packet_type *ptype, *pt_prev;
2191 +       struct net_device *orig_dev;
2192 +       int ret = NET_RX_DROP;
2193 +       __be16 type;
2194 +
2195 +       /* if we've gotten here through NAPI, check netpoll */
2196 +       if (skb->dev->poll && netpoll_rx(skb))
2197 +               return NET_RX_DROP;
2198 +
2199 +       if (!skb->tstamp.off_sec)
2200 +               net_timestamp(skb);
2201 +
2202 +       if (!skb->iif)
2203 +               skb->iif = skb->dev->ifindex;
2204 +
2205 +       orig_dev = skb_bond(skb);
2206 +
2207 +       if (!orig_dev)
2208 +               return NET_RX_DROP;
2209 +
2210 +       __get_cpu_var(netdev_rx_stat).total++;
2211 +
2212 +       skb->h.raw = skb->nh.raw = skb->data;
2213 +       skb->mac_len = skb->nh.raw - skb->mac.raw;
2214 +
2215 +       pt_prev = NULL;
2216 +
2217 +       rcu_read_lock();
2218 +
2219 +#ifdef CONFIG_NET_CLS_ACT
2220 +       if (skb->tc_verd & TC_NCLS) {
2221 +               skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2222 +               goto ncls;
2223 +       }
2224 +#endif
2225 +
2226 +       list_for_each_entry_rcu(ptype, &ptype_all, list) {
2227 +               if (!ptype->dev || ptype->dev == skb->dev) {
2228 +                       if (pt_prev)
2229 +                               ret = deliver_skb(skb, pt_prev, orig_dev);
2230 +                       pt_prev = ptype;
2231 +               }
2232 +       }
2233 +
2234 +#ifdef CONFIG_NET_CLS_ACT
2235 +       if (pt_prev) {
2236 +               ret = deliver_skb(skb, pt_prev, orig_dev);
2237 +               pt_prev = NULL; /* noone else should process this after*/
2238 +       } else {
2239 +               skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2240 +       }
2241 +
2242 +       ret = ing_filter(skb);
2243 +
2244 +       if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
2245 +               kfree_skb(skb);
2246 +               goto out;
2247 +       }
2248 +
2249 +       skb->tc_verd = 0;
2250 +ncls:
2251 +#endif
2252 +
2253 +       if (handle_bridge(&skb, &pt_prev, &ret, orig_dev))
2254 +               goto out;
2255 +
2256 +       type = skb->protocol;
2257 +       list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
2258 +               if (ptype->type == type &&
2259 +                   (!ptype->dev || ptype->dev == skb->dev)) {
2260 +                       if (pt_prev)
2261 +                               ret = deliver_skb(skb, pt_prev, orig_dev);
2262 +                       pt_prev = ptype;
2263 +               }
2264 +       }
2265 +
2266 +       if (pt_prev) {
2267 +               ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2268 +       } else {
2269 +               kfree_skb(skb);
2270 +               /* Jamal, now you will not able to escape explaining
2271 +                * me how you were going to use this. :-)
2272 +                */
2273 +               ret = NET_RX_DROP;
2274 +       }
2275 +
2276 +out:
2277 +       rcu_read_unlock();
2278 +       return ret;
2279 +}
2280 +
2281 +static int process_backlog(struct net_device *backlog_dev, int *budget)
2282 +{
2283 +       int work = 0;
2284 +       int quota = min(backlog_dev->quota, *budget);
2285 +       struct softnet_data *queue = &__get_cpu_var(softnet_data);
2286 +       unsigned long start_time = jiffies;
2287 +
2288 +       backlog_dev->weight = weight_p;
2289 +       for (;;) {
2290 +               struct sk_buff *skb;
2291 +               struct net_device *dev;
2292 +
2293 +               local_irq_disable();
2294 +               skb = __skb_dequeue(&queue->input_pkt_queue);
2295 +               if (!skb)
2296 +                       goto job_done;
2297 +               local_irq_enable();
2298 +
2299 +               dev = skb->dev;
2300 +
2301 +               netif_receive_skb(skb);
2302 +
2303 +               dev_put(dev);
2304 +
2305 +               work++;
2306 +
2307 +               if (work >= quota || jiffies - start_time > 1)
2308 +                       break;
2309 +
2310 +       }
2311 +
2312 +       backlog_dev->quota -= work;
2313 +       *budget -= work;
2314 +       return -1;
2315 +
2316 +job_done:
2317 +       backlog_dev->quota -= work;
2318 +       *budget -= work;
2319 +
2320 +       list_del(&backlog_dev->poll_list);
2321 +       smp_mb__before_clear_bit();
2322 +       netif_poll_enable(backlog_dev);
2323 +
2324 +       local_irq_enable();
2325 +       return 0;
2326 +}
2327 +
2328 +static void net_rx_action(struct softirq_action *h)
2329 +{
2330 +       struct softnet_data *queue = &__get_cpu_var(softnet_data);
2331 +       unsigned long start_time = jiffies;
2332 +       int budget = netdev_budget;
2333 +       void *have;
2334 +
2335 +       local_irq_disable();
2336 +
2337 +       while (!list_empty(&queue->poll_list)) {
2338 +               struct net_device *dev;
2339 +
2340 +               if (budget <= 0 || jiffies - start_time > 1)
2341 +                       goto softnet_break;
2342 +
2343 +               local_irq_enable();
2344 +
2345 +               dev = list_entry(queue->poll_list.next,
2346 +                                struct net_device, poll_list);
2347 +               have = netpoll_poll_lock(dev);
2348 +
2349 +               if (dev->quota <= 0 || dev->poll(dev, &budget)) {
2350 +                       netpoll_poll_unlock(have);
2351 +                       local_irq_disable();
2352 +                       list_move_tail(&dev->poll_list, &queue->poll_list);
2353 +                       if (dev->quota < 0)
2354 +                               dev->quota += dev->weight;
2355 +                       else
2356 +                               dev->quota = dev->weight;
2357 +               } else {
2358 +                       netpoll_poll_unlock(have);
2359 +                       dev_put(dev);
2360 +                       local_irq_disable();
2361 +               }
2362 +       }
2363 +out:
2364 +#ifdef CONFIG_NET_DMA
2365 +       /*
2366 +        * There may not be any more sk_buffs coming right now, so push
2367 +        * any pending DMA copies to hardware
2368 +        */
2369 +       if (net_dma_client) {
2370 +               struct dma_chan *chan;
2371 +               rcu_read_lock();
2372 +               list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node)
2373 +                       dma_async_memcpy_issue_pending(chan);
2374 +               rcu_read_unlock();
2375 +       }
2376 +#endif
2377 +       local_irq_enable();
2378 +       return;
2379 +
2380 +softnet_break:
2381 +       __get_cpu_var(netdev_rx_stat).time_squeeze++;
2382 +       __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2383 +       goto out;
2384 +}
2385 +
2386 +static gifconf_func_t * gifconf_list [NPROTO];
2387 +
2388 +/**
2389 + *     register_gifconf        -       register a SIOCGIF handler
2390 + *     @family: Address family
2391 + *     @gifconf: Function handler
2392 + *
2393 + *     Register protocol dependent address dumping routines. The handler
2394 + *     that is passed must not be freed or reused until it has been replaced
2395 + *     by another handler.
2396 + */
2397 +int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2398 +{
2399 +       if (family >= NPROTO)
2400 +               return -EINVAL;
2401 +       gifconf_list[family] = gifconf;
2402 +       return 0;
2403 +}
2404 +
2405 +
2406 +/*
2407 + *     Map an interface index to its name (SIOCGIFNAME)
2408 + */
2409 +
2410 +/*
2411 + *     We need this ioctl for efficient implementation of the
2412 + *     if_indextoname() function required by the IPv6 API.  Without
2413 + *     it, we would have to search all the interfaces to find a
2414 + *     match.  --pb
2415 + */
2416 +
2417 +static int dev_ifname(struct ifreq __user *arg)
2418 +{
2419 +       struct net_device *dev;
2420 +       struct ifreq ifr;
2421 +
2422 +       /*
2423 +        *      Fetch the caller's info block.
2424 +        */
2425 +
2426 +       if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2427 +               return -EFAULT;
2428 +
2429 +       read_lock(&dev_base_lock);
2430 +       dev = __dev_get_by_index(ifr.ifr_ifindex);
2431 +       if (!dev) {
2432 +               read_unlock(&dev_base_lock);
2433 +               return -ENODEV;
2434 +       }
2435 +
2436 +       strcpy(ifr.ifr_name, dev->name);
2437 +       read_unlock(&dev_base_lock);
2438 +
2439 +       if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2440 +               return -EFAULT;
2441 +       return 0;
2442 +}
2443 +
2444 +/*
2445 + *     Perform a SIOCGIFCONF call. This structure will change
2446 + *     size eventually, and there is nothing I can do about it.
2447 + *     Thus we will need a 'compatibility mode'.
2448 + */
2449 +
2450 +static int dev_ifconf(char __user *arg)
2451 +{
2452 +       struct ifconf ifc;
2453 +       struct net_device *dev;
2454 +       char __user *pos;
2455 +       int len;
2456 +       int total;
2457 +       int i;
2458 +
2459 +       /*
2460 +        *      Fetch the caller's info block.
2461 +        */
2462 +
2463 +       if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2464 +               return -EFAULT;
2465 +
2466 +       pos = ifc.ifc_buf;
2467 +       len = ifc.ifc_len;
2468 +
2469 +       /*
2470 +        *      Loop over the interfaces, and write an info block for each.
2471 +        */
2472 +
2473 +       total = 0;
2474 +       for (dev = dev_base; dev; dev = dev->next) {
2475 +               for (i = 0; i < NPROTO; i++) {
2476 +                       if (gifconf_list[i]) {
2477 +                               int done;
2478 +                               if (!pos)
2479 +                                       done = gifconf_list[i](dev, NULL, 0);
2480 +                               else
2481 +                                       done = gifconf_list[i](dev, pos + total,
2482 +                                                              len - total);
2483 +                               if (done < 0)
2484 +                                       return -EFAULT;
2485 +                               total += done;
2486 +                       }
2487 +               }
2488 +       }
2489 +
2490 +       /*
2491 +        *      All done.  Write the updated control block back to the caller.
2492 +        */
2493 +       ifc.ifc_len = total;
2494 +
2495 +       /*
2496 +        *      Both BSD and Solaris return 0 here, so we do too.
2497 +        */
2498 +       return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2499 +}
2500 +
2501 +#ifdef CONFIG_PROC_FS
2502 +/*
2503 + *     This is invoked by the /proc filesystem handler to display a device
2504 + *     in detail.
2505 + */
2506 +static __inline__ struct net_device *dev_get_idx(loff_t pos)
2507 +{
2508 +       struct net_device *dev;
2509 +       loff_t i;
2510 +
2511 +       for (i = 0, dev = dev_base; dev && i < pos; ++i, dev = dev->next);
2512 +
2513 +       return i == pos ? dev : NULL;
2514 +}
2515 +
2516 +void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2517 +{
2518 +       read_lock(&dev_base_lock);
2519 +       return *pos ? dev_get_idx(*pos - 1) : SEQ_START_TOKEN;
2520 +}
2521 +
2522 +void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2523 +{
2524 +       ++*pos;
2525 +       return v == SEQ_START_TOKEN ? dev_base : ((struct net_device *)v)->next;
2526 +}
2527 +
2528 +void dev_seq_stop(struct seq_file *seq, void *v)
2529 +{
2530 +       read_unlock(&dev_base_lock);
2531 +}
2532 +
2533 +static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2534 +{
2535 +       if (dev->get_stats) {
2536 +               struct net_device_stats *stats = dev->get_stats(dev);
2537 +
2538 +               seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2539 +                               "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2540 +                          dev->name, stats->rx_bytes, stats->rx_packets,
2541 +                          stats->rx_errors,
2542 +                          stats->rx_dropped + stats->rx_missed_errors,
2543 +                          stats->rx_fifo_errors,
2544 +                          stats->rx_length_errors + stats->rx_over_errors +
2545 +                            stats->rx_crc_errors + stats->rx_frame_errors,
2546 +                          stats->rx_compressed, stats->multicast,
2547 +                          stats->tx_bytes, stats->tx_packets,
2548 +                          stats->tx_errors, stats->tx_dropped,
2549 +                          stats->tx_fifo_errors, stats->collisions,
2550 +                          stats->tx_carrier_errors +
2551 +                            stats->tx_aborted_errors +
2552 +                            stats->tx_window_errors +
2553 +                            stats->tx_heartbeat_errors,
2554 +                          stats->tx_compressed);
2555 +       } else
2556 +               seq_printf(seq, "%6s: No statistics available.\n", dev->name);
2557 +}
2558 +
2559 +/*
2560 + *     Called from the PROCfs module. This now uses the new arbitrary sized
2561 + *     /proc/net interface to create /proc/net/dev
2562 + */
2563 +static int dev_seq_show(struct seq_file *seq, void *v)
2564 +{
2565 +       if (v == SEQ_START_TOKEN)
2566 +               seq_puts(seq, "Inter-|   Receive                            "
2567 +                             "                    |  Transmit\n"
2568 +                             " face |bytes    packets errs drop fifo frame "
2569 +                             "compressed multicast|bytes    packets errs "
2570 +                             "drop fifo colls carrier compressed\n");
2571 +       else
2572 +               dev_seq_printf_stats(seq, v);
2573 +       return 0;
2574 +}
2575 +
2576 +static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2577 +{
2578 +       struct netif_rx_stats *rc = NULL;
2579 +
2580 +       while (*pos < NR_CPUS)
2581 +               if (cpu_online(*pos)) {
2582 +                       rc = &per_cpu(netdev_rx_stat, *pos);
2583 +                       break;
2584 +               } else
2585 +                       ++*pos;
2586 +       return rc;
2587 +}
2588 +
2589 +static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2590 +{
2591 +       return softnet_get_online(pos);
2592 +}
2593 +
2594 +static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2595 +{
2596 +       ++*pos;
2597 +       return softnet_get_online(pos);
2598 +}
2599 +
2600 +static void softnet_seq_stop(struct seq_file *seq, void *v)
2601 +{
2602 +}
2603 +
2604 +static int softnet_seq_show(struct seq_file *seq, void *v)
2605 +{
2606 +       struct netif_rx_stats *s = v;
2607 +
2608 +       seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2609 +                  s->total, s->dropped, s->time_squeeze, 0,
2610 +                  0, 0, 0, 0, /* was fastroute */
2611 +                  s->cpu_collision );
2612 +       return 0;
2613 +}
2614 +
2615 +static struct seq_operations dev_seq_ops = {
2616 +       .start = dev_seq_start,
2617 +       .next  = dev_seq_next,
2618 +       .stop  = dev_seq_stop,
2619 +       .show  = dev_seq_show,
2620 +};
2621 +
2622 +static int dev_seq_open(struct inode *inode, struct file *file)
2623 +{
2624 +       return seq_open(file, &dev_seq_ops);
2625 +}
2626 +
2627 +static const struct file_operations dev_seq_fops = {
2628 +       .owner   = THIS_MODULE,
2629 +       .open    = dev_seq_open,
2630 +       .read    = seq_read,
2631 +       .llseek  = seq_lseek,
2632 +       .release = seq_release,
2633 +};
2634 +
2635 +static struct seq_operations softnet_seq_ops = {
2636 +       .start = softnet_seq_start,
2637 +       .next  = softnet_seq_next,
2638 +       .stop  = softnet_seq_stop,
2639 +       .show  = softnet_seq_show,
2640 +};
2641 +
2642 +static int softnet_seq_open(struct inode *inode, struct file *file)
2643 +{
2644 +       return seq_open(file, &softnet_seq_ops);
2645 +}
2646 +
2647 +static const struct file_operations softnet_seq_fops = {
2648 +       .owner   = THIS_MODULE,
2649 +       .open    = softnet_seq_open,
2650 +       .read    = seq_read,
2651 +       .llseek  = seq_lseek,
2652 +       .release = seq_release,
2653 +};
2654 +
2655 +#ifdef CONFIG_WIRELESS_EXT
2656 +extern int wireless_proc_init(void);
2657 +#else
2658 +#define wireless_proc_init() 0
2659 +#endif
2660 +
2661 +static int __init dev_proc_init(void)
2662 +{
2663 +       int rc = -ENOMEM;
2664 +
2665 +       if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2666 +               goto out;
2667 +       if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2668 +               goto out_dev;
2669 +       if (wireless_proc_init())
2670 +               goto out_softnet;
2671 +       rc = 0;
2672 +out:
2673 +       return rc;
2674 +out_softnet:
2675 +       proc_net_remove("softnet_stat");
2676 +out_dev:
2677 +       proc_net_remove("dev");
2678 +       goto out;
2679 +}
2680 +#else
2681 +#define dev_proc_init() 0
2682 +#endif /* CONFIG_PROC_FS */
2683 +
2684 +
2685 +/**
2686 + *     netdev_set_master       -       set up master/slave pair
2687 + *     @slave: slave device
2688 + *     @master: new master device
2689 + *
2690 + *     Changes the master device of the slave. Pass %NULL to break the
2691 + *     bonding. The caller must hold the RTNL semaphore. On a failure
2692 + *     a negative errno code is returned. On success the reference counts
2693 + *     are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2694 + *     function returns zero.
2695 + */
2696 +int netdev_set_master(struct net_device *slave, struct net_device *master)
2697 +{
2698 +       struct net_device *old = slave->master;
2699 +
2700 +       ASSERT_RTNL();
2701 +
2702 +       if (master) {
2703 +               if (old)
2704 +                       return -EBUSY;
2705 +               dev_hold(master);
2706 +       }
2707 +
2708 +       slave->master = master;
2709 +
2710 +       synchronize_net();
2711 +
2712 +       if (old)
2713 +               dev_put(old);
2714 +
2715 +       if (master)
2716 +               slave->flags |= IFF_SLAVE;
2717 +       else
2718 +               slave->flags &= ~IFF_SLAVE;
2719 +
2720 +       rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2721 +       return 0;
2722 +}
2723 +
2724 +/**
2725 + *     dev_set_promiscuity     - update promiscuity count on a device
2726 + *     @dev: device
2727 + *     @inc: modifier
2728 + *
2729 + *     Add or remove promiscuity from a device. While the count in the device
2730 + *     remains above zero the interface remains promiscuous. Once it hits zero
2731 + *     the device reverts back to normal filtering operation. A negative inc
2732 + *     value is used to drop promiscuity on the device.
2733 + */
2734 +void dev_set_promiscuity(struct net_device *dev, int inc)
2735 +{
2736 +       unsigned short old_flags = dev->flags;
2737 +
2738 +       if ((dev->promiscuity += inc) == 0)
2739 +               dev->flags &= ~IFF_PROMISC;
2740 +       else
2741 +               dev->flags |= IFF_PROMISC;
2742 +       if (dev->flags != old_flags) {
2743 +               dev_mc_upload(dev);
2744 +               printk(KERN_INFO "device %s %s promiscuous mode\n",
2745 +                      dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2746 +                                                              "left");
2747 +               audit_log(current->audit_context, GFP_ATOMIC,
2748 +                       AUDIT_ANOM_PROMISCUOUS,
2749 +                       "dev=%s prom=%d old_prom=%d auid=%u",
2750 +                       dev->name, (dev->flags & IFF_PROMISC),
2751 +                       (old_flags & IFF_PROMISC),
2752 +                       audit_get_loginuid(current->audit_context));
2753 +       }
2754 +}
2755 +
2756 +/**
2757 + *     dev_set_allmulti        - update allmulti count on a device
2758 + *     @dev: device
2759 + *     @inc: modifier
2760 + *
2761 + *     Add or remove reception of all multicast frames to a device. While the
2762 + *     count in the device remains above zero the interface remains listening
2763 + *     to all interfaces. Once it hits zero the device reverts back to normal
2764 + *     filtering operation. A negative @inc value is used to drop the counter
2765 + *     when releasing a resource needing all multicasts.
2766 + */
2767 +
2768 +void dev_set_allmulti(struct net_device *dev, int inc)
2769 +{
2770 +       unsigned short old_flags = dev->flags;
2771 +
2772 +       dev->flags |= IFF_ALLMULTI;
2773 +       if ((dev->allmulti += inc) == 0)
2774 +               dev->flags &= ~IFF_ALLMULTI;
2775 +       if (dev->flags ^ old_flags)
2776 +               dev_mc_upload(dev);
2777 +}
2778 +
2779 +unsigned dev_get_flags(const struct net_device *dev)
2780 +{
2781 +       unsigned flags;
2782 +
2783 +       flags = (dev->flags & ~(IFF_PROMISC |
2784 +                               IFF_ALLMULTI |
2785 +                               IFF_RUNNING |
2786 +                               IFF_LOWER_UP |
2787 +                               IFF_DORMANT)) |
2788 +               (dev->gflags & (IFF_PROMISC |
2789 +                               IFF_ALLMULTI));
2790 +
2791 +       if (netif_running(dev)) {
2792 +               if (netif_oper_up(dev))
2793 +                       flags |= IFF_RUNNING;
2794 +               if (netif_carrier_ok(dev))
2795 +                       flags |= IFF_LOWER_UP;
2796 +               if (netif_dormant(dev))
2797 +                       flags |= IFF_DORMANT;
2798 +       }
2799 +
2800 +       return flags;
2801 +}
2802 +
2803 +int dev_change_flags(struct net_device *dev, unsigned flags)
2804 +{
2805 +       int ret;
2806 +       int old_flags = dev->flags;
2807 +
2808 +       /*
2809 +        *      Set the flags on our device.
2810 +        */
2811 +
2812 +       dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2813 +                              IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2814 +                              IFF_AUTOMEDIA)) |
2815 +                    (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2816 +                                   IFF_ALLMULTI));
2817 +
2818 +       /*
2819 +        *      Load in the correct multicast list now the flags have changed.
2820 +        */
2821 +
2822 +       dev_mc_upload(dev);
2823 +
2824 +       /*
2825 +        *      Have we downed the interface. We handle IFF_UP ourselves
2826 +        *      according to user attempts to set it, rather than blindly
2827 +        *      setting it.
2828 +        */
2829 +
2830 +       ret = 0;
2831 +       if ((old_flags ^ flags) & IFF_UP) {     /* Bit is different  ? */
2832 +               ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2833 +
2834 +               if (!ret)
2835 +                       dev_mc_upload(dev);
2836 +       }
2837 +
2838 +       if (dev->flags & IFF_UP &&
2839 +           ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2840 +                                         IFF_VOLATILE)))
2841 +               raw_notifier_call_chain(&netdev_chain,
2842 +                               NETDEV_CHANGE, dev);
2843 +
2844 +       if ((flags ^ dev->gflags) & IFF_PROMISC) {
2845 +               int inc = (flags & IFF_PROMISC) ? +1 : -1;
2846 +               dev->gflags ^= IFF_PROMISC;
2847 +               dev_set_promiscuity(dev, inc);
2848 +       }
2849 +
2850 +       /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2851 +          is important. Some (broken) drivers set IFF_PROMISC, when
2852 +          IFF_ALLMULTI is requested not asking us and not reporting.
2853 +        */
2854 +       if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2855 +               int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2856 +               dev->gflags ^= IFF_ALLMULTI;
2857 +               dev_set_allmulti(dev, inc);
2858 +       }
2859 +
2860 +       if (old_flags ^ dev->flags)
2861 +               rtmsg_ifinfo(RTM_NEWLINK, dev, old_flags ^ dev->flags);
2862 +
2863 +       return ret;
2864 +}
2865 +
2866 +int dev_set_mtu(struct net_device *dev, int new_mtu)
2867 +{
2868 +       int err;
2869 +
2870 +       if (new_mtu == dev->mtu)
2871 +               return 0;
2872 +
2873 +       /*      MTU must be positive.    */
2874 +       if (new_mtu < 0)
2875 +               return -EINVAL;
2876 +
2877 +       if (!netif_device_present(dev))
2878 +               return -ENODEV;
2879 +
2880 +       err = 0;
2881 +       if (dev->change_mtu)
2882 +               err = dev->change_mtu(dev, new_mtu);
2883 +       else
2884 +               dev->mtu = new_mtu;
2885 +       if (!err && dev->flags & IFF_UP)
2886 +               raw_notifier_call_chain(&netdev_chain,
2887 +                               NETDEV_CHANGEMTU, dev);
2888 +       return err;
2889 +}
2890 +
2891 +int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
2892 +{
2893 +       int err;
2894 +
2895 +       if (!dev->set_mac_address)
2896 +               return -EOPNOTSUPP;
2897 +       if (sa->sa_family != dev->type)
2898 +               return -EINVAL;
2899 +       if (!netif_device_present(dev))
2900 +               return -ENODEV;
2901 +       err = dev->set_mac_address(dev, sa);
2902 +       if (!err)
2903 +               raw_notifier_call_chain(&netdev_chain,
2904 +                               NETDEV_CHANGEADDR, dev);
2905 +       return err;
2906 +}
2907 +
2908 +/*
2909 + *     Perform the SIOCxIFxxx calls.
2910 + */
2911 +static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2912 +{
2913 +       int err;
2914 +       struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
2915 +
2916 +       if (!dev)
2917 +               return -ENODEV;
2918 +
2919 +       switch (cmd) {
2920 +               case SIOCGIFFLAGS:      /* Get interface flags */
2921 +                       ifr->ifr_flags = dev_get_flags(dev);
2922 +                       return 0;
2923 +
2924 +               case SIOCSIFFLAGS:      /* Set interface flags */
2925 +                       return dev_change_flags(dev, ifr->ifr_flags);
2926 +
2927 +               case SIOCGIFMETRIC:     /* Get the metric on the interface
2928 +                                          (currently unused) */
2929 +                       ifr->ifr_metric = 0;
2930 +                       return 0;
2931 +
2932 +               case SIOCSIFMETRIC:     /* Set the metric on the interface
2933 +                                          (currently unused) */
2934 +                       return -EOPNOTSUPP;
2935 +
2936 +               case SIOCGIFMTU:        /* Get the MTU of a device */
2937 +                       ifr->ifr_mtu = dev->mtu;
2938 +                       return 0;
2939 +
2940 +               case SIOCSIFMTU:        /* Set the MTU of a device */
2941 +                       return dev_set_mtu(dev, ifr->ifr_mtu);
2942 +
2943 +               case SIOCGIFHWADDR:
2944 +                       if (!dev->addr_len)
2945 +                               memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
2946 +                       else
2947 +                               memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
2948 +                                      min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2949 +                       ifr->ifr_hwaddr.sa_family = dev->type;
2950 +                       return 0;
2951 +
2952 +               case SIOCSIFHWADDR:
2953 +                       return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
2954 +
2955 +               case SIOCSIFHWBROADCAST:
2956 +                       if (ifr->ifr_hwaddr.sa_family != dev->type)
2957 +                               return -EINVAL;
2958 +                       memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
2959 +                              min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
2960 +                       raw_notifier_call_chain(&netdev_chain,
2961 +                                           NETDEV_CHANGEADDR, dev);
2962 +                       return 0;
2963 +
2964 +               case SIOCGIFMAP:
2965 +                       ifr->ifr_map.mem_start = dev->mem_start;
2966 +                       ifr->ifr_map.mem_end   = dev->mem_end;
2967 +                       ifr->ifr_map.base_addr = dev->base_addr;
2968 +                       ifr->ifr_map.irq       = dev->irq;
2969 +                       ifr->ifr_map.dma       = dev->dma;
2970 +                       ifr->ifr_map.port      = dev->if_port;
2971 +                       return 0;
2972 +
2973 +               case SIOCSIFMAP:
2974 +                       if (dev->set_config) {
2975 +                               if (!netif_device_present(dev))
2976 +                                       return -ENODEV;
2977 +                               return dev->set_config(dev, &ifr->ifr_map);
2978 +                       }
2979 +                       return -EOPNOTSUPP;
2980 +
2981 +               case SIOCADDMULTI:
2982 +                       if (!dev->set_multicast_list ||
2983 +                           ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2984 +                               return -EINVAL;
2985 +                       if (!netif_device_present(dev))
2986 +                               return -ENODEV;
2987 +                       return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
2988 +                                         dev->addr_len, 1);
2989 +
2990 +               case SIOCDELMULTI:
2991 +                       if (!dev->set_multicast_list ||
2992 +                           ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
2993 +                               return -EINVAL;
2994 +                       if (!netif_device_present(dev))
2995 +                               return -ENODEV;
2996 +                       return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
2997 +                                            dev->addr_len, 1);
2998 +
2999 +               case SIOCGIFINDEX:
3000 +                       ifr->ifr_ifindex = dev->ifindex;
3001 +                       return 0;
3002 +
3003 +               case SIOCGIFTXQLEN:
3004 +                       ifr->ifr_qlen = dev->tx_queue_len;
3005 +                       return 0;
3006 +
3007 +               case SIOCSIFTXQLEN:
3008 +                       if (ifr->ifr_qlen < 0)
3009 +                               return -EINVAL;
3010 +                       dev->tx_queue_len = ifr->ifr_qlen;
3011 +                       return 0;
3012 +
3013 +               case SIOCSIFNAME:
3014 +                       ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3015 +                       return dev_change_name(dev, ifr->ifr_newname);
3016 +
3017 +               /*
3018 +                *      Unknown or private ioctl
3019 +                */
3020 +
3021 +               default:
3022 +                       if ((cmd >= SIOCDEVPRIVATE &&
3023 +                           cmd <= SIOCDEVPRIVATE + 15) ||
3024 +                           cmd == SIOCBONDENSLAVE ||
3025 +                           cmd == SIOCBONDRELEASE ||
3026 +                           cmd == SIOCBONDSETHWADDR ||
3027 +                           cmd == SIOCBONDSLAVEINFOQUERY ||
3028 +                           cmd == SIOCBONDINFOQUERY ||
3029 +                           cmd == SIOCBONDCHANGEACTIVE ||
3030 +                           cmd == SIOCGMIIPHY ||
3031 +                           cmd == SIOCGMIIREG ||
3032 +                           cmd == SIOCSMIIREG ||
3033 +                           cmd == SIOCBRADDIF ||
3034 +                           cmd == SIOCBRDELIF ||
3035 +                           cmd == SIOCWANDEV) {
3036 +                               err = -EOPNOTSUPP;
3037 +                               if (dev->do_ioctl) {
3038 +                                       if (netif_device_present(dev))
3039 +                                               err = dev->do_ioctl(dev, ifr,
3040 +                                                                   cmd);
3041 +                                       else
3042 +                                               err = -ENODEV;
3043 +                               }
3044 +                       } else
3045 +                               err = -EINVAL;
3046 +
3047 +       }
3048 +       return err;
3049 +}
3050 +
3051 +/*
3052 + *     This function handles all "interface"-type I/O control requests. The actual
3053 + *     'doing' part of this is dev_ifsioc above.
3054 + */
3055 +
3056 +/**
3057 + *     dev_ioctl       -       network device ioctl
3058 + *     @cmd: command to issue
3059 + *     @arg: pointer to a struct ifreq in user space
3060 + *
3061 + *     Issue ioctl functions to devices. This is normally called by the
3062 + *     user space syscall interfaces but can sometimes be useful for
3063 + *     other purposes. The return value is the return from the syscall if
3064 + *     positive or a negative errno code on error.
3065 + */
3066 +
3067 +int dev_ioctl(unsigned int cmd, void __user *arg)
3068 +{
3069 +       struct ifreq ifr;
3070 +       int ret;
3071 +       char *colon;
3072 +
3073 +       /* One special case: SIOCGIFCONF takes ifconf argument
3074 +          and requires shared lock, because it sleeps writing
3075 +          to user space.
3076 +        */
3077 +
3078 +       if (cmd == SIOCGIFCONF) {
3079 +               rtnl_lock();
3080 +               ret = dev_ifconf((char __user *) arg);
3081 +               rtnl_unlock();
3082 +               return ret;
3083 +       }
3084 +       if (cmd == SIOCGIFNAME)
3085 +               return dev_ifname((struct ifreq __user *)arg);
3086 +
3087 +       if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3088 +               return -EFAULT;
3089 +
3090 +       ifr.ifr_name[IFNAMSIZ-1] = 0;
3091 +
3092 +       colon = strchr(ifr.ifr_name, ':');
3093 +       if (colon)
3094 +               *colon = 0;
3095 +
3096 +       /*
3097 +        *      See which interface the caller is talking about.
3098 +        */
3099 +
3100 +       switch (cmd) {
3101 +               /*
3102 +                *      These ioctl calls:
3103 +                *      - can be done by all.
3104 +                *      - atomic and do not require locking.
3105 +                *      - return a value
3106 +                */
3107 +               case SIOCGIFFLAGS:
3108 +               case SIOCGIFMETRIC:
3109 +               case SIOCGIFMTU:
3110 +               case SIOCGIFHWADDR:
3111 +               case SIOCGIFSLAVE:
3112 +               case SIOCGIFMAP:
3113 +               case SIOCGIFINDEX:
3114 +               case SIOCGIFTXQLEN:
3115 +                       dev_load(ifr.ifr_name);
3116 +                       read_lock(&dev_base_lock);
3117 +                       ret = dev_ifsioc(&ifr, cmd);
3118 +                       read_unlock(&dev_base_lock);
3119 +                       if (!ret) {
3120 +                               if (colon)
3121 +                                       *colon = ':';
3122 +                               if (copy_to_user(arg, &ifr,
3123 +                                                sizeof(struct ifreq)))
3124 +                                       ret = -EFAULT;
3125 +                       }
3126 +                       return ret;
3127 +
3128 +               case SIOCETHTOOL:
3129 +                       dev_load(ifr.ifr_name);
3130 +                       rtnl_lock();
3131 +                       ret = dev_ethtool(&ifr);
3132 +                       rtnl_unlock();
3133 +                       if (!ret) {
3134 +                               if (colon)
3135 +                                       *colon = ':';
3136 +                               if (copy_to_user(arg, &ifr,
3137 +                                                sizeof(struct ifreq)))
3138 +                                       ret = -EFAULT;
3139 +                       }
3140 +                       return ret;
3141 +
3142 +               /*
3143 +                *      These ioctl calls:
3144 +                *      - require superuser power.
3145 +                *      - require strict serialization.
3146 +                *      - return a value
3147 +                */
3148 +               case SIOCGMIIPHY:
3149 +               case SIOCGMIIREG:
3150 +               case SIOCSIFNAME:
3151 +                       if (!capable(CAP_NET_ADMIN))
3152 +                               return -EPERM;
3153 +                       dev_load(ifr.ifr_name);
3154 +                       rtnl_lock();
3155 +                       ret = dev_ifsioc(&ifr, cmd);
3156 +                       rtnl_unlock();
3157 +                       if (!ret) {
3158 +                               if (colon)
3159 +                                       *colon = ':';
3160 +                               if (copy_to_user(arg, &ifr,
3161 +                                                sizeof(struct ifreq)))
3162 +                                       ret = -EFAULT;
3163 +                       }
3164 +                       return ret;
3165 +
3166 +               /*
3167 +                *      These ioctl calls:
3168 +                *      - require superuser power.
3169 +                *      - require strict serialization.
3170 +                *      - do not return a value
3171 +                */
3172 +               case SIOCSIFFLAGS:
3173 +               case SIOCSIFMETRIC:
3174 +               case SIOCSIFMTU:
3175 +               case SIOCSIFMAP:
3176 +               case SIOCSIFHWADDR:
3177 +               case SIOCSIFSLAVE:
3178 +               case SIOCADDMULTI:
3179 +               case SIOCDELMULTI:
3180 +               case SIOCSIFHWBROADCAST:
3181 +               case SIOCSIFTXQLEN:
3182 +               case SIOCSMIIREG:
3183 +               case SIOCBONDENSLAVE:
3184 +               case SIOCBONDRELEASE:
3185 +               case SIOCBONDSETHWADDR:
3186 +               case SIOCBONDCHANGEACTIVE:
3187 +               case SIOCBRADDIF:
3188 +               case SIOCBRDELIF:
3189 +                       if (!capable(CAP_NET_ADMIN))
3190 +                               return -EPERM;
3191 +                       /* fall through */
3192 +               case SIOCBONDSLAVEINFOQUERY:
3193 +               case SIOCBONDINFOQUERY:
3194 +                       dev_load(ifr.ifr_name);
3195 +                       rtnl_lock();
3196 +                       ret = dev_ifsioc(&ifr, cmd);
3197 +                       rtnl_unlock();
3198 +                       return ret;
3199 +
3200 +               case SIOCGIFMEM:
3201 +                       /* Get the per device memory space. We can add this but
3202 +                        * currently do not support it */
3203 +               case SIOCSIFMEM:
3204 +                       /* Set the per device memory buffer space.
3205 +                        * Not applicable in our case */
3206 +               case SIOCSIFLINK:
3207 +                       return -EINVAL;
3208 +
3209 +               /*
3210 +                *      Unknown or private ioctl.
3211 +                */
3212 +               default:
3213 +                       if (cmd == SIOCWANDEV ||
3214 +                           (cmd >= SIOCDEVPRIVATE &&
3215 +                            cmd <= SIOCDEVPRIVATE + 15)) {
3216 +                               dev_load(ifr.ifr_name);
3217 +                               rtnl_lock();
3218 +                               ret = dev_ifsioc(&ifr, cmd);
3219 +                               rtnl_unlock();
3220 +                               if (!ret && copy_to_user(arg, &ifr,
3221 +                                                        sizeof(struct ifreq)))
3222 +                                       ret = -EFAULT;
3223 +                               return ret;
3224 +                       }
3225 +#ifdef CONFIG_WIRELESS_EXT
3226 +                       /* Take care of Wireless Extensions */
3227 +                       if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) {
3228 +                               /* If command is `set a parameter', or
3229 +                                * `get the encoding parameters', check if
3230 +                                * the user has the right to do it */
3231 +                               if (IW_IS_SET(cmd) || cmd == SIOCGIWENCODE
3232 +                                   || cmd == SIOCGIWENCODEEXT) {
3233 +                                       if (!capable(CAP_NET_ADMIN))
3234 +                                               return -EPERM;
3235 +                               }
3236 +                               dev_load(ifr.ifr_name);
3237 +                               rtnl_lock();
3238 +                               /* Follow me in net/core/wireless.c */
3239 +                               ret = wireless_process_ioctl(&ifr, cmd);
3240 +                               rtnl_unlock();
3241 +                               if (IW_IS_GET(cmd) &&
3242 +                                   copy_to_user(arg, &ifr,
3243 +                                                sizeof(struct ifreq)))
3244 +                                       ret = -EFAULT;
3245 +                               return ret;
3246 +                       }
3247 +#endif /* CONFIG_WIRELESS_EXT */
3248 +                       return -EINVAL;
3249 +       }
3250 +}
3251 +
3252 +
3253 +/**
3254 + *     dev_new_index   -       allocate an ifindex
3255 + *
3256 + *     Returns a suitable unique value for a new device interface
3257 + *     number.  The caller must hold the rtnl semaphore or the
3258 + *     dev_base_lock to be sure it remains unique.
3259 + */
3260 +static int dev_new_index(void)
3261 +{
3262 +       static int ifindex;
3263 +       for (;;) {
3264 +               if (++ifindex <= 0)
3265 +                       ifindex = 1;
3266 +               if (!__dev_get_by_index(ifindex))
3267 +                       return ifindex;
3268 +       }
3269 +}
3270 +
3271 +static int dev_boot_phase = 1;
3272 +
3273 +/* Delayed registration/unregisteration */
3274 +static DEFINE_SPINLOCK(net_todo_list_lock);
3275 +static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
3276 +
3277 +static inline void net_set_todo(struct net_device *dev)
3278 +{
3279 +       spin_lock(&net_todo_list_lock);
3280 +       list_add_tail(&dev->todo_list, &net_todo_list);
3281 +       spin_unlock(&net_todo_list_lock);
3282 +}
3283 +
3284 +/**
3285 + *     register_netdevice      - register a network device
3286 + *     @dev: device to register
3287 + *
3288 + *     Take a completed network device structure and add it to the kernel
3289 + *     interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3290 + *     chain. 0 is returned on success. A negative errno code is returned
3291 + *     on a failure to set up the device, or if the name is a duplicate.
3292 + *
3293 + *     Callers must hold the rtnl semaphore. You may want
3294 + *     register_netdev() instead of this.
3295 + *
3296 + *     BUGS:
3297 + *     The locking appears insufficient to guarantee two parallel registers
3298 + *     will not get the same name.
3299 + */
3300 +
3301 +int register_netdevice(struct net_device *dev)
3302 +{
3303 +       struct hlist_head *head;
3304 +       struct hlist_node *p;
3305 +       int ret;
3306 +
3307 +       BUG_ON(dev_boot_phase);
3308 +       ASSERT_RTNL();
3309 +
3310 +       might_sleep();
3311 +
3312 +       /* When net_device's are persistent, this will be fatal. */
3313 +       BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
3314 +
3315 +       spin_lock_init(&dev->queue_lock);
3316 +       spin_lock_init(&dev->_xmit_lock);
3317 +       dev->xmit_lock_owner = -1;
3318 +#ifdef CONFIG_NET_CLS_ACT
3319 +       spin_lock_init(&dev->ingress_lock);
3320 +#endif
3321 +
3322 +       dev->iflink = -1;
3323 +
3324 +       /* Init, if this function is available */
3325 +       if (dev->init) {
3326 +               ret = dev->init(dev);
3327 +               if (ret) {
3328 +                       if (ret > 0)
3329 +                               ret = -EIO;
3330 +                       goto out;
3331 +               }
3332 +       }
3333 +
3334 +       if (!dev_valid_name(dev->name)) {
3335 +               ret = -EINVAL;
3336 +               goto out;
3337 +       }
3338 +
3339 +       dev->ifindex = dev_new_index();
3340 +       if (dev->iflink == -1)
3341 +               dev->iflink = dev->ifindex;
3342 +
3343 +       /* Check for existence of name */
3344 +       head = dev_name_hash(dev->name);
3345 +       hlist_for_each(p, head) {
3346 +               struct net_device *d
3347 +                       = hlist_entry(p, struct net_device, name_hlist);
3348 +               if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
3349 +                       ret = -EEXIST;
3350 +                       goto out;
3351 +               }
3352 +       }
3353 +
3354 +       /* Fix illegal SG+CSUM combinations. */
3355 +       if ((dev->features & NETIF_F_SG) &&
3356 +           !(dev->features & NETIF_F_ALL_CSUM)) {
3357 +               printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
3358 +                      dev->name);
3359 +               dev->features &= ~NETIF_F_SG;
3360 +       }
3361 +
3362 +       /* TSO requires that SG is present as well. */
3363 +       if ((dev->features & NETIF_F_TSO) &&
3364 +           !(dev->features & NETIF_F_SG)) {
3365 +               printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
3366 +                      dev->name);
3367 +               dev->features &= ~NETIF_F_TSO;
3368 +       }
3369 +       if (dev->features & NETIF_F_UFO) {
3370 +               if (!(dev->features & NETIF_F_HW_CSUM)) {
3371 +                       printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3372 +                                       "NETIF_F_HW_CSUM feature.\n",
3373 +                                                       dev->name);
3374 +                       dev->features &= ~NETIF_F_UFO;
3375 +               }
3376 +               if (!(dev->features & NETIF_F_SG)) {
3377 +                       printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3378 +                                       "NETIF_F_SG feature.\n",
3379 +                                       dev->name);
3380 +                       dev->features &= ~NETIF_F_UFO;
3381 +               }
3382 +       }
3383 +
3384 +       /*
3385 +        *      nil rebuild_header routine,
3386 +        *      that should be never called and used as just bug trap.
3387 +        */
3388 +
3389 +       if (!dev->rebuild_header)
3390 +               dev->rebuild_header = default_rebuild_header;
3391 +
3392 +       ret = netdev_register_sysfs(dev);
3393 +       if (ret)
3394 +               goto out;
3395 +       dev->reg_state = NETREG_REGISTERED;
3396 +
3397 +       /*
3398 +        *      Default initial state at registry is that the
3399 +        *      device is present.
3400 +        */
3401 +
3402 +       set_bit(__LINK_STATE_PRESENT, &dev->state);
3403 +
3404 +       dev->next = NULL;
3405 +       dev_init_scheduler(dev);
3406 +       write_lock_bh(&dev_base_lock);
3407 +       *dev_tail = dev;
3408 +       dev_tail = &dev->next;
3409 +       hlist_add_head(&dev->name_hlist, head);
3410 +       hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
3411 +       dev_hold(dev);
3412 +       write_unlock_bh(&dev_base_lock);
3413 +
3414 +       /* Notify protocols, that a new device appeared. */
3415 +       raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
3416 +
3417 +       ret = 0;
3418 +
3419 +out:
3420 +       return ret;
3421 +}
3422 +
3423 +/**
3424 + *     register_netdev - register a network device
3425 + *     @dev: device to register
3426 + *
3427 + *     Take a completed network device structure and add it to the kernel
3428 + *     interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3429 + *     chain. 0 is returned on success. A negative errno code is returned
3430 + *     on a failure to set up the device, or if the name is a duplicate.
3431 + *
3432 + *     This is a wrapper around register_netdev that takes the rtnl semaphore
3433 + *     and expands the device name if you passed a format string to
3434 + *     alloc_netdev.
3435 + */
3436 +int register_netdev(struct net_device *dev)
3437 +{
3438 +       int err;
3439 +
3440 +       rtnl_lock();
3441 +
3442 +       /*
3443 +        * If the name is a format string the caller wants us to do a
3444 +        * name allocation.
3445 +        */
3446 +       if (strchr(dev->name, '%')) {
3447 +               err = dev_alloc_name(dev, dev->name);
3448 +               if (err < 0)
3449 +                       goto out;
3450 +       }
3451 +
3452 +       err = register_netdevice(dev);
3453 +out:
3454 +       rtnl_unlock();
3455 +       return err;
3456 +}
3457 +EXPORT_SYMBOL(register_netdev);
3458 +
3459 +/*
3460 + * netdev_wait_allrefs - wait until all references are gone.
3461 + *
3462 + * This is called when unregistering network devices.
3463 + *
3464 + * Any protocol or device that holds a reference should register
3465 + * for netdevice notification, and cleanup and put back the
3466 + * reference if they receive an UNREGISTER event.
3467 + * We can get stuck here if buggy protocols don't correctly
3468 + * call dev_put.
3469 + */
3470 +static void netdev_wait_allrefs(struct net_device *dev)
3471 +{
3472 +       unsigned long rebroadcast_time, warning_time;
3473 +
3474 +       rebroadcast_time = warning_time = jiffies;
3475 +       while (atomic_read(&dev->refcnt) != 0) {
3476 +               if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
3477 +                       rtnl_lock();
3478 +
3479 +                       /* Rebroadcast unregister notification */
3480 +                       raw_notifier_call_chain(&netdev_chain,
3481 +                                           NETDEV_UNREGISTER, dev);
3482 +
3483 +                       if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
3484 +                                    &dev->state)) {
3485 +                               /* We must not have linkwatch events
3486 +                                * pending on unregister. If this
3487 +                                * happens, we simply run the queue
3488 +                                * unscheduled, resulting in a noop
3489 +                                * for this device.
3490 +                                */
3491 +                               linkwatch_run_queue();
3492 +                       }
3493 +
3494 +                       __rtnl_unlock();
3495 +
3496 +                       rebroadcast_time = jiffies;
3497 +               }
3498 +
3499 +               msleep(250);
3500 +
3501 +               if (time_after(jiffies, warning_time + 10 * HZ)) {
3502 +                       printk(KERN_EMERG "unregister_netdevice: "
3503 +                              "waiting for %s to become free. Usage "
3504 +                              "count = %d\n",
3505 +                              dev->name, atomic_read(&dev->refcnt));
3506 +                       warning_time = jiffies;
3507 +               }
3508 +       }
3509 +}
3510 +
3511 +/* The sequence is:
3512 + *
3513 + *     rtnl_lock();
3514 + *     ...
3515 + *     register_netdevice(x1);
3516 + *     register_netdevice(x2);
3517 + *     ...
3518 + *     unregister_netdevice(y1);
3519 + *     unregister_netdevice(y2);
3520 + *      ...
3521 + *     rtnl_unlock();
3522 + *     free_netdev(y1);
3523 + *     free_netdev(y2);
3524 + *
3525 + * We are invoked by rtnl_unlock() after it drops the semaphore.
3526 + * This allows us to deal with problems:
3527 + * 1) We can delete sysfs objects which invoke hotplug
3528 + *    without deadlocking with linkwatch via keventd.
3529 + * 2) Since we run with the RTNL semaphore not held, we can sleep
3530 + *    safely in order to wait for the netdev refcnt to drop to zero.
3531 + */
3532 +static DEFINE_MUTEX(net_todo_run_mutex);
3533 +void netdev_run_todo(void)
3534 +{
3535 +       struct list_head list;
3536 +
3537 +       /* Need to guard against multiple cpu's getting out of order. */
3538 +       mutex_lock(&net_todo_run_mutex);
3539 +
3540 +       /* Not safe to do outside the semaphore.  We must not return
3541 +        * until all unregister events invoked by the local processor
3542 +        * have been completed (either by this todo run, or one on
3543 +        * another cpu).
3544 +        */
3545 +       if (list_empty(&net_todo_list))
3546 +               goto out;
3547 +
3548 +       /* Snapshot list, allow later requests */
3549 +       spin_lock(&net_todo_list_lock);
3550 +       list_replace_init(&net_todo_list, &list);
3551 +       spin_unlock(&net_todo_list_lock);
3552 +
3553 +       while (!list_empty(&list)) {
3554 +               struct net_device *dev
3555 +                       = list_entry(list.next, struct net_device, todo_list);
3556 +               list_del(&dev->todo_list);
3557 +
3558 +               if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
3559 +                       printk(KERN_ERR "network todo '%s' but state %d\n",
3560 +                              dev->name, dev->reg_state);
3561 +                       dump_stack();
3562 +                       continue;
3563 +               }
3564 +
3565 +               netdev_unregister_sysfs(dev);
3566 +               dev->reg_state = NETREG_UNREGISTERED;
3567 +
3568 +               netdev_wait_allrefs(dev);
3569 +
3570 +               /* paranoia */
3571 +               BUG_ON(atomic_read(&dev->refcnt));
3572 +               BUG_TRAP(!dev->ip_ptr);
3573 +               BUG_TRAP(!dev->ip6_ptr);
3574 +               BUG_TRAP(!dev->dn_ptr);
3575 +
3576 +               /* It must be the very last action,
3577 +                * after this 'dev' may point to freed up memory.
3578 +                */
3579 +               if (dev->destructor)
3580 +                       dev->destructor(dev);
3581 +       }
3582 +
3583 +out:
3584 +       mutex_unlock(&net_todo_run_mutex);
3585 +}
3586 +
3587 +/**
3588 + *     alloc_netdev - allocate network device
3589 + *     @sizeof_priv:   size of private data to allocate space for
3590 + *     @name:          device name format string
3591 + *     @setup:         callback to initialize device
3592 + *
3593 + *     Allocates a struct net_device with private data area for driver use
3594 + *     and performs basic initialization.
3595 + */
3596 +struct net_device *alloc_netdev(int sizeof_priv, const char *name,
3597 +               void (*setup)(struct net_device *))
3598 +{
3599 +       void *p;
3600 +       struct net_device *dev;
3601 +       int alloc_size;
3602 +
3603 +       BUG_ON(strlen(name) >= sizeof(dev->name));
3604 +
3605 +       /* ensure 32-byte alignment of both the device and private area */
3606 +       alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
3607 +       alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3608 +
3609 +       p = kzalloc(alloc_size, GFP_KERNEL);
3610 +       if (!p) {
3611 +               printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
3612 +               return NULL;
3613 +       }
3614 +
3615 +       dev = (struct net_device *)
3616 +               (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3617 +       dev->padded = (char *)dev - (char *)p;
3618 +
3619 +       if (sizeof_priv)
3620 +               dev->priv = netdev_priv(dev);
3621 +
3622 +       setup(dev);
3623 +       strcpy(dev->name, name);
3624 +       return dev;
3625 +}
3626 +EXPORT_SYMBOL(alloc_netdev);
3627 +
3628 +/**
3629 + *     free_netdev - free network device
3630 + *     @dev: device
3631 + *
3632 + *     This function does the last stage of destroying an allocated device
3633 + *     interface. The reference to the device object is released.
3634 + *     If this is the last reference then it will be freed.
3635 + */
3636 +void free_netdev(struct net_device *dev)
3637 +{
3638 +#ifdef CONFIG_SYSFS
3639 +       /*  Compatibility with error handling in drivers */
3640 +       if (dev->reg_state == NETREG_UNINITIALIZED) {
3641 +               kfree((char *)dev - dev->padded);
3642 +               return;
3643 +       }
3644 +
3645 +       BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3646 +       dev->reg_state = NETREG_RELEASED;
3647 +
3648 +       /* will free via device release */
3649 +       put_device(&dev->dev);
3650 +#else
3651 +       kfree((char *)dev - dev->padded);
3652 +#endif
3653 +}
3654 +
3655 +/* Synchronize with packet receive processing. */
3656 +void synchronize_net(void)
3657 +{
3658 +       might_sleep();
3659 +       synchronize_rcu();
3660 +}
3661 +
3662 +/**
3663 + *     unregister_netdevice - remove device from the kernel
3664 + *     @dev: device
3665 + *
3666 + *     This function shuts down a device interface and removes it
3667 + *     from the kernel tables. On success 0 is returned, on a failure
3668 + *     a negative errno code is returned.
3669 + *
3670 + *     Callers must hold the rtnl semaphore.  You may want
3671 + *     unregister_netdev() instead of this.
3672 + */
3673 +
3674 +void unregister_netdevice(struct net_device *dev)
3675 +{
3676 +       struct net_device *d, **dp;
3677 +
3678 +       BUG_ON(dev_boot_phase);
3679 +       ASSERT_RTNL();
3680 +
3681 +       /* Some devices call without registering for initialization unwind. */
3682 +       if (dev->reg_state == NETREG_UNINITIALIZED) {
3683 +               printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3684 +                                 "was registered\n", dev->name, dev);
3685 +
3686 +               WARN_ON(1);
3687 +               return;
3688 +       }
3689 +
3690 +       BUG_ON(dev->reg_state != NETREG_REGISTERED);
3691 +
3692 +       /* If device is running, close it first. */
3693 +       if (dev->flags & IFF_UP)
3694 +               dev_close(dev);
3695 +
3696 +       /* And unlink it from device chain. */
3697 +       for (dp = &dev_base; (d = *dp) != NULL; dp = &d->next) {
3698 +               if (d == dev) {
3699 +                       write_lock_bh(&dev_base_lock);
3700 +                       hlist_del(&dev->name_hlist);
3701 +                       hlist_del(&dev->index_hlist);
3702 +                       if (dev_tail == &dev->next)
3703 +                               dev_tail = dp;
3704 +                       *dp = d->next;
3705 +                       write_unlock_bh(&dev_base_lock);
3706 +                       break;
3707 +               }
3708 +       }
3709 +       BUG_ON(!d);
3710 +
3711 +       dev->reg_state = NETREG_UNREGISTERING;
3712 +
3713 +       synchronize_net();
3714 +
3715 +       /* Shutdown queueing discipline. */
3716 +       dev_shutdown(dev);
3717 +
3718 +
3719 +       /* Notify protocols, that we are about to destroy
3720 +          this device. They should clean all the things.
3721 +       */
3722 +       raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
3723 +
3724 +       /*
3725 +        *      Flush the multicast chain
3726 +        */
3727 +       dev_mc_discard(dev);
3728 +
3729 +       if (dev->uninit)
3730 +               dev->uninit(dev);
3731 +
3732 +       /* Notifier chain MUST detach us from master device. */
3733 +       BUG_TRAP(!dev->master);
3734 +
3735 +       /* Finish processing unregister after unlock */
3736 +       net_set_todo(dev);
3737 +
3738 +       synchronize_net();
3739 +
3740 +       dev_put(dev);
3741 +}
3742 +
3743 +/**
3744 + *     unregister_netdev - remove device from the kernel
3745 + *     @dev: device
3746 + *
3747 + *     This function shuts down a device interface and removes it
3748 + *     from the kernel tables. On success 0 is returned, on a failure
3749 + *     a negative errno code is returned.
3750 + *
3751 + *     This is just a wrapper for unregister_netdevice that takes
3752 + *     the rtnl semaphore.  In general you want to use this and not
3753 + *     unregister_netdevice.
3754 + */
3755 +void unregister_netdev(struct net_device *dev)
3756 +{
3757 +       rtnl_lock();
3758 +       unregister_netdevice(dev);
3759 +       rtnl_unlock();
3760 +}
3761 +
3762 +EXPORT_SYMBOL(unregister_netdev);
3763 +
3764 +static int dev_cpu_callback(struct notifier_block *nfb,
3765 +                           unsigned long action,
3766 +                           void *ocpu)
3767 +{
3768 +       struct sk_buff **list_skb;
3769 +       struct net_device **list_net;
3770 +       struct sk_buff *skb;
3771 +       unsigned int cpu, oldcpu = (unsigned long)ocpu;
3772 +       struct softnet_data *sd, *oldsd;
3773 +
3774 +       if (action != CPU_DEAD)
3775 +               return NOTIFY_OK;
3776 +
3777 +       local_irq_disable();
3778 +       cpu = smp_processor_id();
3779 +       sd = &per_cpu(softnet_data, cpu);
3780 +       oldsd = &per_cpu(softnet_data, oldcpu);
3781 +
3782 +       /* Find end of our completion_queue. */
3783 +       list_skb = &sd->completion_queue;
3784 +       while (*list_skb)
3785 +               list_skb = &(*list_skb)->next;
3786 +       /* Append completion queue from offline CPU. */
3787 +       *list_skb = oldsd->completion_queue;
3788 +       oldsd->completion_queue = NULL;
3789 +
3790 +       /* Find end of our output_queue. */
3791 +       list_net = &sd->output_queue;
3792 +       while (*list_net)
3793 +               list_net = &(*list_net)->next_sched;
3794 +       /* Append output queue from offline CPU. */
3795 +       *list_net = oldsd->output_queue;
3796 +       oldsd->output_queue = NULL;
3797 +
3798 +       raise_softirq_irqoff(NET_TX_SOFTIRQ);
3799 +       local_irq_enable();
3800 +
3801 +       /* Process offline CPU's input_pkt_queue */
3802 +       while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3803 +               netif_rx(skb);
3804 +
3805 +       return NOTIFY_OK;
3806 +}
3807 +
3808 +#ifdef CONFIG_NET_DMA
3809 +/**
3810 + * net_dma_rebalance -
3811 + * This is called when the number of channels allocated to the net_dma_client
3812 + * changes.  The net_dma_client tries to have one DMA channel per CPU.
3813 + */
3814 +static void net_dma_rebalance(void)
3815 +{
3816 +       unsigned int cpu, i, n;
3817 +       struct dma_chan *chan;
3818 +
3819 +       if (net_dma_count == 0) {
3820 +               for_each_online_cpu(cpu)
3821 +                       rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
3822 +               return;
3823 +       }
3824 +
3825 +       i = 0;
3826 +       cpu = first_cpu(cpu_online_map);
3827 +
3828 +       rcu_read_lock();
3829 +       list_for_each_entry(chan, &net_dma_client->channels, client_node) {
3830 +               n = ((num_online_cpus() / net_dma_count)
3831 +                  + (i < (num_online_cpus() % net_dma_count) ? 1 : 0));
3832 +
3833 +               while(n) {
3834 +                       per_cpu(softnet_data, cpu).net_dma = chan;
3835 +                       cpu = next_cpu(cpu, cpu_online_map);
3836 +                       n--;
3837 +               }
3838 +               i++;
3839 +       }
3840 +       rcu_read_unlock();
3841 +}
3842 +
3843 +/**
3844 + * netdev_dma_event - event callback for the net_dma_client
3845 + * @client: should always be net_dma_client
3846 + * @chan: DMA channel for the event
3847 + * @event: event type
3848 + */
3849 +static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
3850 +       enum dma_event event)
3851 +{
3852 +       spin_lock(&net_dma_event_lock);
3853 +       switch (event) {
3854 +       case DMA_RESOURCE_ADDED:
3855 +               net_dma_count++;
3856 +               net_dma_rebalance();
3857 +               break;
3858 +       case DMA_RESOURCE_REMOVED:
3859 +               net_dma_count--;
3860 +               net_dma_rebalance();
3861 +               break;
3862 +       default:
3863 +               break;
3864 +       }
3865 +       spin_unlock(&net_dma_event_lock);
3866 +}
3867 +
3868 +/**
3869 + * netdev_dma_regiser - register the networking subsystem as a DMA client
3870 + */
3871 +static int __init netdev_dma_register(void)
3872 +{
3873 +       spin_lock_init(&net_dma_event_lock);
3874 +       net_dma_client = dma_async_client_register(netdev_dma_event);
3875 +       if (net_dma_client == NULL)
3876 +               return -ENOMEM;
3877 +
3878 +       dma_async_client_chan_request(net_dma_client, num_online_cpus());
3879 +       return 0;
3880 +}
3881 +
3882 +#else
3883 +static int __init netdev_dma_register(void) { return -ENODEV; }
3884 +#endif /* CONFIG_NET_DMA */
3885 +
3886 +/*
3887 + *     Initialize the DEV module. At boot time this walks the device list and
3888 + *     unhooks any devices that fail to initialise (normally hardware not
3889 + *     present) and leaves us with a valid list of present and active devices.
3890 + *
3891 + */
3892 +
3893 +/*
3894 + *       This is called single threaded during boot, so no need
3895 + *       to take the rtnl semaphore.
3896 + */
3897 +static int __init net_dev_init(void)
3898 +{
3899 +       int i, rc = -ENOMEM;
3900 +
3901 +       BUG_ON(!dev_boot_phase);
3902 +
3903 +       if (dev_proc_init())
3904 +               goto out;
3905 +
3906 +       if (netdev_sysfs_init())
3907 +               goto out;
3908 +
3909 +       INIT_LIST_HEAD(&ptype_all);
3910 +       for (i = 0; i < 16; i++)
3911 +               INIT_LIST_HEAD(&ptype_base[i]);
3912 +
3913 +       for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
3914 +               INIT_HLIST_HEAD(&dev_name_head[i]);
3915 +
3916 +       for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
3917 +               INIT_HLIST_HEAD(&dev_index_head[i]);
3918 +
3919 +       /*
3920 +        *      Initialise the packet receive queues.
3921 +        */
3922 +
3923 +       for_each_possible_cpu(i) {
3924 +               struct softnet_data *queue;
3925 +
3926 +               queue = &per_cpu(softnet_data, i);
3927 +               skb_queue_head_init(&queue->input_pkt_queue);
3928 +               queue->completion_queue = NULL;
3929 +               INIT_LIST_HEAD(&queue->poll_list);
3930 +               set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
3931 +               queue->backlog_dev.weight = weight_p;
3932 +               queue->backlog_dev.poll = process_backlog;
3933 +               atomic_set(&queue->backlog_dev.refcnt, 1);
3934 +       }
3935 +
3936 +       netdev_dma_register();
3937 +
3938 +       dev_boot_phase = 0;
3939 +
3940 +       open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
3941 +       open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
3942 +
3943 +       hotcpu_notifier(dev_cpu_callback, 0);
3944 +       dst_init();
3945 +       dev_mcast_init();
3946 +       rc = 0;
3947 +out:
3948 +       return rc;
3949 +}
3950 +
3951 +subsys_initcall(net_dev_init);
3952 +
3953 +EXPORT_SYMBOL(__dev_get_by_index);
3954 +EXPORT_SYMBOL(__dev_get_by_name);
3955 +EXPORT_SYMBOL(__dev_remove_pack);
3956 +EXPORT_SYMBOL(dev_valid_name);
3957 +EXPORT_SYMBOL(dev_add_pack);
3958 +EXPORT_SYMBOL(dev_alloc_name);
3959 +EXPORT_SYMBOL(dev_close);
3960 +EXPORT_SYMBOL(dev_get_by_flags);
3961 +EXPORT_SYMBOL(dev_get_by_index);
3962 +EXPORT_SYMBOL(dev_get_by_name);
3963 +EXPORT_SYMBOL(dev_open);
3964 +EXPORT_SYMBOL(dev_queue_xmit);
3965 +EXPORT_SYMBOL(dev_remove_pack);
3966 +EXPORT_SYMBOL(dev_set_allmulti);
3967 +EXPORT_SYMBOL(dev_set_promiscuity);
3968 +EXPORT_SYMBOL(dev_change_flags);
3969 +EXPORT_SYMBOL(dev_set_mtu);
3970 +EXPORT_SYMBOL(dev_set_mac_address);
3971 +EXPORT_SYMBOL(free_netdev);
3972 +EXPORT_SYMBOL(netdev_boot_setup_check);
3973 +EXPORT_SYMBOL(netdev_set_master);
3974 +EXPORT_SYMBOL(netdev_state_change);
3975 +EXPORT_SYMBOL(netif_receive_skb);
3976 +EXPORT_SYMBOL(netif_rx);
3977 +EXPORT_SYMBOL(register_gifconf);
3978 +EXPORT_SYMBOL(register_netdevice);
3979 +EXPORT_SYMBOL(register_netdevice_notifier);
3980 +EXPORT_SYMBOL(skb_checksum_help);
3981 +EXPORT_SYMBOL(synchronize_net);
3982 +EXPORT_SYMBOL(unregister_netdevice);
3983 +EXPORT_SYMBOL(unregister_netdevice_notifier);
3984 +EXPORT_SYMBOL(net_enable_timestamp);
3985 +EXPORT_SYMBOL(net_disable_timestamp);
3986 +EXPORT_SYMBOL(dev_get_flags);
3987 +
3988 +#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
3989 +EXPORT_SYMBOL(br_handle_frame_hook);
3990 +EXPORT_SYMBOL(br_fdb_get_hook);
3991 +EXPORT_SYMBOL(br_fdb_put_hook);
3992 +#endif
3993 +
3994 +#ifdef CONFIG_KMOD
3995 +EXPORT_SYMBOL(dev_load);
3996 +#endif
3997 +
3998 +EXPORT_PER_CPU_SYMBOL(softnet_data);
3999 diff --unified --recursive --new-file linux-2.6.21.4/net/ring/Kconfig linux-2.6.21.4-1-686-smp-ring3/net/ring/Kconfig
4000 --- linux-2.6.21.4/net/ring/Kconfig     1970-01-01 00:00:00.000000000 +0000
4001 +++ linux-2.6.21.4-1-686-smp-ring3/net/ring/Kconfig     2007-06-10 16:43:04.406423944 +0000
4002 @@ -0,0 +1,14 @@
4003 +config RING
4004 +       tristate "PF_RING sockets (EXPERIMENTAL)"
4005 +       depends on EXPERIMENTAL
4006 +       ---help---
4007 +         PF_RING socket family, optimized for packet capture.
4008 +          If a PF_RING socket is bound to an adapter (via the bind() system
4009 +          call), such adapter will be used in read-only mode until the socket
4010 +          is destroyed. Whenever an incoming packet is received from the adapter
4011 +          it will not passed to upper layers, but instead it is copied to a ring
4012 +          buffer, which in turn is exported to user space applications via mmap.
4013 +          Please refer to http://luca.ntop.org/Ring.pdf for more.
4014 +
4015 +         Say N unless you know what you are doing.
4016 +
4017 diff --unified --recursive --new-file linux-2.6.21.4/net/ring/Makefile linux-2.6.21.4-1-686-smp-ring3/net/ring/Makefile
4018 --- linux-2.6.21.4/net/ring/Makefile    1970-01-01 00:00:00.000000000 +0000
4019 +++ linux-2.6.21.4-1-686-smp-ring3/net/ring/Makefile    2007-06-10 16:43:04.350421521 +0000
4020 @@ -0,0 +1,7 @@
4021 +#
4022 +# Makefile for the ring driver.
4023 +#
4024 +
4025 +obj-m += ring.o
4026 +
4027 +ring-objs := ring_packet.o
4028 diff --unified --recursive --new-file linux-2.6.21.4/net/ring/ring_packet.c linux-2.6.21.4-1-686-smp-ring3/net/ring/ring_packet.c
4029 --- linux-2.6.21.4/net/ring/ring_packet.c       1970-01-01 00:00:00.000000000 +0000
4030 +++ linux-2.6.21.4-1-686-smp-ring3/net/ring/ring_packet.c       2007-06-10 16:43:04.354421694 +0000
4031 @@ -0,0 +1,4257 @@
4032 +/* ***************************************************************
4033 + *
4034 + * (C) 2004-07 - Luca Deri <deri@ntop.org>
4035 + *
4036 + * This code includes contributions courtesy of
4037 + * - Jeff Randall <jrandall@nexvu.com>
4038 + * - Helmut Manck <helmut.manck@secunet.com>
4039 + * - Brad Doctor <brad@stillsecure.com>
4040 + * - Amit D. Chaudhary <amit_ml@rajgad.com>
4041 + * - Francesco Fusco <fusco@ntop.org>
4042 + * - Michael Stiller <ms@2scale.net>
4043 + *
4044 + *
4045 + * This program is free software; you can redistribute it and/or modify
4046 + * it under the terms of the GNU General Public License as published by
4047 + * the Free Software Foundation; either version 2 of the License, or
4048 + * (at your option) any later version.
4049 + *
4050 + * This program is distributed in the hope that it will be useful,
4051 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
4052 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
4053 + * GNU General Public License for more details.
4054 + *
4055 + * You should have received a copy of the GNU General Public License
4056 + * along with this program; if not, write to the Free Software Foundation,
4057 + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
4058 + *
4059 + */
4060 +
4061 +#include <linux/version.h>
4062 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,19))
4063 +#include <linux/autoconf.h>
4064 +#else
4065 +#include <linux/config.h>
4066 +#endif
4067 +#include <linux/module.h>
4068 +#include <linux/kernel.h>
4069 +#include <linux/socket.h>
4070 +#include <linux/skbuff.h>
4071 +#include <linux/rtnetlink.h>
4072 +#include <linux/in.h>
4073 +#include <linux/inet.h>
4074 +#include <linux/in6.h>
4075 +#include <linux/init.h>
4076 +#include <linux/filter.h>
4077 +#include <linux/ring.h>
4078 +#include <linux/ip.h>
4079 +#include <linux/tcp.h>
4080 +#include <linux/udp.h>
4081 +#include <linux/list.h>
4082 +#include <linux/proc_fs.h>
4083 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
4084 +#include <net/xfrm.h>
4085 +#else
4086 +#include <linux/poll.h>
4087 +#endif
4088 +#include <net/sock.h>
4089 +#include <asm/io.h>   /* needed for virt_to_phys() */
4090 +#ifdef CONFIG_INET
4091 +#include <net/inet_common.h>
4092 +#endif
4093 +
4094 +/* #define RING_DEBUG */
4095 +
4096 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11))
4097 +static inline int remap_page_range(struct vm_area_struct *vma,
4098 +                                  unsigned long uvaddr,
4099 +                                  unsigned long paddr,
4100 +                                  unsigned long size,
4101 +                                  pgprot_t prot) {
4102 +  return(remap_pfn_range(vma, uvaddr, paddr >> PAGE_SHIFT,
4103 +                        size, prot));
4104 +}
4105 +#endif
4106 +
4107 +/* ************************************************* */
4108 +
4109 +#define CLUSTER_LEN       8
4110 +
4111 +struct ring_cluster {
4112 +  u_short             cluster_id; /* 0 = no cluster */
4113 +  u_short             num_cluster_elements;
4114 +  enum cluster_type   hashing_mode;
4115 +  u_short             hashing_id;
4116 +  struct sock         *sk[CLUSTER_LEN];
4117 +  struct ring_cluster *next;      /* NULL = last element of the cluster */
4118 +};
4119 +
4120 +/* ************************************************* */
4121 +
4122 +struct ring_element {
4123 +  struct list_head  list;
4124 +  struct sock      *sk;
4125 +};
4126 +
4127 +/* ************************************************* */
4128 +
4129 +struct ring_opt {
4130 +  struct net_device *ring_netdev;
4131 +
4132 +  u_short ring_pid;
4133 +
4134 +  /* Cluster */
4135 +  u_short cluster_id; /* 0 = no cluster */
4136 +
4137 +  /* Reflector */
4138 +  struct net_device *reflector_dev;
4139 +
4140 +  /* Packet buffers */
4141 +  unsigned long order;
4142 +
4143 +  /* Ring Slots */
4144 +  unsigned long ring_memory;
4145 +  FlowSlotInfo *slots_info; /* Basically it points to ring_memory */
4146 +  char *ring_slots;  /* Basically it points to ring_memory
4147 +                       +sizeof(FlowSlotInfo) */
4148 +
4149 +  /* Packet Sampling */
4150 +  u_int pktToSample, sample_rate;
4151 +
4152 +  /* BPF Filter */
4153 +  struct sk_filter *bpfFilter;
4154 +
4155 +  /* Aho-Corasick */
4156 +  ACSM_STRUCT2 * acsm;
4157 +
4158 +  /* Locks */
4159 +  atomic_t num_ring_slots_waiters;
4160 +  wait_queue_head_t ring_slots_waitqueue;
4161 +  rwlock_t ring_index_lock;
4162 +
4163 +  /* Bloom Filters */
4164 +  u_char bitmask_enabled;
4165 +  bitmask_selector mac_bitmask, vlan_bitmask, ip_bitmask, twin_ip_bitmask,
4166 +    port_bitmask, twin_port_bitmask, proto_bitmask;
4167 +  u_int32_t num_mac_bitmask_add, num_mac_bitmask_remove;
4168 +  u_int32_t num_vlan_bitmask_add, num_vlan_bitmask_remove;
4169 +  u_int32_t num_ip_bitmask_add, num_ip_bitmask_remove;
4170 +  u_int32_t num_port_bitmask_add, num_port_bitmask_remove;
4171 +  u_int32_t num_proto_bitmask_add, num_proto_bitmask_remove;
4172 +
4173 +  /* Indexes (Internal) */
4174 +  u_int insert_page_id, insert_slot_id;
4175 +};
4176 +
4177 +/* ************************************************* */
4178 +
4179 +/* List of all ring sockets. */
4180 +static struct list_head ring_table;
4181 +static u_int ring_table_size;
4182 +
4183 +/* List of all clusters */
4184 +static struct ring_cluster *ring_cluster_list;
4185 +
4186 +static rwlock_t ring_mgmt_lock = RW_LOCK_UNLOCKED;
4187 +
4188 +/* ********************************** */
4189 +
4190 +/* /proc entry for ring module */
4191 +struct proc_dir_entry *ring_proc_dir = NULL;
4192 +struct proc_dir_entry *ring_proc = NULL;
4193 +
4194 +static int ring_proc_get_info(char *, char **, off_t, int, int *, void *);
4195 +static void ring_proc_add(struct ring_opt *pfr);
4196 +static void ring_proc_remove(struct ring_opt *pfr);
4197 +static void ring_proc_init(void);
4198 +static void ring_proc_term(void);
4199 +
4200 +/* ********************************** */
4201 +
4202 +/* Forward */
4203 +static struct proto_ops ring_ops;
4204 +
4205 +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11))
4206 +static struct proto ring_proto;
4207 +#endif
4208 +
4209 +static int skb_ring_handler(struct sk_buff *skb, u_char recv_packet,
4210 +                           u_char real_skb);
4211 +static int buffer_ring_handler(struct net_device *dev, char *data, int len);
4212 +static int remove_from_cluster(struct sock *sock, struct ring_opt *pfr);
4213 +
4214 +/* Extern */
4215 +
4216 +/* ********************************** */
4217 +
4218 +/* Defaults */
4219 +static unsigned int bucket_len = 128, num_slots = 4096, sample_rate = 1,
4220 +  transparent_mode = 1, enable_tx_capture = 1;
4221 +
4222 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16))
4223 +module_param(bucket_len, uint, 0644);
4224 +module_param(num_slots,  uint, 0644);
4225 +module_param(sample_rate, uint, 0644);
4226 +module_param(transparent_mode, uint, 0644);
4227 +module_param(enable_tx_capture, uint, 0644);
4228 +#else
4229 +MODULE_PARM(bucket_len, "i");
4230 +MODULE_PARM(num_slots, "i");
4231 +MODULE_PARM(sample_rate, "i");
4232 +MODULE_PARM(transparent_mode, "i");
4233 +MODULE_PARM(enable_tx_capture, "i");
4234 +#endif
4235 +
4236 +MODULE_PARM_DESC(bucket_len, "Number of ring buckets");
4237 +MODULE_PARM_DESC(num_slots,  "Number of ring slots");
4238 +MODULE_PARM_DESC(sample_rate, "Ring packet sample rate");
4239 +MODULE_PARM_DESC(transparent_mode,
4240 +                "Set to 1 to set transparent mode "
4241 +                "(slower but backwards compatible)");
4242 +
4243 +MODULE_PARM_DESC(enable_tx_capture, "Set to 1 to capture outgoing packets");
4244 +
4245 +/* ********************************** */
4246 +
4247 +#define MIN_QUEUED_PKTS      64
4248 +#define MAX_QUEUE_LOOPS      64
4249 +
4250 +
4251 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
4252 +#define ring_sk_datatype(__sk) ((struct ring_opt *)__sk)
4253 +#define ring_sk(__sk) ((__sk)->sk_protinfo)
4254 +#else
4255 +#define ring_sk_datatype(a) (a)
4256 +#define ring_sk(__sk) ((__sk)->protinfo.pf_ring)
4257 +#endif
4258 +
4259 +#define _rdtsc() ({ uint64_t x; asm volatile("rdtsc" : "=A" (x)); x; })
4260 +
4261 +/*
4262 +  int dev_queue_xmit(struct sk_buff *skb)
4263 +  skb->dev;
4264 +  struct net_device *dev_get_by_name(const char *name)
4265 +*/
4266 +
4267 +/* ********************************** */
4268 +
4269 +/*
4270 +**   $Id$
4271 +**
4272 +**   acsmx2.c
4273 +**
4274 +**   Multi-Pattern Search Engine
4275 +**
4276 +**   Aho-Corasick State Machine - version 2.0
4277 +**
4278 +**   Supports both Non-Deterministic and Deterministic Finite Automata
4279 +**
4280 +**
4281 +**   Reference - Efficient String matching: An Aid to Bibliographic Search
4282 +**               Alfred V Aho and Margaret J Corasick
4283 +**               Bell Labratories
4284 +**               Copyright(C) 1975 Association for Computing Machinery,Inc
4285 +**
4286 +**   +++
4287 +**   +++ Version 1.0 notes - Marc Norton:
4288 +**   +++
4289 +**
4290 +**   Original implementation based on the 4 algorithms in the paper by Aho & Corasick,
4291 +**   some implementation ideas from 'Practical Algorithms in C', and some
4292 +**   of my own.
4293 +**
4294 +**   1) Finds all occurrences of all patterns within a text.
4295 +**
4296 +**   +++
4297 +**   +++ Version 2.0 Notes - Marc Norton/Dan Roelker:
4298 +**   +++
4299 +**
4300 +**   New implementation modifies the state table storage and access model to use
4301 +**   compacted sparse vector storage. Dan Roelker and I hammered this strategy out
4302 +**   amongst many others in order to reduce memory usage and improve caching performance.
4303 +**   The memory usage is greatly reduced, we only use 1/4 of what we use to. The caching
4304 +**   performance is better in pure benchmarking tests, but does not show overall improvement
4305 +**   in Snort.  Unfortunately, once a pattern match test has been performed Snort moves on to doing
4306 +**   many other things before we get back to a patteren match test, so the cache is voided.
4307 +**
4308 +**   This versions has better caching performance characteristics, reduced memory,
4309 +**   more state table storage options, and requires no a priori case conversions.
4310 +**   It does maintain the same public interface. (Snort only used banded storage).
4311 +**
4312 +**     1) Supports NFA and DFA state machines, and basic keyword state machines
4313 +**     2) Initial transition table uses Linked Lists
4314 +**     3) Improved state table memory options. NFA and DFA state
4315 +**        transition tables are converted to one of 4 formats during compilation.
4316 +**        a) Full matrix
4317 +**        b) Sparse matrix
4318 +**        c) Banded matrix (Default-this is the only one used in snort)
4319 +**        d) Sparse-Banded matrix
4320 +**     4) Added support for acstate_t in .h file so we can compile states as
4321 +**        16, or 32 bit state values for another reduction in memory consumption,
4322 +**        smaller states allows more of the state table to be cached, and improves
4323 +**        performance on x86-P4.  Your mileage may vary, especially on risc systems.
4324 +**     5) Added a bool to each state transition list to indicate if there is a matching
4325 +**        pattern in the state. This prevents us from accessing another data array
4326 +**        and can improve caching/performance.
4327 +**     6) The search functions are very sensitive, don't change them without extensive testing,
4328 +**        or you'll just spoil the caching and prefetching opportunities.
4329 +**
4330 +**   Extras for fellow pattern matchers:
4331 +**    The table below explains the storage format used at each step.
4332 +**    You can use an NFA or DFA to match with, the NFA is slower but tiny - set the structure directly.
4333 +**    You can use any of the 4 storage modes above -full,sparse,banded,sparse-bands, set the structure directly.
4334 +**    For applications where you have lots of data and a pattern set to search, this version was up to 3x faster
4335 +**    than the previous verion, due to caching performance. This cannot be fully realized in Snort yet,
4336 +**    but other applications may have better caching opportunities.
4337 +**    Snort only needs to use the banded or full storage.
4338 +**
4339 +**  Transition table format at each processing stage.
4340 +**  -------------------------------------------------
4341 +**  Patterns -> Keyword State Table (List)
4342 +**  Keyword State Table -> NFA (List)
4343 +**  NFA -> DFA (List)
4344 +**  DFA (List)-> Sparse Rows  O(m-avg # transitions per state)
4345 +**           -> Banded Rows  O(1)
4346 +**            -> Sparse-Banded Rows O(nb-# bands)
4347 +**           -> Full Matrix  O(1)
4348 +**
4349 +** Copyright(C) 2002,2003,2004 Marc Norton
4350 +** Copyright(C) 2003,2004 Daniel Roelker
4351 +** Copyright(C) 2002,2003,2004 Sourcefire,Inc.
4352 +**
4353 +** This program is free software; you can redistribute it and/or modify
4354 +** it under the terms of the GNU General Public License as published by
4355 +** the Free Software Foundation; either version 2 of the License, or
4356 +** (at your option) any later version.
4357 +**
4358 +** This program is distributed in the hope that it will be useful,
4359 +** but WITHOUT ANY WARRANTY; without even the implied warranty of
4360 +** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
4361 +** GNU General Public License for more details.
4362 +**
4363 +** You should have received a copy of the GNU General Public License
4364 +** along with this program; if not, write to the Free Software
4365 +** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
4366 +*
4367 +*/
4368 +
4369 +/*
4370 + *
4371 + */
4372 +#define MEMASSERT(p,s) if(!p){printk("ACSM-No Memory: %s!\n",s);}
4373 +
4374 +/*
4375 + *
4376 + */
4377 +static int max_memory = 0;
4378 +
4379 +/*
4380 + *
4381 + */
4382 +typedef struct acsm_summary_s
4383 +{
4384 +  unsigned    num_states;
4385 +  unsigned    num_transitions;
4386 +  ACSM_STRUCT2 acsm;
4387 +
4388 +}acsm_summary_t;
4389 +
4390 +/*
4391 + *
4392 + */
4393 +static acsm_summary_t summary={0,0};
4394 +
4395 +/*
4396 +** Case Translation Table
4397 +*/
4398 +static unsigned char xlatcase[256];
4399 +/*
4400 + *
4401 + */
4402 +
4403 +inline int toupper(int ch) {
4404 +  if ( (unsigned int)(ch - 'a') < 26u )
4405 +    ch += 'A' - 'a';
4406 +  return ch;
4407 +}
4408 +
4409 +static void init_xlatcase(void)
4410 +{
4411 +  int i;
4412 +  for (i = 0; i < 256; i++)
4413 +    {
4414 +      xlatcase[i] = toupper(i);
4415 +    }
4416 +}
4417 +
4418 +/*
4419 + *    Case Conversion
4420 + */
4421 +static
4422 +inline
4423 +void
4424 +ConvertCaseEx (unsigned char *d, unsigned char *s, int m)
4425 +{
4426 +  int i;
4427 +#ifdef XXXX
4428 +  int n;
4429 +  n   = m & 3;
4430 +  m >>= 2;
4431 +
4432 +  for (i = 0; i < m; i++ )
4433 +    {
4434 +      d[0] = xlatcase[ s[0] ];
4435 +      d[2] = xlatcase[ s[2] ];
4436 +      d[1] = xlatcase[ s[1] ];
4437 +      d[3] = xlatcase[ s[3] ];
4438 +      d+=4;
4439 +      s+=4;
4440 +    }
4441 +
4442 +  for (i=0; i < n; i++)
4443 +    {
4444 +      d[i] = xlatcase[ s[i] ];
4445 +    }
4446 +#else
4447 +  for (i=0; i < m; i++)
4448 +    {
4449 +      d[i] = xlatcase[ s[i] ];
4450 +    }
4451 +
4452 +#endif
4453 +}
4454 +
4455 +
4456 +/*
4457 + *
4458 + */
4459 +static void *
4460 +AC_MALLOC (int n)
4461 +{
4462 +  void *p;
4463 +  p = kmalloc (n, GFP_KERNEL);
4464 +  if (p)
4465 +    max_memory += n;
4466 +  return p;
4467 +}
4468 +
4469 +
4470 +/*
4471 + *
4472 + */
4473 +static void
4474 +AC_FREE (void *p)
4475 +{
4476 +  if (p)
4477 +    kfree (p);
4478 +}
4479 +
4480 +
4481 +/*
4482 + *    Simple QUEUE NODE
4483 + */
4484 +typedef struct _qnode
4485 +{
4486 +  int state;
4487 +  struct _qnode *next;
4488 +}
4489 +  QNODE;
4490 +
4491 +/*
4492 + *    Simple QUEUE Structure
4493 + */
4494 +typedef struct _queue
4495 +{
4496 +  QNODE * head, *tail;
4497 +  int count;
4498 +}
4499 +  QUEUE;
4500 +
4501 +/*
4502 + *   Initialize the queue
4503 + */
4504 +static void
4505 +queue_init (QUEUE * s)
4506 +{
4507 +  s->head = s->tail = 0;
4508 +  s->count= 0;
4509 +}
4510 +
4511 +/*
4512 + *  Find a State in the queue
4513 + */
4514 +static int
4515 +queue_find (QUEUE * s, int state)
4516 +{
4517 +  QNODE * q;
4518 +  q = s->head;
4519 +  while( q )
4520 +    {
4521 +      if( q->state == state ) return 1;
4522 +      q = q->next;
4523 +    }
4524 +  return 0;
4525 +}
4526 +
4527 +/*
4528 + *  Add Tail Item to queue (FiFo/LiLo)
4529 + */
4530 +static void
4531 +queue_add (QUEUE * s, int state)
4532 +{
4533 +  QNODE * q;
4534 +
4535 +  if( queue_find( s, state ) ) return;
4536 +
4537 +  if (!s->head)
4538 +    {
4539 +      q = s->tail = s->head = (QNODE *) AC_MALLOC (sizeof (QNODE));
4540 +      MEMASSERT (q, "queue_add");
4541 +      q->state = state;
4542 +      q->next = 0;
4543 +    }
4544 +  else
4545 +    {
4546 +      q = (QNODE *) AC_MALLOC (sizeof (QNODE));
4547 +      q->state = state;
4548 +      q->next = 0;
4549 +      s->tail->next = q;
4550 +      s->tail = q;
4551 +    }
4552 +  s->count++;
4553 +}
4554 +
4555 +
4556 +/*
4557 + *  Remove Head Item from queue
4558 + */
4559 +static int
4560 +queue_remove (QUEUE * s)
4561 +{
4562 +  int state = 0;
4563 +  QNODE * q;
4564 +  if (s->head)
4565 +    {
4566 +      q       = s->head;
4567 +      state   = q->state;
4568 +      s->head = s->head->next;
4569 +      s->count--;
4570 +
4571 +      if( !s->head )
4572 +       {
4573 +         s->tail = 0;
4574 +         s->count = 0;
4575 +       }
4576 +      AC_FREE (q);
4577 +    }
4578 +  return state;
4579 +}
4580 +
4581 +
4582 +/*
4583 + *   Return items in the queue
4584 + */
4585 +static int
4586 +queue_count (QUEUE * s)
4587 +{
4588 +  return s->count;
4589 +}
4590 +
4591 +
4592 +/*
4593 + *  Free the queue
4594 + */
4595 +static void
4596 +queue_free (QUEUE * s)
4597 +{
4598 +  while (queue_count (s))
4599 +    {
4600 +      queue_remove (s);
4601 +    }
4602 +}
4603 +
4604 +/*
4605 + *  Get Next State-NFA
4606 + */
4607 +static
4608 +int List_GetNextState( ACSM_STRUCT2 * acsm, int state, int input )
4609 +{
4610 +  trans_node_t * t = acsm->acsmTransTable[state];
4611 +
4612 +  while( t )
4613 +    {
4614 +      if( t->key == input )
4615 +       {
4616 +         return t->next_state;
4617 +       }
4618 +      t=t->next;
4619 +    }
4620 +
4621 +  if( state == 0 ) return 0;
4622 +
4623 +  return ACSM_FAIL_STATE2; /* Fail state ??? */
4624 +}
4625 +
4626 +/*
4627 + *  Get Next State-DFA
4628 + */
4629 +static
4630 +int List_GetNextState2( ACSM_STRUCT2 * acsm, int state, int input )
4631 +{
4632 +  trans_node_t * t = acsm->acsmTransTable[state];
4633 +
4634 +  while( t )
4635 +    {
4636 +      if( t->key == input )
4637 +       {
4638 +         return t->next_state;
4639 +       }
4640 +      t = t->next;
4641 +    }
4642 +
4643 +  return 0; /* default state */
4644 +}
4645 +/*
4646 + *  Put Next State - Head insertion, and transition updates
4647 + */
4648 +static
4649 +int List_PutNextState( ACSM_STRUCT2 * acsm, int state, int input, int next_state )
4650 +{
4651 +  trans_node_t * p;
4652 +  trans_node_t * tnew;
4653 +
4654 +  // printk("   List_PutNextState: state=%d, input='%c', next_state=%d\n",state,input,next_state);
4655 +
4656 +
4657 +  /* Check if the transition already exists, if so just update the next_state */
4658 +  p = acsm->acsmTransTable[state];
4659 +  while( p )
4660 +    {
4661 +      if( p->key == input )  /* transition already exists- reset the next state */
4662 +       {
4663 +         p->next_state = next_state;
4664 +         return 0;
4665 +       }
4666 +      p=p->next;
4667 +    }
4668 +
4669 +  /* Definitely not an existing transition - add it */
4670 +  tnew = (trans_node_t*)AC_MALLOC(sizeof(trans_node_t));
4671 +  if( !tnew ) return -1;
4672 +
4673 +  tnew->key        = input;
4674 +  tnew->next_state = next_state;
4675 +  tnew->next       = 0;
4676 +
4677 +  tnew->next = acsm->acsmTransTable[state];
4678 +  acsm->acsmTransTable[state] = tnew;
4679 +
4680 +  acsm->acsmNumTrans++;
4681 +
4682 +  return 0;
4683 +}
4684 +/*
4685 + *   Free the entire transition table
4686 + */
4687 +static
4688 +int List_FreeTransTable( ACSM_STRUCT2 * acsm )
4689 +{
4690 +  int i;
4691 +  trans_node_t * t, *p;
4692 +
4693 +  if( !acsm->acsmTransTable ) return 0;
4694 +
4695 +  for(i=0;i< acsm->acsmMaxStates;i++)
4696 +    {
4697 +      t = acsm->acsmTransTable[i];
4698 +
4699 +      while( t )
4700 +       {
4701 +         p = t->next;
4702 +         kfree(t);
4703 +         t = p;
4704 +         max_memory -= sizeof(trans_node_t);
4705 +       }
4706 +    }
4707 +
4708 +  kfree(acsm->acsmTransTable);
4709 +
4710 +  max_memory -= sizeof(void*) * acsm->acsmMaxStates;
4711 +
4712 +  acsm->acsmTransTable = 0;
4713 +
4714 +  return 0;
4715 +}
4716 +
4717 +/*
4718 + *
4719 + */
4720 +/*
4721 +  static
4722 +  int List_FreeList( trans_node_t * t )
4723 +  {
4724 +  int tcnt=0;
4725 +
4726 +  trans_node_t *p;
4727 +
4728 +  while( t )
4729 +  {
4730 +  p = t->next;
4731 +  kfree(t);
4732 +  t = p;
4733 +  max_memory -= sizeof(trans_node_t);
4734 +  tcnt++;
4735 +  }
4736 +
4737 +  return tcnt;
4738 +  }
4739 +*/
4740 +
4741 +/*
4742 + *   Converts row of states from list to a full vector format
4743 + */
4744 +static
4745 +int List_ConvToFull(ACSM_STRUCT2 * acsm, acstate_t state, acstate_t * full )
4746 +{
4747 +  int tcnt = 0;
4748 +  trans_node_t * t = acsm->acsmTransTable[ state ];
4749 +
4750 +  memset(full,0,sizeof(acstate_t)*acsm->acsmAlphabetSize);
4751 +
4752 +  if( !t ) return 0;
4753 +
4754 +  while(t)
4755 +    {
4756 +      full[ t->key ] = t->next_state;
4757 +      tcnt++;
4758 +      t = t->next;
4759 +    }
4760 +  return tcnt;
4761 +}
4762 +
4763 +/*
4764 + *   Copy a Match List Entry - don't dup the pattern data
4765 + */
4766 +static ACSM_PATTERN2*
4767 +CopyMatchListEntry (ACSM_PATTERN2 * px)
4768 +{
4769 +  ACSM_PATTERN2 * p;
4770 +
4771 +  p = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
4772 +  MEMASSERT (p, "CopyMatchListEntry");
4773 +
4774 +  memcpy (p, px, sizeof (ACSM_PATTERN2));
4775 +
4776 +  p->next = 0;
4777 +
4778 +  return p;
4779 +}
4780 +
4781 +/*
4782 + *  Check if a pattern is in the list already,
4783 + *  validate it using the 'id' field. This must be unique
4784 + *  for every pattern.
4785 + */
4786 +/*
4787 +  static
4788 +  int FindMatchListEntry (ACSM_STRUCT2 * acsm, int state, ACSM_PATTERN2 * px)
4789 +  {
4790 +  ACSM_PATTERN2 * p;
4791 +
4792 +  p = acsm->acsmMatchList[state];
4793 +  while( p )
4794 +  {
4795 +  if( p->id == px->id ) return 1;
4796 +  p = p->next;
4797 +  }
4798 +
4799 +  return 0;
4800 +  }
4801 +*/
4802 +
4803 +
4804 +/*
4805 + *  Add a pattern to the list of patterns terminated at this state.
4806 + *  Insert at front of list.
4807 + */
4808 +static void
4809 +AddMatchListEntry (ACSM_STRUCT2 * acsm, int state, ACSM_PATTERN2 * px)
4810 +{
4811 +  ACSM_PATTERN2 * p;
4812 +
4813 +  p = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
4814 +
4815 +  MEMASSERT (p, "AddMatchListEntry");
4816 +
4817 +  memcpy (p, px, sizeof (ACSM_PATTERN2));
4818 +
4819 +  p->next = acsm->acsmMatchList[state];
4820 +
4821 +  acsm->acsmMatchList[state] = p;
4822 +}
4823 +
4824 +
4825 +static void
4826 +AddPatternStates (ACSM_STRUCT2 * acsm, ACSM_PATTERN2 * p)
4827 +{
4828 +  int            state, next, n;
4829 +  unsigned char *pattern;
4830 +
4831 +  n       = p->n;
4832 +  pattern = p->patrn;
4833 +  state   = 0;
4834 +
4835 +  /*
4836 +   *  Match up pattern with existing states
4837 +   */
4838 +  for (; n > 0; pattern++, n--)
4839 +    {
4840 +      next = List_GetNextState(acsm,state,*pattern);
4841 +      if (next == ACSM_FAIL_STATE2 || next == 0)
4842 +       {
4843 +         break;
4844 +       }
4845 +      state = next;
4846 +    }
4847 +
4848 +  /*
4849 +   *   Add new states for the rest of the pattern bytes, 1 state per byte
4850 +   */
4851 +  for (; n > 0; pattern++, n--)
4852 +    {
4853 +      acsm->acsmNumStates++;
4854 +      List_PutNextState(acsm,state,*pattern,acsm->acsmNumStates);
4855 +      state = acsm->acsmNumStates;
4856 +    }
4857 +
4858 +  AddMatchListEntry (acsm, state, p );
4859 +}
4860 +
4861 +/*
4862 + *   Build A Non-Deterministic Finite Automata
4863 + *   The keyword state table must already be built, via AddPatternStates().
4864 + */
4865 +static void
4866 +Build_NFA (ACSM_STRUCT2 * acsm)
4867 +{
4868 +  int r, s, i;
4869 +  QUEUE q, *queue = &q;
4870 +  acstate_t     * FailState = acsm->acsmFailState;
4871 +  ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
4872 +  ACSM_PATTERN2  * mlist,* px;
4873 +
4874 +  /* Init a Queue */
4875 +  queue_init (queue);
4876 +
4877 +
4878 +  /* Add the state 0 transitions 1st, the states at depth 1, fail to state 0 */
4879 +  for (i = 0; i < acsm->acsmAlphabetSize; i++)
4880 +    {
4881 +      s = List_GetNextState2(acsm,0,i);
4882 +      if( s )
4883 +       {
4884 +         queue_add (queue, s);
4885 +         FailState[s] = 0;
4886 +       }
4887 +    }
4888 +
4889 +  /* Build the fail state successive layer of transitions */
4890 +  while (queue_count (queue) > 0)
4891 +    {
4892 +      r = queue_remove (queue);
4893 +
4894 +      /* Find Final States for any Failure */
4895 +      for (i = 0; i < acsm->acsmAlphabetSize; i++)
4896 +       {
4897 +         int fs, next;
4898 +
4899 +         s = List_GetNextState(acsm,r,i);
4900 +
4901 +         if( s != ACSM_FAIL_STATE2 )
4902 +           {
4903 +             queue_add (queue, s);
4904 +
4905 +             fs = FailState[r];
4906 +
4907 +             /*
4908 +              *  Locate the next valid state for 'i' starting at fs
4909 +              */
4910 +             while( (next=List_GetNextState(acsm,fs,i)) == ACSM_FAIL_STATE2 )
4911 +               {
4912 +                 fs = FailState[fs];
4913 +               }
4914 +
4915 +             /*
4916 +              *  Update 's' state failure state to point to the next valid state
4917 +              */
4918 +             FailState[s] = next;
4919 +
4920 +             /*
4921 +              *  Copy 'next'states MatchList to 's' states MatchList,
4922 +              *  we copy them so each list can be AC_FREE'd later,
4923 +              *  else we could just manipulate pointers to fake the copy.
4924 +              */
4925 +             for( mlist = MatchList[next];
4926 +                  mlist;
4927 +                  mlist = mlist->next)
4928 +               {
4929 +                 px = CopyMatchListEntry (mlist);
4930 +
4931 +                 /* Insert at front of MatchList */
4932 +                 px->next = MatchList[s];
4933 +                 MatchList[s] = px;
4934 +               }
4935 +           }
4936 +       }
4937 +    }
4938 +
4939 +  /* Clean up the queue */
4940 +  queue_free (queue);
4941 +}
4942 +
4943 +/*
4944 + *   Build Deterministic Finite Automata from the NFA
4945 + */
4946 +static void
4947 +Convert_NFA_To_DFA (ACSM_STRUCT2 * acsm)
4948 +{
4949 +  int i, r, s, cFailState;
4950 +  QUEUE  q, *queue = &q;
4951 +  acstate_t * FailState = acsm->acsmFailState;
4952 +
4953 +  /* Init a Queue */
4954 +  queue_init (queue);
4955 +
4956 +  /* Add the state 0 transitions 1st */
4957 +  for(i=0; i<acsm->acsmAlphabetSize; i++)
4958 +    {
4959 +      s = List_GetNextState(acsm,0,i);
4960 +      if ( s != 0 )
4961 +       {
4962 +         queue_add (queue, s);
4963 +       }
4964 +    }
4965 +
4966 +  /* Start building the next layer of transitions */
4967 +  while( queue_count(queue) > 0 )
4968 +    {
4969 +      r = queue_remove(queue);
4970 +
4971 +      /* Process this states layer */
4972 +      for (i = 0; i < acsm->acsmAlphabetSize; i++)
4973 +       {
4974 +         s = List_GetNextState(acsm,r,i);
4975 +
4976 +         if( s != ACSM_FAIL_STATE2 && s!= 0)
4977 +           {
4978 +             queue_add (queue, s);
4979 +           }
4980 +         else
4981 +           {
4982 +             cFailState = List_GetNextState(acsm,FailState[r],i);
4983 +
4984 +             if( cFailState != 0 && cFailState != ACSM_FAIL_STATE2 )
4985 +               {
4986 +                 List_PutNextState(acsm,r,i,cFailState);
4987 +               }
4988 +           }
4989 +       }
4990 +    }
4991 +
4992 +  /* Clean up the queue */
4993 +  queue_free (queue);
4994 +}
4995 +
4996 +/*
4997 + *
4998 + *  Convert a row lists for the state table to a full vector format
4999 + *
5000 + */
5001 +static int
5002 +Conv_List_To_Full(ACSM_STRUCT2 * acsm)
5003 +{
5004 +  int         tcnt, k;
5005 +  acstate_t * p;
5006 +  acstate_t ** NextState = acsm->acsmNextState;
5007 +
5008 +  for(k=0;k<acsm->acsmMaxStates;k++)
5009 +    {
5010 +      p = AC_MALLOC( sizeof(acstate_t) * (acsm->acsmAlphabetSize+2) );
5011 +      if(!p) return -1;
5012 +
5013 +      tcnt = List_ConvToFull( acsm, (acstate_t)k, p+2 );
5014 +
5015 +      p[0] = ACF_FULL;
5016 +      p[1] = 0; /* no matches yet */
5017 +
5018 +      NextState[k] = p; /* now we have a full format row vector  */
5019 +    }
5020 +
5021 +  return 0;
5022 +}
5023 +
5024 +/*
5025 + *   Convert DFA memory usage from list based storage to a sparse-row storage.
5026 + *
5027 + *   The Sparse format allows each row to be either full or sparse formatted.  If the sparse row has
5028 + *   too many transitions, performance or space may dictate that we use the standard full formatting
5029 + *   for the row.  More than 5 or 10 transitions per state ought to really whack performance. So the
5030 + *   user can specify the max state transitions per state allowed in the sparse format.
5031 + *
5032 + *   Standard Full Matrix Format
5033 + *   ---------------------------
5034 + *   acstate_t ** NextState ( 1st index is row/state, 2nd index is column=event/input)
5035 + *
5036 + *   example:
5037 + *
5038 + *        events -> a b c d e f g h i j k l m n o p
5039 + *   states
5040 + *     N            1 7 0 0 0 3 0 0 0 0 0 0 0 0 0 0
5041 + *
5042 + *   Sparse Format, each row : Words     Value
5043 + *                            1-1       fmt(0-full,1-sparse,2-banded,3-sparsebands)
5044 + *                          2-2       bool match flag (indicates this state has pattern matches)
5045 + *                            3-3       sparse state count ( # of input/next-state pairs )
5046 + *                            4-3+2*cnt 'input,next-state' pairs... each sizof(acstate_t)
5047 + *
5048 + *   above example case yields:
5049 + *     Full Format:    0, 1 7 0 0 0 3 0 0 0 0 0 0 0 0 0 0 ...
5050 + *     Sparse format:  1, 3, 'a',1,'b',7,'f',3  - uses 2+2*ntransitions (non-default transitions)
5051 + */
5052 +static int
5053 +Conv_Full_DFA_To_Sparse(ACSM_STRUCT2 * acsm)
5054 +{
5055 +  int          cnt, m, k, i;
5056 +  acstate_t  * p, state, maxstates=0;
5057 +  acstate_t ** NextState = acsm->acsmNextState;
5058 +  acstate_t    full[MAX_ALPHABET_SIZE];
5059 +
5060 +  for(k=0;k<acsm->acsmMaxStates;k++)
5061 +    {
5062 +      cnt=0;
5063 +
5064 +      List_ConvToFull(acsm, (acstate_t)k, full );
5065 +
5066 +      for (i = 0; i < acsm->acsmAlphabetSize; i++)
5067 +       {
5068 +         state = full[i];
5069 +         if( state != 0 && state != ACSM_FAIL_STATE2 ) cnt++;
5070 +       }
5071 +
5072 +      if( cnt > 0 ) maxstates++;
5073 +
5074 +      if( k== 0 || cnt > acsm->acsmSparseMaxRowNodes )
5075 +       {
5076 +         p = AC_MALLOC(sizeof(acstate_t)*(acsm->acsmAlphabetSize+2) );
5077 +         if(!p) return -1;
5078 +
5079 +         p[0] = ACF_FULL;
5080 +         p[1] = 0;
5081 +         memcpy(&p[2],full,acsm->acsmAlphabetSize*sizeof(acstate_t));
5082 +       }
5083 +      else
5084 +       {
5085 +         p = AC_MALLOC(sizeof(acstate_t)*(3+2*cnt));
5086 +         if(!p) return -1;
5087 +
5088 +         m      = 0;
5089 +         p[m++] = ACF_SPARSE;
5090 +         p[m++] = 0;   /* no matches */
5091 +         p[m++] = cnt;
5092 +
5093 +         for(i = 0; i < acsm->acsmAlphabetSize ; i++)
5094 +           {
5095 +             state = full[i];
5096 +             if( state != 0 && state != ACSM_FAIL_STATE2 )
5097 +               {
5098 +                 p[m++] = i;
5099 +                 p[m++] = state;
5100 +               }
5101 +           }
5102 +       }
5103 +
5104 +      NextState[k] = p; /* now we are a sparse formatted state transition array  */
5105 +    }
5106 +
5107 +  return 0;
5108 +}
5109 +/*
5110 +  Convert Full matrix to Banded row format.
5111 +
5112 +  Word     values
5113 +  1        2  -> banded
5114 +  2        n  number of values
5115 +  3        i  index of 1st value (0-256)
5116 +  4 - 3+n  next-state values at each index
5117 +
5118 +*/
5119 +static int
5120 +Conv_Full_DFA_To_Banded(ACSM_STRUCT2 * acsm)
5121 +{
5122 +  int first = -1, last;
5123 +  acstate_t * p, state, full[MAX_ALPHABET_SIZE];
5124 +  acstate_t ** NextState = acsm->acsmNextState;
5125 +  int       cnt,m,k,i;
5126 +
5127 +  for(k=0;k<acsm->acsmMaxStates;k++)
5128 +    {
5129 +      cnt=0;
5130 +
5131 +      List_ConvToFull(acsm, (acstate_t)k, full );
5132 +
5133 +      first=-1;
5134 +      last =-2;
5135 +
5136 +      for (i = 0; i < acsm->acsmAlphabetSize; i++)
5137 +       {
5138 +         state = full[i];
5139 +
5140 +         if( state !=0 && state != ACSM_FAIL_STATE2 )
5141 +           {
5142 +             if( first < 0 ) first = i;
5143 +             last = i;
5144 +           }
5145 +       }
5146 +
5147 +      /* calc band width */
5148 +      cnt= last - first + 1;
5149 +
5150 +      p = AC_MALLOC(sizeof(acstate_t)*(4+cnt));
5151 +
5152 +      if(!p) return -1;
5153 +
5154 +      m      = 0;
5155 +      p[m++] = ACF_BANDED;
5156 +      p[m++] = 0;   /* no matches */
5157 +      p[m++] = cnt;
5158 +      p[m++] = first;
5159 +
5160 +      for(i = first; i <= last; i++)
5161 +       {
5162 +         p[m++] = full[i];
5163 +       }
5164 +
5165 +      NextState[k] = p; /* now we are a banded formatted state transition array  */
5166 +    }
5167 +
5168 +  return 0;
5169 +}
5170 +
5171 +/*
5172 + *   Convert full matrix to Sparse Band row format.
5173 + *
5174 + *   next  - Full formatted row of next states
5175 + *   asize - size of alphabet
5176 + *   zcnt - max number of zeros in a run of zeros in any given band.
5177 + *
5178 + *  Word Values
5179 + *  1    ACF_SPARSEBANDS
5180 + *  2    number of bands
5181 + *  repeat 3 - 5+ ....once for each band in this row.
5182 + *  3    number of items in this band*  4    start index of this band
5183 + *  5-   next-state values in this band...
5184 + */
5185 +static
5186 +int calcSparseBands( acstate_t * next, int * begin, int * end, int asize, int zmax )
5187 +{
5188 +  int i, nbands,zcnt,last=0;
5189 +  acstate_t state;
5190 +
5191 +  nbands=0;
5192 +  for( i=0; i<asize; i++ )
5193 +    {
5194 +      state = next[i];
5195 +
5196 +      if( state !=0 && state != ACSM_FAIL_STATE2 )
5197 +       {
5198 +         begin[nbands] = i;
5199 +         zcnt=0;
5200 +
5201 +         for( ; i< asize; i++ )
5202 +           {
5203 +             state = next[i];
5204 +             if( state ==0 || state == ACSM_FAIL_STATE2 )
5205 +               {
5206 +                 zcnt++;
5207 +                 if( zcnt > zmax ) break;
5208 +               }
5209 +             else
5210 +               {
5211 +                 zcnt=0;
5212 +                 last = i;
5213 +               }
5214 +           }
5215 +
5216 +         end[nbands++] = last;
5217 +
5218 +       }
5219 +    }
5220 +
5221 +  return nbands;
5222 +}
5223 +
5224 +
5225 +/*
5226 + *   Sparse Bands
5227 + *
5228 + *   Row Format:
5229 + *   Word
5230 + *   1    SPARSEBANDS format indicator
5231 + *   2    bool indicates a pattern match in this state
5232 + *   3    number of sparse bands
5233 + *   4    number of elements in this band
5234 + *   5    start index of this band
5235 + *   6-   list of next states
5236 + *
5237 + *   m    number of elements in this band
5238 + *   m+1  start index of this band
5239 + *   m+2- list of next states
5240 + */
5241 +static int
5242 +Conv_Full_DFA_To_SparseBands(ACSM_STRUCT2 * acsm)
5243 +{
5244 +  acstate_t  * p;
5245 +  acstate_t ** NextState = acsm->acsmNextState;
5246 +  int          cnt,m,k,i,zcnt=acsm->acsmSparseMaxZcnt;
5247 +
5248 +  int       band_begin[MAX_ALPHABET_SIZE];
5249 +  int       band_end[MAX_ALPHABET_SIZE];
5250 +  int       nbands,j;
5251 +  acstate_t full[MAX_ALPHABET_SIZE];
5252 +
5253 +  for(k=0;k<acsm->acsmMaxStates;k++)
5254 +    {
5255 +      cnt=0;
5256 +
5257 +      List_ConvToFull(acsm, (acstate_t)k, full );
5258 +
5259 +      nbands = calcSparseBands( full, band_begin, band_end, acsm->acsmAlphabetSize, zcnt );
5260 +
5261 +      /* calc band width space*/
5262 +      cnt = 3;
5263 +      for(i=0;i<nbands;i++)
5264 +       {
5265 +         cnt += 2;
5266 +         cnt += band_end[i] - band_begin[i] + 1;
5267 +
5268 +         /*printk("state %d: sparseband %d,  first=%d, last=%d, cnt=%d\n",k,i,band_begin[i],band_end[i],band_end[i]-band_begin[i]+1); */
5269 +       }
5270 +
5271 +      p = AC_MALLOC(sizeof(acstate_t)*(cnt));
5272 +
5273 +      if(!p) return -1;
5274 +
5275 +      m      = 0;
5276 +      p[m++] = ACF_SPARSEBANDS;
5277 +      p[m++] = 0; /* no matches */
5278 +      p[m++] = nbands;
5279 +
5280 +      for( i=0;i<nbands;i++ )
5281 +       {
5282 +         p[m++] = band_end[i] - band_begin[i] + 1;  /* # states in this band */
5283 +         p[m++] = band_begin[i];   /* start index */
5284 +
5285 +         for( j=band_begin[i]; j<=band_end[i]; j++ )
5286 +           {
5287 +             p[m++] = full[j];  /* some states may be state zero */
5288 +           }
5289 +       }
5290 +
5291 +      NextState[k] = p; /* now we are a sparse-banded formatted state transition array  */
5292 +    }
5293 +
5294 +  return 0;
5295 +}
5296 +
5297 +/*
5298 + *
5299 + *   Convert an NFA or DFA row from sparse to full format
5300 + *   and store into the 'full'  buffer.
5301 + *
5302 + *   returns:
5303 + *     0 - failed, no state transitions
5304 + *    *p - pointer to 'full' buffer
5305 + *
5306 + */
5307 +/*
5308 +  static
5309 +  acstate_t * acsmConvToFull(ACSM_STRUCT2 * acsm, acstate_t k, acstate_t * full )
5310 +  {
5311 +  int i;
5312 +  acstate_t * p, n, fmt, index, nb, bmatch;
5313 +  acstate_t ** NextState = acsm->acsmNextState;
5314 +
5315 +  p   = NextState[k];
5316 +
5317 +  if( !p ) return 0;
5318 +
5319 +  fmt = *p++;
5320 +
5321 +  bmatch = *p++;
5322 +
5323 +  if( fmt ==ACF_SPARSE )
5324 +  {
5325 +  n = *p++;
5326 +  for( ; n>0; n--, p+=2 )
5327 +  {
5328 +  full[ p[0] ] = p[1];
5329 +  }
5330 +  }
5331 +  else if( fmt ==ACF_BANDED )
5332 +  {
5333 +
5334 +  n = *p++;
5335 +  index = *p++;
5336 +
5337 +  for( ; n>0; n--, p++ )
5338 +  {
5339 +  full[ index++ ] = p[0];
5340 +  }
5341 +  }
5342 +  else if( fmt ==ACF_SPARSEBANDS )
5343 +  {
5344 +  nb    = *p++;
5345 +  for(i=0;i<nb;i++)
5346 +  {
5347 +  n     = *p++;
5348 +  index = *p++;
5349 +  for( ; n>0; n--, p++ )
5350 +  {
5351 +  full[ index++ ] = p[0];
5352 +  }
5353 +  }
5354 +  }
5355 +  else if( fmt == ACF_FULL )
5356 +  {
5357 +  memcpy(full,p,acsm->acsmAlphabetSize*sizeof(acstate_t));
5358 +  }
5359 +
5360 +  return full;
5361 +  }
5362 +*/
5363 +
5364 +/*
5365 + *   Select the desired storage mode
5366 + */
5367 +int acsmSelectFormat2( ACSM_STRUCT2 * acsm, int m )
5368 +{
5369 +  switch( m )
5370 +    {
5371 +    case ACF_FULL:
5372 +    case ACF_SPARSE:
5373 +    case ACF_BANDED:
5374 +    case ACF_SPARSEBANDS:
5375 +      acsm->acsmFormat = m;
5376 +      break;
5377 +    default:
5378 +      return -1;
5379 +    }
5380 +
5381 +  return 0;
5382 +}
5383 +/*
5384 + *
5385 + */
5386 +void acsmSetMaxSparseBandZeros2( ACSM_STRUCT2 * acsm, int n )
5387 +{
5388 +  acsm->acsmSparseMaxZcnt = n;
5389 +}
5390 +/*
5391 + *
5392 + */
5393 +void acsmSetMaxSparseElements2( ACSM_STRUCT2 * acsm, int n )
5394 +{
5395 +  acsm->acsmSparseMaxRowNodes = n;
5396 +}
5397 +/*
5398 + *
5399 + */
5400 +int acsmSelectFSA2( ACSM_STRUCT2 * acsm, int m )
5401 +{
5402 +  switch( m )
5403 +    {
5404 +    case FSA_TRIE:
5405 +    case FSA_NFA:
5406 +    case FSA_DFA:
5407 +      acsm->acsmFSA = m;
5408 +    default:
5409 +      return -1;
5410 +    }
5411 +}
5412 +/*
5413 + *
5414 + */
5415 +int acsmSetAlphabetSize2( ACSM_STRUCT2 * acsm, int n )
5416 +{
5417 +  if( n <= MAX_ALPHABET_SIZE )
5418 +    {
5419 +      acsm->acsmAlphabetSize = n;
5420 +    }
5421 +  else
5422 +    {
5423 +      return -1;
5424 +    }
5425 +  return 0;
5426 +}
5427 +/*
5428 + *  Create a new AC state machine
5429 + */
5430 +static ACSM_STRUCT2 * acsmNew2 (void)
5431 +{
5432 +  ACSM_STRUCT2 * p;
5433 +
5434 +  init_xlatcase ();
5435 +
5436 +  p = (ACSM_STRUCT2 *) AC_MALLOC(sizeof (ACSM_STRUCT2));
5437 +  MEMASSERT (p, "acsmNew");
5438 +
5439 +  if (p)
5440 +    {
5441 +      memset (p, 0, sizeof (ACSM_STRUCT2));
5442 +
5443 +      /* Some defaults */
5444 +      p->acsmFSA               = FSA_DFA;
5445 +      p->acsmFormat            = ACF_BANDED;
5446 +      p->acsmAlphabetSize      = 256;
5447 +      p->acsmSparseMaxRowNodes = 256;
5448 +      p->acsmSparseMaxZcnt     = 10;
5449 +    }
5450 +
5451 +  return p;
5452 +}
5453 +/*
5454 + *   Add a pattern to the list of patterns for this state machine
5455 + *
5456 + */
5457 +int
5458 +acsmAddPattern2 (ACSM_STRUCT2 * p, unsigned char *pat, int n, int nocase,
5459 +                int offset, int depth, void * id, int iid)
5460 +{
5461 +  ACSM_PATTERN2 * plist;
5462 +
5463 +  plist = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
5464 +  MEMASSERT (plist, "acsmAddPattern");
5465 +
5466 +  plist->patrn = (unsigned char *) AC_MALLOC ( n );
5467 +  MEMASSERT (plist->patrn, "acsmAddPattern");
5468 +
5469 +  ConvertCaseEx(plist->patrn, pat, n);
5470 +
5471 +  plist->casepatrn = (unsigned char *) AC_MALLOC ( n );
5472 +  MEMASSERT (plist->casepatrn, "acsmAddPattern");
5473 +
5474 +  memcpy (plist->casepatrn, pat, n);
5475 +
5476 +  plist->n      = n;
5477 +  plist->nocase = nocase;
5478 +  plist->offset = offset;
5479 +  plist->depth  = depth;
5480 +  plist->id     = id;
5481 +  plist->iid    = iid;
5482 +
5483 +  plist->next     = p->acsmPatterns;
5484 +  p->acsmPatterns = plist;
5485 +
5486 +  return 0;
5487 +}
5488 +/*
5489 + *   Add a Key to the list of key+data pairs
5490 + */
5491 +int acsmAddKey2(ACSM_STRUCT2 * p, unsigned char *key, int klen, int nocase, void * data)
5492 +{
5493 +  ACSM_PATTERN2 * plist;
5494 +
5495 +  plist = (ACSM_PATTERN2 *) AC_MALLOC (sizeof (ACSM_PATTERN2));
5496 +  MEMASSERT (plist, "acsmAddPattern");
5497 +
5498 +  plist->patrn = (unsigned char *) AC_MALLOC (klen);
5499 +  memcpy (plist->patrn, key, klen);
5500 +
5501 +  plist->casepatrn = (unsigned char *) AC_MALLOC (klen);
5502 +  memcpy (plist->casepatrn, key, klen);
5503 +
5504 +  plist->n      = klen;
5505 +  plist->nocase = nocase;
5506 +  plist->offset = 0;
5507 +  plist->depth  = 0;
5508 +  plist->id     = 0;
5509 +  plist->iid = 0;
5510 +
5511 +  plist->next = p->acsmPatterns;
5512 +  p->acsmPatterns = plist;
5513 +
5514 +  return 0;
5515 +}
5516 +
5517 +/*
5518 + *  Copy a boolean match flag int NextState table, for caching purposes.
5519 + */
5520 +static
5521 +void acsmUpdateMatchStates( ACSM_STRUCT2 * acsm )
5522 +{
5523 +  acstate_t        state;
5524 +  acstate_t     ** NextState = acsm->acsmNextState;
5525 +  ACSM_PATTERN2 ** MatchList = acsm->acsmMatchList;
5526 +
5527 +  for( state=0; state<acsm->acsmNumStates; state++ )
5528 +    {
5529 +      if( MatchList[state] )
5530 +       {
5531 +         NextState[state][1] = 1;
5532 +       }
5533 +      else
5534 +       {
5535 +         NextState[state][1] = 0;
5536 +       }
5537 +    }
5538 +}
5539 +
5540 +/*
5541 + *   Compile State Machine - NFA or DFA and Full or Banded or Sparse or SparseBands
5542 + */
5543 +int
5544 +acsmCompile2 (ACSM_STRUCT2 * acsm)
5545 +{
5546 +  int               k;
5547 +  ACSM_PATTERN2    * plist;
5548 +
5549 +  /* Count number of states */
5550 +  for (plist = acsm->acsmPatterns; plist != NULL; plist = plist->next)
5551 +    {
5552 +      acsm->acsmMaxStates += plist->n;
5553 +      /* acsm->acsmMaxStates += plist->n*2; if we handle case in the table */
5554 +    }
5555 +  acsm->acsmMaxStates++; /* one extra */
5556 +
5557 +  /* Alloc a List based State Transition table */
5558 +  acsm->acsmTransTable =(trans_node_t**) AC_MALLOC(sizeof(trans_node_t*) * acsm->acsmMaxStates );
5559 +  MEMASSERT (acsm->acsmTransTable, "acsmCompile");
5560 +
5561 +  memset (acsm->acsmTransTable, 0, sizeof(trans_node_t*) * acsm->acsmMaxStates);
5562 +
5563 +  /* Alloc a failure table - this has a failure state, and a match list for each state */
5564 +  acsm->acsmFailState =(acstate_t*) AC_MALLOC(sizeof(acstate_t) * acsm->acsmMaxStates );
5565 +  MEMASSERT (acsm->acsmFailState, "acsmCompile");
5566 +
5567 +  memset (acsm->acsmFailState, 0, sizeof(acstate_t) * acsm->acsmMaxStates );
5568 +
5569 +  /* Alloc a MatchList table - this has a lis tof pattern matches for each state, if any */
5570 +  acsm->acsmMatchList=(ACSM_PATTERN2**) AC_MALLOC(sizeof(ACSM_PATTERN2*) * acsm->acsmMaxStates );
5571 +  MEMASSERT (acsm->acsmMatchList, "acsmCompile");
5572 +
5573 +  memset (acsm->acsmMatchList, 0, sizeof(ACSM_PATTERN2*) * acsm->acsmMaxStates );
5574 +
5575 +  /* Alloc a separate state transition table == in state 's' due to event 'k', transition to 'next' state */
5576 +  acsm->acsmNextState=(acstate_t**)AC_MALLOC( acsm->acsmMaxStates * sizeof(acstate_t*) );
5577 +  MEMASSERT(acsm->acsmNextState, "acsmCompile-NextState");
5578 +
5579 +  for (k = 0; k < acsm->acsmMaxStates; k++)
5580 +    {
5581 +      acsm->acsmNextState[k]=(acstate_t*)0;
5582 +    }
5583 +
5584 +  /* Initialize state zero as a branch */
5585 +  acsm->acsmNumStates = 0;
5586 +
5587 +  /* Add the 0'th state,  */
5588 +  //acsm->acsmNumStates++;
5589 +
5590 +  /* Add each Pattern to the State Table - This forms a keywords state table  */
5591 +  for (plist = acsm->acsmPatterns; plist != NULL; plist = plist->next)
5592 +    {
5593 +      AddPatternStates (acsm, plist);
5594 +    }
5595 +
5596 +  acsm->acsmNumStates++;
5597 +
5598 +  if( acsm->acsmFSA == FSA_DFA || acsm->acsmFSA == FSA_NFA )
5599 +    {
5600 +      /* Build the NFA */
5601 +      Build_NFA (acsm);
5602 +    }
5603 +
5604 +  if( acsm->acsmFSA == FSA_DFA )
5605 +    {
5606 +      /* Convert the NFA to a DFA */
5607 +      Convert_NFA_To_DFA (acsm);
5608 +    }
5609 +
5610 +  /*
5611 +   *
5612 +   *  Select Final Transition Table Storage Mode
5613 +   *
5614 +   */
5615 +  if( acsm->acsmFormat == ACF_SPARSE )
5616 +    {
5617 +      /* Convert DFA Full matrix to a Sparse matrix */
5618 +      if( Conv_Full_DFA_To_Sparse(acsm) )
5619 +       return -1;
5620 +    }
5621 +
5622 +  else if( acsm->acsmFormat == ACF_BANDED )
5623 +    {
5624 +      /* Convert DFA Full matrix to a Sparse matrix */
5625 +      if( Conv_Full_DFA_To_Banded(acsm) )
5626 +       return -1;
5627 +    }
5628 +
5629 +  else if( acsm->acsmFormat == ACF_SPARSEBANDS )
5630 +    {
5631 +      /* Convert DFA Full matrix to a Sparse matrix */
5632 +      if( Conv_Full_DFA_To_SparseBands(acsm) )
5633 +       return -1;
5634 +    }
5635 +  else if( acsm->acsmFormat == ACF_FULL )
5636 +    {
5637 +      if( Conv_List_To_Full( acsm ) )
5638 +       return -1;
5639 +    }
5640 +
5641 +  acsmUpdateMatchStates( acsm ); /* load boolean match flags into state table */
5642 +
5643 +  /* Free up the Table Of Transition Lists */
5644 +  List_FreeTransTable( acsm );
5645 +
5646 +  /* For now -- show this info */
5647 +  /*
5648 +   *  acsmPrintInfo( acsm );
5649 +   */
5650 +
5651 +
5652 +  /* Accrue Summary State Stats */
5653 +  summary.num_states      += acsm->acsmNumStates;
5654 +  summary.num_transitions += acsm->acsmNumTrans;
5655 +
5656 +  memcpy( &summary.acsm, acsm, sizeof(ACSM_STRUCT2));
5657 +
5658 +  return 0;
5659 +}
5660 +
5661 +/*
5662 + *   Get the NextState from the NFA, all NFA storage formats use this
5663 + */
5664 +inline
5665 +acstate_t SparseGetNextStateNFA(acstate_t * ps, acstate_t state, unsigned  input)
5666 +{
5667 +  acstate_t fmt;
5668 +  acstate_t n;
5669 +  int       index;
5670 +  int       nb;
5671 +
5672 +  fmt = *ps++;
5673 +
5674 +  ps++;  /* skip bMatchState */
5675 +
5676 +  switch( fmt )
5677 +    {
5678 +    case  ACF_BANDED:
5679 +      {
5680 +       n     = ps[0];
5681 +       index = ps[1];
5682 +
5683 +       if( input <  index     )
5684 +         {
5685 +           if(state==0)
5686 +             {
5687 +               return 0;
5688 +             }
5689 +           else
5690 +             {
5691 +               return (acstate_t)ACSM_FAIL_STATE2;
5692 +             }
5693 +         }
5694 +       if( input >= index + n )
5695 +         {
5696 +           if(state==0)
5697 +             {
5698 +               return 0;
5699 +             }
5700 +           else
5701 +             {
5702 +               return (acstate_t)ACSM_FAIL_STATE2;
5703 +             }
5704 +         }
5705 +       if( ps[input-index] == 0  )
5706 +         {
5707 +           if( state != 0 )
5708 +             {
5709 +               return ACSM_FAIL_STATE2;
5710 +             }
5711 +         }
5712 +
5713 +       return (acstate_t) ps[input-index];
5714 +      }
5715 +
5716 +    case ACF_SPARSE:
5717 +      {
5718 +       n = *ps++; /* number of sparse index-value entries */
5719 +
5720 +       for( ; n>0 ; n-- )
5721 +         {
5722 +           if( ps[0] > input ) /* cannot match the input, already a higher value than the input  */
5723 +             {
5724 +               return (acstate_t)ACSM_FAIL_STATE2; /* default state */
5725 +             }
5726 +           else if( ps[0] == input )
5727 +             {
5728 +               return ps[1]; /* next state */
5729 +             }
5730 +           ps+=2;
5731 +         }
5732 +       if( state == 0 )
5733 +         {
5734 +           return 0;
5735 +         }
5736 +       return ACSM_FAIL_STATE2;
5737 +      }
5738 +
5739 +    case ACF_SPARSEBANDS:
5740 +      {
5741 +       nb  = *ps++;   /* number of bands */
5742 +
5743 +       while( nb > 0 )  /* for each band */
5744 +         {
5745 +           n     = *ps++;  /* number of elements */
5746 +           index = *ps++;  /* 1st element value */
5747 +
5748 +           if( input <  index )
5749 +             {
5750 +               if( state != 0 )
5751 +                 {
5752 +                   return (acstate_t)ACSM_FAIL_STATE2;
5753 +                 }
5754 +               return (acstate_t)0;
5755 +             }
5756 +           if( (input >=  index) && (input < (index + n)) )
5757 +             {
5758 +               if( ps[input-index] == 0 )
5759 +                 {
5760 +                   if( state != 0 )
5761 +                     {
5762 +                       return ACSM_FAIL_STATE2;
5763 +                     }
5764 +                 }
5765 +               return (acstate_t) ps[input-index];
5766 +             }
5767 +           nb--;
5768 +           ps += n;
5769 +         }
5770 +       if( state != 0 )
5771 +         {
5772 +           return (acstate_t)ACSM_FAIL_STATE2;
5773 +         }
5774 +       return (acstate_t)0;
5775 +      }
5776 +
5777 +    case ACF_FULL:
5778 +      {
5779 +       if( ps[input] == 0 )
5780 +         {
5781 +           if( state != 0 )
5782 +             {
5783 +               return ACSM_FAIL_STATE2;
5784 +             }
5785 +         }
5786 +       return ps[input];
5787 +      }
5788 +    }
5789 +
5790 +  return 0;
5791 +}
5792 +
5793 +
5794 +
5795 +/*
5796 + *   Get the NextState from the DFA Next State Transition table
5797 + *   Full and banded are supported separately, this is for
5798 + *   sparse and sparse-bands
5799 + */
5800 +inline
5801 +acstate_t SparseGetNextStateDFA(acstate_t * ps, acstate_t state, unsigned  input)
5802 +{
5803 +  acstate_t  n, nb;
5804 +  int        index;
5805 +
5806 +  switch( ps[0] )
5807 +    {
5808 +      /*   BANDED   */
5809 +    case  ACF_BANDED:
5810 +      {
5811 +       /* n=ps[2] : number of entries in the band */
5812 +       /* index=ps[3] : index of the 1st entry, sequential thereafter */
5813 +
5814 +       if( input  <  ps[3]        )  return 0;
5815 +       if( input >= (ps[3]+ps[2]) )  return 0;
5816 +
5817 +       return  ps[4+input-ps[3]];
5818 +      }
5819 +
5820 +      /*   FULL   */
5821 +    case ACF_FULL:
5822 +      {
5823 +       return ps[2+input];
5824 +      }
5825 +
5826 +      /*   SPARSE   */
5827 +    case ACF_SPARSE:
5828 +      {
5829 +       n = ps[2]; /* number of entries/ key+next pairs */
5830 +
5831 +       ps += 3;
5832 +
5833 +       for( ; n>0 ; n-- )
5834 +         {
5835 +           if( input < ps[0]  ) /* cannot match the input, already a higher value than the input  */
5836 +             {
5837 +               return (acstate_t)0; /* default state */
5838 +             }
5839 +           else if( ps[0] == input )
5840 +             {
5841 +               return ps[1]; /* next state */
5842 +             }
5843 +           ps += 2;
5844 +         }
5845 +       return (acstate_t)0;
5846 +      }
5847 +
5848 +
5849 +      /*   SPARSEBANDS   */
5850 +    case ACF_SPARSEBANDS:
5851 +      {
5852 +       nb  =  ps[2]; /* number of bands */
5853 +
5854 +       ps += 3;
5855 +
5856 +       while( nb > 0 )  /* for each band */
5857 +         {
5858 +           n     = ps[0];  /* number of elements in this band */
5859 +           index = ps[1];  /* start index/char of this band */
5860 +           if( input <  index )
5861 +             {
5862 +               return (acstate_t)0;
5863 +             }
5864 +           if( (input < (index + n)) )
5865 +             {
5866 +               return (acstate_t) ps[2+input-index];
5867 +             }
5868 +           nb--;
5869 +           ps += n;
5870 +         }
5871 +       return (acstate_t)0;
5872 +      }
5873 +    }
5874 +
5875 +  return 0;
5876 +}
5877 +/*
5878 + *   Search Text or Binary Data for Pattern matches
5879 + *
5880 + *   Sparse & Sparse-Banded Matrix search
5881 + */
5882 +static
5883 +inline
5884 +int
5885 +acsmSearchSparseDFA(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
5886 +                   int (*Match) (void * id, int index, void *data),
5887 +                   void *data)
5888 +{
5889 +  acstate_t state;
5890 +  ACSM_PATTERN2   * mlist;
5891 +  unsigned char   * Tend;
5892 +  int               nfound = 0;
5893 +  unsigned char   * T, * Tc;
5894 +  int               index;
5895 +  acstate_t      ** NextState = acsm->acsmNextState;
5896 +  ACSM_PATTERN2  ** MatchList = acsm->acsmMatchList;
5897 +
5898 +  Tc   = Tx;
5899 +  T    = Tx;
5900 +  Tend = T + n;
5901 +
5902 +  for( state = 0; T < Tend; T++ )
5903 +    {
5904 +      state = SparseGetNextStateDFA ( NextState[state], state, xlatcase[*T] );
5905 +
5906 +      /* test if this state has any matching patterns */
5907 +      if( NextState[state][1] )
5908 +       {
5909 +         for( mlist = MatchList[state];
5910 +              mlist!= NULL;
5911 +              mlist = mlist->next )
5912 +           {
5913 +             index = T - mlist->n - Tc;
5914 +             if( mlist->nocase )
5915 +               {
5916 +                 nfound++;
5917 +                 if (Match (mlist->id, index, data))
5918 +                   return nfound;
5919 +               }
5920 +             else
5921 +               {
5922 +                 if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
5923 +                   {
5924 +                     nfound++;
5925 +                     if (Match (mlist->id, index, data))
5926 +                       return nfound;
5927 +                   }
5928 +               }
5929 +           }
5930 +       }
5931 +    }
5932 +  return nfound;
5933 +}
5934 +/*
5935 + *   Full format DFA search
5936 + *   Do not change anything here without testing, caching and prefetching
5937 + *   performance is very sensitive to any changes.
5938 + *
5939 + *   Perf-Notes:
5940 + *    1) replaced ConvertCaseEx with inline xlatcase - this improves performance 5-10%
5941 + *    2) using 'nocase' improves performance again by 10-15%, since memcmp is not needed
5942 + *    3)
5943 + */
5944 +static
5945 +inline
5946 +int
5947 +acsmSearchSparseDFA_Full(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
5948 +                        int (*Match) (void * id, int index, void *data),
5949 +                        void *data)
5950 +{
5951 +  ACSM_PATTERN2   * mlist;
5952 +  unsigned char   * Tend;
5953 +  unsigned char   * T;
5954 +  int               index;
5955 +  acstate_t         state;
5956 +  acstate_t       * ps;
5957 +  acstate_t         sindex;
5958 +  acstate_t      ** NextState = acsm->acsmNextState;
5959 +  ACSM_PATTERN2  ** MatchList = acsm->acsmMatchList;
5960 +  int               nfound    = 0;
5961 +
5962 +  T    = Tx;
5963 +  Tend = Tx + n;
5964 +
5965 +  for( state = 0; T < Tend; T++ )
5966 +    {
5967 +      ps     = NextState[ state ];
5968 +
5969 +      sindex = xlatcase[ T[0] ];
5970 +
5971 +      /* check the current state for a pattern match */
5972 +      if( ps[1] )
5973 +       {
5974 +         for( mlist = MatchList[state];
5975 +              mlist!= NULL;
5976 +              mlist = mlist->next )
5977 +           {
5978 +             index = T - mlist->n - Tx;
5979 +
5980 +
5981 +             if( mlist->nocase )
5982 +               {
5983 +                 nfound++;
5984 +                 if (Match (mlist->id, index, data))
5985 +                   return nfound;
5986 +               }
5987 +             else
5988 +               {
5989 +                 if( memcmp (mlist->casepatrn, Tx + index, mlist->n ) == 0 )
5990 +                   {
5991 +                     nfound++;
5992 +                     if (Match (mlist->id, index, data))
5993 +                       return nfound;
5994 +                   }
5995 +               }
5996 +
5997 +           }
5998 +       }
5999 +
6000 +      state = ps[ 2u + sindex ];
6001 +    }
6002 +
6003 +  /* Check the last state for a pattern match */
6004 +  for( mlist = MatchList[state];
6005 +       mlist!= NULL;
6006 +       mlist = mlist->next )
6007 +    {
6008 +      index = T - mlist->n - Tx;
6009 +
6010 +      if( mlist->nocase )
6011 +       {
6012 +         nfound++;
6013 +         if (Match (mlist->id, index, data))
6014 +           return nfound;
6015 +       }
6016 +      else
6017 +       {
6018 +         if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
6019 +           {
6020 +             nfound++;
6021 +             if (Match (mlist->id, index, data))
6022 +               return nfound;
6023 +           }
6024 +       }
6025 +    }
6026 +
6027 +  return nfound;
6028 +}
6029 +/*
6030 + *   Banded-Row format DFA search
6031 + *   Do not change anything here, caching and prefetching
6032 + *   performance is very sensitive to any changes.
6033 + *
6034 + *   ps[0] = storage fmt
6035 + *   ps[1] = bool match flag
6036 + *   ps[2] = # elements in band
6037 + *   ps[3] = index of 1st element
6038 + */
6039 +static
6040 +inline
6041 +int
6042 +acsmSearchSparseDFA_Banded(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
6043 +                          int (*Match) (void * id, int index, void *data),
6044 +                          void *data)
6045 +{
6046 +  acstate_t         state;
6047 +  unsigned char   * Tend;
6048 +  unsigned char   * T;
6049 +  int               sindex;
6050 +  int               index;
6051 +  acstate_t      ** NextState = acsm->acsmNextState;
6052 +  ACSM_PATTERN2  ** MatchList = acsm->acsmMatchList;
6053 +  ACSM_PATTERN2   * mlist;
6054 +  acstate_t       * ps;
6055 +  int               nfound = 0;
6056 +
6057 +  T    = Tx;
6058 +  Tend = T + n;
6059 +
6060 +  for( state = 0; T < Tend; T++ )
6061 +    {
6062 +      ps     = NextState[state];
6063 +
6064 +      sindex = xlatcase[ T[0] ];
6065 +
6066 +      /* test if this state has any matching patterns */
6067 +      if( ps[1] )
6068 +       {
6069 +         for( mlist = MatchList[state];
6070 +              mlist!= NULL;
6071 +              mlist = mlist->next )
6072 +           {
6073 +             index = T - mlist->n - Tx;
6074 +
6075 +             if( mlist->nocase )
6076 +               {
6077 +                 nfound++;
6078 +                 if (Match (mlist->id, index, data))
6079 +                   return nfound;
6080 +               }
6081 +             else
6082 +               {
6083 +                 if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
6084 +                   {
6085 +                     nfound++;
6086 +                     if (Match (mlist->id, index, data))
6087 +                       return nfound;
6088 +                   }
6089 +               }
6090 +           }
6091 +       }
6092 +
6093 +      if(      sindex <   ps[3]          )  state = 0;
6094 +      else if( sindex >= (ps[3] + ps[2]) )  state = 0;
6095 +      else                                  state = ps[ 4u + sindex - ps[3] ];
6096 +    }
6097 +
6098 +  /* Check the last state for a pattern match */
6099 +  for( mlist = MatchList[state];
6100 +       mlist!= NULL;
6101 +       mlist = mlist->next )
6102 +    {
6103 +      index = T - mlist->n - Tx;
6104 +
6105 +      if( mlist->nocase )
6106 +       {
6107 +         nfound++;
6108 +         if (Match (mlist->id, index, data))
6109 +           return nfound;
6110 +       }
6111 +      else
6112 +       {
6113 +         if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
6114 +           {
6115 +             nfound++;
6116 +             if (Match (mlist->id, index, data))
6117 +               return nfound;
6118 +           }
6119 +       }
6120 +    }
6121 +
6122 +  return nfound;
6123 +}
6124 +
6125 +
6126 +
6127 +/*
6128 + *   Search Text or Binary Data for Pattern matches
6129 + *
6130 + *   Sparse Storage Version
6131 + */
6132 +static
6133 +inline
6134 +int
6135 +acsmSearchSparseNFA(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
6136 +                   int (*Match) (void * id, int index, void *data),
6137 +                   void *data)
6138 +{
6139 +  acstate_t         state;
6140 +  ACSM_PATTERN2   * mlist;
6141 +  unsigned char   * Tend;
6142 +  int               nfound = 0;
6143 +  unsigned char   * T, *Tc;
6144 +  int               index;
6145 +  acstate_t      ** NextState= acsm->acsmNextState;
6146 +  acstate_t       * FailState= acsm->acsmFailState;
6147 +  ACSM_PATTERN2  ** MatchList = acsm->acsmMatchList;
6148 +  unsigned char     Tchar;
6149 +
6150 +  Tc   = Tx;
6151 +  T    = Tx;
6152 +  Tend = T + n;
6153 +
6154 +  for( state = 0; T < Tend; T++ )
6155 +    {
6156 +      acstate_t nstate;
6157 +
6158 +      Tchar = xlatcase[ *T ];
6159 +
6160 +      while( (nstate=SparseGetNextStateNFA(NextState[state],state,Tchar))==ACSM_FAIL_STATE2 )
6161 +       state = FailState[state];
6162 +
6163 +      state = nstate;
6164 +
6165 +      for( mlist = MatchList[state];
6166 +          mlist!= NULL;
6167 +          mlist = mlist->next )
6168 +       {
6169 +         index = T - mlist->n - Tx;
6170 +         if( mlist->nocase )
6171 +           {
6172 +             nfound++;
6173 +             if (Match (mlist->id, index, data))
6174 +               return nfound;
6175 +           }
6176 +         else
6177 +           {
6178 +             if( memcmp (mlist->casepatrn, Tx + index, mlist->n) == 0 )
6179 +               {
6180 +                 nfound++;
6181 +                 if (Match (mlist->id, index, data))
6182 +                   return nfound;
6183 +               }
6184 +           }
6185 +       }
6186 +    }
6187 +
6188 +  return nfound;
6189 +}
6190 +
6191 +/*
6192 + *   Search Function
6193 + */
6194 +int
6195 +acsmSearch2(ACSM_STRUCT2 * acsm, unsigned char *Tx, int n,
6196 +           int (*Match) (void * id, int index, void *data),
6197 +           void *data)
6198 +{
6199 +
6200 +  switch( acsm->acsmFSA )
6201 +    {
6202 +    case FSA_DFA:
6203 +
6204 +      if( acsm->acsmFormat == ACF_FULL )
6205 +       {
6206 +         return acsmSearchSparseDFA_Full( acsm, Tx, n, Match,data );
6207 +       }
6208 +      else if( acsm->acsmFormat == ACF_BANDED )
6209 +       {
6210 +         return acsmSearchSparseDFA_Banded( acsm, Tx, n, Match,data );
6211 +       }
6212 +      else
6213 +       {
6214 +         return acsmSearchSparseDFA( acsm, Tx, n, Match,data );
6215 +       }
6216 +
6217 +    case FSA_NFA:
6218 +
6219 +      return acsmSearchSparseNFA( acsm, Tx, n, Match,data );
6220 +
6221 +    case FSA_TRIE:
6222 +
6223 +      return 0;
6224 +    }
6225 +  return 0;
6226 +}
6227 +
6228 +
6229 +/*
6230 + *   Free all memory
6231 + */
6232 +void
6233 +acsmFree2 (ACSM_STRUCT2 * acsm)
6234 +{
6235 +  int i;
6236 +  ACSM_PATTERN2 * mlist, *ilist;
6237 +  for (i = 0; i < acsm->acsmMaxStates; i++)
6238 +    {
6239 +      mlist = acsm->acsmMatchList[i];
6240 +
6241 +      while (mlist)
6242 +       {
6243 +         ilist = mlist;
6244 +         mlist = mlist->next;
6245 +         AC_FREE (ilist);
6246 +       }
6247 +      AC_FREE(acsm->acsmNextState[i]);
6248 +    }
6249 +  AC_FREE(acsm->acsmFailState);
6250 +  AC_FREE(acsm->acsmMatchList);
6251 +}
6252 +
6253 +/* ********************************** */
6254 +
6255 +static void ring_sock_destruct(struct sock *sk) {
6256 +
6257 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
6258 +  skb_queue_purge(&sk->sk_receive_queue);
6259 +
6260 +  if (!sock_flag(sk, SOCK_DEAD)) {
6261 +#if defined(RING_DEBUG)
6262 +    printk("Attempt to release alive ring socket: %p\n", sk);
6263 +#endif
6264 +    return;
6265 +  }
6266 +
6267 +  BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
6268 +  BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
6269 +#else
6270 +
6271 +  BUG_TRAP(atomic_read(&sk->rmem_alloc)==0);
6272 +  BUG_TRAP(atomic_read(&sk->wmem_alloc)==0);
6273 +
6274 +  if (!sk->dead) {
6275 +#if defined(RING_DEBUG)
6276 +    printk("Attempt to release alive ring socket: %p\n", sk);
6277 +#endif
6278 +    return;
6279 +  }
6280 +#endif
6281 +
6282 +  kfree(ring_sk(sk));
6283 +
6284 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
6285 +  MOD_DEC_USE_COUNT;
6286 +#endif
6287 +}
6288 +
6289 +/* ********************************** */
6290 +
6291 +static void ring_proc_add(struct ring_opt *pfr) {
6292 +  if(ring_proc_dir != NULL) {
6293 +    char name[16];
6294 +
6295 +    pfr->ring_pid = current->pid;
6296 +
6297 +    snprintf(name, sizeof(name), "%d", pfr->ring_pid);
6298 +    create_proc_read_entry(name, 0, ring_proc_dir,
6299 +                          ring_proc_get_info, pfr);
6300 +    /* printk("PF_RING: added /proc/net/pf_ring/%s\n", name); */
6301 +  }
6302 +}
6303 +
6304 +/* ********************************** */
6305 +
6306 +static void ring_proc_remove(struct ring_opt *pfr) {
6307 +  if(ring_proc_dir != NULL) {
6308 +    char name[16];
6309 +
6310 +    snprintf(name, sizeof(name), "%d", pfr->ring_pid);
6311 +    remove_proc_entry(name, ring_proc_dir);
6312 +    /* printk("PF_RING: removed /proc/net/pf_ring/%s\n", name); */
6313 +  }
6314 +}
6315 +
6316 +/* ********************************** */
6317 +
6318 +static int ring_proc_get_info(char *buf, char **start, off_t offset,
6319 +                             int len, int *unused, void *data)
6320 +{
6321 +  int rlen = 0;
6322 +  struct ring_opt *pfr;
6323 +  FlowSlotInfo *fsi;
6324 +
6325 +  if(data == NULL) {
6326 +    /* /proc/net/pf_ring/info */
6327 +    rlen = sprintf(buf,"Version             : %s\n", RING_VERSION);
6328 +    rlen += sprintf(buf + rlen,"Bucket length       : %d bytes\n", bucket_len);
6329 +    rlen += sprintf(buf + rlen,"Ring slots          : %d\n", num_slots);
6330 +    rlen += sprintf(buf + rlen,"Sample rate         : %d [1=no sampling]\n", sample_rate);
6331 +
6332 +    rlen += sprintf(buf + rlen,"Capture TX          : %s\n",
6333 +                   enable_tx_capture ? "Yes [RX+TX]" : "No [RX only]");
6334 +    rlen += sprintf(buf + rlen,"Transparent mode    : %s\n",
6335 +                   transparent_mode ? "Yes" : "No");
6336 +    rlen += sprintf(buf + rlen,"Total rings         : %d\n", ring_table_size);
6337 +  } else {
6338 +    /* detailed statistics about a PF_RING */
6339 +    pfr = (struct ring_opt*)data;
6340 +
6341 +    if(data) {
6342 +      fsi = pfr->slots_info;
6343 +
6344 +      if(fsi) {
6345 +       rlen = sprintf(buf,        "Bound Device  : %s\n",
6346 +                      pfr->ring_netdev->name == NULL ? "<NULL>" : pfr->ring_netdev->name);
6347 +       rlen += sprintf(buf + rlen,"Version       : %d\n",  fsi->version);
6348 +       rlen += sprintf(buf + rlen,"Sampling Rate : %d\n",  pfr->sample_rate);
6349 +       rlen += sprintf(buf + rlen,"BPF Filtering : %s\n",  pfr->bpfFilter ? "Enabled" : "Disabled");
6350 +       rlen += sprintf(buf + rlen,"Bloom Filters : %s\n",  pfr->bitmask_enabled ? "Enabled" : "Disabled");
6351 +       rlen += sprintf(buf + rlen,"Pattern Search: %s\n",  pfr->acsm ? "Enabled" : "Disabled");
6352 +       rlen += sprintf(buf + rlen,"Cluster Id    : %d\n",  pfr->cluster_id);
6353 +       rlen += sprintf(buf + rlen,"Tot Slots     : %d\n",  fsi->tot_slots);
6354 +       rlen += sprintf(buf + rlen,"Slot Len      : %d\n",  fsi->slot_len);
6355 +       rlen += sprintf(buf + rlen,"Data Len      : %d\n",  fsi->data_len);
6356 +       rlen += sprintf(buf + rlen,"Tot Memory    : %d\n",  fsi->tot_mem);
6357 +       rlen += sprintf(buf + rlen,"Tot Packets   : %lu\n", (unsigned long)fsi->tot_pkts);
6358 +       rlen += sprintf(buf + rlen,"Tot Pkt Lost  : %lu\n", (unsigned long)fsi->tot_lost);
6359 +       rlen += sprintf(buf + rlen,"Tot Insert    : %lu\n", (unsigned long)fsi->tot_insert);
6360 +       rlen += sprintf(buf + rlen,"Tot Read      : %lu\n", (unsigned long)fsi->tot_read);
6361 +
6362 +      } else
6363 +       rlen = sprintf(buf, "WARNING fsi == NULL\n");
6364 +    } else
6365 +      rlen = sprintf(buf, "WARNING data == NULL\n");
6366 +  }
6367 +
6368 +  return rlen;
6369 +}
6370 +
6371 +/* ********************************** */
6372 +
6373 +static void ring_proc_init(void) {
6374 +  ring_proc_dir = proc_mkdir("pf_ring", proc_net);
6375 +
6376 +  if(ring_proc_dir) {
6377 +    ring_proc_dir->owner = THIS_MODULE;
6378 +    ring_proc = create_proc_read_entry("info", 0, ring_proc_dir,
6379 +                                      ring_proc_get_info, NULL);
6380 +    if(!ring_proc)
6381 +      printk("PF_RING: unable to register proc file\n");
6382 +    else {
6383 +      ring_proc->owner = THIS_MODULE;
6384 +      printk("PF_RING: registered /proc/net/pf_ring/\n");
6385 +    }
6386 +  } else
6387 +    printk("PF_RING: unable to create /proc/net/pf_ring\n");
6388 +}
6389 +
6390 +/* ********************************** */
6391 +
6392 +static void ring_proc_term(void) {
6393 +  if(ring_proc != NULL) {
6394 +    remove_proc_entry("info", ring_proc_dir);
6395 +    if(ring_proc_dir != NULL) remove_proc_entry("pf_ring", proc_net);
6396 +
6397 +    printk("PF_RING: deregistered /proc/net/pf_ring\n");
6398 +  }
6399 +}
6400 +
6401 +/* ********************************** */
6402 +
6403 +/*
6404 + * ring_insert()
6405 + *
6406 + * store the sk in a new element and add it
6407 + * to the head of the list.
6408 + */
6409 +static inline void ring_insert(struct sock *sk) {
6410 +  struct ring_element *next;
6411 +
6412 +#if defined(RING_DEBUG)
6413 +  printk("RING: ring_insert()\n");
6414 +#endif
6415 +
6416 +  next = kmalloc(sizeof(struct ring_element), GFP_ATOMIC);
6417 +  if(next != NULL) {
6418 +    next->sk = sk;
6419 +    write_lock_irq(&ring_mgmt_lock);
6420 +    list_add(&next->list, &ring_table);
6421 +    write_unlock_irq(&ring_mgmt_lock);
6422 +  } else {
6423 +    if(net_ratelimit())
6424 +      printk("RING: could not kmalloc slot!!\n");
6425 +  }
6426 +
6427 +  ring_table_size++;
6428 +  ring_proc_add(ring_sk(sk));
6429 +}
6430 +
6431 +/* ********************************** */
6432 +
6433 +/*
6434 + * ring_remove()
6435 + *
6436 + * For each of the elements in the list:
6437 + *  - check if this is the element we want to delete
6438 + *  - if it is, remove it from the list, and free it.
6439 + *
6440 + * stop when we find the one we're looking for (break),
6441 + * or when we reach the end of the list.
6442 + */
6443 +static inline void ring_remove(struct sock *sk) {
6444 +  struct list_head *ptr;
6445 +  struct ring_element *entry;
6446 +
6447 +  for(ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) {
6448 +    entry = list_entry(ptr, struct ring_element, list);
6449 +
6450 +    if(entry->sk == sk) {
6451 +      list_del(ptr);
6452 +      kfree(ptr);
6453 +      ring_table_size--;
6454 +      break;
6455 +    }
6456 +  }
6457 +}
6458 +
6459 +/* ********************************** */
6460 +
6461 +static u_int32_t num_queued_pkts(struct ring_opt *pfr) {
6462 +
6463 +  if(pfr->ring_slots != NULL) {
6464 +
6465 +    u_int32_t tot_insert = pfr->slots_info->insert_idx,
6466 +#if defined(RING_DEBUG)
6467 +      tot_read = pfr->slots_info->tot_read, tot_pkts;
6468 +#else
6469 +    tot_read = pfr->slots_info->tot_read;
6470 +#endif
6471 +
6472 +    if(tot_insert >= tot_read) {
6473 +#if defined(RING_DEBUG)
6474 +      tot_pkts = tot_insert-tot_read;
6475 +#endif
6476 +      return(tot_insert-tot_read);
6477 +    } else {
6478 +#if defined(RING_DEBUG)
6479 +      tot_pkts = ((u_int32_t)-1)+tot_insert-tot_read;
6480 +#endif
6481 +      return(((u_int32_t)-1)+tot_insert-tot_read);
6482 +    }
6483 +
6484 +#if defined(RING_DEBUG)
6485 +    printk("-> num_queued_pkts=%d [tot_insert=%d][tot_read=%d]\n",
6486 +          tot_pkts, tot_insert, tot_read);
6487 +#endif
6488 +
6489 +  } else
6490 +    return(0);
6491 +}
6492 +
6493 +/* ********************************** */
6494 +
6495 +static inline FlowSlot* get_insert_slot(struct ring_opt *pfr) {
6496 +#if defined(RING_DEBUG)
6497 +  printk("get_insert_slot(%d)\n", pfr->slots_info->insert_idx);
6498 +#endif
6499 +
6500 +  if(pfr->ring_slots != NULL) {
6501 +    FlowSlot *slot = (FlowSlot*)&(pfr->ring_slots[pfr->slots_info->insert_idx
6502 +                                                 *pfr->slots_info->slot_len]);
6503 +    return(slot);
6504 +  } else
6505 +    return(NULL);
6506 +}
6507 +
6508 +/* ********************************** */
6509 +
6510 +static inline FlowSlot* get_remove_slot(struct ring_opt *pfr) {
6511 +#if defined(RING_DEBUG)
6512 +  printk("get_remove_slot(%d)\n", pfr->slots_info->remove_idx);
6513 +#endif
6514 +
6515 +  if(pfr->ring_slots != NULL)
6516 +    return((FlowSlot*)&(pfr->ring_slots[pfr->slots_info->remove_idx*
6517 +                                       pfr->slots_info->slot_len]));
6518 +  else
6519 +    return(NULL);
6520 +}
6521 +
6522 +/* ******************************************************* */
6523 +
6524 +static int parse_pkt(struct sk_buff *skb, u_int16_t skb_displ,
6525 +                    u_int8_t *l3_proto, u_int16_t *eth_type,
6526 +                    u_int16_t *l3_offset, u_int16_t *l4_offset,
6527 +                    u_int16_t *vlan_id, u_int32_t *ipv4_src,
6528 +                    u_int32_t *ipv4_dst,
6529 +                    u_int16_t *l4_src_port, u_int16_t *l4_dst_port,
6530 +                    u_int16_t *payload_offset) {
6531 +  struct iphdr *ip;
6532 +  struct ethhdr *eh = (struct ethhdr*)(skb->data-skb_displ);
6533 +  u_int16_t displ;
6534 +
6535 +  *l3_offset = *l4_offset = *l3_proto = *payload_offset = 0;
6536 +  *eth_type = ntohs(eh->h_proto);
6537 +
6538 +  if(*eth_type == 0x8100 /* 802.1q (VLAN) */) {
6539 +    (*vlan_id) = (skb->data[14] & 15)*256 + skb->data[15];
6540 +    *eth_type = (skb->data[16])*256 + skb->data[17];
6541 +    displ = 4;
6542 +  } else {
6543 +    displ = 0;
6544 +    (*vlan_id) = (u_int16_t)-1;
6545 +  }
6546 +
6547 +  if(*eth_type == 0x0800 /* IP */) {
6548 +    *l3_offset = displ+sizeof(struct ethhdr);
6549 +    ip = (struct iphdr*)(skb->data-skb_displ+(*l3_offset));
6550 +
6551 +    *ipv4_src = ntohl(ip->saddr), *ipv4_dst = ntohl(ip->daddr), *l3_proto = ip->protocol;
6552 +
6553 +    if((ip->protocol == IPPROTO_TCP) || (ip->protocol == IPPROTO_UDP)) {
6554 +      *l4_offset = (*l3_offset)+(ip->ihl*4);
6555 +
6556 +      if(ip->protocol == IPPROTO_TCP) {
6557 +       struct tcphdr *tcp = (struct tcphdr*)(skb->data-skb_displ+(*l4_offset));
6558 +       *l4_src_port = ntohs(tcp->source), *l4_dst_port = ntohs(tcp->dest);
6559 +       *payload_offset = (*l4_offset)+(tcp->doff * 4);
6560 +      } else if(ip->protocol == IPPROTO_UDP) {
6561 +       struct udphdr *udp = (struct udphdr*)(skb->data-skb_displ+(*l4_offset));
6562 +       *l4_src_port = ntohs(udp->source), *l4_dst_port = ntohs(udp->dest);
6563 +       *payload_offset = (*l4_offset)+sizeof(struct udphdr);
6564 +      } else
6565 +       *payload_offset = (*l4_offset);
6566 +    } else
6567 +      *l4_src_port = *l4_dst_port = 0;
6568 +
6569 +    return(1); /* IP */
6570 +  } /* TODO: handle IPv6 */
6571 +
6572 +  return(0); /* No IP */
6573 +}
6574 +
6575 +/* **************************************************************** */
6576 +
6577 +static void reset_bitmask(bitmask_selector *selector)
6578 +{
6579 +  memset((char*)selector->bits_memory, 0, selector->num_bits/8);
6580 +
6581 +  while(selector->clashes != NULL) {
6582 +    bitmask_counter_list *next = selector->clashes->next;
6583 +    kfree(selector->clashes);
6584 +    selector->clashes = next;
6585 +  }
6586 +}
6587 +
6588 +/* **************************************************************** */
6589 +
6590 +static void alloc_bitmask(u_int32_t tot_bits, bitmask_selector *selector)
6591 +{
6592 +  u_int tot_mem = tot_bits/8;
6593 +
6594 +  if(tot_mem <= PAGE_SIZE)
6595 +    selector->order = 1;
6596 +  else {
6597 +    for(selector->order = 0; (PAGE_SIZE << selector->order) < tot_mem; selector->order++)
6598 +      ;
6599 +  }
6600 +
6601 +  printk("BITMASK: [order=%d][tot_mem=%d]\n", selector->order, tot_mem);
6602 +
6603 +  while((selector->bits_memory = __get_free_pages(GFP_ATOMIC, selector->order)) == 0)
6604 +    if(selector->order-- == 0)
6605 +      break;
6606 +
6607 +  if(selector->order == 0) {
6608 +    printk("BITMASK: ERROR not enough memory for bitmask\n");
6609 +    selector->num_bits = 0;
6610 +    return;
6611 +  }
6612 +
6613 +  tot_mem = PAGE_SIZE << selector->order;
6614 +  printk("BITMASK: succesfully allocated [tot_mem=%d][order=%d]\n",
6615 +        tot_mem, selector->order);
6616 +
6617 +  selector->num_bits = tot_mem*8;
6618 +  selector->clashes = NULL;
6619 +  reset_bitmask(selector);
6620 +}
6621 +
6622 +/* ********************************** */
6623 +
6624 +static void free_bitmask(bitmask_selector *selector)
6625 +{
6626 +  if(selector->bits_memory > 0)
6627 +    free_pages(selector->bits_memory, selector->order);
6628 +}
6629 +
6630 +/* ********************************** */
6631 +
6632 +static void set_bit_bitmask(bitmask_selector *selector, u_int32_t the_bit) {
6633 +  u_int32_t idx = the_bit % selector->num_bits;
6634 +
6635 +  if(BITMASK_ISSET(idx, selector)) {
6636 +    bitmask_counter_list *head = selector->clashes;
6637 +
6638 +    printk("BITMASK: bit %u was already set\n", the_bit);
6639 +
6640 +    while(head != NULL) {
6641 +      if(head->bit_id == the_bit) {
6642 +       head->bit_counter++;
6643 +       printk("BITMASK: bit %u is now set to %d\n", the_bit, head->bit_counter);
6644 +       return;
6645 +      }
6646 +
6647 +      head = head->next;
6648 +    }
6649 +
6650 +    head = kmalloc(sizeof(bitmask_counter_list), GFP_KERNEL);
6651 +    if(head) {
6652 +      head->bit_id = the_bit;
6653 +      head->bit_counter = 1 /* previous value */ + 1 /* the requested set */;
6654 +      head->next = selector->clashes;
6655 +      selector->clashes = head;
6656 +    } else {
6657 +      printk("BITMASK: not enough memory\n");
6658 +      return;
6659 +    }
6660 +  } else {
6661 +    BITMASK_SET(idx, selector);
6662 +    printk("BITMASK: bit %u is now set\n", the_bit);
6663 +  }
6664 +}
6665 +
6666 +/* ********************************** */
6667 +
6668 +static u_char is_set_bit_bitmask(bitmask_selector *selector, u_int32_t the_bit) {
6669 +  u_int32_t idx = the_bit % selector->num_bits;
6670 +  return(BITMASK_ISSET(idx, selector));
6671 +}
6672 +
6673 +/* ********************************** */
6674 +
6675 +static void clear_bit_bitmask(bitmask_selector *selector, u_int32_t the_bit) {
6676 +  u_int32_t idx = the_bit % selector->num_bits;
6677 +
6678 +  if(!BITMASK_ISSET(idx, selector))
6679 +    printk("BITMASK: bit %u was not set\n", the_bit);
6680 +  else {
6681 +    bitmask_counter_list *head = selector->clashes, *prev = NULL;
6682 +
6683 +    while(head != NULL) {
6684 +      if(head->bit_id == the_bit) {
6685 +       head->bit_counter--;
6686 +
6687 +       printk("BITMASK: bit %u is now set to %d\n",
6688 +              the_bit, head->bit_counter);
6689 +
6690 +       if(head->bit_counter == 1) {
6691 +         /* We can now delete this entry as '1' can be
6692 +            accommodated into the bitmask */
6693 +
6694 +         if(prev == NULL)
6695 +           selector->clashes = head->next;
6696 +         else
6697 +           prev->next = head->next;
6698 +
6699 +         kfree(head);
6700 +       }
6701 +       return;
6702 +      }
6703 +
6704 +      prev = head; head = head->next;
6705 +    }
6706 +
6707 +    BITMASK_CLR(idx, selector);
6708 +    printk("BITMASK: bit %u is now reset\n", the_bit);
6709 +  }
6710 +}
6711 +
6712 +/* ********************************** */
6713 +
6714 +/* Hash function */
6715 +static u_int32_t sdb_hash(u_int32_t value) {
6716 +  u_int32_t hash = 0, i;
6717 +  u_int8_t str[sizeof(value)];
6718 +
6719 +  memcpy(str, &value, sizeof(value));
6720 +
6721 +  for(i = 0; i < sizeof(value); i++) {
6722 +    hash = str[i] + (hash << 6) + (hash << 16) - hash;
6723 +  }
6724 +
6725 +  return(hash);
6726 +}
6727 +
6728 +/* ********************************** */
6729 +
6730 +static void handle_bloom_filter_rule(struct ring_opt *pfr, char *buf) {
6731 +  u_int count;
6732 +
6733 +  if(buf == NULL)
6734 +    return;
6735 +  else
6736 +    count = strlen(buf);
6737 +
6738 +  printk("PF_RING: -> handle_bloom_filter_rule(%s)\n", buf);
6739 +
6740 +  if((buf[count-1] == '\n') || (buf[count-1] == '\r')) buf[count-1] = '\0';
6741 +
6742 +  if(count > 1) {
6743 +    u_int32_t the_bit;
6744 +
6745 +    if(!strncmp(&buf[1], "vlan=", 5)) {
6746 +      sscanf(&buf[6], "%d", &the_bit);
6747 +
6748 +      if(buf[0] == '+')
6749 +       set_bit_bitmask(&pfr->vlan_bitmask, the_bit), pfr->num_vlan_bitmask_add++;
6750 +      else
6751 +       clear_bit_bitmask(&pfr->vlan_bitmask, the_bit), pfr->num_vlan_bitmask_remove++;
6752 +    } else if(!strncmp(&buf[1], "mac=", 4)) {
6753 +      int a, b, c, d, e, f;
6754 +
6755 +      if(sscanf(&buf[5], "%02x:%02x:%02x:%02x:%02x:%02x:",
6756 +               &a, &b, &c, &d, &e, &f) == 6) {
6757 +       u_int32_t mac_addr =  (a & 0xff) + (b & 0xff) + ((c & 0xff) << 24) + ((d & 0xff) << 16) + ((e & 0xff) << 8) + (f & 0xff);
6758 +
6759 +       /* printk("PF_RING: -> [%u][%u][%u][%u][%u][%u] -> [%u]\n", a, b, c, d, e, f, mac_addr); */
6760 +
6761 +       if(buf[0] == '+')
6762 +         set_bit_bitmask(&pfr->mac_bitmask, mac_addr), pfr->num_mac_bitmask_add++;
6763 +       else
6764 +         clear_bit_bitmask(&pfr->mac_bitmask, mac_addr), pfr->num_mac_bitmask_remove++;
6765 +      } else
6766 +       printk("PF_RING: -> Invalid MAC address '%s'\n", &buf[5]);
6767 +    } else if(!strncmp(&buf[1], "ip=", 3)) {
6768 +      int a, b, c, d;
6769 +
6770 +      if(sscanf(&buf[4], "%d.%d.%d.%d", &a, &b, &c, &d) == 4) {
6771 +       u_int32_t ip_addr = ((a & 0xff) << 24) + ((b & 0xff) << 16) + ((c & 0xff) << 8) + (d & 0xff);
6772 +
6773 +       if(buf[0] == '+')
6774 +         set_bit_bitmask(&pfr->ip_bitmask, ip_addr), set_bit_bitmask(&pfr->ip_bitmask, sdb_hash(ip_addr)), pfr->num_ip_bitmask_add++;
6775 +       else
6776 +         clear_bit_bitmask(&pfr->ip_bitmask, ip_addr), clear_bit_bitmask(&pfr->twin_ip_bitmask, sdb_hash(ip_addr)), pfr->num_ip_bitmask_remove++;
6777 +      } else
6778 +       printk("PF_RING: -> Invalid IP address '%s'\n", &buf[4]);
6779 +    } else if(!strncmp(&buf[1], "port=", 5)) {
6780 +      sscanf(&buf[6], "%d", &the_bit);
6781 +
6782 +      if(buf[0] == '+')
6783 +       set_bit_bitmask(&pfr->port_bitmask, the_bit), set_bit_bitmask(&pfr->port_bitmask, sdb_hash(the_bit)), pfr->num_port_bitmask_add++;
6784 +      else
6785 +       clear_bit_bitmask(&pfr->port_bitmask, the_bit), clear_bit_bitmask(&pfr->twin_port_bitmask, sdb_hash(the_bit)), pfr->num_port_bitmask_remove++;
6786 +    } else if(!strncmp(&buf[1], "proto=", 6)) {
6787 +      if(!strncmp(&buf[7], "tcp", 3))       the_bit = 6;
6788 +      else if(!strncmp(&buf[7], "udp", 3))  the_bit = 17;
6789 +      else if(!strncmp(&buf[7], "icmp", 4)) the_bit = 1;
6790 +      else sscanf(&buf[7], "%d", &the_bit);
6791 +
6792 +      if(buf[0] == '+')
6793 +       set_bit_bitmask(&pfr->proto_bitmask, the_bit);
6794 +      else
6795 +       clear_bit_bitmask(&pfr->proto_bitmask, the_bit);
6796 +    } else
6797 +      printk("PF_RING: -> Unknown rule type '%s'\n", buf);
6798 +  }
6799 +}
6800 +
6801 +/* ********************************** */
6802 +
6803 +static void reset_bloom_filters(struct ring_opt *pfr) {
6804 +  reset_bitmask(&pfr->mac_bitmask);
6805 +  reset_bitmask(&pfr->vlan_bitmask);
6806 +  reset_bitmask(&pfr->ip_bitmask); reset_bitmask(&pfr->twin_ip_bitmask);
6807 +  reset_bitmask(&pfr->port_bitmask); reset_bitmask(&pfr->twin_port_bitmask);
6808 +  reset_bitmask(&pfr->proto_bitmask);
6809 +
6810 +  pfr->num_mac_bitmask_add   = pfr->num_mac_bitmask_remove   = 0;
6811 +  pfr->num_vlan_bitmask_add  = pfr->num_vlan_bitmask_remove  = 0;
6812 +  pfr->num_ip_bitmask_add    = pfr->num_ip_bitmask_remove    = 0;
6813 +  pfr->num_port_bitmask_add  = pfr->num_port_bitmask_remove  = 0;
6814 +  pfr->num_proto_bitmask_add = pfr->num_proto_bitmask_remove = 0;
6815 +
6816 +  printk("PF_RING: rules have been reset\n");
6817 +}
6818 +
6819 +/* ********************************** */
6820 +
6821 +static void init_blooms(struct ring_opt *pfr) {
6822 +  alloc_bitmask(4096,  &pfr->mac_bitmask);
6823 +  alloc_bitmask(4096,  &pfr->vlan_bitmask);
6824 +  alloc_bitmask(32768, &pfr->ip_bitmask); alloc_bitmask(32768, &pfr->twin_ip_bitmask);
6825 +  alloc_bitmask(4096,  &pfr->port_bitmask); alloc_bitmask(4096,  &pfr->twin_port_bitmask);
6826 +  alloc_bitmask(4096,  &pfr->proto_bitmask);
6827 +
6828 +  pfr->num_mac_bitmask_add   = pfr->num_mac_bitmask_remove = 0;
6829 +  pfr->num_vlan_bitmask_add  = pfr->num_vlan_bitmask_remove = 0;
6830 +  pfr->num_ip_bitmask_add    = pfr->num_ip_bitmask_remove   = 0;
6831 +  pfr->num_port_bitmask_add  = pfr->num_port_bitmask_remove = 0;
6832 +  pfr->num_proto_bitmask_add = pfr->num_proto_bitmask_remove = 0;
6833 +
6834 +  reset_bloom_filters(pfr);
6835 +}
6836 +
6837 +/* ********************************** */
6838 +
6839 +inline int MatchFound (void* id, int index, void *data) { return(0); }
6840 +
6841 +/* ********************************** */
6842 +
6843 +static void add_skb_to_ring(struct sk_buff *skb,
6844 +                           struct ring_opt *pfr,
6845 +                           u_char recv_packet,
6846 +                           u_char real_skb /* 1=skb 0=faked skb */) {
6847 +  FlowSlot *theSlot;
6848 +  int idx, displ, fwd_pkt = 0;
6849 +
6850 +  if(recv_packet) {
6851 +    /* Hack for identifying a packet received by the e1000 */
6852 +    if(real_skb) {
6853 +      displ = SKB_DISPLACEMENT;
6854 +    } else
6855 +      displ = 0; /* Received by the e1000 wrapper */
6856 +  } else
6857 +    displ = 0;
6858 +
6859 +  write_lock(&pfr->ring_index_lock);
6860 +  pfr->slots_info->tot_pkts++;
6861 +  write_unlock(&pfr->ring_index_lock);
6862 +
6863 +  /* BPF Filtering (from af_packet.c) */
6864 +  if(pfr->bpfFilter != NULL) {
6865 +    unsigned res = 1, len;
6866 +
6867 +    len = skb->len-skb->data_len;
6868 +
6869 +    write_lock(&pfr->ring_index_lock);
6870 +    skb->data -= displ;
6871 +    res = sk_run_filter(skb, pfr->bpfFilter->insns, pfr->bpfFilter->len);
6872 +    skb->data += displ;
6873 +    write_unlock(&pfr->ring_index_lock);
6874 +
6875 +    if(res == 0) {
6876 +      /* Filter failed */
6877 +
6878 +#if defined(RING_DEBUG)
6879 +      printk("add_skb_to_ring(skb): Filter failed [len=%d][tot=%llu]"
6880 +            "[insertIdx=%d][pkt_type=%d][cloned=%d]\n",
6881 +            (int)skb->len, pfr->slots_info->tot_pkts,
6882 +            pfr->slots_info->insert_idx,
6883 +            skb->pkt_type, skb->cloned);
6884 +#endif
6885 +
6886 +      return;
6887 +    }
6888 +  }
6889 +
6890 +  /* ************************** */
6891 +
6892 +  if(pfr->sample_rate > 1) {
6893 +    if(pfr->pktToSample == 0) {
6894 +      write_lock(&pfr->ring_index_lock);
6895 +      pfr->pktToSample = pfr->sample_rate;
6896 +      write_unlock(&pfr->ring_index_lock);
6897 +    } else {
6898 +      write_lock(&pfr->ring_index_lock);
6899 +      pfr->pktToSample--;
6900 +      write_unlock(&pfr->ring_index_lock);
6901 +
6902 +#if defined(RING_DEBUG)
6903 +      printk("add_skb_to_ring(skb): sampled packet [len=%d]"
6904 +            "[tot=%llu][insertIdx=%d][pkt_type=%d][cloned=%d]\n",
6905 +            (int)skb->len, pfr->slots_info->tot_pkts,
6906 +            pfr->slots_info->insert_idx,
6907 +            skb->pkt_type, skb->cloned);
6908 +#endif
6909 +      return;
6910 +    }
6911 +  }
6912 +
6913 +  /* ************************************* */
6914 +
6915 +  if((pfr->reflector_dev != NULL)
6916 +     && (!netif_queue_stopped(pfr->reflector_dev))) {
6917 +    int cpu = smp_processor_id();
6918 +
6919 +    /* increase reference counter so that this skb is not freed */
6920 +    atomic_inc(&skb->users);
6921 +
6922 +    skb->data -= displ;
6923 +
6924 +    /* send it */
6925 +    if (pfr->reflector_dev->xmit_lock_owner != cpu) {
6926 +      /* Patch below courtesy of Matthew J. Roth <mroth@imminc.com> */
6927 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
6928 +      spin_lock_bh(&pfr->reflector_dev->xmit_lock);
6929 +      pfr->reflector_dev->xmit_lock_owner = cpu;
6930 +      spin_unlock_bh(&pfr->reflector_dev->xmit_lock);
6931 +#else
6932 +      netif_tx_lock_bh(pfr->reflector_dev);
6933 +#endif
6934 +      if (pfr->reflector_dev->hard_start_xmit(skb, pfr->reflector_dev) == 0) {
6935 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
6936 +        spin_lock_bh(&pfr->reflector_dev->xmit_lock);
6937 +       pfr->reflector_dev->xmit_lock_owner = -1;
6938 +       spin_unlock_bh(&pfr->reflector_dev->xmit_lock);
6939 +#else
6940 +       netif_tx_unlock_bh(pfr->reflector_dev);
6941 +#endif
6942 +       skb->data += displ;
6943 +#if defined(RING_DEBUG)
6944 +       printk("++ hard_start_xmit succeeded\n");
6945 +#endif
6946 +       return; /* OK */
6947 +      }
6948 +
6949 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18))
6950 +      spin_lock_bh(&pfr->reflector_dev->xmit_lock);
6951 +      pfr->reflector_dev->xmit_lock_owner = -1;
6952 +      spin_unlock_bh(&pfr->reflector_dev->xmit_lock);
6953 +#else
6954 +      netif_tx_unlock_bh(pfr->reflector_dev);
6955 +#endif
6956 +    }
6957 +
6958 +#if defined(RING_DEBUG)
6959 +    printk("++ hard_start_xmit failed\n");
6960 +#endif
6961 +    skb->data += displ;
6962 +    return; /* -ENETDOWN */
6963 +  }
6964 +
6965 +  /* ************************************* */
6966 +
6967 +#if defined(RING_DEBUG)
6968 +  printk("add_skb_to_ring(skb) [len=%d][tot=%llu][insertIdx=%d]"
6969 +        "[pkt_type=%d][cloned=%d]\n",
6970 +        (int)skb->len, pfr->slots_info->tot_pkts,
6971 +        pfr->slots_info->insert_idx,
6972 +        skb->pkt_type, skb->cloned);
6973 +#endif
6974 +
6975 +  idx = pfr->slots_info->insert_idx;
6976 +  theSlot = get_insert_slot(pfr);
6977 +
6978 +  if((theSlot != NULL) && (theSlot->slot_state == 0)) {
6979 +    struct pcap_pkthdr *hdr;
6980 +    char *bucket;
6981 +    int is_ip_pkt, debug = 0;
6982 +
6983 +    /* Update Index */
6984 +    idx++;
6985 +
6986 +    bucket = &theSlot->bucket;
6987 +    hdr = (struct pcap_pkthdr*)bucket;
6988 +
6989 +    /* BD - API changed for time keeping */
6990 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14))
6991 +    if(skb->stamp.tv_sec == 0) do_gettimeofday(&skb->stamp);
6992 +
6993 +    hdr->ts.tv_sec = skb->stamp.tv_sec, hdr->ts.tv_usec = skb->stamp.tv_usec;
6994 +#else
6995 +    if(skb->tstamp.off_sec == 0) __net_timestamp(skb);
6996 +
6997 +    hdr->ts.tv_sec = skb->tstamp.off_sec, hdr->ts.tv_usec = skb->tstamp.off_usec;
6998 +#endif
6999 +    hdr->caplen    = skb->len+displ;
7000 +
7001 +    if(hdr->caplen > pfr->slots_info->data_len)
7002 +      hdr->caplen = pfr->slots_info->data_len;
7003 +
7004 +    hdr->len = skb->len+displ;
7005 +
7006 +    /* Extensions */
7007 +    is_ip_pkt = parse_pkt(skb, displ,
7008 +                         &hdr->l3_proto,
7009 +                         &hdr->eth_type,
7010 +                         &hdr->l3_offset,
7011 +                         &hdr->l4_offset,
7012 +                         &hdr->vlan_id,
7013 +                         &hdr->ipv4_src,
7014 +                         &hdr->ipv4_dst,
7015 +                         &hdr->l4_src_port,
7016 +                         &hdr->l4_dst_port,
7017 +                         &hdr->payload_offset);
7018 +
7019 +    if(is_ip_pkt && pfr->bitmask_enabled) {
7020 +      int vlan_match = 0;
7021 +
7022 +      fwd_pkt = 0;
7023 +
7024 +      if(debug) {
7025 +       if(is_ip_pkt)
7026 +         printk(KERN_INFO "PF_RING: [proto=%d][vlan=%d][sport=%d][dport=%d][src=%u][dst=%u]\n",
7027 +                hdr->l3_proto, hdr->vlan_id, hdr->l4_src_port, hdr->l4_dst_port, hdr->ipv4_src, hdr->ipv4_dst);
7028 +       else
7029 +         printk(KERN_INFO "PF_RING: [proto=%d][vlan=%d]\n", hdr->l3_proto, hdr->vlan_id);
7030 +      }
7031 +
7032 +      if(hdr->vlan_id != (u_int16_t)-1) {
7033 +       vlan_match = is_set_bit_bitmask(&pfr->vlan_bitmask, hdr->vlan_id);
7034 +      } else
7035 +       vlan_match = 1;
7036 +
7037 +      if(vlan_match) {
7038 +       struct ethhdr *eh = (struct ethhdr*)(skb->data);
7039 +       u_int32_t src_mac =  (eh->h_source[0] & 0xff) + (eh->h_source[1] & 0xff) + ((eh->h_source[2] & 0xff) << 24)
7040 +         + ((eh->h_source[3] & 0xff) << 16) + ((eh->h_source[4] & 0xff) << 8) + (eh->h_source[5] & 0xff);
7041 +
7042 +       if(debug) printk(KERN_INFO "PF_RING: [src_mac=%u]\n", src_mac);
7043 +
7044 +       fwd_pkt |= is_set_bit_bitmask(&pfr->mac_bitmask, src_mac);
7045 +
7046 +       if(!fwd_pkt) {
7047 +         u_int32_t dst_mac =  (eh->h_dest[0] & 0xff) + (eh->h_dest[1] & 0xff) + ((eh->h_dest[2] & 0xff) << 24)
7048 +           + ((eh->h_dest[3] & 0xff) << 16) + ((eh->h_dest[4] & 0xff) << 8) + (eh->h_dest[5] & 0xff);
7049 +
7050 +         if(debug) printk(KERN_INFO "PF_RING: [dst_mac=%u]\n", dst_mac);
7051 +
7052 +         fwd_pkt |= is_set_bit_bitmask(&pfr->mac_bitmask, dst_mac);
7053 +
7054 +         if(is_ip_pkt && (!fwd_pkt)) {
7055 +           fwd_pkt |= is_set_bit_bitmask(&pfr->ip_bitmask, hdr->ipv4_src);
7056 +
7057 +           if(!fwd_pkt) {
7058 +             fwd_pkt |= is_set_bit_bitmask(&pfr->ip_bitmask, hdr->ipv4_dst);
7059 +
7060 +             if((!fwd_pkt) && ((hdr->l3_proto == IPPROTO_TCP)
7061 +                               || (hdr->l3_proto == IPPROTO_UDP))) {
7062 +               fwd_pkt |= is_set_bit_bitmask(&pfr->port_bitmask, hdr->l4_src_port);
7063 +               if(!fwd_pkt) fwd_pkt |= is_set_bit_bitmask(&pfr->port_bitmask, hdr->l4_dst_port);
7064 +             }
7065 +
7066 +             if(!fwd_pkt) fwd_pkt |= is_set_bit_bitmask(&pfr->proto_bitmask, hdr->l3_proto);
7067 +           }
7068 +         }
7069 +       }
7070 +      }
7071 +    } else
7072 +      fwd_pkt = 1;
7073 +
7074 +    if(fwd_pkt && (pfr->acsm != NULL)) {
7075 +      if((hdr->payload_offset > 0) && ((skb->len+skb->mac_len) > hdr->payload_offset)) {
7076 +       char *payload = (skb->data-displ+hdr->payload_offset);
7077 +       int payload_len = skb->len /* + skb->mac_len */ - hdr->payload_offset;
7078 +
7079 +       if((payload_len > 0)
7080 +          && ((hdr->l4_src_port == 80) || (hdr->l4_dst_port == 80))) {
7081 +         int rc;
7082 +
7083 +         if(0) {
7084 +           char buf[1500];
7085 +
7086 +           memcpy(buf, payload, payload_len);
7087 +           buf[payload_len] = '\0';
7088 +           printk("[%s]\n", payload);
7089 +         }
7090 +
7091 +         /* printk("Tring to match pattern [len=%d][%s]\n", payload_len, payload); */
7092 +         rc = acsmSearch2(pfr->acsm, payload, payload_len, MatchFound, (void *)0) ? 1 : 0;
7093 +
7094 +         // printk("Match result: %d\n", fwd_pkt);
7095 +         if(rc) {
7096 +           printk("Pattern matched!\n");
7097 +         } else {
7098 +           fwd_pkt = 0;
7099 +         }
7100 +       } else
7101 +         fwd_pkt = 0;
7102 +      }        else
7103 +       fwd_pkt = 0;
7104 +    }
7105 +
7106 +    if(fwd_pkt) {
7107 +      memcpy(&bucket[sizeof(struct pcap_pkthdr)], skb->data-displ, hdr->caplen);
7108 +
7109 +#if defined(RING_DEBUG)
7110 +      {
7111 +       static unsigned int lastLoss = 0;
7112 +
7113 +       if(pfr->slots_info->tot_lost
7114 +          && (lastLoss != pfr->slots_info->tot_lost)) {
7115 +         printk("add_skb_to_ring(%d): [data_len=%d]"
7116 +                "[hdr.caplen=%d][skb->len=%d]"
7117 +                "[pcap_pkthdr=%d][removeIdx=%d]"
7118 +                "[loss=%lu][page=%u][slot=%u]\n",
7119 +                idx-1, pfr->slots_info->data_len, hdr->caplen, skb->len,
7120 +                sizeof(struct pcap_pkthdr),
7121 +                pfr->slots_info->remove_idx,
7122 +                (long unsigned int)pfr->slots_info->tot_lost,
7123 +                pfr->insert_page_id, pfr->insert_slot_id);
7124 +
7125 +         lastLoss = pfr->slots_info->tot_lost;
7126 +       }
7127 +      }
7128 +#endif
7129 +
7130 +      write_lock(&pfr->ring_index_lock);
7131 +      if(idx == pfr->slots_info->tot_slots)
7132 +       pfr->slots_info->insert_idx = 0;
7133 +      else
7134 +       pfr->slots_info->insert_idx = idx;
7135 +
7136 +      pfr->slots_info->tot_insert++;
7137 +      theSlot->slot_state = 1;
7138 +      write_unlock(&pfr->ring_index_lock);
7139 +    }
7140 +  } else {
7141 +    write_lock(&pfr->ring_index_lock);
7142 +    pfr->slots_info->tot_lost++;
7143 +    write_unlock(&pfr->ring_index_lock);
7144 +
7145 +#if defined(RING_DEBUG)
7146 +    printk("add_skb_to_ring(skb): packet lost [loss=%lu]"
7147 +          "[removeIdx=%u][insertIdx=%u]\n",
7148 +          (long unsigned int)pfr->slots_info->tot_lost,
7149 +          pfr->slots_info->remove_idx, pfr->slots_info->insert_idx);
7150 +#endif
7151 +  }
7152 +
7153 +  if(fwd_pkt) {
7154 +
7155 +    /* wakeup in case of poll() */
7156 +    if(waitqueue_active(&pfr->ring_slots_waitqueue))
7157 +      wake_up_interruptible(&pfr->ring_slots_waitqueue);
7158 +  }
7159 +}
7160 +
7161 +/* ********************************** */
7162 +
7163 +static u_int hash_skb(struct ring_cluster *cluster_ptr,
7164 +                     struct sk_buff *skb, u_char recv_packet) {
7165 +  u_int idx;
7166 +  int displ;
7167 +  struct iphdr *ip;
7168 +
7169 +  if(cluster_ptr->hashing_mode == cluster_round_robin) {
7170 +    idx = cluster_ptr->hashing_id++;
7171 +  } else {
7172 +    /* Per-flow clustering */
7173 +    if(skb->len > sizeof(struct iphdr)+sizeof(struct tcphdr)) {
7174 +      if(recv_packet)
7175 +       displ = 0;
7176 +      else
7177 +       displ = SKB_DISPLACEMENT;
7178 +
7179 +      /*
7180 +       skb->data+displ
7181 +
7182 +       Always points to to the IP part of the packet
7183 +      */
7184 +
7185 +      ip = (struct iphdr*)(skb->data+displ);
7186 +
7187 +      idx = ip->saddr+ip->daddr+ip->protocol;
7188 +
7189 +      if(ip->protocol == IPPROTO_TCP) {
7190 +       struct tcphdr *tcp = (struct tcphdr*)(skb->data+displ
7191 +                                             +sizeof(struct iphdr));
7192 +       idx += tcp->source+tcp->dest;
7193 +      } else if(ip->protocol == IPPROTO_UDP) {
7194 +       struct udphdr *udp = (struct udphdr*)(skb->data+displ
7195 +                                             +sizeof(struct iphdr));
7196 +       idx += udp->source+udp->dest;
7197 +      }
7198 +    } else
7199 +      idx = skb->len;
7200 +  }
7201 +
7202 +  return(idx % cluster_ptr->num_cluster_elements);
7203 +}
7204 +
7205 +/* ********************************** */
7206 +
7207 +static int skb_ring_handler(struct sk_buff *skb,
7208 +                           u_char recv_packet,
7209 +                           u_char real_skb /* 1=skb 0=faked skb */) {
7210 +  struct sock *skElement;
7211 +  int rc = 0;
7212 +  struct list_head *ptr;
7213 +  struct ring_cluster *cluster_ptr;
7214 +
7215 +#ifdef PROFILING
7216 +  uint64_t rdt = _rdtsc(), rdt1, rdt2;
7217 +#endif
7218 +
7219 +  if((!skb) /* Invalid skb */
7220 +     || ((!enable_tx_capture) && (!recv_packet))) {
7221 +    /*
7222 +      An outgoing packet is about to be sent out
7223 +      but we decided not to handle transmitted
7224 +      packets.
7225 +    */
7226 +    return(0);
7227 +  }
7228 +
7229 +#if defined(RING_DEBUG)
7230 +  if(0) {
7231 +    printk("skb_ring_handler() [len=%d][dev=%s]\n", skb->len,
7232 +          skb->dev->name == NULL ? "<NULL>" : skb->dev->name);
7233 +  }
7234 +#endif
7235 +
7236 +#ifdef PROFILING
7237 +  rdt1 = _rdtsc();
7238 +#endif
7239 +
7240 +  /* [1] Check unclustered sockets */
7241 +  for (ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) {
7242 +    struct ring_opt *pfr;
7243 +    struct ring_element *entry;
7244 +
7245 +    entry = list_entry(ptr, struct ring_element, list);
7246 +
7247 +    read_lock(&ring_mgmt_lock);
7248 +    skElement = entry->sk;
7249 +    pfr = ring_sk(skElement);
7250 +    read_unlock(&ring_mgmt_lock);
7251 +
7252 +    if((pfr != NULL)
7253 +       && (pfr->cluster_id == 0 /* No cluster */)
7254 +       && (pfr->ring_slots != NULL)
7255 +       && ((pfr->ring_netdev == skb->dev) || ((skb->dev->flags & IFF_SLAVE) && pfr->ring_netdev == skb->dev->master))) {
7256 +      /* We've found the ring where the packet can be stored */
7257 +      read_lock(&ring_mgmt_lock);
7258 +      add_skb_to_ring(skb, pfr, recv_packet, real_skb);
7259 +      read_unlock(&ring_mgmt_lock);
7260 +
7261 +      rc = 1; /* Ring found: we've done our job */
7262 +    }
7263 +  }
7264 +
7265 +  /* [2] Check socket clusters */
7266 +  cluster_ptr = ring_cluster_list;
7267 +
7268 +  while(cluster_ptr != NULL) {
7269 +    struct ring_opt *pfr;
7270 +
7271 +    if(cluster_ptr->num_cluster_elements > 0) {
7272 +      u_int skb_hash = hash_skb(cluster_ptr, skb, recv_packet);
7273 +
7274 +      read_lock(&ring_mgmt_lock);
7275 +      skElement = cluster_ptr->sk[skb_hash];
7276 +      read_unlock(&ring_mgmt_lock);
7277 +
7278 +      if(skElement != NULL) {
7279 +       pfr = ring_sk(skElement);
7280 +
7281 +       if((pfr != NULL)
7282 +          && (pfr->ring_slots != NULL)
7283 +          && ((pfr->ring_netdev == skb->dev) || ((skb->dev->flags & IFF_SLAVE) && pfr->ring_netdev == skb->dev->master))) {
7284 +         /* We've found the ring where the packet can be stored */
7285 +          read_lock(&ring_mgmt_lock);
7286 +         add_skb_to_ring(skb, pfr, recv_packet, real_skb);
7287 +          read_unlock(&ring_mgmt_lock);
7288 +
7289 +         rc = 1; /* Ring found: we've done our job */
7290 +       }
7291 +      }
7292 +    }
7293 +
7294 +    cluster_ptr = cluster_ptr->next;
7295 +  }
7296 +
7297 +#ifdef PROFILING
7298 +  rdt1 = _rdtsc()-rdt1;
7299 +#endif
7300 +
7301 +#ifdef PROFILING
7302 +  rdt2 = _rdtsc();
7303 +#endif
7304 +
7305 +  if(transparent_mode) rc = 0;
7306 +
7307 +  if((rc != 0) && real_skb)
7308 +    dev_kfree_skb(skb); /* Free the skb */
7309 +
7310 +#ifdef PROFILING
7311 +  rdt2 = _rdtsc()-rdt2;
7312 +  rdt = _rdtsc()-rdt;
7313 +
7314 +#if defined(RING_DEBUG)
7315 +  printk("# cycles: %d [lock costed %d %d%%][free costed %d %d%%]\n",
7316 +        (int)rdt, rdt-rdt1,
7317 +        (int)((float)((rdt-rdt1)*100)/(float)rdt),
7318 +        rdt2,
7319 +        (int)((float)(rdt2*100)/(float)rdt));
7320 +#endif
7321 +#endif
7322 +
7323 +  return(rc); /*  0 = packet not handled */
7324 +}
7325 +
7326 +/* ********************************** */
7327 +
7328 +struct sk_buff skb;
7329 +
7330 +static int buffer_ring_handler(struct net_device *dev,
7331 +                              char *data, int len) {
7332 +
7333 +#if defined(RING_DEBUG)
7334 +  printk("buffer_ring_handler: [dev=%s][len=%d]\n",
7335 +        dev->name == NULL ? "<NULL>" : dev->name, len);
7336 +#endif
7337 +
7338 +  /* BD - API changed for time keeping */
7339 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14))
7340 +  skb.dev = dev, skb.len = len, skb.data = data,
7341 +    skb.data_len = len, skb.stamp.tv_sec = 0; /* Calculate the time */
7342 +#else
7343 +  skb.dev = dev, skb.len = len, skb.data = data,
7344 +    skb.data_len = len, skb.tstamp.off_sec = 0; /* Calculate the time */
7345 +#endif
7346 +
7347 +  skb_ring_handler(&skb, 1, 0 /* fake skb */);
7348 +
7349 +  return(0);
7350 +}
7351 +
7352 +/* ********************************** */
7353 +
7354 +static int ring_create(struct socket *sock, int protocol) {
7355 +  struct sock *sk;
7356 +  struct ring_opt *pfr;
7357 +  int err;
7358 +
7359 +#if defined(RING_DEBUG)
7360 +  printk("RING: ring_create()\n");
7361 +#endif
7362 +
7363 +  /* Are you root, superuser or so ? */
7364 +  if(!capable(CAP_NET_ADMIN))
7365 +    return -EPERM;
7366 +
7367 +  if(sock->type != SOCK_RAW)
7368 +    return -ESOCKTNOSUPPORT;
7369 +
7370 +  if(protocol != htons(ETH_P_ALL))
7371 +    return -EPROTONOSUPPORT;
7372 +
7373 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
7374 +  MOD_INC_USE_COUNT;
7375 +#endif
7376 +
7377 +  err = -ENOMEM;
7378 +
7379 +  // BD: -- broke this out to keep it more simple and clear as to what the
7380 +  // options are.
7381 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
7382 +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,11))
7383 +  sk = sk_alloc(PF_RING, GFP_KERNEL, 1, NULL);
7384 +#else
7385 +  // BD: API changed in 2.6.12, ref:
7386 +  // http://svn.clkao.org/svnweb/linux/revision/?rev=28201
7387 +  sk = sk_alloc(PF_RING, GFP_ATOMIC, &ring_proto, 1);
7388 +#endif
7389 +#else
7390 +  /* Kernel 2.4 */
7391 +  sk = sk_alloc(PF_RING, GFP_KERNEL, 1);
7392 +#endif
7393 +
7394 +  if (sk == NULL)
7395 +    goto out;
7396 +
7397 +  sock->ops = &ring_ops;
7398 +  sock_init_data(sock, sk);
7399 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
7400 +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,11))
7401 +  sk_set_owner(sk, THIS_MODULE);
7402 +#endif
7403 +#endif
7404 +
7405 +  err = -ENOMEM;
7406 +  ring_sk(sk) = ring_sk_datatype(kmalloc(sizeof(*pfr), GFP_KERNEL));
7407 +
7408 +  if (!(pfr = ring_sk(sk))) {
7409 +    sk_free(sk);
7410 +    goto out;
7411 +  }
7412 +  memset(pfr, 0, sizeof(*pfr));
7413 +  init_waitqueue_head(&pfr->ring_slots_waitqueue);
7414 +  pfr->ring_index_lock = RW_LOCK_UNLOCKED;
7415 +  atomic_set(&pfr->num_ring_slots_waiters, 0);
7416 +  init_blooms(pfr);
7417 +  pfr->acsm = NULL;
7418 +
7419 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
7420 +  sk->sk_family       = PF_RING;
7421 +  sk->sk_destruct     = ring_sock_destruct;
7422 +#else
7423 +  sk->family          = PF_RING;
7424 +  sk->destruct        = ring_sock_destruct;
7425 +  sk->num             = protocol;
7426 +#endif
7427 +
7428 +  ring_insert(sk);
7429 +
7430 +#if defined(RING_DEBUG)
7431 +  printk("RING: ring_create() - created\n");
7432 +#endif
7433 +
7434 +  return(0);
7435 + out:
7436 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
7437 +  MOD_DEC_USE_COUNT;
7438 +#endif
7439 +  return err;
7440 +}
7441 +
7442 +/* *********************************************** */
7443 +
7444 +static int ring_release(struct socket *sock)
7445 +{
7446 +  struct sock *sk = sock->sk;
7447 +  struct ring_opt *pfr = ring_sk(sk);
7448 +
7449 +  if(!sk)  return 0;
7450 +
7451 +#if defined(RING_DEBUG)
7452 +  printk("RING: called ring_release\n");
7453 +#endif
7454 +
7455 +#if defined(RING_DEBUG)
7456 +  printk("RING: ring_release entered\n");
7457 +#endif
7458 +
7459 +  /*
7460 +    The calls below must be placed outside the
7461 +    write_lock_irq...write_unlock_irq block.
7462 +  */
7463 +  sock_orphan(sk);
7464 +  ring_proc_remove(ring_sk(sk));
7465 +
7466 +  write_lock_irq(&ring_mgmt_lock);
7467 +  ring_remove(sk);
7468 +  sock->sk = NULL;
7469 +
7470 +  /* Free the ring buffer */
7471 +  if(pfr->ring_memory) {
7472 +    struct page *page, *page_end;
7473 +
7474 +    page_end = virt_to_page(pfr->ring_memory + (PAGE_SIZE << pfr->order) - 1);
7475 +    for(page = virt_to_page(pfr->ring_memory); page <= page_end; page++)
7476 +      ClearPageReserved(page);
7477 +
7478 +    free_pages(pfr->ring_memory, pfr->order);
7479 +  }
7480 +
7481 +  free_bitmask(&pfr->mac_bitmask);
7482 +  free_bitmask(&pfr->vlan_bitmask);
7483 +  free_bitmask(&pfr->ip_bitmask); free_bitmask(&pfr->twin_ip_bitmask);
7484 +  free_bitmask(&pfr->port_bitmask); free_bitmask(&pfr->twin_port_bitmask);
7485 +  free_bitmask(&pfr->proto_bitmask);
7486 +
7487 +  if(pfr->acsm != NULL) acsmFree2(pfr->acsm);
7488 +
7489 +  kfree(pfr);
7490 +  ring_sk(sk) = NULL;
7491 +
7492 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
7493 +  skb_queue_purge(&sk->sk_write_queue);
7494 +#endif
7495 +
7496 +  sock_put(sk);
7497 +  write_unlock_irq(&ring_mgmt_lock);
7498 +
7499 +#if defined(RING_DEBUG)
7500 +  printk("RING: ring_release leaving\n");
7501 +#endif
7502 +
7503 +  return 0;
7504 +}
7505 +
7506 +/* ********************************** */
7507 +/*
7508 + * We create a ring for this socket and bind it to the specified device
7509 + */
7510 +static int packet_ring_bind(struct sock *sk, struct net_device *dev)
7511 +{
7512 +  u_int the_slot_len;
7513 +  u_int32_t tot_mem;
7514 +  struct ring_opt *pfr = ring_sk(sk);
7515 +  struct page *page, *page_end;
7516 +
7517 +  if(!dev) return(-1);
7518 +
7519 +#if defined(RING_DEBUG)
7520 +  printk("RING: packet_ring_bind(%s) called\n", dev->name);
7521 +#endif
7522 +
7523 +  /* **********************************************
7524 +
7525 +  *************************************
7526 +  *                                   *
7527 +  *        FlowSlotInfo               *
7528 +  *                                   *
7529 +  ************************************* <-+
7530 +  *        FlowSlot                   *   |
7531 +  *************************************   |
7532 +  *        FlowSlot                   *   |
7533 +  *************************************   +- num_slots
7534 +  *        FlowSlot                   *   |
7535 +  *************************************   |
7536 +  *        FlowSlot                   *   |
7537 +  ************************************* <-+
7538 +
7539 +  ********************************************** */
7540 +
7541 +  the_slot_len = sizeof(u_char)    /* flowSlot.slot_state */
7542 +#ifdef RING_MAGIC
7543 +    + sizeof(u_char)
7544 +#endif
7545 +    + sizeof(struct pcap_pkthdr)
7546 +    + bucket_len      /* flowSlot.bucket */;
7547 +
7548 +  tot_mem = sizeof(FlowSlotInfo) + num_slots*the_slot_len;
7549 +
7550 +  /*
7551 +    Calculate the value of the order parameter used later.
7552 +    See http://www.linuxjournal.com/article.php?sid=1133
7553 +  */
7554 +  for(pfr->order = 0;(PAGE_SIZE << pfr->order) < tot_mem; pfr->order++)  ;
7555 +
7556 +  /*
7557 +    We now try to allocate the memory as required. If we fail
7558 +    we try to allocate a smaller amount or memory (hence a
7559 +    smaller ring).
7560 +  */
7561 +  while((pfr->ring_memory = __get_free_pages(GFP_ATOMIC, pfr->order)) == 0)
7562 +    if(pfr->order-- == 0)
7563 +      break;
7564 +
7565 +  if(pfr->order == 0) {
7566 +    printk("RING: ERROR not enough memory for ring\n");
7567 +    return(-1);
7568 +  } else {
7569 +    printk("RING: succesfully allocated %lu KB [tot_mem=%d][order=%ld]\n",
7570 +          PAGE_SIZE >> (10 - pfr->order), tot_mem, pfr->order);
7571 +  }
7572 +
7573 +  tot_mem = PAGE_SIZE << pfr->order;
7574 +  memset((char*)pfr->ring_memory, 0, tot_mem);
7575 +
7576 +  /* Now we need to reserve the pages */
7577 +  page_end = virt_to_page(pfr->ring_memory + (PAGE_SIZE << pfr->order) - 1);
7578 +  for(page = virt_to_page(pfr->ring_memory); page <= page_end; page++)
7579 +    SetPageReserved(page);
7580 +
7581 +  pfr->slots_info = (FlowSlotInfo*)pfr->ring_memory;
7582 +  pfr->ring_slots = (char*)(pfr->ring_memory+sizeof(FlowSlotInfo));
7583 +
7584 +  pfr->slots_info->version     = RING_FLOWSLOT_VERSION;
7585 +  pfr->slots_info->slot_len    = the_slot_len;
7586 +  pfr->slots_info->data_len    = bucket_len;
7587 +  pfr->slots_info->tot_slots   = (tot_mem-sizeof(FlowSlotInfo))/the_slot_len;
7588 +  pfr->slots_info->tot_mem     = tot_mem;
7589 +  pfr->slots_info->sample_rate = sample_rate;
7590 +
7591 +  printk("RING: allocated %d slots [slot_len=%d][tot_mem=%u]\n",
7592 +        pfr->slots_info->tot_slots, pfr->slots_info->slot_len,
7593 +        pfr->slots_info->tot_mem);
7594 +
7595 +#ifdef RING_MAGIC
7596 +  {
7597 +    int i;
7598 +
7599 +    for(i=0; i<pfr->slots_info->tot_slots; i++) {
7600 +      unsigned long idx = i*pfr->slots_info->slot_len;
7601 +      FlowSlot *slot = (FlowSlot*)&pfr->ring_slots[idx];
7602 +      slot->magic = RING_MAGIC_VALUE; slot->slot_state = 0;
7603 +    }
7604 +  }
7605 +#endif
7606 +
7607 +  pfr->insert_page_id = 1, pfr->insert_slot_id = 0;
7608 +
7609 +  /*
7610 +    IMPORTANT
7611 +    Leave this statement here as last one. In fact when
7612 +    the ring_netdev != NULL the socket is ready to be used.
7613 +  */
7614 +  pfr->ring_netdev = dev;
7615 +
7616 +  return(0);
7617 +}
7618 +
7619 +/* ************************************* */
7620 +
7621 +/* Bind to a device */
7622 +static int ring_bind(struct socket *sock,
7623 +                    struct sockaddr *sa, int addr_len)
7624 +{
7625 +  struct sock *sk=sock->sk;
7626 +  struct net_device *dev = NULL;
7627 +
7628 +#if defined(RING_DEBUG)
7629 +  printk("RING: ring_bind() called\n");
7630 +#endif
7631 +
7632 +  /*
7633 +   *   Check legality
7634 +   */
7635 +  if (addr_len != sizeof(struct sockaddr))
7636 +    return -EINVAL;
7637 +  if (sa->sa_family != PF_RING)
7638 +    return -EINVAL;
7639 +
7640 +  /* Safety check: add trailing zero if missing */
7641 +  sa->sa_data[sizeof(sa->sa_data)-1] = '\0';
7642 +
7643 +#if defined(RING_DEBUG)
7644 +  printk("RING: searching device %s\n", sa->sa_data);
7645 +#endif
7646 +
7647 +  if((dev = __dev_get_by_name(sa->sa_data)) == NULL) {
7648 +#if defined(RING_DEBUG)
7649 +    printk("RING: search failed\n");
7650 +#endif
7651 +    return(-EINVAL);
7652 +  } else
7653 +    return(packet_ring_bind(sk, dev));
7654 +}
7655 +
7656 +/* ************************************* */
7657 +
7658 +static int ring_mmap(struct file *file,
7659 +                    struct socket *sock,
7660 +                    struct vm_area_struct *vma)
7661 +{
7662 +  struct sock *sk = sock->sk;
7663 +  struct ring_opt *pfr = ring_sk(sk);
7664 +  unsigned long size, start;
7665 +  u_int pagesToMap;
7666 +  char *ptr;
7667 +
7668 +#if defined(RING_DEBUG)
7669 +  printk("RING: ring_mmap() called\n");
7670 +#endif
7671 +
7672 +  if(pfr->ring_memory == 0) {
7673 +#if defined(RING_DEBUG)
7674 +    printk("RING: ring_mmap() failed: mapping area to an unbound socket\n");
7675 +#endif
7676 +    return -EINVAL;
7677 +  }
7678 +
7679 +  size = (unsigned long)(vma->vm_end-vma->vm_start);
7680 +
7681 +  if(size % PAGE_SIZE) {
7682 +#if defined(RING_DEBUG)
7683 +    printk("RING: ring_mmap() failed: len is not multiple of PAGE_SIZE\n");
7684 +#endif
7685 +    return(-EINVAL);
7686 +  }
7687 +
7688 +  /* if userspace tries to mmap beyond end of our buffer, fail */
7689 +  if(size > pfr->slots_info->tot_mem) {
7690 +#if defined(RING_DEBUG)
7691 +    printk("proc_mmap() failed: area too large [%ld > %d]\n", size, pfr->slots_info->tot_mem);
7692 +#endif
7693 +    return(-EINVAL);
7694 +  }
7695 +
7696 +  pagesToMap = size/PAGE_SIZE;
7697 +
7698 +#if defined(RING_DEBUG)
7699 +  printk("RING: ring_mmap() called. %d pages to map\n", pagesToMap);
7700 +#endif
7701 +
7702 +#if defined(RING_DEBUG)
7703 +  printk("RING: mmap [slot_len=%d][tot_slots=%d] for ring on device %s\n",
7704 +        pfr->slots_info->slot_len, pfr->slots_info->tot_slots,
7705 +        pfr->ring_netdev->name);
7706 +#endif
7707 +
7708 +  /* we do not want to have this area swapped out, lock it */
7709 +  vma->vm_flags |= VM_LOCKED;
7710 +  start = vma->vm_start;
7711 +
7712 +  /* Ring slots start from page 1 (page 0 is reserved for FlowSlotInfo) */
7713 +  ptr = (char*)(start+PAGE_SIZE);
7714 +
7715 +  if(remap_page_range(
7716 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
7717 +                     vma,
7718 +#endif
7719 +                     start,
7720 +                     __pa(pfr->ring_memory),
7721 +                     PAGE_SIZE*pagesToMap, vma->vm_page_prot)) {
7722 +#if defined(RING_DEBUG)
7723 +    printk("remap_page_range() failed\n");
7724 +#endif
7725 +    return(-EAGAIN);
7726 +  }
7727 +
7728 +#if defined(RING_DEBUG)
7729 +  printk("proc_mmap(pagesToMap=%d): success.\n", pagesToMap);
7730 +#endif
7731 +
7732 +  return 0;
7733 +}
7734 +
7735 +/* ************************************* */
7736 +
7737 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
7738 +static int ring_recvmsg(struct kiocb *iocb, struct socket *sock,
7739 +                       struct msghdr *msg, size_t len, int flags)
7740 +#else
7741 +  static int ring_recvmsg(struct socket *sock, struct msghdr *msg, int len,
7742 +                         int flags, struct scm_cookie *scm)
7743 +#endif
7744 +{
7745 +  FlowSlot* slot;
7746 +  struct ring_opt *pfr = ring_sk(sock->sk);
7747 +  u_int32_t queued_pkts, num_loops = 0;
7748 +
7749 +#if defined(RING_DEBUG)
7750 +  printk("ring_recvmsg called\n");
7751 +#endif
7752 +
7753 +  slot = get_remove_slot(pfr);
7754 +
7755 +  while((queued_pkts = num_queued_pkts(pfr)) < MIN_QUEUED_PKTS) {
7756 +    wait_event_interruptible(pfr->ring_slots_waitqueue, 1);
7757 +
7758 +#if defined(RING_DEBUG)
7759 +    printk("-> ring_recvmsg returning %d [queued_pkts=%d][num_loops=%d]\n",
7760 +          slot->slot_state, queued_pkts, num_loops);
7761 +#endif
7762 +
7763 +    if(queued_pkts > 0) {
7764 +      if(num_loops++ > MAX_QUEUE_LOOPS)
7765 +       break;
7766 +    }
7767 +  }
7768 +
7769 +#if defined(RING_DEBUG)
7770 +  if(slot != NULL)
7771 +    printk("ring_recvmsg is returning [queued_pkts=%d][num_loops=%d]\n",
7772 +          queued_pkts, num_loops);
7773 +#endif
7774 +
7775 +  return(queued_pkts);
7776 +}
7777 +
7778 +/* ************************************* */
7779 +
7780 +unsigned int ring_poll(struct file * file,
7781 +                      struct socket *sock, poll_table *wait)
7782 +{
7783 +  FlowSlot* slot;
7784 +  struct ring_opt *pfr = ring_sk(sock->sk);
7785 +
7786 +#if defined(RING_DEBUG)
7787 +  printk("poll called\n");
7788 +#endif
7789 +
7790 +  slot = get_remove_slot(pfr);
7791 +
7792 +  if((slot != NULL) && (slot->slot_state == 0))
7793 +    poll_wait(file, &pfr->ring_slots_waitqueue, wait);
7794 +
7795 +#if defined(RING_DEBUG)
7796 +  printk("poll returning %d\n", slot->slot_state);
7797 +#endif
7798 +
7799 +  if((slot != NULL) && (slot->slot_state == 1))
7800 +    return(POLLIN | POLLRDNORM);
7801 +  else
7802 +    return(0);
7803 +}
7804 +
7805 +/* ************************************* */
7806 +
7807 +int add_to_cluster_list(struct ring_cluster *el,
7808 +                       struct sock *sock) {
7809 +
7810 +  if(el->num_cluster_elements == CLUSTER_LEN)
7811 +    return(-1); /* Cluster full */
7812 +
7813 +  ring_sk_datatype(ring_sk(sock))->cluster_id = el->cluster_id;
7814 +  el->sk[el->num_cluster_elements] = sock;
7815 +  el->num_cluster_elements++;
7816 +  return(0);
7817 +}
7818 +
7819 +/* ************************************* */
7820 +
7821 +int remove_from_cluster_list(struct ring_cluster *el,
7822 +                            struct sock *sock) {
7823 +  int i, j;
7824 +
7825 +  for(i=0; i<CLUSTER_LEN; i++)
7826 +    if(el->sk[i] == sock) {
7827 +      el->num_cluster_elements--;
7828 +
7829 +      if(el->num_cluster_elements > 0) {
7830 +       /* The cluster contains other elements */
7831 +       for(j=i; j<CLUSTER_LEN-1; j++)
7832 +         el->sk[j] = el->sk[j+1];
7833 +
7834 +       el->sk[CLUSTER_LEN-1] = NULL;
7835 +      } else {
7836 +       /* Empty cluster */
7837 +       memset(el->sk, 0, sizeof(el->sk));
7838 +      }
7839 +
7840 +      return(0);
7841 +    }
7842 +
7843 +  return(-1); /* Not found */
7844 +}
7845 +
7846 +/* ************************************* */
7847 +
7848 +static int remove_from_cluster(struct sock *sock,
7849 +                              struct ring_opt *pfr)
7850 +{
7851 +  struct ring_cluster *el;
7852 +
7853 +#if defined(RING_DEBUG)
7854 +  printk("--> remove_from_cluster(%d)\n", pfr->cluster_id);
7855 +#endif
7856 +
7857 +  if(pfr->cluster_id == 0 /* 0 = No Cluster */)
7858 +    return(0); /* Noting to do */
7859 +
7860 +  el = ring_cluster_list;
7861 +
7862 +  while(el != NULL) {
7863 +    if(el->cluster_id == pfr->cluster_id) {
7864 +      return(remove_from_cluster_list(el, sock));
7865 +    } else
7866 +      el = el->next;
7867 +  }
7868 +
7869 +  return(-EINVAL); /* Not found */
7870 +}
7871 +
7872 +/* ************************************* */
7873 +
7874 +static int add_to_cluster(struct sock *sock,
7875 +                         struct ring_opt *pfr,
7876 +                         u_short cluster_id)
7877 +{
7878 +  struct ring_cluster *el;
7879 +
7880 +#ifndef RING_DEBUG
7881 +  printk("--> add_to_cluster(%d)\n", cluster_id);
7882 +#endif
7883 +
7884 +  if(cluster_id == 0 /* 0 = No Cluster */) return(-EINVAL);
7885 +
7886 +  if(pfr->cluster_id != 0)
7887 +    remove_from_cluster(sock, pfr);
7888 +
7889 +  el = ring_cluster_list;
7890 +
7891 +  while(el != NULL) {
7892 +    if(el->cluster_id == cluster_id) {
7893 +      return(add_to_cluster_list(el, sock));
7894 +    } else
7895 +      el = el->next;
7896 +  }
7897 +
7898 +  /* There's no existing cluster. We need to create one */
7899 +  if((el = kmalloc(sizeof(struct ring_cluster), GFP_KERNEL)) == NULL)
7900 +    return(-ENOMEM);
7901 +
7902 +  el->cluster_id = cluster_id;
7903 +  el->num_cluster_elements = 1;
7904 +  el->hashing_mode = cluster_per_flow; /* Default */
7905 +  el->hashing_id   = 0;
7906 +
7907 +  memset(el->sk, 0, sizeof(el->sk));
7908 +  el->sk[0] = sock;
7909 +  el->next = ring_cluster_list;
7910 +  ring_cluster_list = el;
7911 +  pfr->cluster_id = cluster_id;
7912 +
7913 +  return(0); /* 0 = OK */
7914 +}
7915 +
7916 +/* ************************************* */
7917 +
7918 +/* Code taken/inspired from core/sock.c */
7919 +static int ring_setsockopt(struct socket *sock,
7920 +                          int level, int optname,
7921 +                          char *optval, int optlen)
7922 +{
7923 +  struct ring_opt *pfr = ring_sk(sock->sk);
7924 +  int val, found, ret = 0;
7925 +  u_int cluster_id, do_enable;
7926 +  char devName[8], bloom_filter[256], aho_pattern[256];
7927 +
7928 +  if(pfr == NULL) return(-EINVAL);
7929 +
7930 +  if (get_user(val, (int *)optval))
7931 +    return -EFAULT;
7932 +
7933 +  found = 1;
7934 +
7935 +  switch(optname)
7936 +    {
7937 +    case SO_ATTACH_FILTER:
7938 +      ret = -EINVAL;
7939 +      if (optlen == sizeof(struct sock_fprog)) {
7940 +       unsigned int fsize;
7941 +       struct sock_fprog fprog;
7942 +       struct sk_filter *filter;
7943 +
7944 +       ret = -EFAULT;
7945 +
7946 +       /*
7947 +         NOTE
7948 +
7949 +         Do not call copy_from_user within a held
7950 +         splinlock (e.g. ring_mgmt_lock) as this caused
7951 +         problems when certain debugging was enabled under
7952 +         2.6.5 -- including hard lockups of the machine.
7953 +       */
7954 +       if(copy_from_user(&fprog, optval, sizeof(fprog)))
7955 +         break;
7956 +
7957 +       fsize = sizeof(struct sock_filter) * fprog.len;
7958 +       filter = kmalloc(fsize, GFP_KERNEL);
7959 +
7960 +       if(filter == NULL) {
7961 +         ret = -ENOMEM;
7962 +         break;
7963 +       }
7964 +
7965 +       if(copy_from_user(filter->insns, fprog.filter, fsize))
7966 +         break;
7967 +
7968 +       filter->len = fprog.len;
7969 +
7970 +       if(sk_chk_filter(filter->insns, filter->len) != 0) {
7971 +         /* Bad filter specified */
7972 +         kfree(filter);
7973 +         pfr->bpfFilter = NULL;
7974 +         break;
7975 +       }
7976 +
7977 +       /* get the lock, set the filter, release the lock */
7978 +       write_lock(&ring_mgmt_lock);
7979 +       pfr->bpfFilter = filter;
7980 +       write_unlock(&ring_mgmt_lock);
7981 +       ret = 0;
7982 +      }
7983 +      break;
7984 +
7985 +    case SO_DETACH_FILTER:
7986 +      write_lock(&ring_mgmt_lock);
7987 +      found = 1;
7988 +      if(pfr->bpfFilter != NULL) {
7989 +       kfree(pfr->bpfFilter);
7990 +       pfr->bpfFilter = NULL;
7991 +       write_unlock(&ring_mgmt_lock);
7992 +       break;
7993 +      }
7994 +      ret = -ENONET;
7995 +      break;
7996 +
7997 +    case SO_ADD_TO_CLUSTER:
7998 +      if (optlen!=sizeof(val))
7999 +       return -EINVAL;
8000 +
8001 +      if (copy_from_user(&cluster_id, optval, sizeof(cluster_id)))
8002 +       return -EFAULT;
8003 +
8004 +      write_lock(&ring_mgmt_lock);
8005 +      ret = add_to_cluster(sock->sk, pfr, cluster_id);
8006 +      write_unlock(&ring_mgmt_lock);
8007 +      break;
8008 +
8009 +    case SO_REMOVE_FROM_CLUSTER:
8010 +      write_lock(&ring_mgmt_lock);
8011 +      ret = remove_from_cluster(sock->sk, pfr);
8012 +      write_unlock(&ring_mgmt_lock);
8013 +      break;
8014 +
8015 +    case SO_SET_REFLECTOR:
8016 +      if(optlen >= (sizeof(devName)-1))
8017 +       return -EINVAL;
8018 +
8019 +      if(optlen > 0) {
8020 +       if(copy_from_user(devName, optval, optlen))
8021 +         return -EFAULT;
8022 +      }
8023 +
8024 +      devName[optlen] = '\0';
8025 +
8026 +#if defined(RING_DEBUG)
8027 +      printk("+++ SO_SET_REFLECTOR(%s)\n", devName);
8028 +#endif
8029 +
8030 +      write_lock(&ring_mgmt_lock);
8031 +      pfr->reflector_dev = dev_get_by_name(devName);
8032 +      write_unlock(&ring_mgmt_lock);
8033 +
8034 +#if defined(RING_DEBUG)
8035 +      if(pfr->reflector_dev != NULL)
8036 +       printk("SO_SET_REFLECTOR(%s): succeded\n", devName);
8037 +      else
8038 +       printk("SO_SET_REFLECTOR(%s): device unknown\n", devName);
8039 +#endif
8040 +      break;
8041 +
8042 +    case SO_SET_BLOOM:
8043 +      if(optlen >= (sizeof(bloom_filter)-1))
8044 +       return -EINVAL;
8045 +
8046 +      if(optlen > 0) {
8047 +       if(copy_from_user(bloom_filter, optval, optlen))
8048 +         return -EFAULT;
8049 +      }
8050 +
8051 +      bloom_filter[optlen] = '\0';
8052 +
8053 +      write_lock(&ring_mgmt_lock);
8054 +      handle_bloom_filter_rule(pfr, bloom_filter);
8055 +      write_unlock(&ring_mgmt_lock);
8056 +      break;
8057 +
8058 +    case SO_SET_STRING:
8059 +      if(optlen >= (sizeof(aho_pattern)-1))
8060 +       return -EINVAL;
8061 +
8062 +      if(optlen > 0) {
8063 +       if(copy_from_user(aho_pattern, optval, optlen))
8064 +         return -EFAULT;
8065 +      }
8066 +
8067 +      aho_pattern[optlen] = '\0';
8068 +
8069 +      write_lock(&ring_mgmt_lock);
8070 +      if(pfr->acsm != NULL) acsmFree2(pfr->acsm);
8071 +      if(optlen > 0) {
8072 +#if 1
8073 +       if((pfr->acsm = acsmNew2()) != NULL) {
8074 +         int nc=1 /* case sensitive */, i = 0;
8075 +
8076 +         pfr->acsm->acsmFormat = ACF_BANDED;
8077 +         acsmAddPattern2(pfr->acsm,  (unsigned char*)aho_pattern,
8078 +                         (int)strlen(aho_pattern), nc, 0, 0,(void*)aho_pattern, i);
8079 +         acsmCompile2(pfr->acsm);
8080 +       }
8081 +#else
8082 +       pfr->acsm =  kmalloc (10, GFP_KERNEL); /* TEST */
8083 +#endif
8084 +      }
8085 +      write_unlock(&ring_mgmt_lock);
8086 +      break;
8087 +
8088 +    case SO_TOGGLE_BLOOM_STATE:
8089 +      if(optlen >= (sizeof(bloom_filter)-1))
8090 +       return -EINVAL;
8091 +
8092 +      if(optlen > 0) {
8093 +       if(copy_from_user(&do_enable, optval, optlen))
8094 +         return -EFAULT;
8095 +      }
8096 +
8097 +      write_lock(&ring_mgmt_lock);
8098 +      if(do_enable)
8099 +       pfr->bitmask_enabled = 1;
8100 +      else
8101 +       pfr->bitmask_enabled = 0;
8102 +      write_unlock(&ring_mgmt_lock);
8103 +      printk("SO_TOGGLE_BLOOM_STATE: bloom bitmask %s\n",
8104 +            pfr->bitmask_enabled ? "enabled" : "disabled");
8105 +      break;
8106 +
8107 +    case SO_RESET_BLOOM_FILTERS:
8108 +      if(optlen >= (sizeof(bloom_filter)-1))
8109 +       return -EINVAL;
8110 +
8111 +      if(optlen > 0) {
8112 +       if(copy_from_user(&do_enable, optval, optlen))
8113 +         return -EFAULT;
8114 +      }
8115 +
8116 +      write_lock(&ring_mgmt_lock);
8117 +      reset_bloom_filters(pfr);
8118 +      write_unlock(&ring_mgmt_lock);
8119 +      break;
8120 +
8121 +    default:
8122 +      found = 0;
8123 +      break;
8124 +    }
8125 +
8126 +  if(found)
8127 +    return(ret);
8128 +  else
8129 +    return(sock_setsockopt(sock, level, optname, optval, optlen));
8130 +}
8131 +
8132 +/* ************************************* */
8133 +
8134 +static int ring_ioctl(struct socket *sock,
8135 +                     unsigned int cmd, unsigned long arg)
8136 +{
8137 +  switch(cmd)
8138 +    {
8139 +#ifdef CONFIG_INET
8140 +    case SIOCGIFFLAGS:
8141 +    case SIOCSIFFLAGS:
8142 +    case SIOCGIFCONF:
8143 +    case SIOCGIFMETRIC:
8144 +    case SIOCSIFMETRIC:
8145 +    case SIOCGIFMEM:
8146 +    case SIOCSIFMEM:
8147 +    case SIOCGIFMTU:
8148 +    case SIOCSIFMTU:
8149 +    case SIOCSIFLINK:
8150 +    case SIOCGIFHWADDR:
8151 +    case SIOCSIFHWADDR:
8152 +    case SIOCSIFMAP:
8153 +    case SIOCGIFMAP:
8154 +    case SIOCSIFSLAVE:
8155 +    case SIOCGIFSLAVE:
8156 +    case SIOCGIFINDEX:
8157 +    case SIOCGIFNAME:
8158 +    case SIOCGIFCOUNT:
8159 +    case SIOCSIFHWBROADCAST:
8160 +      return(inet_dgram_ops.ioctl(sock, cmd, arg));
8161 +#endif
8162 +
8163 +    default:
8164 +      return -ENOIOCTLCMD;
8165 +    }
8166 +
8167 +  return 0;
8168 +}
8169 +
8170 +/* ************************************* */
8171 +
8172 +static struct proto_ops ring_ops = {
8173 +  .family      =       PF_RING,
8174 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
8175 +  .owner       =       THIS_MODULE,
8176 +#endif
8177 +
8178 +  /* Operations that make no sense on ring sockets. */
8179 +  .connect     =       sock_no_connect,
8180 +  .socketpair  =       sock_no_socketpair,
8181 +  .accept      =       sock_no_accept,
8182 +  .getname     =       sock_no_getname,
8183 +  .listen      =       sock_no_listen,
8184 +  .shutdown    =       sock_no_shutdown,
8185 +  .sendpage    =       sock_no_sendpage,
8186 +  .sendmsg     =       sock_no_sendmsg,
8187 +  .getsockopt  =       sock_no_getsockopt,
8188 +
8189 +  /* Now the operations that really occur. */
8190 +  .release     =       ring_release,
8191 +  .bind                =       ring_bind,
8192 +  .mmap                =       ring_mmap,
8193 +  .poll                =       ring_poll,
8194 +  .setsockopt  =       ring_setsockopt,
8195 +  .ioctl       =       ring_ioctl,
8196 +  .recvmsg     =       ring_recvmsg,
8197 +};
8198 +
8199 +/* ************************************ */
8200 +
8201 +static struct net_proto_family ring_family_ops = {
8202 +  .family      =       PF_RING,
8203 +  .create      =       ring_create,
8204 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
8205 +  .owner       =       THIS_MODULE,
8206 +#endif
8207 +};
8208 +
8209 +// BD: API changed in 2.6.12, ref:
8210 +// http://svn.clkao.org/svnweb/linux/revision/?rev=28201
8211 +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11))
8212 +static struct proto ring_proto = {
8213 +  .name                =       "PF_RING",
8214 +  .owner       =       THIS_MODULE,
8215 +  .obj_size    =       sizeof(struct sock),
8216 +};
8217 +#endif
8218 +
8219 +/* ************************************ */
8220 +
8221 +static void __exit ring_exit(void)
8222 +{
8223 +  struct list_head *ptr;
8224 +  struct ring_element *entry;
8225 +
8226 +  for(ptr = ring_table.next; ptr != &ring_table; ptr = ptr->next) {
8227 +    entry = list_entry(ptr, struct ring_element, list);
8228 +    kfree(entry);
8229 +  }
8230 +
8231 +  while(ring_cluster_list != NULL) {
8232 +    struct ring_cluster *next = ring_cluster_list->next;
8233 +    kfree(ring_cluster_list);
8234 +    ring_cluster_list = next;
8235 +  }
8236 +
8237 +  set_skb_ring_handler(NULL);
8238 +  set_buffer_ring_handler(NULL);
8239 +  sock_unregister(PF_RING);
8240 +  ring_proc_term();
8241 +  printk("PF_RING shut down.\n");
8242 +}
8243 +
8244 +/* ************************************ */
8245 +
8246 +static int __init ring_init(void)
8247 +{
8248 +  printk("Welcome to PF_RING %s\n(C) 2004-07 L.Deri <deri@ntop.org>\n",
8249 +        RING_VERSION);
8250 +
8251 +  INIT_LIST_HEAD(&ring_table);
8252 +  ring_cluster_list = NULL;
8253 +
8254 +  sock_register(&ring_family_ops);
8255 +
8256 +  set_skb_ring_handler(skb_ring_handler);
8257 +  set_buffer_ring_handler(buffer_ring_handler);
8258 +
8259 +  if(get_buffer_ring_handler() != buffer_ring_handler) {
8260 +    printk("PF_RING: set_buffer_ring_handler FAILED\n");
8261 +
8262 +    set_skb_ring_handler(NULL);
8263 +    set_buffer_ring_handler(NULL);
8264 +    sock_unregister(PF_RING);
8265 +    return -1;
8266 +  } else {
8267 +    printk("PF_RING: bucket length    %d bytes\n", bucket_len);
8268 +    printk("PF_RING: ring slots       %d\n", num_slots);
8269 +    printk("PF_RING: sample rate      %d [1=no sampling]\n", sample_rate);
8270 +    printk("PF_RING: capture TX       %s\n",
8271 +          enable_tx_capture ? "Yes [RX+TX]" : "No [RX only]");
8272 +    printk("PF_RING: transparent mode %s\n",
8273 +          transparent_mode ? "Yes" : "No");
8274 +
8275 +    printk("PF_RING initialized correctly.\n");
8276 +
8277 +    ring_proc_init();
8278 +    return 0;
8279 +  }
8280 +}
8281 +
8282 +module_init(ring_init);
8283 +module_exit(ring_exit);
8284 +MODULE_LICENSE("GPL");
8285 +
8286 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
8287 +MODULE_ALIAS_NETPROTO(PF_RING);
8288 +#endif