kernel-PF_RING.patch

   1 diff --unified --recursive --new-file linux-2.6.30/include/linux/ring.h linux-2.6.30-1-686-smp-PF_RING/include/linux/ring.h
   2 --- linux-2.6.30/include/linux/ring.h   1970-01-01 01:00:00.000000000 +0100
   3 +++ linux-2.6.30-1-686-smp-PF_RING/include/linux/ring.h 2009-07-21 04:40:31.308485480 +0200
   4 @@ -0,0 +1,590 @@
   5 +/*
   6 + * Definitions for packet ring
   7 + *
   8 + * 2004-09 Luca Deri <deri@ntop.org>
   9 + */
  10 +
  11 +#ifndef __RING_H
  12 +#define __RING_H
  13 +
  14 +#define INCLUDE_MAC_INFO
  15 +
  16 +#ifdef INCLUDE_MAC_INFO
  17 +#define SKB_DISPLACEMENT    14 /* Include MAC address information */
  18 +#else
  19 +#define SKB_DISPLACEMENT    0  /* Do NOT include MAC address information */
  20 +#endif
  21 +
  22 +#define RING_MAGIC
  23 +#define RING_MAGIC_VALUE             0x88
  24 +#define RING_FLOWSLOT_VERSION           9
  25 +
  26 +#define DEFAULT_BUCKET_LEN            128
  27 +#define MAX_NUM_DEVICES               256
  28 +
  29 +/* Versioning */
  30 +#define RING_VERSION                "3.9.5"
  31 +#define RING_VERSION_NUM           0x030904
  32 +
  33 +/* Set */
  34 +#define SO_ADD_TO_CLUSTER                99
  35 +#define SO_REMOVE_FROM_CLUSTER           100
  36 +#define SO_SET_REFLECTOR                 101
  37 +#define SO_SET_STRING                    102
  38 +#define SO_ADD_FILTERING_RULE            103
  39 +#define SO_REMOVE_FILTERING_RULE         104
  40 +#define SO_TOGGLE_FILTER_POLICY          105
  41 +#define SO_SET_SAMPLING_RATE             106
  42 +#define SO_ACTIVATE_RING                 107
  43 +#define SO_RING_BUCKET_LEN               108
  44 +#define SO_SET_CHANNEL_ID                109
  45 +#define SO_PURGE_IDLE_HASH_RULES         110 /* inactivity (sec) */
  46 +#define SO_SET_APPL_NAME                 111
  47 +
  48 +/* Get */
  49 +#define SO_GET_RING_VERSION              120
  50 +#define SO_GET_FILTERING_RULE_STATS      121
  51 +#define SO_GET_HASH_FILTERING_RULE_STATS 122
  52 +#define SO_GET_MAPPED_DNA_DEVICE         123
  53 +
  54 +/* Map */
  55 +#define SO_MAP_DNA_DEVICE                130
  56 +
  57 +/* **************** regexp.h ******************* */
  58 +
  59 +/*
  60 +http://www.opensource.apple.com/darwinsource/10.3/expect-1/expect/expect.h ,
  61 +which contains a version of this library, says:
  62 +
  63 + *
  64 + * NSUBEXP must be at least 10, and no greater than 117 or the parser
  65 + * will not work properly.
  66 + *
  67 +
  68 +However, it looks rather like this library is limited to 10.  If you think
  69 +otherwise, let us know.
  70 +*/
  71 +
  72 +#define NSUBEXP  10
  73 +typedef struct regexp {
  74 +       char *startp[NSUBEXP];
  75 +       char *endp[NSUBEXP];
  76 +       char regstart;          /* Internal use only. */
  77 +       char reganch;           /* Internal use only. */
  78 +       char *regmust;          /* Internal use only. */
  79 +       int regmlen;            /* Internal use only. */
  80 +       char program[1];        /* Unwarranted chumminess with compiler. */
  81 +} regexp;
  82 +
  83 +regexp * regcomp(char *exp, int *patternsize);
  84 +int regexec(regexp *prog, char *string);
  85 +void regsub(regexp *prog, char *source, char *dest);
  86 +void regerror(char *s);
  87 +
  88 +/*
  89 + * The first byte of the regexp internal "program" is actually this magic
  90 + * number; the start node begins in the second byte.
  91 + */
  92 +#define        MAGIC   0234
  93 +
  94 +/* *********************************** */
  95 +
  96 +struct pkt_aggregation_info {
  97 +  u_int32_t num_pkts, num_bytes;
  98 +  struct timeval first_seen, last_seen;
  99 +};
 100 +
 101 +/*
 102 +  Note that as offsets *can* be negative,
 103 +  please do not change them to unsigned
 104 +*/
 105 +struct pkt_offset {
 106 +  int16_t eth_offset; /* This offset *must* be added to all offsets below */
 107 +  int16_t vlan_offset;
 108 +  int16_t l3_offset;
 109 +  int16_t l4_offset;
 110 +  int16_t payload_offset;
 111 +};
 112 +
 113 +struct pkt_parsing_info {
 114 +  /* Core fields (also used by NetFlow) */
 115 +  u_int16_t eth_type;   /* Ethernet type */
 116 +  u_int16_t vlan_id;    /* VLAN Id or NO_VLAN */
 117 +  u_int8_t  l3_proto, ipv4_tos;   /* Layer 3 protocol/TOS */
 118 +  u_int32_t ipv4_src, ipv4_dst;   /* IPv4 src/dst IP addresses */
 119 +  u_int16_t l4_src_port, l4_dst_port; /* Layer 4 src/dst ports */
 120 +  u_int8_t tcp_flags;   /* TCP flags (0 if not available) */
 121 +
 122 +  u_int16_t last_matched_plugin_id; /* If > 0 identifies a plugin to that matched the packet */
 123 +  union {
 124 +    struct pkt_offset offset; /* Offsets of L3/L4/payload elements */
 125 +    struct pkt_aggregation_info aggregation; /* Future or plugin use */
 126 +  } pkt_detail;
 127 +};
 128 +
 129 +struct pfring_pkthdr {
 130 +  struct timeval ts;    /* time stamp */
 131 +  u_int32_t caplen;     /* length of portion present */
 132 +  u_int32_t len;        /* length this packet (off wire) */
 133 +  struct pkt_parsing_info parsed_pkt; /* packet parsing info */
 134 +  u_int16_t parsed_header_len; /* Extra parsing data before packet */
 135 +};
 136 +
 137 +/* *********************************** */
 138 +
 139 +#define MAX_PLUGIN_ID      64
 140 +#define MAX_PLUGIN_FIELDS  32
 141 +
 142 +/* ************************************************* */
 143 +
 144 +typedef struct {
 145 +  u_int8_t  proto;                   /* Use 0 for 'any' protocol */
 146 +  u_int16_t vlan_id;                 /* Use '0' for any vlan */
 147 +  u_int32_t host_low, host_high;     /* User '0' for any host. This is applied to both source
 148 +                                       and destination. */
 149 +  u_int16_t port_low, port_high;     /* All ports between port_low...port_high
 150 +                                       0 means 'any' port. This is applied to both source
 151 +                                       and destination. This means that
 152 +                                       (proto, sip, sport, dip, dport) matches the rule if
 153 +                                       one in "sip & sport", "sip & dport" "dip & sport"
 154 +                                       match. */
 155 +} filtering_rule_core_fields;
 156 +
 157 +/* ************************************************* */
 158 +
 159 +#define FILTER_PLUGIN_DATA_LEN   256
 160 +
 161 +typedef struct {
 162 +  char payload_pattern[32];         /* If strlen(payload_pattern) > 0, the packet payload
 163 +                                      must match the specified pattern */
 164 +  u_int16_t filter_plugin_id;       /* If > 0 identifies a plugin to which the datastructure
 165 +                                      below will be passed for matching */
 166 +  char      filter_plugin_data[FILTER_PLUGIN_DATA_LEN];
 167 +                                    /* Opaque datastructure that is interpreted by the
 168 +                                      specified plugin and that specifies a filtering
 169 +                                      criteria to be checked for match. Usually this data
 170 +                                      is re-casted to a more meaningful datastructure
 171 +                                   */
 172 +} filtering_rule_extended_fields;
 173 +
 174 +/* ************************************************* */
 175 +
 176 +typedef struct {
 177 +  /* Plugin Action */
 178 +  u_int16_t plugin_id; /* ('0'=no plugin) id of the plugin associated with this rule */
 179 +} filtering_rule_plugin_action;
 180 +
 181 +typedef enum {
 182 +  forward_packet_and_stop_rule_evaluation = 0,
 183 +  dont_forward_packet_and_stop_rule_evaluation,
 184 +  execute_action_and_continue_rule_evaluation,
 185 +  forward_packet_add_rule_and_stop_rule_evaluation
 186 +} rule_action_behaviour;
 187 +
 188 +typedef enum {
 189 +  forward_packet = 100,
 190 +  dont_forward_packet,
 191 +  use_rule_forward_policy
 192 +} packet_action_behaviour;
 193 +
 194 +typedef struct {
 195 +  u_int16_t rule_id;                 /* Rules are processed in order from lowest to higest id */
 196 +  rule_action_behaviour rule_action; /* What to do in case of match */
 197 +  u_int8_t balance_id, balance_pool; /* If balance_pool > 0, then pass the packet above only if the
 198 +                                       (hash(proto, sip, sport, dip, dport) % balance_pool)
 199 +                                       = balance_id */
 200 +  filtering_rule_core_fields     core_fields;
 201 +  filtering_rule_extended_fields extended_fields;
 202 +  filtering_rule_plugin_action   plugin_action;
 203 +  unsigned long jiffies_last_match;  /* Jiffies of the last rule match (updated by pf_ring) */
 204 +} filtering_rule;
 205 +
 206 +/* *********************************** */
 207 +
 208 +/* Hash size used for precise packet matching */
 209 +#define DEFAULT_RING_HASH_SIZE     4096
 210 +
 211 +/*
 212 + * The hashtable contains only perfect matches: no
 213 + * wildacards or so are accepted.
 214 +*/
 215 +typedef struct {
 216 +  u_int16_t vlan_id;
 217 +  u_int8_t  proto;
 218 +  u_int32_t host_peer_a, host_peer_b;
 219 +  u_int16_t port_peer_a, port_peer_b;
 220 +
 221 +  rule_action_behaviour rule_action; /* What to do in case of match */
 222 +  filtering_rule_plugin_action plugin_action;
 223 +  unsigned long jiffies_last_match;  /* Jiffies of the last rule match (updated by pf_ring) */
 224 +} hash_filtering_rule;
 225 +
 226 +/* ************************************************* */
 227 +
 228 +typedef struct _filtering_hash_bucket {
 229 +  hash_filtering_rule           rule;
 230 +  void                          *plugin_data_ptr; /* ptr to a *continuous* memory area
 231 +                                                    allocated by the plugin */
 232 +  u_int16_t                     plugin_data_ptr_len;
 233 +  struct _filtering_hash_bucket *next;
 234 +} filtering_hash_bucket;
 235 +
 236 +/* *********************************** */
 237 +
 238 +#define RING_MIN_SLOT_SIZE    (60+sizeof(struct pfring_pkthdr))
 239 +#define RING_MAX_SLOT_SIZE    (1514+sizeof(struct pfring_pkthdr))
 240 +
 241 +#ifndef min
 242 +#define min(a,b) ((a < b) ? a : b)
 243 +#endif
 244 +
 245 +/* *********************************** */
 246 +/* False sharing reference: http://en.wikipedia.org/wiki/False_sharing */
 247 +
 248 +typedef struct flowSlotInfo {
 249 +  u_int16_t version, sample_rate;
 250 +  u_int32_t tot_slots, slot_len, data_len, tot_mem;
 251 +  u_int64_t tot_pkts, tot_lost, tot_insert, tot_read;
 252 +  u_int32_t insert_idx;
 253 +  u_int8_t  padding[72]; /* Used to avoid false sharing */
 254 +  u_int32_t remove_idx;
 255 +  u_int32_t  padding2[31]; /* Used to avoid false sharing */
 256 +} FlowSlotInfo;
 257 +
 258 +/* *********************************** */
 259 +
 260 +typedef struct flowSlot {
 261 +#ifdef RING_MAGIC
 262 +  u_char     magic;      /* It must alwasy be zero */
 263 +#endif
 264 +  u_char     slot_state; /* 0=empty, 1=full   */
 265 +  u_char     bucket;     /* bucket[bucketLen] */
 266 +} FlowSlot;
 267 +
 268 +/* *********************************** */
 269 +
 270 +#ifdef __KERNEL__
 271 +
 272 +FlowSlotInfo* getRingPtr(void);
 273 +int allocateRing(char *deviceName, u_int numSlots,
 274 +                u_int bucketLen, u_int sampleRate);
 275 +unsigned int pollRing(struct file *fp, struct poll_table_struct * wait);
 276 +void deallocateRing(void);
 277 +
 278 +/* ************************* */
 279 +
 280 +#endif /* __KERNEL__ */
 281 +
 282 +/* *********************************** */
 283 +
 284 +#define PF_RING          27      /* Packet Ring */
 285 +#define SOCK_RING        PF_RING
 286 +
 287 +/* ioctl() */
 288 +#define SIORINGPOLL      0x8888
 289 +
 290 +/* ************************************************* */
 291 +
 292 +typedef int (*dna_wait_packet)(void *adapter, int mode);
 293 +
 294 +typedef enum {
 295 +  add_device_mapping = 0, remove_device_mapping
 296 +} dna_device_operation;
 297 +
 298 +typedef enum {
 299 +  intel_e1000 = 0, intel_igb, intel_ixgbe
 300 +} dna_device_model;
 301 +
 302 +typedef struct {
 303 +  unsigned long packet_memory;  /* Invalid in userland */
 304 +  u_int packet_memory_num_slots;
 305 +  u_int packet_memory_slot_len;
 306 +  u_int packet_memory_tot_len;
 307 +  void *descr_packet_memory;  /* Invalid in userland */
 308 +  u_int descr_packet_memory_num_slots;
 309 +  u_int descr_packet_memory_slot_len;
 310 +  u_int descr_packet_memory_tot_len;
 311 +  u_int channel_id;
 312 +  char *phys_card_memory; /* Invalid in userland */
 313 +  u_int phys_card_memory_len;
 314 +  struct net_device *netdev; /* Invalid in userland */
 315 +  dna_device_model device_model;
 316 +#ifdef __KERNEL__
 317 +  wait_queue_head_t *packet_waitqueue;
 318 +#else
 319 +  void *packet_waitqueue;
 320 +#endif
 321 +  u_int8_t *interrupt_received, in_use;
 322 +  void *adapter_ptr;
 323 +  dna_wait_packet wait_packet_function_ptr;
 324 +} dna_device;
 325 +
 326 +typedef struct {
 327 +  dna_device_operation operation;
 328 +  char device_name[8];
 329 +  int32_t channel_id;
 330 +} dna_device_mapping;
 331 +
 332 +/* ************************************************* */
 333 +
 334 +#ifdef __KERNEL__
 335 +
 336 +enum cluster_type {
 337 +  cluster_per_flow = 0,
 338 +  cluster_round_robin
 339 +};
 340 +
 341 +#define CLUSTER_LEN       8
 342 +
 343 +/*
 344 + * A ring cluster is used group together rings used by various applications
 345 + * so that they look, from the PF_RING point of view, as a single ring.
 346 + * This means that developers can use clusters for sharing packets across
 347 + * applications using various policies as specified in the hashing_mode
 348 + * parameter.
 349 + */
 350 +struct ring_cluster {
 351 +  u_short             cluster_id; /* 0 = no cluster */
 352 +  u_short             num_cluster_elements;
 353 +  enum cluster_type   hashing_mode;
 354 +  u_short             hashing_id;
 355 +  struct sock         *sk[CLUSTER_LEN];
 356 +};
 357 +
 358 +/*
 359 + * Linked-list of ring clusters.
 360 + */
 361 +typedef struct {
 362 +  struct ring_cluster cluster;
 363 +  struct list_head list;
 364 +} ring_cluster_element;
 365 +
 366 +typedef struct {
 367 +  dna_device dev;
 368 +  struct list_head list;
 369 +} dna_device_list;
 370 +
 371 +/* ************************************************* */
 372 +
 373 +/*
 374 + * Linked-list of ring sockets.
 375 + */
 376 +struct ring_element {
 377 +  struct list_head  list;
 378 +  struct sock      *sk;
 379 +};
 380 +
 381 +/* ************************************************* */
 382 +
 383 +struct ring_opt *pfr; /* Forward */
 384 +
 385 +typedef int (*do_handle_filtering_hash_bucket)(struct ring_opt *pfr,
 386 +                                              filtering_hash_bucket* rule,
 387 +                                              u_char add_rule);
 388 +
 389 +/* ************************************************* */
 390 +
 391 +#define RING_ANY_CHANNEL  -1
 392 +
 393 +/*
 394 + * Ring options
 395 + */
 396 +struct ring_opt {
 397 +  u_int8_t ring_active;
 398 +  struct net_device *ring_netdev;
 399 +  u_short ring_pid;
 400 +  u_int32_t ring_id;
 401 +  char *appl_name; /* String that identifies the application bound to the socket */
 402 +
 403 +  /* Direct NIC Access */
 404 +  u_int8_t mmap_count;
 405 +  dna_device *dna_device;
 406 +
 407 +  /* Cluster */
 408 +  u_short cluster_id; /* 0 = no cluster */
 409 +
 410 +  /* Channel */
 411 +  int32_t channel_id;  /* -1 = any channel */
 412 +
 413 +  /* Reflector */
 414 +  struct net_device *reflector_dev; /* Reflector device */
 415 +
 416 +  /* Packet buffers */
 417 +  unsigned long order;
 418 +
 419 +  /* Ring Slots */
 420 +  void * ring_memory;
 421 +  u_int32_t bucket_len;
 422 +  FlowSlotInfo *slots_info; /* Points to ring_memory */
 423 +  char *ring_slots;         /* Points to ring_memory+sizeof(FlowSlotInfo) */
 424 +
 425 +  /* Packet Sampling */
 426 +  u_int32_t pktToSample, sample_rate;
 427 +
 428 +  /* BPF Filter */
 429 +  struct sk_filter *bpfFilter;
 430 +
 431 +  /* Filtering Rules */
 432 +  filtering_hash_bucket **filtering_hash;
 433 +  u_int16_t num_filtering_rules;
 434 +  u_int8_t rules_default_accept_policy; /* 1=default policy is accept, drop otherwise */
 435 +  struct list_head rules;
 436 +
 437 +  /* Locks */
 438 +  atomic_t num_ring_users;
 439 +  wait_queue_head_t ring_slots_waitqueue;
 440 +  rwlock_t ring_index_lock, ring_rules_lock;
 441 +
 442 +  /* Indexes (Internal) */
 443 +  u_int insert_page_id, insert_slot_id;
 444 +
 445 +  /* Function pointer */
 446 +  do_handle_filtering_hash_bucket handle_hash_rule;
 447 +};
 448 +
 449 +/* **************************************** */
 450 +
 451 +/*
 452 + * Linked-list of device rings
 453 + */
 454 +typedef struct {
 455 +  struct ring_opt *the_ring;
 456 +  struct list_head list;
 457 +} device_ring_list_element;
 458 +
 459 +/* **************************************** */
 460 +
 461 +typedef struct {
 462 +  filtering_rule rule;
 463 +  regexp *pattern;
 464 +  struct list_head list;
 465 +
 466 +  /* Plugin action */
 467 +  void *plugin_data_ptr; /* ptr to a *continuous* memory area allocated by the plugin */
 468 +} filtering_rule_element;
 469 +
 470 +struct parse_buffer {
 471 +  void      *mem;
 472 +  u_int16_t  mem_len;
 473 +};
 474 +
 475 +/* **************************************** */
 476 +
 477 +/* Plugins */
 478 +/* Execute an action (e.g. update rule stats) */
 479 +typedef int (*plugin_handle_skb)(struct ring_opt *the_ring,
 480 +                                filtering_rule_element *rule,       /* In case the match is on the list */
 481 +                                filtering_hash_bucket *hash_bucket, /* In case the match is on the hash */
 482 +                                struct pfring_pkthdr *hdr,
 483 +                                struct sk_buff *skb,
 484 +                                u_int16_t filter_plugin_id,
 485 +                                struct parse_buffer **filter_rule_memory_storage,
 486 +                                packet_action_behaviour *behaviour);
 487 +/* Return 1/0 in case of match/no match for the given skb */
 488 +typedef int (*plugin_filter_skb)(struct ring_opt *the_ring,
 489 +                                filtering_rule_element *rule,
 490 +                                struct pfring_pkthdr *hdr,
 491 +                                struct sk_buff *skb,
 492 +                                struct parse_buffer **filter_rule_memory_storage);
 493 +/* Get stats about the rule */
 494 +typedef int (*plugin_get_stats)(struct ring_opt *pfr,
 495 +                               filtering_rule_element *rule,
 496 +                               filtering_hash_bucket  *hash_bucket,
 497 +                               u_char* stats_buffer, u_int stats_buffer_len);
 498 +
 499 +/* Called when a ring is disposed */
 500 +typedef void (*plugin_free_ring_mem)(filtering_rule_element *rule);
 501 +
 502 +struct pfring_plugin_registration {
 503 +  u_int16_t plugin_id;
 504 +  char name[16];          /* Unique plugin name (e.g. sip, udp) */
 505 +  char description[64];   /* Short plugin description */
 506 +  plugin_filter_skb    pfring_plugin_filter_skb; /* Filter skb: 1=match, 0=no match */
 507 +  plugin_handle_skb    pfring_plugin_handle_skb;
 508 +  plugin_get_stats     pfring_plugin_get_stats;
 509 +  plugin_free_ring_mem pfring_plugin_free_ring_mem;
 510 +};
 511 +
 512 +typedef int   (*register_pfring_plugin)(struct pfring_plugin_registration
 513 +                                       *reg);
 514 +typedef int   (*unregister_pfring_plugin)(u_int16_t pfring_plugin_id);
 515 +typedef u_int (*read_device_pfring_free_slots)(int ifindex);
 516 +typedef void  (*handle_ring_dna_device)(dna_device_operation operation,
 517 +                                       unsigned long packet_memory,
 518 +                                       u_int packet_memory_num_slots,
 519 +                                       u_int packet_memory_slot_len,
 520 +                                       u_int packet_memory_tot_len,
 521 +                                       void *descr_packet_memory,
 522 +                                       u_int descr_packet_memory_num_slots,
 523 +                                       u_int descr_packet_memory_slot_len,
 524 +                                       u_int descr_packet_memory_tot_len,
 525 +                                       u_int channel_id,
 526 +                                       void *phys_card_memory,
 527 +                                       u_int phys_card_memory_len,
 528 +                                       struct net_device *netdev,
 529 +                                       dna_device_model device_model,
 530 +                                       wait_queue_head_t *packet_waitqueue,
 531 +                                       u_int8_t *interrupt_received,
 532 +                                       void *adapter_ptr,
 533 +                                       dna_wait_packet wait_packet_function_ptr);
 534 +
 535 +extern register_pfring_plugin get_register_pfring_plugin(void);
 536 +extern unregister_pfring_plugin get_unregister_pfring_plugin(void);
 537 +extern read_device_pfring_free_slots get_read_device_pfring_free_slots(void);
 538 +
 539 +extern void set_register_pfring_plugin(register_pfring_plugin the_handler);
 540 +extern void set_unregister_pfring_plugin(unregister_pfring_plugin the_handler);
 541 +extern void set_read_device_pfring_free_slots(read_device_pfring_free_slots the_handler);
 542 +
 543 +extern int do_register_pfring_plugin(struct pfring_plugin_registration *reg);
 544 +extern int do_unregister_pfring_plugin(u_int16_t pfring_plugin_id);
 545 +extern int do_read_device_pfring_free_slots(int deviceidx);
 546 +
 547 +extern handle_ring_dna_device get_ring_dna_device_handler(void);
 548 +extern void set_ring_dna_device_handler(handle_ring_dna_device
 549 +                                       the_dna_device_handler);
 550 +extern void do_ring_dna_device_handler(dna_device_operation operation,
 551 +                                      unsigned long packet_memory,
 552 +                                      u_int packet_memory_num_slots,
 553 +                                      u_int packet_memory_slot_len,
 554 +                                      u_int packet_memory_tot_len,
 555 +                                      void *descr_packet_memory,
 556 +                                      u_int descr_packet_memory_num_slots,
 557 +                                      u_int descr_packet_memory_slot_len,
 558 +                                      u_int descr_packet_memory_tot_len,
 559 +                                      u_int channel_id,
 560 +                                      void *phys_card_memory,
 561 +                                      u_int phys_card_memory_len,
 562 +                                      struct net_device *netdev,
 563 +                                      dna_device_model device_model,
 564 +                                      wait_queue_head_t *packet_waitqueue,
 565 +                                      u_int8_t *interrupt_received,
 566 +                                      void *adapter_ptr,
 567 +                                      dna_wait_packet wait_packet_function_ptr);
 568 +
 569 +typedef int (*handle_ring_skb)(struct sk_buff *skb, u_char recv_packet,
 570 +                              u_char real_skb, short channel_id);
 571 +extern handle_ring_skb get_skb_ring_handler(void);
 572 +extern void set_skb_ring_handler(handle_ring_skb the_handler);
 573 +extern void do_skb_ring_handler(struct sk_buff *skb,
 574 +                               u_char recv_packet, u_char real_skb);
 575 +
 576 +typedef int (*handle_ring_buffer)(struct net_device *dev,
 577 +                                 char *data, int len);
 578 +extern handle_ring_buffer get_buffer_ring_handler(void);
 579 +extern void set_buffer_ring_handler(handle_ring_buffer the_handler);
 580 +extern int do_buffer_ring_handler(struct net_device *dev,
 581 +                                 char *data, int len);
 582 +
 583 +typedef int (*handle_add_hdr_to_ring)(struct ring_opt *pfr,
 584 +                                     struct pfring_pkthdr *hdr);
 585 +extern handle_add_hdr_to_ring get_add_hdr_to_ring(void);
 586 +extern void set_add_hdr_to_ring(handle_add_hdr_to_ring the_handler);
 587 +extern int do_add_hdr_to_ring(struct ring_opt *pfr, struct pfring_pkthdr *hdr);
 588 +
 589 +#endif /* __KERNEL__  */
 590 +
 591 +
 592 +/* *********************************** */
 593 +
 594 +#endif /* __RING_H */
 595 diff --unified --recursive --new-file linux-2.6.30/net/core/dev.c linux-2.6.30-1-686-smp-PF_RING/net/core/dev.c
 596 --- linux-2.6.30/net/core/dev.c 2009-06-10 05:05:27.000000000 +0200
 597 +++ linux-2.6.30-1-686-smp-PF_RING/net/core/dev.c       2009-07-21 04:40:31.365770966 +0200
 598 @@ -129,6 +129,196 @@
 599
 600  #include "net-sysfs.h"
 601
 602 +#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
 603 +
 604 +/* #define RING_DEBUG */
 605 +
 606 +#include <linux/ring.h>
 607 +#include <linux/version.h>
 608 +
 609 +/* ************************************************ */
 610 +
 611 +static handle_ring_skb ring_handler = NULL;
 612 +
 613 +handle_ring_skb get_skb_ring_handler() { return(ring_handler); }
 614 +
 615 +void set_skb_ring_handler(handle_ring_skb the_handler) {
 616 +  ring_handler = the_handler;
 617 +}
 618 +
 619 +void do_skb_ring_handler(struct sk_buff *skb,
 620 +                        u_char recv_packet, u_char real_skb) {
 621 +  if(ring_handler)
 622 +    ring_handler(skb, recv_packet, real_skb, -1 /* Unknown channel */);
 623 +}
 624 +
 625 +/* ************************************************ */
 626 +
 627 +static handle_ring_buffer buffer_ring_handler = NULL;
 628 +
 629 +handle_ring_buffer get_buffer_ring_handler() { return(buffer_ring_handler); }
 630 +
 631 +void set_buffer_ring_handler(handle_ring_buffer the_handler) {
 632 +  buffer_ring_handler = the_handler;
 633 +}
 634 +
 635 +int do_buffer_ring_handler(struct net_device *dev, char *data, int len) {
 636 +  if(buffer_ring_handler) {
 637 +    buffer_ring_handler(dev, data, len);
 638 +    return(1);
 639 +  } else
 640 +    return(0);
 641 +}
 642 +
 643 +/* ******************* */
 644 +
 645 +static handle_add_hdr_to_ring buffer_add_hdr_to_ring = NULL;
 646 +
 647 +handle_add_hdr_to_ring get_add_hdr_to_ring() { return(buffer_add_hdr_to_ring); }
 648 +
 649 +void set_add_hdr_to_ring(handle_add_hdr_to_ring the_handler) {
 650 +  buffer_add_hdr_to_ring = the_handler;
 651 +}
 652 +
 653 +int do_add_hdr_to_ring(struct ring_opt *pfr, struct pfring_pkthdr *hdr) {
 654 +  if(buffer_add_hdr_to_ring) {
 655 +    buffer_add_hdr_to_ring(pfr, hdr);
 656 +    return(1);
 657 +  } else
 658 +    return(0);
 659 +}
 660 +
 661 +/* ************************************************ */
 662 +
 663 +static register_pfring_plugin pfring_registration = NULL;
 664 +
 665 +register_pfring_plugin get_register_pfring_plugin() { return(pfring_registration); }
 666 +
 667 +void set_register_pfring_plugin(register_pfring_plugin the_handler) {
 668 +  pfring_registration = the_handler;
 669 +}
 670 +
 671 +int do_register_pfring_plugin(struct pfring_plugin_registration *reg) {
 672 +  if(pfring_registration) {
 673 +    pfring_registration(reg);
 674 +    return(1);
 675 +  } else
 676 +    return(0);
 677 +}
 678 +
 679 +/* ************************************************ */
 680 +
 681 +static unregister_pfring_plugin pfring_unregistration = NULL;
 682 +
 683 +unregister_pfring_plugin get_unregister_pfring_plugin() { return(pfring_unregistration); }
 684 +
 685 +void set_unregister_pfring_plugin(unregister_pfring_plugin the_handler) {
 686 +  pfring_unregistration = the_handler;
 687 +}
 688 +
 689 +int do_unregister_pfring_plugin(u_int16_t pfring_plugin_id) {
 690 +  if(pfring_unregistration) {
 691 +    pfring_unregistration(pfring_plugin_id);
 692 +    return(1);
 693 +  } else
 694 +    return(0);
 695 +}
 696 +
 697 +/* ************************************************ */
 698 +
 699 +static handle_ring_dna_device ring_dna_device_handler = NULL;
 700 +
 701 +handle_ring_dna_device get_ring_dna_device_handler() { return(ring_dna_device_handler); }
 702 +
 703 +void set_ring_dna_device_handler(handle_ring_dna_device the_dna_device_handler) {
 704 +  ring_dna_device_handler = the_dna_device_handler;
 705 +}
 706 +
 707 +void do_ring_dna_device_handler(dna_device_operation operation,
 708 +                               unsigned long packet_memory,
 709 +                               u_int packet_memory_num_slots,
 710 +                               u_int packet_memory_slot_len,
 711 +                               u_int packet_memory_tot_len,
 712 +                               void *descr_packet_memory,
 713 +                               u_int descr_packet_memory_num_slots,
 714 +                               u_int descr_packet_memory_slot_len,
 715 +                               u_int descr_packet_memory_tot_len,
 716 +                               u_int channel_id,
 717 +                               void *phys_card_memory,
 718 +                               u_int phys_card_memory_len,
 719 +                               struct net_device *netdev,
 720 +                               dna_device_model device_model,
 721 +                               wait_queue_head_t *packet_waitqueue,
 722 +                               u_int8_t *interrupt_received,
 723 +                               void *adapter_ptr,
 724 +                               dna_wait_packet wait_packet_function_ptr) {
 725 +  if(ring_dna_device_handler)
 726 +    ring_dna_device_handler(operation,
 727 +                           packet_memory,
 728 +                           packet_memory_num_slots,
 729 +                           packet_memory_slot_len,
 730 +                           packet_memory_tot_len,
 731 +                           descr_packet_memory,
 732 +                           descr_packet_memory_num_slots,
 733 +                           descr_packet_memory_slot_len,
 734 +                           descr_packet_memory_tot_len, channel_id,
 735 +                           phys_card_memory, phys_card_memory_len,
 736 +                           netdev, device_model, packet_waitqueue,
 737 +                           interrupt_received, adapter_ptr,
 738 +                           wait_packet_function_ptr);
 739 +}
 740 +
 741 +/* ************************************************ */
 742 +
 743 +static read_device_pfring_free_slots pfring_free_device_slots = NULL;
 744 +
 745 +read_device_pfring_free_slots get_read_device_pfring_free_slots() { return(pfring_free_device_slots); }
 746 +
 747 +void set_read_device_pfring_free_slots(read_device_pfring_free_slots the_handler) {
 748 +  pfring_free_device_slots = the_handler;
 749 +}
 750 +
 751 +int do_read_device_pfring_free_slots(int deviceidx) {
 752 +  if(pfring_free_device_slots) {
 753 +    return(pfring_free_device_slots(deviceidx));
 754 +  } else
 755 +    return(0);
 756 +}
 757 +
 758 +/* ************************************************ */
 759 +
 760 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
 761 +EXPORT_SYMBOL(get_skb_ring_handler);
 762 +EXPORT_SYMBOL(set_skb_ring_handler);
 763 +EXPORT_SYMBOL(do_skb_ring_handler);
 764 +
 765 +EXPORT_SYMBOL(get_buffer_ring_handler);
 766 +EXPORT_SYMBOL(set_buffer_ring_handler);
 767 +EXPORT_SYMBOL(do_buffer_ring_handler);
 768 +
 769 +EXPORT_SYMBOL(get_add_hdr_to_ring);
 770 +EXPORT_SYMBOL(set_add_hdr_to_ring);
 771 +EXPORT_SYMBOL(do_add_hdr_to_ring);
 772 +
 773 +EXPORT_SYMBOL(get_register_pfring_plugin);
 774 +EXPORT_SYMBOL(set_register_pfring_plugin);
 775 +EXPORT_SYMBOL(do_register_pfring_plugin);
 776 +
 777 +EXPORT_SYMBOL(get_unregister_pfring_plugin);
 778 +EXPORT_SYMBOL(set_unregister_pfring_plugin);
 779 +EXPORT_SYMBOL(do_unregister_pfring_plugin);
 780 +
 781 +EXPORT_SYMBOL(get_ring_dna_device_handler);
 782 +EXPORT_SYMBOL(set_ring_dna_device_handler);
 783 +EXPORT_SYMBOL(do_ring_dna_device_handler);
 784 +
 785 +EXPORT_SYMBOL(get_read_device_pfring_free_slots);
 786 +EXPORT_SYMBOL(set_read_device_pfring_free_slots);
 787 +EXPORT_SYMBOL(do_read_device_pfring_free_slots);
 788 +
 789 +#endif
 790 +
 791 +#endif
 792  /* Instead of increasing this, you should create a hash table. */
 793  #define MAX_GRO_SKBS 8
 794
 795 @@ -1839,6 +2029,12 @@
 796         if (q->enqueue) {
 797                 spinlock_t *root_lock = qdisc_lock(q);
 798
 799 +                /* This TX patch applies to all drivers */
 800 +                #if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
 801 +                if(ring_handler) ring_handler(skb, 0, 1, -1 /* Unknown channel */);
 802 +                #endif /* CONFIG_RING */
 803 +
 804 +
 805                 spin_lock(root_lock);
 806
 807                 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
 808 @@ -1936,6 +2132,16 @@
 809         unsigned long flags;
 810
 811         /* if netpoll wants it, pretend we never saw it */
 812 +/* This RX patch applies only to non-NAPI drivers */
 813 +
 814 +#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
 815 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
 816 +if(ring_handler && ring_handler(skb, 1, 1, -1 /* Unknown channel */)) {
 817 +  /* The packet has been copied into a ring */
 818 +  return(NET_RX_SUCCESS);
 819 +}
 820 +#endif
 821 +#endif /* CONFIG_RING */
 822         if (netpoll_rx(skb))
 823                 return NET_RX_DROP;
 824
 825 @@ -2220,6 +2426,16 @@
 826         struct net_device *orig_dev;
 827         struct net_device *null_or_orig;
 828         int ret = NET_RX_DROP;
 829 +/*
 830 +  This RX patch applies to both non-NAPI (this as netif_receive_rx
 831 +  is called by netif_rx) and NAPI drivers.
 832 +*/
 833 +#if defined (CONFIG_RING) || defined(CONFIG_RING_MODULE)
 834 +if(ring_handler && ring_handler(skb, 1, 1, -1 /* Unknown channel */)) {
 835 +  /* The packet has been copied into a ring */
 836 +  return(NET_RX_SUCCESS);
 837 +}
 838 +#endif /* CONFIG_RING */
 839         __be16 type;
 840
 841         if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
 842 diff --unified --recursive --new-file linux-2.6.30/net/core/dev.c.ORG linux-2.6.30-1-686-smp-PF_RING/net/core/dev.c.ORG
 843 --- linux-2.6.30/net/core/dev.c.ORG     1970-01-01 01:00:00.000000000 +0100
 844 +++ linux-2.6.30-1-686-smp-PF_RING/net/core/dev.c.ORG   2009-07-21 04:40:31.319103951 +0200
 845 @@ -0,0 +1,5336 @@
 846 +/*
 847 + *     NET3    Protocol independent device support routines.
 848 + *
 849 + *             This program is free software; you can redistribute it and/or
 850 + *             modify it under the terms of the GNU General Public License
 851 + *             as published by the Free Software Foundation; either version
 852 + *             2 of the License, or (at your option) any later version.
 853 + *
 854 + *     Derived from the non IP parts of dev.c 1.0.19
 855 + *             Authors:        Ross Biro
 856 + *                             Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 857 + *                             Mark Evans, <evansmp@uhura.aston.ac.uk>
 858 + *
 859 + *     Additional Authors:
 860 + *             Florian la Roche <rzsfl@rz.uni-sb.de>
 861 + *             Alan Cox <gw4pts@gw4pts.ampr.org>
 862 + *             David Hinds <dahinds@users.sourceforge.net>
 863 + *             Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
 864 + *             Adam Sulmicki <adam@cfar.umd.edu>
 865 + *              Pekka Riikonen <priikone@poesidon.pspt.fi>
 866 + *
 867 + *     Changes:
 868 + *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
 869 + *                                     to 2 if register_netdev gets called
 870 + *                                     before net_dev_init & also removed a
 871 + *                                     few lines of code in the process.
 872 + *             Alan Cox        :       device private ioctl copies fields back.
 873 + *             Alan Cox        :       Transmit queue code does relevant
 874 + *                                     stunts to keep the queue safe.
 875 + *             Alan Cox        :       Fixed double lock.
 876 + *             Alan Cox        :       Fixed promisc NULL pointer trap
 877 + *             ????????        :       Support the full private ioctl range
 878 + *             Alan Cox        :       Moved ioctl permission check into
 879 + *                                     drivers
 880 + *             Tim Kordas      :       SIOCADDMULTI/SIOCDELMULTI
 881 + *             Alan Cox        :       100 backlog just doesn't cut it when
 882 + *                                     you start doing multicast video 8)
 883 + *             Alan Cox        :       Rewrote net_bh and list manager.
 884 + *             Alan Cox        :       Fix ETH_P_ALL echoback lengths.
 885 + *             Alan Cox        :       Took out transmit every packet pass
 886 + *                                     Saved a few bytes in the ioctl handler
 887 + *             Alan Cox        :       Network driver sets packet type before
 888 + *                                     calling netif_rx. Saves a function
 889 + *                                     call a packet.
 890 + *             Alan Cox        :       Hashed net_bh()
 891 + *             Richard Kooijman:       Timestamp fixes.
 892 + *             Alan Cox        :       Wrong field in SIOCGIFDSTADDR
 893 + *             Alan Cox        :       Device lock protection.
 894 + *             Alan Cox        :       Fixed nasty side effect of device close
 895 + *                                     changes.
 896 + *             Rudi Cilibrasi  :       Pass the right thing to
 897 + *                                     set_mac_address()
 898 + *             Dave Miller     :       32bit quantity for the device lock to
 899 + *                                     make it work out on a Sparc.
 900 + *             Bjorn Ekwall    :       Added KERNELD hack.
 901 + *             Alan Cox        :       Cleaned up the backlog initialise.
 902 + *             Craig Metz      :       SIOCGIFCONF fix if space for under
 903 + *                                     1 device.
 904 + *         Thomas Bogendoerfer :       Return ENODEV for dev_open, if there
 905 + *                                     is no device open function.
 906 + *             Andi Kleen      :       Fix error reporting for SIOCGIFCONF
 907 + *         Michael Chastain    :       Fix signed/unsigned for SIOCGIFCONF
 908 + *             Cyrus Durgin    :       Cleaned for KMOD
 909 + *             Adam Sulmicki   :       Bug Fix : Network Device Unload
 910 + *                                     A network device unload needs to purge
 911 + *                                     the backlog queue.
 912 + *     Paul Rusty Russell      :       SIOCSIFNAME
 913 + *              Pekka Riikonen  :      Netdev boot-time settings code
 914 + *              Andrew Morton   :       Make unregister_netdevice wait
 915 + *                                     indefinitely on dev->refcnt
 916 + *             J Hadi Salim    :       - Backlog queue sampling
 917 + *                                     - netif_rx() feedback
 918 + */
 919 +
 920 +#include <asm/uaccess.h>
 921 +#include <asm/system.h>
 922 +#include <linux/bitops.h>
 923 +#include <linux/capability.h>
 924 +#include <linux/cpu.h>
 925 +#include <linux/types.h>
 926 +#include <linux/kernel.h>
 927 +#include <linux/sched.h>
 928 +#include <linux/mutex.h>
 929 +#include <linux/string.h>
 930 +#include <linux/mm.h>
 931 +#include <linux/socket.h>
 932 +#include <linux/sockios.h>
 933 +#include <linux/errno.h>
 934 +#include <linux/interrupt.h>
 935 +#include <linux/if_ether.h>
 936 +#include <linux/netdevice.h>
 937 +#include <linux/etherdevice.h>
 938 +#include <linux/ethtool.h>
 939 +#include <linux/notifier.h>
 940 +#include <linux/skbuff.h>
 941 +#include <net/net_namespace.h>
 942 +#include <net/sock.h>
 943 +#include <linux/rtnetlink.h>
 944 +#include <linux/proc_fs.h>
 945 +#include <linux/seq_file.h>
 946 +#include <linux/stat.h>
 947 +#include <linux/if_bridge.h>
 948 +#include <linux/if_macvlan.h>
 949 +#include <net/dst.h>
 950 +#include <net/pkt_sched.h>
 951 +#include <net/checksum.h>
 952 +#include <linux/highmem.h>
 953 +#include <linux/init.h>
 954 +#include <linux/kmod.h>
 955 +#include <linux/module.h>
 956 +#include <linux/netpoll.h>
 957 +#include <linux/rcupdate.h>
 958 +#include <linux/delay.h>
 959 +#include <net/wext.h>
 960 +#include <net/iw_handler.h>
 961 +#include <asm/current.h>
 962 +#include <linux/audit.h>
 963 +#include <linux/dmaengine.h>
 964 +#include <linux/err.h>
 965 +#include <linux/ctype.h>
 966 +#include <linux/if_arp.h>
 967 +#include <linux/if_vlan.h>
 968 +#include <linux/ip.h>
 969 +#include <net/ip.h>
 970 +#include <linux/ipv6.h>
 971 +#include <linux/in.h>
 972 +#include <linux/jhash.h>
 973 +#include <linux/random.h>
 974 +
 975 +#include "net-sysfs.h"
 976 +
 977 +/* Instead of increasing this, you should create a hash table. */
 978 +#define MAX_GRO_SKBS 8
 979 +
 980 +/* This should be increased if a protocol with a bigger head is added. */
 981 +#define GRO_MAX_HEAD (MAX_HEADER + 128)
 982 +
 983 +/*
 984 + *     The list of packet types we will receive (as opposed to discard)
 985 + *     and the routines to invoke.
 986 + *
 987 + *     Why 16. Because with 16 the only overlap we get on a hash of the
 988 + *     low nibble of the protocol value is RARP/SNAP/X.25.
 989 + *
 990 + *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
 991 + *             sure which should go first, but I bet it won't make much
 992 + *             difference if we are running VLANs.  The good news is that
 993 + *             this protocol won't be in the list unless compiled in, so
 994 + *             the average user (w/out VLANs) will not be adversely affected.
 995 + *             --BLG
 996 + *
 997 + *             0800    IP
 998 + *             8100    802.1Q VLAN
 999 + *             0001    802.3
1000 + *             0002    AX.25
1001 + *             0004    802.2
1002 + *             8035    RARP
1003 + *             0005    SNAP
1004 + *             0805    X.25
1005 + *             0806    ARP
1006 + *             8137    IPX
1007 + *             0009    Localtalk
1008 + *             86DD    IPv6
1009 + */
1010 +
1011 +#define PTYPE_HASH_SIZE        (16)
1012 +#define PTYPE_HASH_MASK        (PTYPE_HASH_SIZE - 1)
1013 +
1014 +static DEFINE_SPINLOCK(ptype_lock);
1015 +static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
1016 +static struct list_head ptype_all __read_mostly;       /* Taps */
1017 +
1018 +/*
1019 + * The @dev_base_head list is protected by @dev_base_lock and the rtnl
1020 + * semaphore.
1021 + *
1022 + * Pure readers hold dev_base_lock for reading.
1023 + *
1024 + * Writers must hold the rtnl semaphore while they loop through the
1025 + * dev_base_head list, and hold dev_base_lock for writing when they do the
1026 + * actual updates.  This allows pure readers to access the list even
1027 + * while a writer is preparing to update it.
1028 + *
1029 + * To put it another way, dev_base_lock is held for writing only to
1030 + * protect against pure readers; the rtnl semaphore provides the
1031 + * protection against other writers.
1032 + *
1033 + * See, for example usages, register_netdevice() and
1034 + * unregister_netdevice(), which must be called with the rtnl
1035 + * semaphore held.
1036 + */
1037 +DEFINE_RWLOCK(dev_base_lock);
1038 +
1039 +EXPORT_SYMBOL(dev_base_lock);
1040 +
1041 +#define NETDEV_HASHBITS        8
1042 +#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
1043 +
1044 +static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
1045 +{
1046 +       unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
1047 +       return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
1048 +}
1049 +
1050 +static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
1051 +{
1052 +       return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
1053 +}
1054 +
1055 +/* Device list insertion */
1056 +static int list_netdevice(struct net_device *dev)
1057 +{
1058 +       struct net *net = dev_net(dev);
1059 +
1060 +       ASSERT_RTNL();
1061 +
1062 +       write_lock_bh(&dev_base_lock);
1063 +       list_add_tail(&dev->dev_list, &net->dev_base_head);
1064 +       hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
1065 +       hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
1066 +       write_unlock_bh(&dev_base_lock);
1067 +       return 0;
1068 +}
1069 +
1070 +/* Device list removal */
1071 +static void unlist_netdevice(struct net_device *dev)
1072 +{
1073 +       ASSERT_RTNL();
1074 +
1075 +       /* Unlink dev from the device chain */
1076 +       write_lock_bh(&dev_base_lock);
1077 +       list_del(&dev->dev_list);
1078 +       hlist_del(&dev->name_hlist);
1079 +       hlist_del(&dev->index_hlist);
1080 +       write_unlock_bh(&dev_base_lock);
1081 +}
1082 +
1083 +/*
1084 + *     Our notifier list
1085 + */
1086 +
1087 +static RAW_NOTIFIER_HEAD(netdev_chain);
1088 +
1089 +/*
1090 + *     Device drivers call our routines to queue packets here. We empty the
1091 + *     queue in the local softnet handler.
1092 + */
1093 +
1094 +DEFINE_PER_CPU(struct softnet_data, softnet_data);
1095 +
1096 +#ifdef CONFIG_LOCKDEP
1097 +/*
1098 + * register_netdevice() inits txq->_xmit_lock and sets lockdep class
1099 + * according to dev->type
1100 + */
1101 +static const unsigned short netdev_lock_type[] =
1102 +       {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
1103 +        ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
1104 +        ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
1105 +        ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
1106 +        ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
1107 +        ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
1108 +        ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
1109 +        ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
1110 +        ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
1111 +        ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
1112 +        ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
1113 +        ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
1114 +        ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
1115 +        ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
1116 +        ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE};
1117 +
1118 +static const char *netdev_lock_name[] =
1119 +       {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
1120 +        "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
1121 +        "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
1122 +        "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
1123 +        "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
1124 +        "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
1125 +        "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
1126 +        "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
1127 +        "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
1128 +        "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
1129 +        "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
1130 +        "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
1131 +        "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
1132 +        "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
1133 +        "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"};
1134 +
1135 +static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
1136 +static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
1137 +
1138 +static inline unsigned short netdev_lock_pos(unsigned short dev_type)
1139 +{
1140 +       int i;
1141 +
1142 +       for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
1143 +               if (netdev_lock_type[i] == dev_type)
1144 +                       return i;
1145 +       /* the last key is used by default */
1146 +       return ARRAY_SIZE(netdev_lock_type) - 1;
1147 +}
1148 +
1149 +static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
1150 +                                                unsigned short dev_type)
1151 +{
1152 +       int i;
1153 +
1154 +       i = netdev_lock_pos(dev_type);
1155 +       lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
1156 +                                  netdev_lock_name[i]);
1157 +}
1158 +
1159 +static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
1160 +{
1161 +       int i;
1162 +
1163 +       i = netdev_lock_pos(dev->type);
1164 +       lockdep_set_class_and_name(&dev->addr_list_lock,
1165 +                                  &netdev_addr_lock_key[i],
1166 +                                  netdev_lock_name[i]);
1167 +}
1168 +#else
1169 +static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
1170 +                                                unsigned short dev_type)
1171 +{
1172 +}
1173 +static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
1174 +{
1175 +}
1176 +#endif
1177 +
1178 +/*******************************************************************************
1179 +
1180 +               Protocol management and registration routines
1181 +
1182 +*******************************************************************************/
1183 +
1184 +/*
1185 + *     Add a protocol ID to the list. Now that the input handler is
1186 + *     smarter we can dispense with all the messy stuff that used to be
1187 + *     here.
1188 + *
1189 + *     BEWARE!!! Protocol handlers, mangling input packets,
1190 + *     MUST BE last in hash buckets and checking protocol handlers
1191 + *     MUST start from promiscuous ptype_all chain in net_bh.
1192 + *     It is true now, do not change it.
1193 + *     Explanation follows: if protocol handler, mangling packet, will
1194 + *     be the first on list, it is not able to sense, that packet
1195 + *     is cloned and should be copied-on-write, so that it will
1196 + *     change it and subsequent readers will get broken packet.
1197 + *                                                     --ANK (980803)
1198 + */
1199 +
1200 +/**
1201 + *     dev_add_pack - add packet handler
1202 + *     @pt: packet type declaration
1203 + *
1204 + *     Add a protocol handler to the networking stack. The passed &packet_type
1205 + *     is linked into kernel lists and may not be freed until it has been
1206 + *     removed from the kernel lists.
1207 + *
1208 + *     This call does not sleep therefore it can not
1209 + *     guarantee all CPU's that are in middle of receiving packets
1210 + *     will see the new packet type (until the next received packet).
1211 + */
1212 +
1213 +void dev_add_pack(struct packet_type *pt)
1214 +{
1215 +       int hash;
1216 +
1217 +       spin_lock_bh(&ptype_lock);
1218 +       if (pt->type == htons(ETH_P_ALL))
1219 +               list_add_rcu(&pt->list, &ptype_all);
1220 +       else {
1221 +               hash = ntohs(pt->type) & PTYPE_HASH_MASK;
1222 +               list_add_rcu(&pt->list, &ptype_base[hash]);
1223 +       }
1224 +       spin_unlock_bh(&ptype_lock);
1225 +}
1226 +
1227 +/**
1228 + *     __dev_remove_pack        - remove packet handler
1229 + *     @pt: packet type declaration
1230 + *
1231 + *     Remove a protocol handler that was previously added to the kernel
1232 + *     protocol handlers by dev_add_pack(). The passed &packet_type is removed
1233 + *     from the kernel lists and can be freed or reused once this function
1234 + *     returns.
1235 + *
1236 + *      The packet type might still be in use by receivers
1237 + *     and must not be freed until after all the CPU's have gone
1238 + *     through a quiescent state.
1239 + */
1240 +void __dev_remove_pack(struct packet_type *pt)
1241 +{
1242 +       struct list_head *head;
1243 +       struct packet_type *pt1;
1244 +
1245 +       spin_lock_bh(&ptype_lock);
1246 +
1247 +       if (pt->type == htons(ETH_P_ALL))
1248 +               head = &ptype_all;
1249 +       else
1250 +               head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
1251 +
1252 +       list_for_each_entry(pt1, head, list) {
1253 +               if (pt == pt1) {
1254 +                       list_del_rcu(&pt->list);
1255 +                       goto out;
1256 +               }
1257 +       }
1258 +
1259 +       printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
1260 +out:
1261 +       spin_unlock_bh(&ptype_lock);
1262 +}
1263 +/**
1264 + *     dev_remove_pack  - remove packet handler
1265 + *     @pt: packet type declaration
1266 + *
1267 + *     Remove a protocol handler that was previously added to the kernel
1268 + *     protocol handlers by dev_add_pack(). The passed &packet_type is removed
1269 + *     from the kernel lists and can be freed or reused once this function
1270 + *     returns.
1271 + *
1272 + *     This call sleeps to guarantee that no CPU is looking at the packet
1273 + *     type after return.
1274 + */
1275 +void dev_remove_pack(struct packet_type *pt)
1276 +{
1277 +       __dev_remove_pack(pt);
1278 +
1279 +       synchronize_net();
1280 +}
1281 +
1282 +/******************************************************************************
1283 +
1284 +                     Device Boot-time Settings Routines
1285 +
1286 +*******************************************************************************/
1287 +
1288 +/* Boot time configuration table */
1289 +static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
1290 +
1291 +/**
1292 + *     netdev_boot_setup_add   - add new setup entry
1293 + *     @name: name of the device
1294 + *     @map: configured settings for the device
1295 + *
1296 + *     Adds new setup entry to the dev_boot_setup list.  The function
1297 + *     returns 0 on error and 1 on success.  This is a generic routine to
1298 + *     all netdevices.
1299 + */
1300 +static int netdev_boot_setup_add(char *name, struct ifmap *map)
1301 +{
1302 +       struct netdev_boot_setup *s;
1303 +       int i;
1304 +
1305 +       s = dev_boot_setup;
1306 +       for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
1307 +               if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
1308 +                       memset(s[i].name, 0, sizeof(s[i].name));
1309 +                       strlcpy(s[i].name, name, IFNAMSIZ);
1310 +                       memcpy(&s[i].map, map, sizeof(s[i].map));
1311 +                       break;
1312 +               }
1313 +       }
1314 +
1315 +       return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
1316 +}
1317 +
1318 +/**
1319 + *     netdev_boot_setup_check - check boot time settings
1320 + *     @dev: the netdevice
1321 + *
1322 + *     Check boot time settings for the device.
1323 + *     The found settings are set for the device to be used
1324 + *     later in the device probing.
1325 + *     Returns 0 if no settings found, 1 if they are.
1326 + */
1327 +int netdev_boot_setup_check(struct net_device *dev)
1328 +{
1329 +       struct netdev_boot_setup *s = dev_boot_setup;
1330 +       int i;
1331 +
1332 +       for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
1333 +               if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
1334 +                   !strcmp(dev->name, s[i].name)) {
1335 +                       dev->irq        = s[i].map.irq;
1336 +                       dev->base_addr  = s[i].map.base_addr;
1337 +                       dev->mem_start  = s[i].map.mem_start;
1338 +                       dev->mem_end    = s[i].map.mem_end;
1339 +                       return 1;
1340 +               }
1341 +       }
1342 +       return 0;
1343 +}
1344 +
1345 +
1346 +/**
1347 + *     netdev_boot_base        - get address from boot time settings
1348 + *     @prefix: prefix for network device
1349 + *     @unit: id for network device
1350 + *
1351 + *     Check boot time settings for the base address of device.
1352 + *     The found settings are set for the device to be used
1353 + *     later in the device probing.
1354 + *     Returns 0 if no settings found.
1355 + */
1356 +unsigned long netdev_boot_base(const char *prefix, int unit)
1357 +{
1358 +       const struct netdev_boot_setup *s = dev_boot_setup;
1359 +       char name[IFNAMSIZ];
1360 +       int i;
1361 +
1362 +       sprintf(name, "%s%d", prefix, unit);
1363 +
1364 +       /*
1365 +        * If device already registered then return base of 1
1366 +        * to indicate not to probe for this interface
1367 +        */
1368 +       if (__dev_get_by_name(&init_net, name))
1369 +               return 1;
1370 +
1371 +       for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
1372 +               if (!strcmp(name, s[i].name))
1373 +                       return s[i].map.base_addr;
1374 +       return 0;
1375 +}
1376 +
1377 +/*
1378 + * Saves at boot time configured settings for any netdevice.
1379 + */
1380 +int __init netdev_boot_setup(char *str)
1381 +{
1382 +       int ints[5];
1383 +       struct ifmap map;
1384 +
1385 +       str = get_options(str, ARRAY_SIZE(ints), ints);
1386 +       if (!str || !*str)
1387 +               return 0;
1388 +
1389 +       /* Save settings */
1390 +       memset(&map, 0, sizeof(map));
1391 +       if (ints[0] > 0)
1392 +               map.irq = ints[1];
1393 +       if (ints[0] > 1)
1394 +               map.base_addr = ints[2];
1395 +       if (ints[0] > 2)
1396 +               map.mem_start = ints[3];
1397 +       if (ints[0] > 3)
1398 +               map.mem_end = ints[4];
1399 +
1400 +       /* Add new entry to the list */
1401 +       return netdev_boot_setup_add(str, &map);
1402 +}
1403 +
1404 +__setup("netdev=", netdev_boot_setup);
1405 +
1406 +/*******************************************************************************
1407 +
1408 +                           Device Interface Subroutines
1409 +
1410 +*******************************************************************************/
1411 +
1412 +/**
1413 + *     __dev_get_by_name       - find a device by its name
1414 + *     @net: the applicable net namespace
1415 + *     @name: name to find
1416 + *
1417 + *     Find an interface by name. Must be called under RTNL semaphore
1418 + *     or @dev_base_lock. If the name is found a pointer to the device
1419 + *     is returned. If the name is not found then %NULL is returned. The
1420 + *     reference counters are not incremented so the caller must be
1421 + *     careful with locks.
1422 + */
1423 +
1424 +struct net_device *__dev_get_by_name(struct net *net, const char *name)
1425 +{
1426 +       struct hlist_node *p;
1427 +
1428 +       hlist_for_each(p, dev_name_hash(net, name)) {
1429 +               struct net_device *dev
1430 +                       = hlist_entry(p, struct net_device, name_hlist);
1431 +               if (!strncmp(dev->name, name, IFNAMSIZ))
1432 +                       return dev;
1433 +       }
1434 +       return NULL;
1435 +}
1436 +
1437 +/**
1438 + *     dev_get_by_name         - find a device by its name
1439 + *     @net: the applicable net namespace
1440 + *     @name: name to find
1441 + *
1442 + *     Find an interface by name. This can be called from any
1443 + *     context and does its own locking. The returned handle has
1444 + *     the usage count incremented and the caller must use dev_put() to
1445 + *     release it when it is no longer needed. %NULL is returned if no
1446 + *     matching device is found.
1447 + */
1448 +
1449 +struct net_device *dev_get_by_name(struct net *net, const char *name)
1450 +{
1451 +       struct net_device *dev;
1452 +
1453 +       read_lock(&dev_base_lock);
1454 +       dev = __dev_get_by_name(net, name);
1455 +       if (dev)
1456 +               dev_hold(dev);
1457 +       read_unlock(&dev_base_lock);
1458 +       return dev;
1459 +}
1460 +
1461 +/**
1462 + *     __dev_get_by_index - find a device by its ifindex
1463 + *     @net: the applicable net namespace
1464 + *     @ifindex: index of device
1465 + *
1466 + *     Search for an interface by index. Returns %NULL if the device
1467 + *     is not found or a pointer to the device. The device has not
1468 + *     had its reference counter increased so the caller must be careful
1469 + *     about locking. The caller must hold either the RTNL semaphore
1470 + *     or @dev_base_lock.
1471 + */
1472 +
1473 +struct net_device *__dev_get_by_index(struct net *net, int ifindex)
1474 +{
1475 +       struct hlist_node *p;
1476 +
1477 +       hlist_for_each(p, dev_index_hash(net, ifindex)) {
1478 +               struct net_device *dev
1479 +                       = hlist_entry(p, struct net_device, index_hlist);
1480 +               if (dev->ifindex == ifindex)
1481 +                       return dev;
1482 +       }
1483 +       return NULL;
1484 +}
1485 +
1486 +
1487 +/**
1488 + *     dev_get_by_index - find a device by its ifindex
1489 + *     @net: the applicable net namespace
1490 + *     @ifindex: index of device
1491 + *
1492 + *     Search for an interface by index. Returns NULL if the device
1493 + *     is not found or a pointer to the device. The device returned has
1494 + *     had a reference added and the pointer is safe until the user calls
1495 + *     dev_put to indicate they have finished with it.
1496 + */
1497 +
1498 +struct net_device *dev_get_by_index(struct net *net, int ifindex)
1499 +{
1500 +       struct net_device *dev;
1501 +
1502 +       read_lock(&dev_base_lock);
1503 +       dev = __dev_get_by_index(net, ifindex);
1504 +       if (dev)
1505 +               dev_hold(dev);
1506 +       read_unlock(&dev_base_lock);
1507 +       return dev;
1508 +}
1509 +
1510 +/**
1511 + *     dev_getbyhwaddr - find a device by its hardware address
1512 + *     @net: the applicable net namespace
1513 + *     @type: media type of device
1514 + *     @ha: hardware address
1515 + *
1516 + *     Search for an interface by MAC address. Returns NULL if the device
1517 + *     is not found or a pointer to the device. The caller must hold the
1518 + *     rtnl semaphore. The returned device has not had its ref count increased
1519 + *     and the caller must therefore be careful about locking
1520 + *
1521 + *     BUGS:
1522 + *     If the API was consistent this would be __dev_get_by_hwaddr
1523 + */
1524 +
1525 +struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
1526 +{
1527 +       struct net_device *dev;
1528 +
1529 +       ASSERT_RTNL();
1530 +
1531 +       for_each_netdev(net, dev)
1532 +               if (dev->type == type &&
1533 +                   !memcmp(dev->dev_addr, ha, dev->addr_len))
1534 +                       return dev;
1535 +
1536 +       return NULL;
1537 +}
1538 +
1539 +EXPORT_SYMBOL(dev_getbyhwaddr);
1540 +
1541 +struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
1542 +{
1543 +       struct net_device *dev;
1544 +
1545 +       ASSERT_RTNL();
1546 +       for_each_netdev(net, dev)
1547 +               if (dev->type == type)
1548 +                       return dev;
1549 +
1550 +       return NULL;
1551 +}
1552 +
1553 +EXPORT_SYMBOL(__dev_getfirstbyhwtype);
1554 +
1555 +struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
1556 +{
1557 +       struct net_device *dev;
1558 +
1559 +       rtnl_lock();
1560 +       dev = __dev_getfirstbyhwtype(net, type);
1561 +       if (dev)
1562 +               dev_hold(dev);
1563 +       rtnl_unlock();
1564 +       return dev;
1565 +}
1566 +
1567 +EXPORT_SYMBOL(dev_getfirstbyhwtype);
1568 +
1569 +/**
1570 + *     dev_get_by_flags - find any device with given flags
1571 + *     @net: the applicable net namespace
1572 + *     @if_flags: IFF_* values
1573 + *     @mask: bitmask of bits in if_flags to check
1574 + *
1575 + *     Search for any interface with the given flags. Returns NULL if a device
1576 + *     is not found or a pointer to the device. The device returned has
1577 + *     had a reference added and the pointer is safe until the user calls
1578 + *     dev_put to indicate they have finished with it.
1579 + */
1580 +
1581 +struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
1582 +{
1583 +       struct net_device *dev, *ret;
1584 +
1585 +       ret = NULL;
1586 +       read_lock(&dev_base_lock);
1587 +       for_each_netdev(net, dev) {
1588 +               if (((dev->flags ^ if_flags) & mask) == 0) {
1589 +                       dev_hold(dev);
1590 +                       ret = dev;
1591 +                       break;
1592 +               }
1593 +       }
1594 +       read_unlock(&dev_base_lock);
1595 +       return ret;
1596 +}
1597 +
1598 +/**
1599 + *     dev_valid_name - check if name is okay for network device
1600 + *     @name: name string
1601 + *
1602 + *     Network device names need to be valid file names to
1603 + *     to allow sysfs to work.  We also disallow any kind of
1604 + *     whitespace.
1605 + */
1606 +int dev_valid_name(const char *name)
1607 +{
1608 +       if (*name == '\0')
1609 +               return 0;
1610 +       if (strlen(name) >= IFNAMSIZ)
1611 +               return 0;
1612 +       if (!strcmp(name, ".") || !strcmp(name, ".."))
1613 +               return 0;
1614 +
1615 +       while (*name) {
1616 +               if (*name == '/' || isspace(*name))
1617 +                       return 0;
1618 +               name++;
1619 +       }
1620 +       return 1;
1621 +}
1622 +
1623 +/**
1624 + *     __dev_alloc_name - allocate a name for a device
1625 + *     @net: network namespace to allocate the device name in
1626 + *     @name: name format string
1627 + *     @buf:  scratch buffer and result name string
1628 + *
1629 + *     Passed a format string - eg "lt%d" it will try and find a suitable
1630 + *     id. It scans list of devices to build up a free map, then chooses
1631 + *     the first empty slot. The caller must hold the dev_base or rtnl lock
1632 + *     while allocating the name and adding the device in order to avoid
1633 + *     duplicates.
1634 + *     Limited to bits_per_byte * page size devices (ie 32K on most platforms).
1635 + *     Returns the number of the unit assigned or a negative errno code.
1636 + */
1637 +
1638 +static int __dev_alloc_name(struct net *net, const char *name, char *buf)
1639 +{
1640 +       int i = 0;
1641 +       const char *p;
1642 +       const int max_netdevices = 8*PAGE_SIZE;
1643 +       unsigned long *inuse;
1644 +       struct net_device *d;
1645 +
1646 +       p = strnchr(name, IFNAMSIZ-1, '%');
1647 +       if (p) {
1648 +               /*
1649 +                * Verify the string as this thing may have come from
1650 +                * the user.  There must be either one "%d" and no other "%"
1651 +                * characters.
1652 +                */
1653 +               if (p[1] != 'd' || strchr(p + 2, '%'))
1654 +                       return -EINVAL;
1655 +
1656 +               /* Use one page as a bit array of possible slots */
1657 +               inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
1658 +               if (!inuse)
1659 +                       return -ENOMEM;
1660 +
1661 +               for_each_netdev(net, d) {
1662 +                       if (!sscanf(d->name, name, &i))
1663 +                               continue;
1664 +                       if (i < 0 || i >= max_netdevices)
1665 +                               continue;
1666 +
1667 +                       /*  avoid cases where sscanf is not exact inverse of printf */
1668 +                       snprintf(buf, IFNAMSIZ, name, i);
1669 +                       if (!strncmp(buf, d->name, IFNAMSIZ))
1670 +                               set_bit(i, inuse);
1671 +               }
1672 +
1673 +               i = find_first_zero_bit(inuse, max_netdevices);
1674 +               free_page((unsigned long) inuse);
1675 +       }
1676 +
1677 +       snprintf(buf, IFNAMSIZ, name, i);
1678 +       if (!__dev_get_by_name(net, buf))
1679 +               return i;
1680 +
1681 +       /* It is possible to run out of possible slots
1682 +        * when the name is long and there isn't enough space left
1683 +        * for the digits, or if all bits are used.
1684 +        */
1685 +       return -ENFILE;
1686 +}
1687 +
1688 +/**
1689 + *     dev_alloc_name - allocate a name for a device
1690 + *     @dev: device
1691 + *     @name: name format string
1692 + *
1693 + *     Passed a format string - eg "lt%d" it will try and find a suitable
1694 + *     id. It scans list of devices to build up a free map, then chooses
1695 + *     the first empty slot. The caller must hold the dev_base or rtnl lock
1696 + *     while allocating the name and adding the device in order to avoid
1697 + *     duplicates.
1698 + *     Limited to bits_per_byte * page size devices (ie 32K on most platforms).
1699 + *     Returns the number of the unit assigned or a negative errno code.
1700 + */
1701 +
1702 +int dev_alloc_name(struct net_device *dev, const char *name)
1703 +{
1704 +       char buf[IFNAMSIZ];
1705 +       struct net *net;
1706 +       int ret;
1707 +
1708 +       BUG_ON(!dev_net(dev));
1709 +       net = dev_net(dev);
1710 +       ret = __dev_alloc_name(net, name, buf);
1711 +       if (ret >= 0)
1712 +               strlcpy(dev->name, buf, IFNAMSIZ);
1713 +       return ret;
1714 +}
1715 +
1716 +
1717 +/**
1718 + *     dev_change_name - change name of a device
1719 + *     @dev: device
1720 + *     @newname: name (or format string) must be at least IFNAMSIZ
1721 + *
1722 + *     Change name of a device, can pass format strings "eth%d".
1723 + *     for wildcarding.
1724 + */
1725 +int dev_change_name(struct net_device *dev, const char *newname)
1726 +{
1727 +       char oldname[IFNAMSIZ];
1728 +       int err = 0;
1729 +       int ret;
1730 +       struct net *net;
1731 +
1732 +       ASSERT_RTNL();
1733 +       BUG_ON(!dev_net(dev));
1734 +
1735 +       net = dev_net(dev);
1736 +       if (dev->flags & IFF_UP)
1737 +               return -EBUSY;
1738 +
1739 +       if (!dev_valid_name(newname))
1740 +               return -EINVAL;
1741 +
1742 +       if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
1743 +               return 0;
1744 +
1745 +       memcpy(oldname, dev->name, IFNAMSIZ);
1746 +
1747 +       if (strchr(newname, '%')) {
1748 +               err = dev_alloc_name(dev, newname);
1749 +               if (err < 0)
1750 +                       return err;
1751 +       }
1752 +       else if (__dev_get_by_name(net, newname))
1753 +               return -EEXIST;
1754 +       else
1755 +               strlcpy(dev->name, newname, IFNAMSIZ);
1756 +
1757 +rollback:
1758 +       /* For now only devices in the initial network namespace
1759 +        * are in sysfs.
1760 +        */
1761 +       if (net == &init_net) {
1762 +               ret = device_rename(&dev->dev, dev->name);
1763 +               if (ret) {
1764 +                       memcpy(dev->name, oldname, IFNAMSIZ);
1765 +                       return ret;
1766 +               }
1767 +       }
1768 +
1769 +       write_lock_bh(&dev_base_lock);
1770 +       hlist_del(&dev->name_hlist);
1771 +       hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
1772 +       write_unlock_bh(&dev_base_lock);
1773 +
1774 +       ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
1775 +       ret = notifier_to_errno(ret);
1776 +
1777 +       if (ret) {
1778 +               if (err) {
1779 +                       printk(KERN_ERR
1780 +                              "%s: name change rollback failed: %d.\n",
1781 +                              dev->name, ret);
1782 +               } else {
1783 +                       err = ret;
1784 +                       memcpy(dev->name, oldname, IFNAMSIZ);
1785 +                       goto rollback;
1786 +               }
1787 +       }
1788 +
1789 +       return err;
1790 +}
1791 +
1792 +/**
1793 + *     dev_set_alias - change ifalias of a device
1794 + *     @dev: device
1795 + *     @alias: name up to IFALIASZ
1796 + *     @len: limit of bytes to copy from info
1797 + *
1798 + *     Set ifalias for a device,
1799 + */
1800 +int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
1801 +{
1802 +       ASSERT_RTNL();
1803 +
1804 +       if (len >= IFALIASZ)
1805 +               return -EINVAL;
1806 +
1807 +       if (!len) {
1808 +               if (dev->ifalias) {
1809 +                       kfree(dev->ifalias);
1810 +                       dev->ifalias = NULL;
1811 +               }
1812 +               return 0;
1813 +       }
1814 +
1815 +       dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
1816 +       if (!dev->ifalias)
1817 +               return -ENOMEM;
1818 +
1819 +       strlcpy(dev->ifalias, alias, len+1);
1820 +       return len;
1821 +}
1822 +
1823 +
1824 +/**
1825 + *     netdev_features_change - device changes features
1826 + *     @dev: device to cause notification
1827 + *
1828 + *     Called to indicate a device has changed features.
1829 + */
1830 +void netdev_features_change(struct net_device *dev)
1831 +{
1832 +       call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
1833 +}
1834 +EXPORT_SYMBOL(netdev_features_change);
1835 +
1836 +/**
1837 + *     netdev_state_change - device changes state
1838 + *     @dev: device to cause notification
1839 + *
1840 + *     Called to indicate a device has changed state. This function calls
1841 + *     the notifier chains for netdev_chain and sends a NEWLINK message
1842 + *     to the routing socket.
1843 + */
1844 +void netdev_state_change(struct net_device *dev)
1845 +{
1846 +       if (dev->flags & IFF_UP) {
1847 +               call_netdevice_notifiers(NETDEV_CHANGE, dev);
1848 +               rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1849 +       }
1850 +}
1851 +
1852 +void netdev_bonding_change(struct net_device *dev)
1853 +{
1854 +       call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
1855 +}
1856 +EXPORT_SYMBOL(netdev_bonding_change);
1857 +
1858 +/**
1859 + *     dev_load        - load a network module
1860 + *     @net: the applicable net namespace
1861 + *     @name: name of interface
1862 + *
1863 + *     If a network interface is not present and the process has suitable
1864 + *     privileges this function loads the module. If module loading is not
1865 + *     available in this kernel then it becomes a nop.
1866 + */
1867 +
1868 +void dev_load(struct net *net, const char *name)
1869 +{
1870 +       struct net_device *dev;
1871 +
1872 +       read_lock(&dev_base_lock);
1873 +       dev = __dev_get_by_name(net, name);
1874 +       read_unlock(&dev_base_lock);
1875 +
1876 +       if (!dev && capable(CAP_SYS_MODULE))
1877 +               request_module("%s", name);
1878 +}
1879 +
1880 +/**
1881 + *     dev_open        - prepare an interface for use.
1882 + *     @dev:   device to open
1883 + *
1884 + *     Takes a device from down to up state. The device's private open
1885 + *     function is invoked and then the multicast lists are loaded. Finally
1886 + *     the device is moved into the up state and a %NETDEV_UP message is
1887 + *     sent to the netdev notifier chain.
1888 + *
1889 + *     Calling this function on an active interface is a nop. On a failure
1890 + *     a negative errno code is returned.
1891 + */
1892 +int dev_open(struct net_device *dev)
1893 +{
1894 +       const struct net_device_ops *ops = dev->netdev_ops;
1895 +       int ret = 0;
1896 +
1897 +       ASSERT_RTNL();
1898 +
1899 +       /*
1900 +        *      Is it already up?
1901 +        */
1902 +
1903 +       if (dev->flags & IFF_UP)
1904 +               return 0;
1905 +
1906 +       /*
1907 +        *      Is it even present?
1908 +        */
1909 +       if (!netif_device_present(dev))
1910 +               return -ENODEV;
1911 +
1912 +       /*
1913 +        *      Call device private open method
1914 +        */
1915 +       set_bit(__LINK_STATE_START, &dev->state);
1916 +
1917 +       if (ops->ndo_validate_addr)
1918 +               ret = ops->ndo_validate_addr(dev);
1919 +
1920 +       if (!ret && ops->ndo_open)
1921 +               ret = ops->ndo_open(dev);
1922 +
1923 +       /*
1924 +        *      If it went open OK then:
1925 +        */
1926 +
1927 +       if (ret)
1928 +               clear_bit(__LINK_STATE_START, &dev->state);
1929 +       else {
1930 +               /*
1931 +                *      Set the flags.
1932 +                */
1933 +               dev->flags |= IFF_UP;
1934 +
1935 +               /*
1936 +                *      Enable NET_DMA
1937 +                */
1938 +               net_dmaengine_get();
1939 +
1940 +               /*
1941 +                *      Initialize multicasting status
1942 +                */
1943 +               dev_set_rx_mode(dev);
1944 +
1945 +               /*
1946 +                *      Wakeup transmit queue engine
1947 +                */
1948 +               dev_activate(dev);
1949 +
1950 +               /*
1951 +                *      ... and announce new interface.
1952 +                */
1953 +               call_netdevice_notifiers(NETDEV_UP, dev);
1954 +       }
1955 +
1956 +       return ret;
1957 +}
1958 +
1959 +/**
1960 + *     dev_close - shutdown an interface.
1961 + *     @dev: device to shutdown
1962 + *
1963 + *     This function moves an active device into down state. A
1964 + *     %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1965 + *     is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1966 + *     chain.
1967 + */
1968 +int dev_close(struct net_device *dev)
1969 +{
1970 +       const struct net_device_ops *ops = dev->netdev_ops;
1971 +       ASSERT_RTNL();
1972 +
1973 +       might_sleep();
1974 +
1975 +       if (!(dev->flags & IFF_UP))
1976 +               return 0;
1977 +
1978 +       /*
1979 +        *      Tell people we are going down, so that they can
1980 +        *      prepare to death, when device is still operating.
1981 +        */
1982 +       call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
1983 +
1984 +       clear_bit(__LINK_STATE_START, &dev->state);
1985 +
1986 +       /* Synchronize to scheduled poll. We cannot touch poll list,
1987 +        * it can be even on different cpu. So just clear netif_running().
1988 +        *
1989 +        * dev->stop() will invoke napi_disable() on all of it's
1990 +        * napi_struct instances on this device.
1991 +        */
1992 +       smp_mb__after_clear_bit(); /* Commit netif_running(). */
1993 +
1994 +       dev_deactivate(dev);
1995 +
1996 +       /*
1997 +        *      Call the device specific close. This cannot fail.
1998 +        *      Only if device is UP
1999 +        *
2000 +        *      We allow it to be called even after a DETACH hot-plug
2001 +        *      event.
2002 +        */
2003 +       if (ops->ndo_stop)
2004 +               ops->ndo_stop(dev);
2005 +
2006 +       /*
2007 +        *      Device is now down.
2008 +        */
2009 +
2010 +       dev->flags &= ~IFF_UP;
2011 +
2012 +       /*
2013 +        * Tell people we are down
2014 +        */
2015 +       call_netdevice_notifiers(NETDEV_DOWN, dev);
2016 +
2017 +       /*
2018 +        *      Shutdown NET_DMA
2019 +        */
2020 +       net_dmaengine_put();
2021 +
2022 +       return 0;
2023 +}
2024 +
2025 +
2026 +/**
2027 + *     dev_disable_lro - disable Large Receive Offload on a device
2028 + *     @dev: device
2029 + *
2030 + *     Disable Large Receive Offload (LRO) on a net device.  Must be
2031 + *     called under RTNL.  This is needed if received packets may be
2032 + *     forwarded to another interface.
2033 + */
2034 +void dev_disable_lro(struct net_device *dev)
2035 +{
2036 +       if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
2037 +           dev->ethtool_ops->set_flags) {
2038 +               u32 flags = dev->ethtool_ops->get_flags(dev);
2039 +               if (flags & ETH_FLAG_LRO) {
2040 +                       flags &= ~ETH_FLAG_LRO;
2041 +                       dev->ethtool_ops->set_flags(dev, flags);
2042 +               }
2043 +       }
2044 +       WARN_ON(dev->features & NETIF_F_LRO);
2045 +}
2046 +EXPORT_SYMBOL(dev_disable_lro);
2047 +
2048 +
2049 +static int dev_boot_phase = 1;
2050 +
2051 +/*
2052 + *     Device change register/unregister. These are not inline or static
2053 + *     as we export them to the world.
2054 + */
2055 +
2056 +/**
2057 + *     register_netdevice_notifier - register a network notifier block
2058 + *     @nb: notifier
2059 + *
2060 + *     Register a notifier to be called when network device events occur.
2061 + *     The notifier passed is linked into the kernel structures and must
2062 + *     not be reused until it has been unregistered. A negative errno code
2063 + *     is returned on a failure.
2064 + *
2065 + *     When registered all registration and up events are replayed
2066 + *     to the new notifier to allow device to have a race free
2067 + *     view of the network device list.
2068 + */
2069 +
2070 +int register_netdevice_notifier(struct notifier_block *nb)
2071 +{
2072 +       struct net_device *dev;
2073 +       struct net_device *last;
2074 +       struct net *net;
2075 +       int err;
2076 +
2077 +       rtnl_lock();
2078 +       err = raw_notifier_chain_register(&netdev_chain, nb);
2079 +       if (err)
2080 +               goto unlock;
2081 +       if (dev_boot_phase)
2082 +               goto unlock;
2083 +       for_each_net(net) {
2084 +               for_each_netdev(net, dev) {
2085 +                       err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
2086 +                       err = notifier_to_errno(err);
2087 +                       if (err)
2088 +                               goto rollback;
2089 +
2090 +                       if (!(dev->flags & IFF_UP))
2091 +                               continue;
2092 +
2093 +                       nb->notifier_call(nb, NETDEV_UP, dev);
2094 +               }
2095 +       }
2096 +
2097 +unlock:
2098 +       rtnl_unlock();
2099 +       return err;
2100 +
2101 +rollback:
2102 +       last = dev;
2103 +       for_each_net(net) {
2104 +               for_each_netdev(net, dev) {
2105 +                       if (dev == last)
2106 +                               break;
2107 +
2108 +                       if (dev->flags & IFF_UP) {
2109 +                               nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
2110 +                               nb->notifier_call(nb, NETDEV_DOWN, dev);
2111 +                       }
2112 +                       nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
2113 +               }
2114 +       }
2115 +
2116 +       raw_notifier_chain_unregister(&netdev_chain, nb);
2117 +       goto unlock;
2118 +}
2119 +
2120 +/**
2121 + *     unregister_netdevice_notifier - unregister a network notifier block
2122 + *     @nb: notifier
2123 + *
2124 + *     Unregister a notifier previously registered by
2125 + *     register_netdevice_notifier(). The notifier is unlinked into the
2126 + *     kernel structures and may then be reused. A negative errno code
2127 + *     is returned on a failure.
2128 + */
2129 +
2130 +int unregister_netdevice_notifier(struct notifier_block *nb)
2131 +{
2132 +       int err;
2133 +
2134 +       rtnl_lock();
2135 +       err = raw_notifier_chain_unregister(&netdev_chain, nb);
2136 +       rtnl_unlock();
2137 +       return err;
2138 +}
2139 +
2140 +/**
2141 + *     call_netdevice_notifiers - call all network notifier blocks
2142 + *      @val: value passed unmodified to notifier function
2143 + *      @dev: net_device pointer passed unmodified to notifier function
2144 + *
2145 + *     Call all network notifier blocks.  Parameters and return value
2146 + *     are as for raw_notifier_call_chain().
2147 + */
2148 +
2149 +int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
2150 +{
2151 +       return raw_notifier_call_chain(&netdev_chain, val, dev);
2152 +}
2153 +
2154 +/* When > 0 there are consumers of rx skb time stamps */
2155 +static atomic_t netstamp_needed = ATOMIC_INIT(0);
2156 +
2157 +void net_enable_timestamp(void)
2158 +{
2159 +       atomic_inc(&netstamp_needed);
2160 +}
2161 +
2162 +void net_disable_timestamp(void)
2163 +{
2164 +       atomic_dec(&netstamp_needed);
2165 +}
2166 +
2167 +static inline void net_timestamp(struct sk_buff *skb)
2168 +{
2169 +       if (atomic_read(&netstamp_needed))
2170 +               __net_timestamp(skb);
2171 +       else
2172 +               skb->tstamp.tv64 = 0;
2173 +}
2174 +
2175 +/*
2176 + *     Support routine. Sends outgoing frames to any network
2177 + *     taps currently in use.
2178 + */
2179 +
2180 +static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
2181 +{
2182 +       struct packet_type *ptype;
2183 +
2184 +#ifdef CONFIG_NET_CLS_ACT
2185 +       if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
2186 +               net_timestamp(skb);
2187 +#else
2188 +       net_timestamp(skb);
2189 +#endif
2190 +
2191 +       rcu_read_lock();
2192 +       list_for_each_entry_rcu(ptype, &ptype_all, list) {
2193 +               /* Never send packets back to the socket
2194 +                * they originated from - MvS (miquels@drinkel.ow.org)
2195 +                */
2196 +               if ((ptype->dev == dev || !ptype->dev) &&
2197 +                   (ptype->af_packet_priv == NULL ||
2198 +                    (struct sock *)ptype->af_packet_priv != skb->sk)) {
2199 +                       struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
2200 +                       if (!skb2)
2201 +                               break;
2202 +
2203 +                       /* skb->nh should be correctly
2204 +                          set by sender, so that the second statement is
2205 +                          just protection against buggy protocols.
2206 +                        */
2207 +                       skb_reset_mac_header(skb2);
2208 +
2209 +                       if (skb_network_header(skb2) < skb2->data ||
2210 +                           skb2->network_header > skb2->tail) {
2211 +                               if (net_ratelimit())
2212 +                                       printk(KERN_CRIT "protocol %04x is "
2213 +                                              "buggy, dev %s\n",
2214 +                                              skb2->protocol, dev->name);
2215 +                               skb_reset_network_header(skb2);
2216 +                       }
2217 +
2218 +                       skb2->transport_header = skb2->network_header;
2219 +                       skb2->pkt_type = PACKET_OUTGOING;
2220 +                       ptype->func(skb2, skb->dev, ptype, skb->dev);
2221 +               }
2222 +       }
2223 +       rcu_read_unlock();
2224 +}
2225 +
2226 +
2227 +static inline void __netif_reschedule(struct Qdisc *q)
2228 +{
2229 +       struct softnet_data *sd;
2230 +       unsigned long flags;
2231 +
2232 +       local_irq_save(flags);
2233 +       sd = &__get_cpu_var(softnet_data);
2234 +       q->next_sched = sd->output_queue;
2235 +       sd->output_queue = q;
2236 +       raise_softirq_irqoff(NET_TX_SOFTIRQ);
2237 +       local_irq_restore(flags);
2238 +}
2239 +
2240 +void __netif_schedule(struct Qdisc *q)
2241 +{
2242 +       if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
2243 +               __netif_reschedule(q);
2244 +}
2245 +EXPORT_SYMBOL(__netif_schedule);
2246 +
2247 +void dev_kfree_skb_irq(struct sk_buff *skb)
2248 +{
2249 +       if (atomic_dec_and_test(&skb->users)) {
2250 +               struct softnet_data *sd;
2251 +               unsigned long flags;
2252 +
2253 +               local_irq_save(flags);
2254 +               sd = &__get_cpu_var(softnet_data);
2255 +               skb->next = sd->completion_queue;
2256 +               sd->completion_queue = skb;
2257 +               raise_softirq_irqoff(NET_TX_SOFTIRQ);
2258 +               local_irq_restore(flags);
2259 +       }
2260 +}
2261 +EXPORT_SYMBOL(dev_kfree_skb_irq);
2262 +
2263 +void dev_kfree_skb_any(struct sk_buff *skb)
2264 +{
2265 +       if (in_irq() || irqs_disabled())
2266 +               dev_kfree_skb_irq(skb);
2267 +       else
2268 +               dev_kfree_skb(skb);
2269 +}
2270 +EXPORT_SYMBOL(dev_kfree_skb_any);
2271 +
2272 +
2273 +/**
2274 + * netif_device_detach - mark device as removed
2275 + * @dev: network device
2276 + *
2277 + * Mark device as removed from system and therefore no longer available.
2278 + */
2279 +void netif_device_detach(struct net_device *dev)
2280 +{
2281 +       if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
2282 +           netif_running(dev)) {
2283 +               netif_tx_stop_all_queues(dev);
2284 +       }
2285 +}
2286 +EXPORT_SYMBOL(netif_device_detach);
2287 +
2288 +/**
2289 + * netif_device_attach - mark device as attached
2290 + * @dev: network device
2291 + *
2292 + * Mark device as attached from system and restart if needed.
2293 + */
2294 +void netif_device_attach(struct net_device *dev)
2295 +{
2296 +       if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
2297 +           netif_running(dev)) {
2298 +               netif_tx_wake_all_queues(dev);
2299 +               __netdev_watchdog_up(dev);
2300 +       }
2301 +}
2302 +EXPORT_SYMBOL(netif_device_attach);
2303 +
2304 +static bool can_checksum_protocol(unsigned long features, __be16 protocol)
2305 +{
2306 +       return ((features & NETIF_F_GEN_CSUM) ||
2307 +               ((features & NETIF_F_IP_CSUM) &&
2308 +                protocol == htons(ETH_P_IP)) ||
2309 +               ((features & NETIF_F_IPV6_CSUM) &&
2310 +                protocol == htons(ETH_P_IPV6)) ||
2311 +               ((features & NETIF_F_FCOE_CRC) &&
2312 +                protocol == htons(ETH_P_FCOE)));
2313 +}
2314 +
2315 +static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
2316 +{
2317 +       if (can_checksum_protocol(dev->features, skb->protocol))
2318 +               return true;
2319 +
2320 +       if (skb->protocol == htons(ETH_P_8021Q)) {
2321 +               struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
2322 +               if (can_checksum_protocol(dev->features & dev->vlan_features,
2323 +                                         veh->h_vlan_encapsulated_proto))
2324 +                       return true;
2325 +       }
2326 +
2327 +       return false;
2328 +}
2329 +
2330 +/*
2331 + * Invalidate hardware checksum when packet is to be mangled, and
2332 + * complete checksum manually on outgoing path.
2333 + */
2334 +int skb_checksum_help(struct sk_buff *skb)
2335 +{
2336 +       __wsum csum;
2337 +       int ret = 0, offset;
2338 +
2339 +       if (skb->ip_summed == CHECKSUM_COMPLETE)
2340 +               goto out_set_summed;
2341 +
2342 +       if (unlikely(skb_shinfo(skb)->gso_size)) {
2343 +               /* Let GSO fix up the checksum. */
2344 +               goto out_set_summed;
2345 +       }
2346 +
2347 +       offset = skb->csum_start - skb_headroom(skb);
2348 +       BUG_ON(offset >= skb_headlen(skb));
2349 +       csum = skb_checksum(skb, offset, skb->len - offset, 0);
2350 +
2351 +       offset += skb->csum_offset;
2352 +       BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
2353 +
2354 +       if (skb_cloned(skb) &&
2355 +           !skb_clone_writable(skb, offset + sizeof(__sum16))) {
2356 +               ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
2357 +               if (ret)
2358 +                       goto out;
2359 +       }
2360 +
2361 +       *(__sum16 *)(skb->data + offset) = csum_fold(csum);
2362 +out_set_summed:
2363 +       skb->ip_summed = CHECKSUM_NONE;
2364 +out:
2365 +       return ret;
2366 +}
2367 +
2368 +/**
2369 + *     skb_gso_segment - Perform segmentation on skb.
2370 + *     @skb: buffer to segment
2371 + *     @features: features for the output path (see dev->features)
2372 + *
2373 + *     This function segments the given skb and returns a list of segments.
2374 + *
2375 + *     It may return NULL if the skb requires no segmentation.  This is
2376 + *     only possible when GSO is used for verifying header integrity.
2377 + */
2378 +struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
2379 +{
2380 +       struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
2381 +       struct packet_type *ptype;
2382 +       __be16 type = skb->protocol;
2383 +       int err;
2384 +
2385 +       skb_reset_mac_header(skb);
2386 +       skb->mac_len = skb->network_header - skb->mac_header;
2387 +       __skb_pull(skb, skb->mac_len);
2388 +
2389 +       if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
2390 +               struct net_device *dev = skb->dev;
2391 +               struct ethtool_drvinfo info = {};
2392 +
2393 +               if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
2394 +                       dev->ethtool_ops->get_drvinfo(dev, &info);
2395 +
2396 +               WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
2397 +                       "ip_summed=%d",
2398 +                    info.driver, dev ? dev->features : 0L,
2399 +                    skb->sk ? skb->sk->sk_route_caps : 0L,
2400 +                    skb->len, skb->data_len, skb->ip_summed);
2401 +
2402 +               if (skb_header_cloned(skb) &&
2403 +                   (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
2404 +                       return ERR_PTR(err);
2405 +       }
2406 +
2407 +       rcu_read_lock();
2408 +       list_for_each_entry_rcu(ptype,
2409 +                       &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
2410 +               if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
2411 +                       if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
2412 +                               err = ptype->gso_send_check(skb);
2413 +                               segs = ERR_PTR(err);
2414 +                               if (err || skb_gso_ok(skb, features))
2415 +                                       break;
2416 +                               __skb_push(skb, (skb->data -
2417 +                                                skb_network_header(skb)));
2418 +                       }
2419 +                       segs = ptype->gso_segment(skb, features);
2420 +                       break;
2421 +               }
2422 +       }
2423 +       rcu_read_unlock();
2424 +
2425 +       __skb_push(skb, skb->data - skb_mac_header(skb));
2426 +
2427 +       return segs;
2428 +}
2429 +
2430 +EXPORT_SYMBOL(skb_gso_segment);
2431 +
2432 +/* Take action when hardware reception checksum errors are detected. */
2433 +#ifdef CONFIG_BUG
2434 +void netdev_rx_csum_fault(struct net_device *dev)
2435 +{
2436 +       if (net_ratelimit()) {
2437 +               printk(KERN_ERR "%s: hw csum failure.\n",
2438 +                       dev ? dev->name : "<unknown>");
2439 +               dump_stack();
2440 +       }
2441 +}
2442 +EXPORT_SYMBOL(netdev_rx_csum_fault);
2443 +#endif
2444 +
2445 +/* Actually, we should eliminate this check as soon as we know, that:
2446 + * 1. IOMMU is present and allows to map all the memory.
2447 + * 2. No high memory really exists on this machine.
2448 + */
2449 +
2450 +static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
2451 +{
2452 +#ifdef CONFIG_HIGHMEM
2453 +       int i;
2454 +
2455 +       if (dev->features & NETIF_F_HIGHDMA)
2456 +               return 0;
2457 +
2458 +       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
2459 +               if (PageHighMem(skb_shinfo(skb)->frags[i].page))
2460 +                       return 1;
2461 +
2462 +#endif
2463 +       return 0;
2464 +}
2465 +
2466 +struct dev_gso_cb {
2467 +       void (*destructor)(struct sk_buff *skb);
2468 +};
2469 +
2470 +#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
2471 +
2472 +static void dev_gso_skb_destructor(struct sk_buff *skb)
2473 +{
2474 +       struct dev_gso_cb *cb;
2475 +
2476 +       do {
2477 +               struct sk_buff *nskb = skb->next;
2478 +
2479 +               skb->next = nskb->next;
2480 +               nskb->next = NULL;
2481 +               kfree_skb(nskb);
2482 +       } while (skb->next);
2483 +
2484 +       cb = DEV_GSO_CB(skb);
2485 +       if (cb->destructor)
2486 +               cb->destructor(skb);
2487 +}
2488 +
2489 +/**
2490 + *     dev_gso_segment - Perform emulated hardware segmentation on skb.
2491 + *     @skb: buffer to segment
2492 + *
2493 + *     This function segments the given skb and stores the list of segments
2494 + *     in skb->next.
2495 + */
2496 +static int dev_gso_segment(struct sk_buff *skb)
2497 +{
2498 +       struct net_device *dev = skb->dev;
2499 +       struct sk_buff *segs;
2500 +       int features = dev->features & ~(illegal_highdma(dev, skb) ?
2501 +                                        NETIF_F_SG : 0);
2502 +
2503 +       segs = skb_gso_segment(skb, features);
2504 +
2505 +       /* Verifying header integrity only. */
2506 +       if (!segs)
2507 +               return 0;
2508 +
2509 +       if (IS_ERR(segs))
2510 +               return PTR_ERR(segs);
2511 +
2512 +       skb->next = segs;
2513 +       DEV_GSO_CB(skb)->destructor = skb->destructor;
2514 +       skb->destructor = dev_gso_skb_destructor;
2515 +
2516 +       return 0;
2517 +}
2518 +
2519 +int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
2520 +                       struct netdev_queue *txq)
2521 +{
2522 +       const struct net_device_ops *ops = dev->netdev_ops;
2523 +       int rc;
2524 +
2525 +       if (likely(!skb->next)) {
2526 +               if (!list_empty(&ptype_all))
2527 +                       dev_queue_xmit_nit(skb, dev);
2528 +
2529 +               if (netif_needs_gso(dev, skb)) {
2530 +                       if (unlikely(dev_gso_segment(skb)))
2531 +                               goto out_kfree_skb;
2532 +                       if (skb->next)
2533 +                               goto gso;
2534 +               }
2535 +
2536 +               rc = ops->ndo_start_xmit(skb, dev);
2537 +               /*
2538 +                * TODO: if skb_orphan() was called by
2539 +                * dev->hard_start_xmit() (for example, the unmodified
2540 +                * igb driver does that; bnx2 doesn't), then
2541 +                * skb_tx_software_timestamp() will be unable to send
2542 +                * back the time stamp.
2543 +                *
2544 +                * How can this be prevented? Always create another
2545 +                * reference to the socket before calling
2546 +                * dev->hard_start_xmit()? Prevent that skb_orphan()
2547 +                * does anything in dev->hard_start_xmit() by clearing
2548 +                * the skb destructor before the call and restoring it
2549 +                * afterwards, then doing the skb_orphan() ourselves?
2550 +                */
2551 +               return rc;
2552 +       }
2553 +
2554 +gso:
2555 +       do {
2556 +               struct sk_buff *nskb = skb->next;
2557 +
2558 +               skb->next = nskb->next;
2559 +               nskb->next = NULL;
2560 +               rc = ops->ndo_start_xmit(nskb, dev);
2561 +               if (unlikely(rc)) {
2562 +                       nskb->next = skb->next;
2563 +                       skb->next = nskb;
2564 +                       return rc;
2565 +               }
2566 +               if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
2567 +                       return NETDEV_TX_BUSY;
2568 +       } while (skb->next);
2569 +
2570 +       skb->destructor = DEV_GSO_CB(skb)->destructor;
2571 +
2572 +out_kfree_skb:
2573 +       kfree_skb(skb);
2574 +       return 0;
2575 +}
2576 +
2577 +static u32 skb_tx_hashrnd;
2578 +
2579 +u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
2580 +{
2581 +       u32 hash;
2582 +
2583 +       if (skb_rx_queue_recorded(skb))
2584 +               return skb_get_rx_queue(skb) % dev->real_num_tx_queues;
2585 +
2586 +       if (skb->sk && skb->sk->sk_hash)
2587 +               hash = skb->sk->sk_hash;
2588 +       else
2589 +               hash = skb->protocol;
2590 +
2591 +       hash = jhash_1word(hash, skb_tx_hashrnd);
2592 +
2593 +       return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
2594 +}
2595 +EXPORT_SYMBOL(skb_tx_hash);
2596 +
2597 +static struct netdev_queue *dev_pick_tx(struct net_device *dev,
2598 +                                       struct sk_buff *skb)
2599 +{
2600 +       const struct net_device_ops *ops = dev->netdev_ops;
2601 +       u16 queue_index = 0;
2602 +
2603 +       if (ops->ndo_select_queue)
2604 +               queue_index = ops->ndo_select_queue(dev, skb);
2605 +       else if (dev->real_num_tx_queues > 1)
2606 +               queue_index = skb_tx_hash(dev, skb);
2607 +
2608 +       skb_set_queue_mapping(skb, queue_index);
2609 +       return netdev_get_tx_queue(dev, queue_index);
2610 +}
2611 +
2612 +/**
2613 + *     dev_queue_xmit - transmit a buffer
2614 + *     @skb: buffer to transmit
2615 + *
2616 + *     Queue a buffer for transmission to a network device. The caller must
2617 + *     have set the device and priority and built the buffer before calling
2618 + *     this function. The function can be called from an interrupt.
2619 + *
2620 + *     A negative errno code is returned on a failure. A success does not
2621 + *     guarantee the frame will be transmitted as it may be dropped due
2622 + *     to congestion or traffic shaping.
2623 + *
2624 + * -----------------------------------------------------------------------------------
2625 + *      I notice this method can also return errors from the queue disciplines,
2626 + *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
2627 + *      be positive.
2628 + *
2629 + *      Regardless of the return value, the skb is consumed, so it is currently
2630 + *      difficult to retry a send to this method.  (You can bump the ref count
2631 + *      before sending to hold a reference for retry if you are careful.)
2632 + *
2633 + *      When calling this method, interrupts MUST be enabled.  This is because
2634 + *      the BH enable code must have IRQs enabled so that it will not deadlock.
2635 + *          --BLG
2636 + */
2637 +int dev_queue_xmit(struct sk_buff *skb)
2638 +{
2639 +       struct net_device *dev = skb->dev;
2640 +       struct netdev_queue *txq;
2641 +       struct Qdisc *q;
2642 +       int rc = -ENOMEM;
2643 +
2644 +       /* GSO will handle the following emulations directly. */
2645 +       if (netif_needs_gso(dev, skb))
2646 +               goto gso;
2647 +
2648 +       if (skb_shinfo(skb)->frag_list &&
2649 +           !(dev->features & NETIF_F_FRAGLIST) &&
2650 +           __skb_linearize(skb))
2651 +               goto out_kfree_skb;
2652 +
2653 +       /* Fragmented skb is linearized if device does not support SG,
2654 +        * or if at least one of fragments is in highmem and device
2655 +        * does not support DMA from it.
2656 +        */
2657 +       if (skb_shinfo(skb)->nr_frags &&
2658 +           (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
2659 +           __skb_linearize(skb))
2660 +               goto out_kfree_skb;
2661 +
2662 +       /* If packet is not checksummed and device does not support
2663 +        * checksumming for this protocol, complete checksumming here.
2664 +        */
2665 +       if (skb->ip_summed == CHECKSUM_PARTIAL) {
2666 +               skb_set_transport_header(skb, skb->csum_start -
2667 +                                             skb_headroom(skb));
2668 +               if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
2669 +                       goto out_kfree_skb;
2670 +       }
2671 +
2672 +gso:
2673 +       /* Disable soft irqs for various locks below. Also
2674 +        * stops preemption for RCU.
2675 +        */
2676 +       rcu_read_lock_bh();
2677 +
2678 +       txq = dev_pick_tx(dev, skb);
2679 +       q = rcu_dereference(txq->qdisc);
2680 +
2681 +#ifdef CONFIG_NET_CLS_ACT
2682 +       skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
2683 +#endif
2684 +       if (q->enqueue) {
2685 +               spinlock_t *root_lock = qdisc_lock(q);
2686 +
2687 +               spin_lock(root_lock);
2688 +
2689 +               if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
2690 +                       kfree_skb(skb);
2691 +                       rc = NET_XMIT_DROP;
2692 +               } else {
2693 +                       rc = qdisc_enqueue_root(skb, q);
2694 +                       qdisc_run(q);
2695 +               }
2696 +               spin_unlock(root_lock);
2697 +
2698 +               goto out;
2699 +       }
2700 +
2701 +       /* The device has no queue. Common case for software devices:
2702 +          loopback, all the sorts of tunnels...
2703 +
2704 +          Really, it is unlikely that netif_tx_lock protection is necessary
2705 +          here.  (f.e. loopback and IP tunnels are clean ignoring statistics
2706 +          counters.)
2707 +          However, it is possible, that they rely on protection
2708 +          made by us here.
2709 +
2710 +          Check this and shot the lock. It is not prone from deadlocks.
2711 +          Either shot noqueue qdisc, it is even simpler 8)
2712 +        */
2713 +       if (dev->flags & IFF_UP) {
2714 +               int cpu = smp_processor_id(); /* ok because BHs are off */
2715 +
2716 +               if (txq->xmit_lock_owner != cpu) {
2717 +
2718 +                       HARD_TX_LOCK(dev, txq, cpu);
2719 +
2720 +                       if (!netif_tx_queue_stopped(txq)) {
2721 +                               rc = 0;
2722 +                               if (!dev_hard_start_xmit(skb, dev, txq)) {
2723 +                                       HARD_TX_UNLOCK(dev, txq);
2724 +                                       goto out;
2725 +                               }
2726 +                       }
2727 +                       HARD_TX_UNLOCK(dev, txq);
2728 +                       if (net_ratelimit())
2729 +                               printk(KERN_CRIT "Virtual device %s asks to "
2730 +                                      "queue packet!\n", dev->name);
2731 +               } else {
2732 +                       /* Recursion is detected! It is possible,
2733 +                        * unfortunately */
2734 +                       if (net_ratelimit())
2735 +                               printk(KERN_CRIT "Dead loop on virtual device "
2736 +                                      "%s, fix it urgently!\n", dev->name);
2737 +               }
2738 +       }
2739 +
2740 +       rc = -ENETDOWN;
2741 +       rcu_read_unlock_bh();
2742 +
2743 +out_kfree_skb:
2744 +       kfree_skb(skb);
2745 +       return rc;
2746 +out:
2747 +       rcu_read_unlock_bh();
2748 +       return rc;
2749 +}
2750 +
2751 +
2752 +/*=======================================================================
2753 +                       Receiver routines
2754 +  =======================================================================*/
2755 +
2756 +int netdev_max_backlog __read_mostly = 1000;
2757 +int netdev_budget __read_mostly = 300;
2758 +int weight_p __read_mostly = 64;            /* old backlog weight */
2759 +
2760 +DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
2761 +
2762 +
2763 +/**
2764 + *     netif_rx        -       post buffer to the network code
2765 + *     @skb: buffer to post
2766 + *
2767 + *     This function receives a packet from a device driver and queues it for
2768 + *     the upper (protocol) levels to process.  It always succeeds. The buffer
2769 + *     may be dropped during processing for congestion control or by the
2770 + *     protocol layers.
2771 + *
2772 + *     return values:
2773 + *     NET_RX_SUCCESS  (no congestion)
2774 + *     NET_RX_DROP     (packet was dropped)
2775 + *
2776 + */
2777 +
2778 +int netif_rx(struct sk_buff *skb)
2779 +{
2780 +       struct softnet_data *queue;
2781 +       unsigned long flags;
2782 +
2783 +       /* if netpoll wants it, pretend we never saw it */
2784 +       if (netpoll_rx(skb))
2785 +               return NET_RX_DROP;
2786 +
2787 +       if (!skb->tstamp.tv64)
2788 +               net_timestamp(skb);
2789 +
2790 +       /*
2791 +        * The code is rearranged so that the path is the most
2792 +        * short when CPU is congested, but is still operating.
2793 +        */
2794 +       local_irq_save(flags);
2795 +       queue = &__get_cpu_var(softnet_data);
2796 +
2797 +       __get_cpu_var(netdev_rx_stat).total++;
2798 +       if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
2799 +               if (queue->input_pkt_queue.qlen) {
2800 +enqueue:
2801 +                       __skb_queue_tail(&queue->input_pkt_queue, skb);
2802 +                       local_irq_restore(flags);
2803 +                       return NET_RX_SUCCESS;
2804 +               }
2805 +
2806 +               napi_schedule(&queue->backlog);
2807 +               goto enqueue;
2808 +       }
2809 +
2810 +       __get_cpu_var(netdev_rx_stat).dropped++;
2811 +       local_irq_restore(flags);
2812 +
2813 +       kfree_skb(skb);
2814 +       return NET_RX_DROP;
2815 +}
2816 +
2817 +int netif_rx_ni(struct sk_buff *skb)
2818 +{
2819 +       int err;
2820 +
2821 +       preempt_disable();
2822 +       err = netif_rx(skb);
2823 +       if (local_softirq_pending())
2824 +               do_softirq();
2825 +       preempt_enable();
2826 +
2827 +       return err;
2828 +}
2829 +
2830 +EXPORT_SYMBOL(netif_rx_ni);
2831 +
2832 +static void net_tx_action(struct softirq_action *h)
2833 +{
2834 +       struct softnet_data *sd = &__get_cpu_var(softnet_data);
2835 +
2836 +       if (sd->completion_queue) {
2837 +               struct sk_buff *clist;
2838 +
2839 +               local_irq_disable();
2840 +               clist = sd->completion_queue;
2841 +               sd->completion_queue = NULL;
2842 +               local_irq_enable();
2843 +
2844 +               while (clist) {
2845 +                       struct sk_buff *skb = clist;
2846 +                       clist = clist->next;
2847 +
2848 +                       WARN_ON(atomic_read(&skb->users));
2849 +                       __kfree_skb(skb);
2850 +               }
2851 +       }
2852 +
2853 +       if (sd->output_queue) {
2854 +               struct Qdisc *head;
2855 +
2856 +               local_irq_disable();
2857 +               head = sd->output_queue;
2858 +               sd->output_queue = NULL;
2859 +               local_irq_enable();
2860 +
2861 +               while (head) {
2862 +                       struct Qdisc *q = head;
2863 +                       spinlock_t *root_lock;
2864 +
2865 +                       head = head->next_sched;
2866 +
2867 +                       root_lock = qdisc_lock(q);
2868 +                       if (spin_trylock(root_lock)) {
2869 +                               smp_mb__before_clear_bit();
2870 +                               clear_bit(__QDISC_STATE_SCHED,
2871 +                                         &q->state);
2872 +                               qdisc_run(q);
2873 +                               spin_unlock(root_lock);
2874 +                       } else {
2875 +                               if (!test_bit(__QDISC_STATE_DEACTIVATED,
2876 +                                             &q->state)) {
2877 +                                       __netif_reschedule(q);
2878 +                               } else {
2879 +                                       smp_mb__before_clear_bit();
2880 +                                       clear_bit(__QDISC_STATE_SCHED,
2881 +                                                 &q->state);
2882 +                               }
2883 +                       }
2884 +               }
2885 +       }
2886 +}
2887 +
2888 +static inline int deliver_skb(struct sk_buff *skb,
2889 +                             struct packet_type *pt_prev,
2890 +                             struct net_device *orig_dev)
2891 +{
2892 +       atomic_inc(&skb->users);
2893 +       return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2894 +}
2895 +
2896 +#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
2897 +/* These hooks defined here for ATM */
2898 +struct net_bridge;
2899 +struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
2900 +                                               unsigned char *addr);
2901 +void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
2902 +
2903 +/*
2904 + * If bridge module is loaded call bridging hook.
2905 + *  returns NULL if packet was consumed.
2906 + */
2907 +struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2908 +                                       struct sk_buff *skb) __read_mostly;
2909 +static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2910 +                                           struct packet_type **pt_prev, int *ret,
2911 +                                           struct net_device *orig_dev)
2912 +{
2913 +       struct net_bridge_port *port;
2914 +
2915 +       if (skb->pkt_type == PACKET_LOOPBACK ||
2916 +           (port = rcu_dereference(skb->dev->br_port)) == NULL)
2917 +               return skb;
2918 +
2919 +       if (*pt_prev) {
2920 +               *ret = deliver_skb(skb, *pt_prev, orig_dev);
2921 +               *pt_prev = NULL;
2922 +       }
2923 +
2924 +       return br_handle_frame_hook(port, skb);
2925 +}
2926 +#else
2927 +#define handle_bridge(skb, pt_prev, ret, orig_dev)     (skb)
2928 +#endif
2929 +
2930 +#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2931 +struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2932 +EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2933 +
2934 +static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2935 +                                            struct packet_type **pt_prev,
2936 +                                            int *ret,
2937 +                                            struct net_device *orig_dev)
2938 +{
2939 +       if (skb->dev->macvlan_port == NULL)
2940 +               return skb;
2941 +
2942 +       if (*pt_prev) {
2943 +               *ret = deliver_skb(skb, *pt_prev, orig_dev);
2944 +               *pt_prev = NULL;
2945 +       }
2946 +       return macvlan_handle_frame_hook(skb);
2947 +}
2948 +#else
2949 +#define handle_macvlan(skb, pt_prev, ret, orig_dev)    (skb)
2950 +#endif
2951 +
2952 +#ifdef CONFIG_NET_CLS_ACT
2953 +/* TODO: Maybe we should just force sch_ingress to be compiled in
2954 + * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2955 + * a compare and 2 stores extra right now if we dont have it on
2956 + * but have CONFIG_NET_CLS_ACT
2957 + * NOTE: This doesnt stop any functionality; if you dont have
2958 + * the ingress scheduler, you just cant add policies on ingress.
2959 + *
2960 + */
2961 +static int ing_filter(struct sk_buff *skb)
2962 +{
2963 +       struct net_device *dev = skb->dev;
2964 +       u32 ttl = G_TC_RTTL(skb->tc_verd);
2965 +       struct netdev_queue *rxq;
2966 +       int result = TC_ACT_OK;
2967 +       struct Qdisc *q;
2968 +
2969 +       if (MAX_RED_LOOP < ttl++) {
2970 +               printk(KERN_WARNING
2971 +                      "Redir loop detected Dropping packet (%d->%d)\n",
2972 +                      skb->iif, dev->ifindex);
2973 +               return TC_ACT_SHOT;
2974 +       }
2975 +
2976 +       skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2977 +       skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2978 +
2979 +       rxq = &dev->rx_queue;
2980 +
2981 +       q = rxq->qdisc;
2982 +       if (q != &noop_qdisc) {
2983 +               spin_lock(qdisc_lock(q));
2984 +               if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2985 +                       result = qdisc_enqueue_root(skb, q);
2986 +               spin_unlock(qdisc_lock(q));
2987 +       }
2988 +
2989 +       return result;
2990 +}
2991 +
2992 +static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2993 +                                        struct packet_type **pt_prev,
2994 +                                        int *ret, struct net_device *orig_dev)
2995 +{
2996 +       if (skb->dev->rx_queue.qdisc == &noop_qdisc)
2997 +               goto out;
2998 +
2999 +       if (*pt_prev) {
3000 +               *ret = deliver_skb(skb, *pt_prev, orig_dev);
3001 +               *pt_prev = NULL;
3002 +       } else {
3003 +               /* Huh? Why does turning on AF_PACKET affect this? */
3004 +               skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
3005 +       }
3006 +
3007 +       switch (ing_filter(skb)) {
3008 +       case TC_ACT_SHOT:
3009 +       case TC_ACT_STOLEN:
3010 +               kfree_skb(skb);
3011 +               return NULL;
3012 +       }
3013 +
3014 +out:
3015 +       skb->tc_verd = 0;
3016 +       return skb;
3017 +}
3018 +#endif
3019 +
3020 +/*
3021 + *     netif_nit_deliver - deliver received packets to network taps
3022 + *     @skb: buffer
3023 + *
3024 + *     This function is used to deliver incoming packets to network
3025 + *     taps. It should be used when the normal netif_receive_skb path
3026 + *     is bypassed, for example because of VLAN acceleration.
3027 + */
3028 +void netif_nit_deliver(struct sk_buff *skb)
3029 +{
3030 +       struct packet_type *ptype;
3031 +
3032 +       if (list_empty(&ptype_all))
3033 +               return;
3034 +
3035 +       skb_reset_network_header(skb);
3036 +       skb_reset_transport_header(skb);
3037 +       skb->mac_len = skb->network_header - skb->mac_header;
3038 +
3039 +       rcu_read_lock();
3040 +       list_for_each_entry_rcu(ptype, &ptype_all, list) {
3041 +               if (!ptype->dev || ptype->dev == skb->dev)
3042 +                       deliver_skb(skb, ptype, skb->dev);
3043 +       }
3044 +       rcu_read_unlock();
3045 +}
3046 +
3047 +/**
3048 + *     netif_receive_skb - process receive buffer from network
3049 + *     @skb: buffer to process
3050 + *
3051 + *     netif_receive_skb() is the main receive data processing function.
3052 + *     It always succeeds. The buffer may be dropped during processing
3053 + *     for congestion control or by the protocol layers.
3054 + *
3055 + *     This function may only be called from softirq context and interrupts
3056 + *     should be enabled.
3057 + *
3058 + *     Return values (usually ignored):
3059 + *     NET_RX_SUCCESS: no congestion
3060 + *     NET_RX_DROP: packet was dropped
3061 + */
3062 +int netif_receive_skb(struct sk_buff *skb)
3063 +{
3064 +       struct packet_type *ptype, *pt_prev;
3065 +       struct net_device *orig_dev;
3066 +       struct net_device *null_or_orig;
3067 +       int ret = NET_RX_DROP;
3068 +       __be16 type;
3069 +
3070 +       if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
3071 +               return NET_RX_SUCCESS;
3072 +
3073 +       /* if we've gotten here through NAPI, check netpoll */
3074 +       if (netpoll_receive_skb(skb))
3075 +               return NET_RX_DROP;
3076 +
3077 +       if (!skb->tstamp.tv64)
3078 +               net_timestamp(skb);
3079 +
3080 +       if (!skb->iif)
3081 +               skb->iif = skb->dev->ifindex;
3082 +
3083 +       null_or_orig = NULL;
3084 +       orig_dev = skb->dev;
3085 +       if (orig_dev->master) {
3086 +               if (skb_bond_should_drop(skb))
3087 +                       null_or_orig = orig_dev; /* deliver only exact match */
3088 +               else
3089 +                       skb->dev = orig_dev->master;
3090 +       }
3091 +
3092 +       __get_cpu_var(netdev_rx_stat).total++;
3093 +
3094 +       skb_reset_network_header(skb);
3095 +       skb_reset_transport_header(skb);
3096 +       skb->mac_len = skb->network_header - skb->mac_header;
3097 +
3098 +       pt_prev = NULL;
3099 +
3100 +       rcu_read_lock();
3101 +
3102 +#ifdef CONFIG_NET_CLS_ACT
3103 +       if (skb->tc_verd & TC_NCLS) {
3104 +               skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
3105 +               goto ncls;
3106 +       }
3107 +#endif
3108 +
3109 +       list_for_each_entry_rcu(ptype, &ptype_all, list) {
3110 +               if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
3111 +                   ptype->dev == orig_dev) {
3112 +                       if (pt_prev)
3113 +                               ret = deliver_skb(skb, pt_prev, orig_dev);
3114 +                       pt_prev = ptype;
3115 +               }
3116 +       }
3117 +
3118 +#ifdef CONFIG_NET_CLS_ACT
3119 +       skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
3120 +       if (!skb)
3121 +               goto out;
3122 +ncls:
3123 +#endif
3124 +
3125 +       skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
3126 +       if (!skb)
3127 +               goto out;
3128 +       skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
3129 +       if (!skb)
3130 +               goto out;
3131 +
3132 +       skb_orphan(skb);
3133 +
3134 +       type = skb->protocol;
3135 +       list_for_each_entry_rcu(ptype,
3136 +                       &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
3137 +               if (ptype->type == type &&
3138 +                   (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
3139 +                    ptype->dev == orig_dev)) {
3140 +                       if (pt_prev)
3141 +                               ret = deliver_skb(skb, pt_prev, orig_dev);
3142 +                       pt_prev = ptype;
3143 +               }
3144 +       }
3145 +
3146 +       if (pt_prev) {
3147 +               ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
3148 +       } else {
3149 +               kfree_skb(skb);
3150 +               /* Jamal, now you will not able to escape explaining
3151 +                * me how you were going to use this. :-)
3152 +                */
3153 +               ret = NET_RX_DROP;
3154 +       }
3155 +
3156 +out:
3157 +       rcu_read_unlock();
3158 +       return ret;
3159 +}
3160 +
3161 +/* Network device is going away, flush any packets still pending  */
3162 +static void flush_backlog(void *arg)
3163 +{
3164 +       struct net_device *dev = arg;
3165 +       struct softnet_data *queue = &__get_cpu_var(softnet_data);
3166 +       struct sk_buff *skb, *tmp;
3167 +
3168 +       skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
3169 +               if (skb->dev == dev) {
3170 +                       __skb_unlink(skb, &queue->input_pkt_queue);
3171 +                       kfree_skb(skb);
3172 +               }
3173 +}
3174 +
3175 +static int napi_gro_complete(struct sk_buff *skb)
3176 +{
3177 +       struct packet_type *ptype;
3178 +       __be16 type = skb->protocol;
3179 +       struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
3180 +       int err = -ENOENT;
3181 +
3182 +       if (NAPI_GRO_CB(skb)->count == 1) {
3183 +               skb_shinfo(skb)->gso_size = 0;
3184 +               goto out;
3185 +       }
3186 +
3187 +       rcu_read_lock();
3188 +       list_for_each_entry_rcu(ptype, head, list) {
3189 +               if (ptype->type != type || ptype->dev || !ptype->gro_complete)
3190 +                       continue;
3191 +
3192 +               err = ptype->gro_complete(skb);
3193 +               break;
3194 +       }
3195 +       rcu_read_unlock();
3196 +
3197 +       if (err) {
3198 +               WARN_ON(&ptype->list == head);
3199 +               kfree_skb(skb);
3200 +               return NET_RX_SUCCESS;
3201 +       }
3202 +
3203 +out:
3204 +       return netif_receive_skb(skb);
3205 +}
3206 +
3207 +void napi_gro_flush(struct napi_struct *napi)
3208 +{
3209 +       struct sk_buff *skb, *next;
3210 +
3211 +       for (skb = napi->gro_list; skb; skb = next) {
3212 +               next = skb->next;
3213 +               skb->next = NULL;
3214 +               napi_gro_complete(skb);
3215 +       }
3216 +
3217 +       napi->gro_count = 0;
3218 +       napi->gro_list = NULL;
3219 +}
3220 +EXPORT_SYMBOL(napi_gro_flush);
3221 +
3222 +void *skb_gro_header(struct sk_buff *skb, unsigned int hlen)
3223 +{
3224 +       unsigned int offset = skb_gro_offset(skb);
3225 +
3226 +       hlen += offset;
3227 +       if (hlen <= skb_headlen(skb))
3228 +               return skb->data + offset;
3229 +
3230 +       if (unlikely(!skb_shinfo(skb)->nr_frags ||
3231 +                    skb_shinfo(skb)->frags[0].size <=
3232 +                    hlen - skb_headlen(skb) ||
3233 +                    PageHighMem(skb_shinfo(skb)->frags[0].page)))
3234 +               return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
3235 +
3236 +       return page_address(skb_shinfo(skb)->frags[0].page) +
3237 +              skb_shinfo(skb)->frags[0].page_offset +
3238 +              offset - skb_headlen(skb);
3239 +}
3240 +EXPORT_SYMBOL(skb_gro_header);
3241 +
3242 +int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3243 +{
3244 +       struct sk_buff **pp = NULL;
3245 +       struct packet_type *ptype;
3246 +       __be16 type = skb->protocol;
3247 +       struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
3248 +       int same_flow;
3249 +       int mac_len;
3250 +       int ret;
3251 +
3252 +       if (!(skb->dev->features & NETIF_F_GRO))
3253 +               goto normal;
3254 +
3255 +       if (skb_is_gso(skb) || skb_shinfo(skb)->frag_list)
3256 +               goto normal;
3257 +
3258 +       rcu_read_lock();
3259 +       list_for_each_entry_rcu(ptype, head, list) {
3260 +               if (ptype->type != type || ptype->dev || !ptype->gro_receive)
3261 +                       continue;
3262 +
3263 +               skb_set_network_header(skb, skb_gro_offset(skb));
3264 +               mac_len = skb->network_header - skb->mac_header;
3265 +               skb->mac_len = mac_len;
3266 +               NAPI_GRO_CB(skb)->same_flow = 0;
3267 +               NAPI_GRO_CB(skb)->flush = 0;
3268 +               NAPI_GRO_CB(skb)->free = 0;
3269 +
3270 +               pp = ptype->gro_receive(&napi->gro_list, skb);
3271 +               break;
3272 +       }
3273 +       rcu_read_unlock();
3274 +
3275 +       if (&ptype->list == head)
3276 +               goto normal;
3277 +
3278 +       same_flow = NAPI_GRO_CB(skb)->same_flow;
3279 +       ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
3280 +
3281 +       if (pp) {
3282 +               struct sk_buff *nskb = *pp;
3283 +
3284 +               *pp = nskb->next;
3285 +               nskb->next = NULL;
3286 +               napi_gro_complete(nskb);
3287 +               napi->gro_count--;
3288 +       }
3289 +
3290 +       if (same_flow)
3291 +               goto ok;
3292 +
3293 +       if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
3294 +               goto normal;
3295 +
3296 +       napi->gro_count++;
3297 +       NAPI_GRO_CB(skb)->count = 1;
3298 +       skb_shinfo(skb)->gso_size = skb_gro_len(skb);
3299 +       skb->next = napi->gro_list;
3300 +       napi->gro_list = skb;
3301 +       ret = GRO_HELD;
3302 +
3303 +pull:
3304 +       if (unlikely(!pskb_may_pull(skb, skb_gro_offset(skb)))) {
3305 +               if (napi->gro_list == skb)
3306 +                       napi->gro_list = skb->next;
3307 +               ret = GRO_DROP;
3308 +       }
3309 +
3310 +ok:
3311 +       return ret;
3312 +
3313 +normal:
3314 +       ret = GRO_NORMAL;
3315 +       goto pull;
3316 +}
3317 +EXPORT_SYMBOL(dev_gro_receive);
3318 +
3319 +static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3320 +{
3321 +       struct sk_buff *p;
3322 +
3323 +       if (netpoll_rx_on(skb))
3324 +               return GRO_NORMAL;
3325 +
3326 +       for (p = napi->gro_list; p; p = p->next) {
3327 +               NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev)
3328 +                       && !compare_ether_header(skb_mac_header(p),
3329 +                                                skb_gro_mac_header(skb));
3330 +               NAPI_GRO_CB(p)->flush = 0;
3331 +       }
3332 +
3333 +       return dev_gro_receive(napi, skb);
3334 +}
3335 +
3336 +int napi_skb_finish(int ret, struct sk_buff *skb)
3337 +{
3338 +       int err = NET_RX_SUCCESS;
3339 +
3340 +       switch (ret) {
3341 +       case GRO_NORMAL:
3342 +               return netif_receive_skb(skb);
3343 +
3344 +       case GRO_DROP:
3345 +               err = NET_RX_DROP;
3346 +               /* fall through */
3347 +
3348 +       case GRO_MERGED_FREE:
3349 +               kfree_skb(skb);
3350 +               break;
3351 +       }
3352 +
3353 +       return err;
3354 +}
3355 +EXPORT_SYMBOL(napi_skb_finish);
3356 +
3357 +int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
3358 +{
3359 +       skb_gro_reset_offset(skb);
3360 +
3361 +       return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
3362 +}
3363 +EXPORT_SYMBOL(napi_gro_receive);
3364 +
3365 +void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
3366 +{
3367 +       __skb_pull(skb, skb_headlen(skb));
3368 +       skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
3369 +
3370 +       napi->skb = skb;
3371 +}
3372 +EXPORT_SYMBOL(napi_reuse_skb);
3373 +
3374 +struct sk_buff *napi_fraginfo_skb(struct napi_struct *napi,
3375 +                                 struct napi_gro_fraginfo *info)
3376 +{
3377 +       struct net_device *dev = napi->dev;
3378 +       struct sk_buff *skb = napi->skb;
3379 +       struct ethhdr *eth;
3380 +       skb_frag_t *frag;
3381 +       int i;
3382 +
3383 +       napi->skb = NULL;
3384 +
3385 +       if (!skb) {
3386 +               skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
3387 +               if (!skb)
3388 +                       goto out;
3389 +
3390 +               skb_reserve(skb, NET_IP_ALIGN);
3391 +       }
3392 +
3393 +       BUG_ON(info->nr_frags > MAX_SKB_FRAGS);
3394 +       frag = info->frags;
3395 +
3396 +       for (i = 0; i < info->nr_frags; i++) {
3397 +               skb_fill_page_desc(skb, i, frag->page, frag->page_offset,
3398 +                                  frag->size);
3399 +               frag++;
3400 +       }
3401 +       skb_shinfo(skb)->nr_frags = info->nr_frags;
3402 +
3403 +       skb->data_len = info->len;
3404 +       skb->len += info->len;
3405 +       skb->truesize += info->len;
3406 +
3407 +       skb_reset_mac_header(skb);
3408 +       skb_gro_reset_offset(skb);
3409 +
3410 +       eth = skb_gro_header(skb, sizeof(*eth));
3411 +       if (!eth) {
3412 +               napi_reuse_skb(napi, skb);
3413 +               skb = NULL;
3414 +               goto out;
3415 +       }
3416 +
3417 +       skb_gro_pull(skb, sizeof(*eth));
3418 +
3419 +       /*
3420 +        * This works because the only protocols we care about don't require
3421 +        * special handling.  We'll fix it up properly at the end.
3422 +        */
3423 +       skb->protocol = eth->h_proto;
3424 +
3425 +       skb->ip_summed = info->ip_summed;
3426 +       skb->csum = info->csum;
3427 +
3428 +out:
3429 +       return skb;
3430 +}
3431 +EXPORT_SYMBOL(napi_fraginfo_skb);
3432 +
3433 +int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
3434 +{
3435 +       int err = NET_RX_SUCCESS;
3436 +
3437 +       switch (ret) {
3438 +       case GRO_NORMAL:
3439 +       case GRO_HELD:
3440 +               skb->protocol = eth_type_trans(skb, napi->dev);
3441 +
3442 +               if (ret == GRO_NORMAL)
3443 +                       return netif_receive_skb(skb);
3444 +
3445 +               skb_gro_pull(skb, -ETH_HLEN);
3446 +               break;
3447 +
3448 +       case GRO_DROP:
3449 +               err = NET_RX_DROP;
3450 +               /* fall through */
3451 +
3452 +       case GRO_MERGED_FREE:
3453 +               napi_reuse_skb(napi, skb);
3454 +               break;
3455 +       }
3456 +
3457 +       return err;
3458 +}
3459 +EXPORT_SYMBOL(napi_frags_finish);
3460 +
3461 +int napi_gro_frags(struct napi_struct *napi, struct napi_gro_fraginfo *info)
3462 +{
3463 +       struct sk_buff *skb = napi_fraginfo_skb(napi, info);
3464 +
3465 +       if (!skb)
3466 +               return NET_RX_DROP;
3467 +
3468 +       return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
3469 +}
3470 +EXPORT_SYMBOL(napi_gro_frags);
3471 +
3472 +static int process_backlog(struct napi_struct *napi, int quota)
3473 +{
3474 +       int work = 0;
3475 +       struct softnet_data *queue = &__get_cpu_var(softnet_data);
3476 +       unsigned long start_time = jiffies;
3477 +
3478 +       napi->weight = weight_p;
3479 +       do {
3480 +               struct sk_buff *skb;
3481 +
3482 +               local_irq_disable();
3483 +               skb = __skb_dequeue(&queue->input_pkt_queue);
3484 +               if (!skb) {
3485 +                       __napi_complete(napi);
3486 +                       local_irq_enable();
3487 +                       break;
3488 +               }
3489 +               local_irq_enable();
3490 +
3491 +               netif_receive_skb(skb);
3492 +       } while (++work < quota && jiffies == start_time);
3493 +
3494 +       return work;
3495 +}
3496 +
3497 +/**
3498 + * __napi_schedule - schedule for receive
3499 + * @n: entry to schedule
3500 + *
3501 + * The entry's receive function will be scheduled to run
3502 + */
3503 +void __napi_schedule(struct napi_struct *n)
3504 +{
3505 +       unsigned long flags;
3506 +
3507 +       local_irq_save(flags);
3508 +       list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
3509 +       __raise_softirq_irqoff(NET_RX_SOFTIRQ);
3510 +       local_irq_restore(flags);
3511 +}
3512 +EXPORT_SYMBOL(__napi_schedule);
3513 +
3514 +void __napi_complete(struct napi_struct *n)
3515 +{
3516 +       BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
3517 +       BUG_ON(n->gro_list);
3518 +
3519 +       list_del(&n->poll_list);
3520 +       smp_mb__before_clear_bit();
3521 +       clear_bit(NAPI_STATE_SCHED, &n->state);
3522 +}
3523 +EXPORT_SYMBOL(__napi_complete);
3524 +
3525 +void napi_complete(struct napi_struct *n)
3526 +{
3527 +       unsigned long flags;
3528 +
3529 +       /*
3530 +        * don't let napi dequeue from the cpu poll list
3531 +        * just in case its running on a different cpu
3532 +        */
3533 +       if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
3534 +               return;
3535 +
3536 +       napi_gro_flush(n);
3537 +       local_irq_save(flags);
3538 +       __napi_complete(n);
3539 +       local_irq_restore(flags);
3540 +}
3541 +EXPORT_SYMBOL(napi_complete);
3542 +
3543 +void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
3544 +                   int (*poll)(struct napi_struct *, int), int weight)
3545 +{
3546 +       INIT_LIST_HEAD(&napi->poll_list);
3547 +       napi->gro_count = 0;
3548 +       napi->gro_list = NULL;
3549 +       napi->skb = NULL;
3550 +       napi->poll = poll;
3551 +       napi->weight = weight;
3552 +       list_add(&napi->dev_list, &dev->napi_list);
3553 +       napi->dev = dev;
3554 +#ifdef CONFIG_NETPOLL
3555 +       spin_lock_init(&napi->poll_lock);
3556 +       napi->poll_owner = -1;
3557 +#endif
3558 +       set_bit(NAPI_STATE_SCHED, &napi->state);
3559 +}
3560 +EXPORT_SYMBOL(netif_napi_add);
3561 +
3562 +void netif_napi_del(struct napi_struct *napi)
3563 +{
3564 +       struct sk_buff *skb, *next;
3565 +
3566 +       list_del_init(&napi->dev_list);
3567 +       kfree_skb(napi->skb);
3568 +
3569 +       for (skb = napi->gro_list; skb; skb = next) {
3570 +               next = skb->next;
3571 +               skb->next = NULL;
3572 +               kfree_skb(skb);
3573 +       }
3574 +
3575 +       napi->gro_list = NULL;
3576 +       napi->gro_count = 0;
3577 +}
3578 +EXPORT_SYMBOL(netif_napi_del);
3579 +
3580 +
3581 +static void net_rx_action(struct softirq_action *h)
3582 +{
3583 +       struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
3584 +       unsigned long time_limit = jiffies + 2;
3585 +       int budget = netdev_budget;
3586 +       void *have;
3587 +
3588 +       local_irq_disable();
3589 +
3590 +       while (!list_empty(list)) {
3591 +               struct napi_struct *n;
3592 +               int work, weight;
3593 +
3594 +               /* If softirq window is exhuasted then punt.
3595 +                * Allow this to run for 2 jiffies since which will allow
3596 +                * an average latency of 1.5/HZ.
3597 +                */
3598 +               if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
3599 +                       goto softnet_break;
3600 +
3601 +               local_irq_enable();
3602 +
3603 +               /* Even though interrupts have been re-enabled, this
3604 +                * access is safe because interrupts can only add new
3605 +                * entries to the tail of this list, and only ->poll()
3606 +                * calls can remove this head entry from the list.
3607 +                */
3608 +               n = list_entry(list->next, struct napi_struct, poll_list);
3609 +
3610 +               have = netpoll_poll_lock(n);
3611 +
3612 +               weight = n->weight;
3613 +
3614 +               /* This NAPI_STATE_SCHED test is for avoiding a race
3615 +                * with netpoll's poll_napi().  Only the entity which
3616 +                * obtains the lock and sees NAPI_STATE_SCHED set will
3617 +                * actually make the ->poll() call.  Therefore we avoid
3618 +                * accidently calling ->poll() when NAPI is not scheduled.
3619 +                */
3620 +               work = 0;
3621 +               if (test_bit(NAPI_STATE_SCHED, &n->state))
3622 +                       work = n->poll(n, weight);
3623 +
3624 +               WARN_ON_ONCE(work > weight);
3625 +
3626 +               budget -= work;
3627 +
3628 +               local_irq_disable();
3629 +
3630 +               /* Drivers must not modify the NAPI state if they
3631 +                * consume the entire weight.  In such cases this code
3632 +                * still "owns" the NAPI instance and therefore can
3633 +                * move the instance around on the list at-will.
3634 +                */
3635 +               if (unlikely(work == weight)) {
3636 +                       if (unlikely(napi_disable_pending(n)))
3637 +                               __napi_complete(n);
3638 +                       else
3639 +                               list_move_tail(&n->poll_list, list);
3640 +               }
3641 +
3642 +               netpoll_poll_unlock(have);
3643 +       }
3644 +out:
3645 +       local_irq_enable();
3646 +
3647 +#ifdef CONFIG_NET_DMA
3648 +       /*
3649 +        * There may not be any more sk_buffs coming right now, so push
3650 +        * any pending DMA copies to hardware
3651 +        */
3652 +       dma_issue_pending_all();
3653 +#endif
3654 +
3655 +       return;
3656 +
3657 +softnet_break:
3658 +       __get_cpu_var(netdev_rx_stat).time_squeeze++;
3659 +       __raise_softirq_irqoff(NET_RX_SOFTIRQ);
3660 +       goto out;
3661 +}
3662 +
3663 +static gifconf_func_t * gifconf_list [NPROTO];
3664 +
3665 +/**
3666 + *     register_gifconf        -       register a SIOCGIF handler
3667 + *     @family: Address family
3668 + *     @gifconf: Function handler
3669 + *
3670 + *     Register protocol dependent address dumping routines. The handler
3671 + *     that is passed must not be freed or reused until it has been replaced
3672 + *     by another handler.
3673 + */
3674 +int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
3675 +{
3676 +       if (family >= NPROTO)
3677 +               return -EINVAL;
3678 +       gifconf_list[family] = gifconf;
3679 +       return 0;
3680 +}
3681 +
3682 +
3683 +/*
3684 + *     Map an interface index to its name (SIOCGIFNAME)
3685 + */
3686 +
3687 +/*
3688 + *     We need this ioctl for efficient implementation of the
3689 + *     if_indextoname() function required by the IPv6 API.  Without
3690 + *     it, we would have to search all the interfaces to find a
3691 + *     match.  --pb
3692 + */
3693 +
3694 +static int dev_ifname(struct net *net, struct ifreq __user *arg)
3695 +{
3696 +       struct net_device *dev;
3697 +       struct ifreq ifr;
3698 +
3699 +       /*
3700 +        *      Fetch the caller's info block.
3701 +        */
3702 +
3703 +       if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3704 +               return -EFAULT;
3705 +
3706 +       read_lock(&dev_base_lock);
3707 +       dev = __dev_get_by_index(net, ifr.ifr_ifindex);
3708 +       if (!dev) {
3709 +               read_unlock(&dev_base_lock);
3710 +               return -ENODEV;
3711 +       }
3712 +
3713 +       strcpy(ifr.ifr_name, dev->name);
3714 +       read_unlock(&dev_base_lock);
3715 +
3716 +       if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
3717 +               return -EFAULT;
3718 +       return 0;
3719 +}
3720 +
3721 +/*
3722 + *     Perform a SIOCGIFCONF call. This structure will change
3723 + *     size eventually, and there is nothing I can do about it.
3724 + *     Thus we will need a 'compatibility mode'.
3725 + */
3726 +
3727 +static int dev_ifconf(struct net *net, char __user *arg)
3728 +{
3729 +       struct ifconf ifc;
3730 +       struct net_device *dev;
3731 +       char __user *pos;
3732 +       int len;
3733 +       int total;
3734 +       int i;
3735 +
3736 +       /*
3737 +        *      Fetch the caller's info block.
3738 +        */
3739 +
3740 +       if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
3741 +               return -EFAULT;
3742 +
3743 +       pos = ifc.ifc_buf;
3744 +       len = ifc.ifc_len;
3745 +
3746 +       /*
3747 +        *      Loop over the interfaces, and write an info block for each.
3748 +        */
3749 +
3750 +       total = 0;
3751 +       for_each_netdev(net, dev) {
3752 +               for (i = 0; i < NPROTO; i++) {
3753 +                       if (gifconf_list[i]) {
3754 +                               int done;
3755 +                               if (!pos)
3756 +                                       done = gifconf_list[i](dev, NULL, 0);
3757 +                               else
3758 +                                       done = gifconf_list[i](dev, pos + total,
3759 +                                                              len - total);
3760 +                               if (done < 0)
3761 +                                       return -EFAULT;
3762 +                               total += done;
3763 +                       }
3764 +               }
3765 +       }
3766 +
3767 +       /*
3768 +        *      All done.  Write the updated control block back to the caller.
3769 +        */
3770 +       ifc.ifc_len = total;
3771 +
3772 +       /*
3773 +        *      Both BSD and Solaris return 0 here, so we do too.
3774 +        */
3775 +       return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
3776 +}
3777 +
3778 +#ifdef CONFIG_PROC_FS
3779 +/*
3780 + *     This is invoked by the /proc filesystem handler to display a device
3781 + *     in detail.
3782 + */
3783 +void *dev_seq_start(struct seq_file *seq, loff_t *pos)
3784 +       __acquires(dev_base_lock)
3785 +{
3786 +       struct net *net = seq_file_net(seq);
3787 +       loff_t off;
3788 +       struct net_device *dev;
3789 +
3790 +       read_lock(&dev_base_lock);
3791 +       if (!*pos)
3792 +               return SEQ_START_TOKEN;
3793 +
3794 +       off = 1;
3795 +       for_each_netdev(net, dev)
3796 +               if (off++ == *pos)
3797 +                       return dev;
3798 +
3799 +       return NULL;
3800 +}
3801 +
3802 +void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3803 +{
3804 +       struct net *net = seq_file_net(seq);
3805 +       ++*pos;
3806 +       return v == SEQ_START_TOKEN ?
3807 +               first_net_device(net) : next_net_device((struct net_device *)v);
3808 +}
3809 +
3810 +void dev_seq_stop(struct seq_file *seq, void *v)
3811 +       __releases(dev_base_lock)
3812 +{
3813 +       read_unlock(&dev_base_lock);
3814 +}
3815 +
3816 +static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
3817 +{
3818 +       const struct net_device_stats *stats = dev_get_stats(dev);
3819 +
3820 +       seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
3821 +                  "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
3822 +                  dev->name, stats->rx_bytes, stats->rx_packets,
3823 +                  stats->rx_errors,
3824 +                  stats->rx_dropped + stats->rx_missed_errors,
3825 +                  stats->rx_fifo_errors,
3826 +                  stats->rx_length_errors + stats->rx_over_errors +
3827 +                   stats->rx_crc_errors + stats->rx_frame_errors,
3828 +                  stats->rx_compressed, stats->multicast,
3829 +                  stats->tx_bytes, stats->tx_packets,
3830 +                  stats->tx_errors, stats->tx_dropped,
3831 +                  stats->tx_fifo_errors, stats->collisions,
3832 +                  stats->tx_carrier_errors +
3833 +                   stats->tx_aborted_errors +
3834 +                   stats->tx_window_errors +
3835 +                   stats->tx_heartbeat_errors,
3836 +                  stats->tx_compressed);
3837 +}
3838 +
3839 +/*
3840 + *     Called from the PROCfs module. This now uses the new arbitrary sized
3841 + *     /proc/net interface to create /proc/net/dev
3842 + */
3843 +static int dev_seq_show(struct seq_file *seq, void *v)
3844 +{
3845 +       if (v == SEQ_START_TOKEN)
3846 +               seq_puts(seq, "Inter-|   Receive                            "
3847 +                             "                    |  Transmit\n"
3848 +                             " face |bytes    packets errs drop fifo frame "
3849 +                             "compressed multicast|bytes    packets errs "
3850 +                             "drop fifo colls carrier compressed\n");
3851 +       else
3852 +               dev_seq_printf_stats(seq, v);
3853 +       return 0;
3854 +}
3855 +
3856 +static struct netif_rx_stats *softnet_get_online(loff_t *pos)
3857 +{
3858 +       struct netif_rx_stats *rc = NULL;
3859 +
3860 +       while (*pos < nr_cpu_ids)
3861 +               if (cpu_online(*pos)) {
3862 +                       rc = &per_cpu(netdev_rx_stat, *pos);
3863 +                       break;
3864 +               } else
3865 +                       ++*pos;
3866 +       return rc;
3867 +}
3868 +
3869 +static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
3870 +{
3871 +       return softnet_get_online(pos);
3872 +}
3873 +
3874 +static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3875 +{
3876 +       ++*pos;
3877 +       return softnet_get_online(pos);
3878 +}
3879 +
3880 +static void softnet_seq_stop(struct seq_file *seq, void *v)
3881 +{
3882 +}
3883 +
3884 +static int softnet_seq_show(struct seq_file *seq, void *v)
3885 +{
3886 +       struct netif_rx_stats *s = v;
3887 +
3888 +       seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
3889 +                  s->total, s->dropped, s->time_squeeze, 0,
3890 +                  0, 0, 0, 0, /* was fastroute */
3891 +                  s->cpu_collision );
3892 +       return 0;
3893 +}
3894 +
3895 +static const struct seq_operations dev_seq_ops = {
3896 +       .start = dev_seq_start,
3897 +       .next  = dev_seq_next,
3898 +       .stop  = dev_seq_stop,
3899 +       .show  = dev_seq_show,
3900 +};
3901 +
3902 +static int dev_seq_open(struct inode *inode, struct file *file)
3903 +{
3904 +       return seq_open_net(inode, file, &dev_seq_ops,
3905 +                           sizeof(struct seq_net_private));
3906 +}
3907 +
3908 +static const struct file_operations dev_seq_fops = {
3909 +       .owner   = THIS_MODULE,
3910 +       .open    = dev_seq_open,
3911 +       .read    = seq_read,
3912 +       .llseek  = seq_lseek,
3913 +       .release = seq_release_net,
3914 +};
3915 +
3916 +static const struct seq_operations softnet_seq_ops = {
3917 +       .start = softnet_seq_start,
3918 +       .next  = softnet_seq_next,
3919 +       .stop  = softnet_seq_stop,
3920 +       .show  = softnet_seq_show,
3921 +};
3922 +
3923 +static int softnet_seq_open(struct inode *inode, struct file *file)
3924 +{
3925 +       return seq_open(file, &softnet_seq_ops);
3926 +}
3927 +
3928 +static const struct file_operations softnet_seq_fops = {
3929 +       .owner   = THIS_MODULE,
3930 +       .open    = softnet_seq_open,
3931 +       .read    = seq_read,
3932 +       .llseek  = seq_lseek,
3933 +       .release = seq_release,
3934 +};
3935 +
3936 +static void *ptype_get_idx(loff_t pos)
3937 +{
3938 +       struct packet_type *pt = NULL;
3939 +       loff_t i = 0;
3940 +       int t;
3941 +
3942 +       list_for_each_entry_rcu(pt, &ptype_all, list) {
3943 +               if (i == pos)
3944 +                       return pt;
3945 +               ++i;
3946 +       }
3947 +
3948 +       for (t = 0; t < PTYPE_HASH_SIZE; t++) {
3949 +               list_for_each_entry_rcu(pt, &ptype_base[t], list) {
3950 +                       if (i == pos)
3951 +                               return pt;
3952 +                       ++i;
3953 +               }
3954 +       }
3955 +       return NULL;
3956 +}
3957 +
3958 +static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
3959 +       __acquires(RCU)
3960 +{
3961 +       rcu_read_lock();
3962 +       return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
3963 +}
3964 +
3965 +static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
3966 +{
3967 +       struct packet_type *pt;
3968 +       struct list_head *nxt;
3969 +       int hash;
3970 +
3971 +       ++*pos;
3972 +       if (v == SEQ_START_TOKEN)
3973 +               return ptype_get_idx(0);
3974 +
3975 +       pt = v;
3976 +       nxt = pt->list.next;
3977 +       if (pt->type == htons(ETH_P_ALL)) {
3978 +               if (nxt != &ptype_all)
3979 +                       goto found;
3980 +               hash = 0;
3981 +               nxt = ptype_base[0].next;
3982 +       } else
3983 +               hash = ntohs(pt->type) & PTYPE_HASH_MASK;
3984 +
3985 +       while (nxt == &ptype_base[hash]) {
3986 +               if (++hash >= PTYPE_HASH_SIZE)
3987 +                       return NULL;
3988 +               nxt = ptype_base[hash].next;
3989 +       }
3990 +found:
3991 +       return list_entry(nxt, struct packet_type, list);
3992 +}
3993 +
3994 +static void ptype_seq_stop(struct seq_file *seq, void *v)
3995 +       __releases(RCU)
3996 +{
3997 +       rcu_read_unlock();
3998 +}
3999 +
4000 +static int ptype_seq_show(struct seq_file *seq, void *v)
4001 +{
4002 +       struct packet_type *pt = v;
4003 +
4004 +       if (v == SEQ_START_TOKEN)
4005 +               seq_puts(seq, "Type Device      Function\n");
4006 +       else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
4007 +               if (pt->type == htons(ETH_P_ALL))
4008 +                       seq_puts(seq, "ALL ");
4009 +               else
4010 +                       seq_printf(seq, "%04x", ntohs(pt->type));
4011 +
4012 +               seq_printf(seq, " %-8s %pF\n",
4013 +                          pt->dev ? pt->dev->name : "", pt->func);
4014 +       }
4015 +
4016 +       return 0;
4017 +}
4018 +
4019 +static const struct seq_operations ptype_seq_ops = {
4020 +       .start = ptype_seq_start,
4021 +       .next  = ptype_seq_next,
4022 +       .stop  = ptype_seq_stop,
4023 +       .show  = ptype_seq_show,
4024 +};
4025 +
4026 +static int ptype_seq_open(struct inode *inode, struct file *file)
4027 +{
4028 +       return seq_open_net(inode, file, &ptype_seq_ops,
4029 +                       sizeof(struct seq_net_private));
4030 +}
4031 +
4032 +static const struct file_operations ptype_seq_fops = {
4033 +       .owner   = THIS_MODULE,
4034 +       .open    = ptype_seq_open,
4035 +       .read    = seq_read,
4036 +       .llseek  = seq_lseek,
4037 +       .release = seq_release_net,
4038 +};
4039 +
4040 +
4041 +static int __net_init dev_proc_net_init(struct net *net)
4042 +{
4043 +       int rc = -ENOMEM;
4044 +
4045 +       if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
4046 +               goto out;
4047 +       if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
4048 +               goto out_dev;
4049 +       if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
4050 +               goto out_softnet;
4051 +
4052 +       if (wext_proc_init(net))
4053 +               goto out_ptype;
4054 +       rc = 0;
4055 +out:
4056 +       return rc;
4057 +out_ptype:
4058 +       proc_net_remove(net, "ptype");
4059 +out_softnet:
4060 +       proc_net_remove(net, "softnet_stat");
4061 +out_dev:
4062 +       proc_net_remove(net, "dev");
4063 +       goto out;
4064 +}
4065 +
4066 +static void __net_exit dev_proc_net_exit(struct net *net)
4067 +{
4068 +       wext_proc_exit(net);
4069 +
4070 +       proc_net_remove(net, "ptype");
4071 +       proc_net_remove(net, "softnet_stat");
4072 +       proc_net_remove(net, "dev");
4073 +}
4074 +
4075 +static struct pernet_operations __net_initdata dev_proc_ops = {
4076 +       .init = dev_proc_net_init,
4077 +       .exit = dev_proc_net_exit,
4078 +};
4079 +
4080 +static int __init dev_proc_init(void)
4081 +{
4082 +       return register_pernet_subsys(&dev_proc_ops);
4083 +}
4084 +#else
4085 +#define dev_proc_init() 0
4086 +#endif /* CONFIG_PROC_FS */
4087 +
4088 +
4089 +/**
4090 + *     netdev_set_master       -       set up master/slave pair
4091 + *     @slave: slave device
4092 + *     @master: new master device
4093 + *
4094 + *     Changes the master device of the slave. Pass %NULL to break the
4095 + *     bonding. The caller must hold the RTNL semaphore. On a failure
4096 + *     a negative errno code is returned. On success the reference counts
4097 + *     are adjusted, %RTM_NEWLINK is sent to the routing socket and the
4098 + *     function returns zero.
4099 + */
4100 +int netdev_set_master(struct net_device *slave, struct net_device *master)
4101 +{
4102 +       struct net_device *old = slave->master;
4103 +
4104 +       ASSERT_RTNL();
4105 +
4106 +       if (master) {
4107 +               if (old)
4108 +                       return -EBUSY;
4109 +               dev_hold(master);
4110 +       }
4111 +
4112 +       slave->master = master;
4113 +
4114 +       synchronize_net();
4115 +
4116 +       if (old)
4117 +               dev_put(old);
4118 +
4119 +       if (master)
4120 +               slave->flags |= IFF_SLAVE;
4121 +       else
4122 +               slave->flags &= ~IFF_SLAVE;
4123 +
4124 +       rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
4125 +       return 0;
4126 +}
4127 +
4128 +static void dev_change_rx_flags(struct net_device *dev, int flags)
4129 +{
4130 +       const struct net_device_ops *ops = dev->netdev_ops;
4131 +
4132 +       if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
4133 +               ops->ndo_change_rx_flags(dev, flags);
4134 +}
4135 +
4136 +static int __dev_set_promiscuity(struct net_device *dev, int inc)
4137 +{
4138 +       unsigned short old_flags = dev->flags;
4139 +       uid_t uid;
4140 +       gid_t gid;
4141 +
4142 +       ASSERT_RTNL();
4143 +
4144 +       dev->flags |= IFF_PROMISC;
4145 +       dev->promiscuity += inc;
4146 +       if (dev->promiscuity == 0) {
4147 +               /*
4148 +                * Avoid overflow.
4149 +                * If inc causes overflow, untouch promisc and return error.
4150 +                */
4151 +               if (inc < 0)
4152 +                       dev->flags &= ~IFF_PROMISC;
4153 +               else {
4154 +                       dev->promiscuity -= inc;
4155 +                       printk(KERN_WARNING "%s: promiscuity touches roof, "
4156 +                               "set promiscuity failed, promiscuity feature "
4157 +                               "of device might be broken.\n", dev->name);
4158 +                       return -EOVERFLOW;
4159 +               }
4160 +       }
4161 +       if (dev->flags != old_flags) {
4162 +               printk(KERN_INFO "device %s %s promiscuous mode\n",
4163 +                      dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
4164 +                                                              "left");
4165 +               if (audit_enabled) {
4166 +                       current_uid_gid(&uid, &gid);
4167 +                       audit_log(current->audit_context, GFP_ATOMIC,
4168 +                               AUDIT_ANOM_PROMISCUOUS,
4169 +                               "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
4170 +                               dev->name, (dev->flags & IFF_PROMISC),
4171 +                               (old_flags & IFF_PROMISC),
4172 +                               audit_get_loginuid(current),
4173 +                               uid, gid,
4174 +                               audit_get_sessionid(current));
4175 +               }
4176 +
4177 +               dev_change_rx_flags(dev, IFF_PROMISC);
4178 +       }
4179 +       return 0;
4180 +}
4181 +
4182 +/**
4183 + *     dev_set_promiscuity     - update promiscuity count on a device
4184 + *     @dev: device
4185 + *     @inc: modifier
4186 + *
4187 + *     Add or remove promiscuity from a device. While the count in the device
4188 + *     remains above zero the interface remains promiscuous. Once it hits zero
4189 + *     the device reverts back to normal filtering operation. A negative inc
4190 + *     value is used to drop promiscuity on the device.
4191 + *     Return 0 if successful or a negative errno code on error.
4192 + */
4193 +int dev_set_promiscuity(struct net_device *dev, int inc)
4194 +{
4195 +       unsigned short old_flags = dev->flags;
4196 +       int err;
4197 +
4198 +       err = __dev_set_promiscuity(dev, inc);
4199 +       if (err < 0)
4200 +               return err;
4201 +       if (dev->flags != old_flags)
4202 +               dev_set_rx_mode(dev);
4203 +       return err;
4204 +}
4205 +
4206 +/**
4207 + *     dev_set_allmulti        - update allmulti count on a device
4208 + *     @dev: device
4209 + *     @inc: modifier
4210 + *
4211 + *     Add or remove reception of all multicast frames to a device. While the
4212 + *     count in the device remains above zero the interface remains listening
4213 + *     to all interfaces. Once it hits zero the device reverts back to normal
4214 + *     filtering operation. A negative @inc value is used to drop the counter
4215 + *     when releasing a resource needing all multicasts.
4216 + *     Return 0 if successful or a negative errno code on error.
4217 + */
4218 +
4219 +int dev_set_allmulti(struct net_device *dev, int inc)
4220 +{
4221 +       unsigned short old_flags = dev->flags;
4222 +
4223 +       ASSERT_RTNL();
4224 +
4225 +       dev->flags |= IFF_ALLMULTI;
4226 +       dev->allmulti += inc;
4227 +       if (dev->allmulti == 0) {
4228 +               /*
4229 +                * Avoid overflow.
4230 +                * If inc causes overflow, untouch allmulti and return error.
4231 +                */
4232 +               if (inc < 0)
4233 +                       dev->flags &= ~IFF_ALLMULTI;
4234 +               else {
4235 +                       dev->allmulti -= inc;
4236 +                       printk(KERN_WARNING "%s: allmulti touches roof, "
4237 +                               "set allmulti failed, allmulti feature of "
4238 +                               "device might be broken.\n", dev->name);
4239 +                       return -EOVERFLOW;
4240 +               }
4241 +       }
4242 +       if (dev->flags ^ old_flags) {
4243 +               dev_change_rx_flags(dev, IFF_ALLMULTI);
4244 +               dev_set_rx_mode(dev);
4245 +       }
4246 +       return 0;
4247 +}
4248 +
4249 +/*
4250 + *     Upload unicast and multicast address lists to device and
4251 + *     configure RX filtering. When the device doesn't support unicast
4252 + *     filtering it is put in promiscuous mode while unicast addresses
4253 + *     are present.
4254 + */
4255 +void __dev_set_rx_mode(struct net_device *dev)
4256 +{
4257 +       const struct net_device_ops *ops = dev->netdev_ops;
4258 +
4259 +       /* dev_open will call this function so the list will stay sane. */
4260 +       if (!(dev->flags&IFF_UP))
4261 +               return;
4262 +
4263 +       if (!netif_device_present(dev))
4264 +               return;
4265 +
4266 +       if (ops->ndo_set_rx_mode)
4267 +               ops->ndo_set_rx_mode(dev);
4268 +       else {
4269 +               /* Unicast addresses changes may only happen under the rtnl,
4270 +                * therefore calling __dev_set_promiscuity here is safe.
4271 +                */
4272 +               if (dev->uc_count > 0 && !dev->uc_promisc) {
4273 +                       __dev_set_promiscuity(dev, 1);
4274 +                       dev->uc_promisc = 1;
4275 +               } else if (dev->uc_count == 0 && dev->uc_promisc) {
4276 +                       __dev_set_promiscuity(dev, -1);
4277 +                       dev->uc_promisc = 0;
4278 +               }
4279 +
4280 +               if (ops->ndo_set_multicast_list)
4281 +                       ops->ndo_set_multicast_list(dev);
4282 +       }
4283 +}
4284 +
4285 +void dev_set_rx_mode(struct net_device *dev)
4286 +{
4287 +       netif_addr_lock_bh(dev);
4288 +       __dev_set_rx_mode(dev);
4289 +       netif_addr_unlock_bh(dev);
4290 +}
4291 +
4292 +int __dev_addr_delete(struct dev_addr_list **list, int *count,
4293 +                     void *addr, int alen, int glbl)
4294 +{
4295 +       struct dev_addr_list *da;
4296 +
4297 +       for (; (da = *list) != NULL; list = &da->next) {
4298 +               if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
4299 +                   alen == da->da_addrlen) {
4300 +                       if (glbl) {
4301 +                               int old_glbl = da->da_gusers;
4302 +                               da->da_gusers = 0;
4303 +                               if (old_glbl == 0)
4304 +                                       break;
4305 +                       }
4306 +                       if (--da->da_users)
4307 +                               return 0;
4308 +
4309 +                       *list = da->next;
4310 +                       kfree(da);
4311 +                       (*count)--;
4312 +                       return 0;
4313 +               }
4314 +       }
4315 +       return -ENOENT;
4316 +}
4317 +
4318 +int __dev_addr_add(struct dev_addr_list **list, int *count,
4319 +                  void *addr, int alen, int glbl)
4320 +{
4321 +       struct dev_addr_list *da;
4322 +
4323 +       for (da = *list; da != NULL; da = da->next) {
4324 +               if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
4325 +                   da->da_addrlen == alen) {
4326 +                       if (glbl) {
4327 +                               int old_glbl = da->da_gusers;
4328 +                               da->da_gusers = 1;
4329 +                               if (old_glbl)
4330 +                                       return 0;
4331 +                       }
4332 +                       da->da_users++;
4333 +                       return 0;
4334 +               }
4335 +       }
4336 +
4337 +       da = kzalloc(sizeof(*da), GFP_ATOMIC);
4338 +       if (da == NULL)
4339 +               return -ENOMEM;
4340 +       memcpy(da->da_addr, addr, alen);
4341 +       da->da_addrlen = alen;
4342 +       da->da_users = 1;
4343 +       da->da_gusers = glbl ? 1 : 0;
4344 +       da->next = *list;
4345 +       *list = da;
4346 +       (*count)++;
4347 +       return 0;
4348 +}
4349 +
4350 +/**
4351 + *     dev_unicast_delete      - Release secondary unicast address.
4352 + *     @dev: device
4353 + *     @addr: address to delete
4354 + *     @alen: length of @addr
4355 + *
4356 + *     Release reference to a secondary unicast address and remove it
4357 + *     from the device if the reference count drops to zero.
4358 + *
4359 + *     The caller must hold the rtnl_mutex.
4360 + */
4361 +int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
4362 +{
4363 +       int err;
4364 +
4365 +       ASSERT_RTNL();
4366 +
4367 +       netif_addr_lock_bh(dev);
4368 +       err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
4369 +       if (!err)
4370 +               __dev_set_rx_mode(dev);
4371 +       netif_addr_unlock_bh(dev);
4372 +       return err;
4373 +}
4374 +EXPORT_SYMBOL(dev_unicast_delete);
4375 +
4376 +/**
4377 + *     dev_unicast_add         - add a secondary unicast address
4378 + *     @dev: device
4379 + *     @addr: address to add
4380 + *     @alen: length of @addr
4381 + *
4382 + *     Add a secondary unicast address to the device or increase
4383 + *     the reference count if it already exists.
4384 + *
4385 + *     The caller must hold the rtnl_mutex.
4386 + */
4387 +int dev_unicast_add(struct net_device *dev, void *addr, int alen)
4388 +{
4389 +       int err;
4390 +
4391 +       ASSERT_RTNL();
4392 +
4393 +       netif_addr_lock_bh(dev);
4394 +       err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
4395 +       if (!err)
4396 +               __dev_set_rx_mode(dev);
4397 +       netif_addr_unlock_bh(dev);
4398 +       return err;
4399 +}
4400 +EXPORT_SYMBOL(dev_unicast_add);
4401 +
4402 +int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
4403 +                   struct dev_addr_list **from, int *from_count)
4404 +{
4405 +       struct dev_addr_list *da, *next;
4406 +       int err = 0;
4407 +
4408 +       da = *from;
4409 +       while (da != NULL) {
4410 +               next = da->next;
4411 +               if (!da->da_synced) {
4412 +                       err = __dev_addr_add(to, to_count,
4413 +                                            da->da_addr, da->da_addrlen, 0);
4414 +                       if (err < 0)
4415 +                               break;
4416 +                       da->da_synced = 1;
4417 +                       da->da_users++;
4418 +               } else if (da->da_users == 1) {
4419 +                       __dev_addr_delete(to, to_count,
4420 +                                         da->da_addr, da->da_addrlen, 0);
4421 +                       __dev_addr_delete(from, from_count,
4422 +                                         da->da_addr, da->da_addrlen, 0);
4423 +               }
4424 +               da = next;
4425 +       }
4426 +       return err;
4427 +}
4428 +
4429 +void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
4430 +                      struct dev_addr_list **from, int *from_count)
4431 +{
4432 +       struct dev_addr_list *da, *next;
4433 +
4434 +       da = *from;
4435 +       while (da != NULL) {
4436 +               next = da->next;
4437 +               if (da->da_synced) {
4438 +                       __dev_addr_delete(to, to_count,
4439 +                                         da->da_addr, da->da_addrlen, 0);
4440 +                       da->da_synced = 0;
4441 +                       __dev_addr_delete(from, from_count,
4442 +                                         da->da_addr, da->da_addrlen, 0);
4443 +               }
4444 +               da = next;
4445 +       }
4446 +}
4447 +
4448 +/**
4449 + *     dev_unicast_sync - Synchronize device's unicast list to another device
4450 + *     @to: destination device
4451 + *     @from: source device
4452 + *
4453 + *     Add newly added addresses to the destination device and release
4454 + *     addresses that have no users left. The source device must be
4455 + *     locked by netif_tx_lock_bh.
4456 + *
4457 + *     This function is intended to be called from the dev->set_rx_mode
4458 + *     function of layered software devices.
4459 + */
4460 +int dev_unicast_sync(struct net_device *to, struct net_device *from)
4461 +{
4462 +       int err = 0;
4463 +
4464 +       netif_addr_lock_bh(to);
4465 +       err = __dev_addr_sync(&to->uc_list, &to->uc_count,
4466 +                             &from->uc_list, &from->uc_count);
4467 +       if (!err)
4468 +               __dev_set_rx_mode(to);
4469 +       netif_addr_unlock_bh(to);
4470 +       return err;
4471 +}
4472 +EXPORT_SYMBOL(dev_unicast_sync);
4473 +
4474 +/**
4475 + *     dev_unicast_unsync - Remove synchronized addresses from the destination device
4476 + *     @to: destination device
4477 + *     @from: source device
4478 + *
4479 + *     Remove all addresses that were added to the destination device by
4480 + *     dev_unicast_sync(). This function is intended to be called from the
4481 + *     dev->stop function of layered software devices.
4482 + */
4483 +void dev_unicast_unsync(struct net_device *to, struct net_device *from)
4484 +{
4485 +       netif_addr_lock_bh(from);
4486 +       netif_addr_lock(to);
4487 +
4488 +       __dev_addr_unsync(&to->uc_list, &to->uc_count,
4489 +                         &from->uc_list, &from->uc_count);
4490 +       __dev_set_rx_mode(to);
4491 +
4492 +       netif_addr_unlock(to);
4493 +       netif_addr_unlock_bh(from);
4494 +}
4495 +EXPORT_SYMBOL(dev_unicast_unsync);
4496 +
4497 +static void __dev_addr_discard(struct dev_addr_list **list)
4498 +{
4499 +       struct dev_addr_list *tmp;
4500 +
4501 +       while (*list != NULL) {
4502 +               tmp = *list;
4503 +               *list = tmp->next;
4504 +               if (tmp->da_users > tmp->da_gusers)
4505 +                       printk("__dev_addr_discard: address leakage! "
4506 +                              "da_users=%d\n", tmp->da_users);
4507 +               kfree(tmp);
4508 +       }
4509 +}
4510 +
4511 +static void dev_addr_discard(struct net_device *dev)
4512 +{
4513 +       netif_addr_lock_bh(dev);
4514 +
4515 +       __dev_addr_discard(&dev->uc_list);
4516 +       dev->uc_count = 0;
4517 +
4518 +       __dev_addr_discard(&dev->mc_list);
4519 +       dev->mc_count = 0;
4520 +
4521 +       netif_addr_unlock_bh(dev);
4522 +}
4523 +
4524 +/**
4525 + *     dev_get_flags - get flags reported to userspace
4526 + *     @dev: device
4527 + *
4528 + *     Get the combination of flag bits exported through APIs to userspace.
4529 + */
4530 +unsigned dev_get_flags(const struct net_device *dev)
4531 +{
4532 +       unsigned flags;
4533 +
4534 +       flags = (dev->flags & ~(IFF_PROMISC |
4535 +                               IFF_ALLMULTI |
4536 +                               IFF_RUNNING |
4537 +                               IFF_LOWER_UP |
4538 +                               IFF_DORMANT)) |
4539 +               (dev->gflags & (IFF_PROMISC |
4540 +                               IFF_ALLMULTI));
4541 +
4542 +       if (netif_running(dev)) {
4543 +               if (netif_oper_up(dev))
4544 +                       flags |= IFF_RUNNING;
4545 +               if (netif_carrier_ok(dev))
4546 +                       flags |= IFF_LOWER_UP;
4547 +               if (netif_dormant(dev))
4548 +                       flags |= IFF_DORMANT;
4549 +       }
4550 +
4551 +       return flags;
4552 +}
4553 +
4554 +/**
4555 + *     dev_change_flags - change device settings
4556 + *     @dev: device
4557 + *     @flags: device state flags
4558 + *
4559 + *     Change settings on device based state flags. The flags are
4560 + *     in the userspace exported format.
4561 + */
4562 +int dev_change_flags(struct net_device *dev, unsigned flags)
4563 +{
4564 +       int ret, changes;
4565 +       int old_flags = dev->flags;
4566 +
4567 +       ASSERT_RTNL();
4568 +
4569 +       /*
4570 +        *      Set the flags on our device.
4571 +        */
4572 +
4573 +       dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
4574 +                              IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
4575 +                              IFF_AUTOMEDIA)) |
4576 +                    (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
4577 +                                   IFF_ALLMULTI));
4578 +
4579 +       /*
4580 +        *      Load in the correct multicast list now the flags have changed.
4581 +        */
4582 +
4583 +       if ((old_flags ^ flags) & IFF_MULTICAST)
4584 +               dev_change_rx_flags(dev, IFF_MULTICAST);
4585 +
4586 +       dev_set_rx_mode(dev);
4587 +
4588 +       /*
4589 +        *      Have we downed the interface. We handle IFF_UP ourselves
4590 +        *      according to user attempts to set it, rather than blindly
4591 +        *      setting it.
4592 +        */
4593 +
4594 +       ret = 0;
4595 +       if ((old_flags ^ flags) & IFF_UP) {     /* Bit is different  ? */
4596 +               ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
4597 +
4598 +               if (!ret)
4599 +                       dev_set_rx_mode(dev);
4600 +       }
4601 +
4602 +       if (dev->flags & IFF_UP &&
4603 +           ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
4604 +                                         IFF_VOLATILE)))
4605 +               call_netdevice_notifiers(NETDEV_CHANGE, dev);
4606 +
4607 +       if ((flags ^ dev->gflags) & IFF_PROMISC) {
4608 +               int inc = (flags & IFF_PROMISC) ? +1 : -1;
4609 +               dev->gflags ^= IFF_PROMISC;
4610 +               dev_set_promiscuity(dev, inc);
4611 +       }
4612 +
4613 +       /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
4614 +          is important. Some (broken) drivers set IFF_PROMISC, when
4615 +          IFF_ALLMULTI is requested not asking us and not reporting.
4616 +        */
4617 +       if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
4618 +               int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
4619 +               dev->gflags ^= IFF_ALLMULTI;
4620 +               dev_set_allmulti(dev, inc);
4621 +       }
4622 +
4623 +       /* Exclude state transition flags, already notified */
4624 +       changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
4625 +       if (changes)
4626 +               rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
4627 +
4628 +       return ret;
4629 +}
4630 +
4631 +/**
4632 + *     dev_set_mtu - Change maximum transfer unit
4633 + *     @dev: device
4634 + *     @new_mtu: new transfer unit
4635 + *
4636 + *     Change the maximum transfer size of the network device.
4637 + */
4638 +int dev_set_mtu(struct net_device *dev, int new_mtu)
4639 +{
4640 +       const struct net_device_ops *ops = dev->netdev_ops;
4641 +       int err;
4642 +
4643 +       if (new_mtu == dev->mtu)
4644 +               return 0;
4645 +
4646 +       /*      MTU must be positive.    */
4647 +       if (new_mtu < 0)
4648 +               return -EINVAL;
4649 +
4650 +       if (!netif_device_present(dev))
4651 +               return -ENODEV;
4652 +
4653 +       err = 0;
4654 +       if (ops->ndo_change_mtu)
4655 +               err = ops->ndo_change_mtu(dev, new_mtu);
4656 +       else
4657 +               dev->mtu = new_mtu;
4658 +
4659 +       if (!err && dev->flags & IFF_UP)
4660 +               call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
4661 +       return err;
4662 +}
4663 +
4664 +/**
4665 + *     dev_set_mac_address - Change Media Access Control Address
4666 + *     @dev: device
4667 + *     @sa: new address
4668 + *
4669 + *     Change the hardware (MAC) address of the device
4670 + */
4671 +int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
4672 +{
4673 +       const struct net_device_ops *ops = dev->netdev_ops;
4674 +       int err;
4675 +
4676 +       if (!ops->ndo_set_mac_address)
4677 +               return -EOPNOTSUPP;
4678 +       if (sa->sa_family != dev->type)
4679 +               return -EINVAL;
4680 +       if (!netif_device_present(dev))
4681 +               return -ENODEV;
4682 +       err = ops->ndo_set_mac_address(dev, sa);
4683 +       if (!err)
4684 +               call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4685 +       return err;
4686 +}
4687 +
4688 +/*
4689 + *     Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
4690 + */
4691 +static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
4692 +{
4693 +       int err;
4694 +       struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
4695 +
4696 +       if (!dev)
4697 +               return -ENODEV;
4698 +
4699 +       switch (cmd) {
4700 +               case SIOCGIFFLAGS:      /* Get interface flags */
4701 +                       ifr->ifr_flags = dev_get_flags(dev);
4702 +                       return 0;
4703 +
4704 +               case SIOCGIFMETRIC:     /* Get the metric on the interface
4705 +                                          (currently unused) */
4706 +                       ifr->ifr_metric = 0;
4707 +                       return 0;
4708 +
4709 +               case SIOCGIFMTU:        /* Get the MTU of a device */
4710 +                       ifr->ifr_mtu = dev->mtu;
4711 +                       return 0;
4712 +
4713 +               case SIOCGIFHWADDR:
4714 +                       if (!dev->addr_len)
4715 +                               memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
4716 +                       else
4717 +                               memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
4718 +                                      min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4719 +                       ifr->ifr_hwaddr.sa_family = dev->type;
4720 +                       return 0;
4721 +
4722 +               case SIOCGIFSLAVE:
4723 +                       err = -EINVAL;
4724 +                       break;
4725 +
4726 +               case SIOCGIFMAP:
4727 +                       ifr->ifr_map.mem_start = dev->mem_start;
4728 +                       ifr->ifr_map.mem_end   = dev->mem_end;
4729 +                       ifr->ifr_map.base_addr = dev->base_addr;
4730 +                       ifr->ifr_map.irq       = dev->irq;
4731 +                       ifr->ifr_map.dma       = dev->dma;
4732 +                       ifr->ifr_map.port      = dev->if_port;
4733 +                       return 0;
4734 +
4735 +               case SIOCGIFINDEX:
4736 +                       ifr->ifr_ifindex = dev->ifindex;
4737 +                       return 0;
4738 +
4739 +               case SIOCGIFTXQLEN:
4740 +                       ifr->ifr_qlen = dev->tx_queue_len;
4741 +                       return 0;
4742 +
4743 +               default:
4744 +                       /* dev_ioctl() should ensure this case
4745 +                        * is never reached
4746 +                        */
4747 +                       WARN_ON(1);
4748 +                       err = -EINVAL;
4749 +                       break;
4750 +
4751 +       }
4752 +       return err;
4753 +}
4754 +
4755 +/*
4756 + *     Perform the SIOCxIFxxx calls, inside rtnl_lock()
4757 + */
4758 +static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
4759 +{
4760 +       int err;
4761 +       struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
4762 +       const struct net_device_ops *ops;
4763 +
4764 +       if (!dev)
4765 +               return -ENODEV;
4766 +
4767 +       ops = dev->netdev_ops;
4768 +
4769 +       switch (cmd) {
4770 +               case SIOCSIFFLAGS:      /* Set interface flags */
4771 +                       return dev_change_flags(dev, ifr->ifr_flags);
4772 +
4773 +               case SIOCSIFMETRIC:     /* Set the metric on the interface
4774 +                                          (currently unused) */
4775 +                       return -EOPNOTSUPP;
4776 +
4777 +               case SIOCSIFMTU:        /* Set the MTU of a device */
4778 +                       return dev_set_mtu(dev, ifr->ifr_mtu);
4779 +
4780 +               case SIOCSIFHWADDR:
4781 +                       return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
4782 +
4783 +               case SIOCSIFHWBROADCAST:
4784 +                       if (ifr->ifr_hwaddr.sa_family != dev->type)
4785 +                               return -EINVAL;
4786 +                       memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
4787 +                              min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
4788 +                       call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
4789 +                       return 0;
4790 +
4791 +               case SIOCSIFMAP:
4792 +                       if (ops->ndo_set_config) {
4793 +                               if (!netif_device_present(dev))
4794 +                                       return -ENODEV;
4795 +                               return ops->ndo_set_config(dev, &ifr->ifr_map);
4796 +                       }
4797 +                       return -EOPNOTSUPP;
4798 +
4799 +               case SIOCADDMULTI:
4800 +                       if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4801 +                           ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4802 +                               return -EINVAL;
4803 +                       if (!netif_device_present(dev))
4804 +                               return -ENODEV;
4805 +                       return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
4806 +                                         dev->addr_len, 1);
4807 +
4808 +               case SIOCDELMULTI:
4809 +                       if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
4810 +                           ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
4811 +                               return -EINVAL;
4812 +                       if (!netif_device_present(dev))
4813 +                               return -ENODEV;
4814 +                       return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
4815 +                                            dev->addr_len, 1);
4816 +
4817 +               case SIOCSIFTXQLEN:
4818 +                       if (ifr->ifr_qlen < 0)
4819 +                               return -EINVAL;
4820 +                       dev->tx_queue_len = ifr->ifr_qlen;
4821 +                       return 0;
4822 +
4823 +               case SIOCSIFNAME:
4824 +                       ifr->ifr_newname[IFNAMSIZ-1] = '\0';
4825 +                       return dev_change_name(dev, ifr->ifr_newname);
4826 +
4827 +               /*
4828 +                *      Unknown or private ioctl
4829 +                */
4830 +
4831 +               default:
4832 +                       if ((cmd >= SIOCDEVPRIVATE &&
4833 +                           cmd <= SIOCDEVPRIVATE + 15) ||
4834 +                           cmd == SIOCBONDENSLAVE ||
4835 +                           cmd == SIOCBONDRELEASE ||
4836 +                           cmd == SIOCBONDSETHWADDR ||
4837 +                           cmd == SIOCBONDSLAVEINFOQUERY ||
4838 +                           cmd == SIOCBONDINFOQUERY ||
4839 +                           cmd == SIOCBONDCHANGEACTIVE ||
4840 +                           cmd == SIOCGMIIPHY ||
4841 +                           cmd == SIOCGMIIREG ||
4842 +                           cmd == SIOCSMIIREG ||
4843 +                           cmd == SIOCBRADDIF ||
4844 +                           cmd == SIOCBRDELIF ||
4845 +                           cmd == SIOCSHWTSTAMP ||
4846 +                           cmd == SIOCWANDEV) {
4847 +                               err = -EOPNOTSUPP;
4848 +                               if (ops->ndo_do_ioctl) {
4849 +                                       if (netif_device_present(dev))
4850 +                                               err = ops->ndo_do_ioctl(dev, ifr, cmd);
4851 +                                       else
4852 +                                               err = -ENODEV;
4853 +                               }
4854 +                       } else
4855 +                               err = -EINVAL;
4856 +
4857 +       }
4858 +       return err;
4859 +}
4860 +
4861 +/*
4862 + *     This function handles all "interface"-type I/O control requests. The actual
4863 + *     'doing' part of this is dev_ifsioc above.
4864 + */
4865 +
4866 +/**
4867 + *     dev_ioctl       -       network device ioctl
4868 + *     @net: the applicable net namespace
4869 + *     @cmd: command to issue
4870 + *     @arg: pointer to a struct ifreq in user space
4871 + *
4872 + *     Issue ioctl functions to devices. This is normally called by the
4873 + *     user space syscall interfaces but can sometimes be useful for
4874 + *     other purposes. The return value is the return from the syscall if
4875 + *     positive or a negative errno code on error.
4876 + */
4877 +
4878 +int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
4879 +{
4880 +       struct ifreq ifr;
4881 +       int ret;
4882 +       char *colon;
4883 +
4884 +       /* One special case: SIOCGIFCONF takes ifconf argument
4885 +          and requires shared lock, because it sleeps writing
4886 +          to user space.
4887 +        */
4888 +
4889 +       if (cmd == SIOCGIFCONF) {
4890 +               rtnl_lock();
4891 +               ret = dev_ifconf(net, (char __user *) arg);
4892 +               rtnl_unlock();
4893 +               return ret;
4894 +       }
4895 +       if (cmd == SIOCGIFNAME)
4896 +               return dev_ifname(net, (struct ifreq __user *)arg);
4897 +
4898 +       if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
4899 +               return -EFAULT;
4900 +
4901 +       ifr.ifr_name[IFNAMSIZ-1] = 0;
4902 +
4903 +       colon = strchr(ifr.ifr_name, ':');
4904 +       if (colon)
4905 +               *colon = 0;
4906 +
4907 +       /*
4908 +        *      See which interface the caller is talking about.
4909 +        */
4910 +
4911 +       switch (cmd) {
4912 +               /*
4913 +                *      These ioctl calls:
4914 +                *      - can be done by all.
4915 +                *      - atomic and do not require locking.
4916 +                *      - return a value
4917 +                */
4918 +               case SIOCGIFFLAGS:
4919 +               case SIOCGIFMETRIC:
4920 +               case SIOCGIFMTU:
4921 +               case SIOCGIFHWADDR:
4922 +               case SIOCGIFSLAVE:
4923 +               case SIOCGIFMAP:
4924 +               case SIOCGIFINDEX:
4925 +               case SIOCGIFTXQLEN:
4926 +                       dev_load(net, ifr.ifr_name);
4927 +                       read_lock(&dev_base_lock);
4928 +                       ret = dev_ifsioc_locked(net, &ifr, cmd);
4929 +                       read_unlock(&dev_base_lock);
4930 +                       if (!ret) {
4931 +                               if (colon)
4932 +                                       *colon = ':';
4933 +                               if (copy_to_user(arg, &ifr,
4934 +                                                sizeof(struct ifreq)))
4935 +                                       ret = -EFAULT;
4936 +                       }
4937 +                       return ret;
4938 +
4939 +               case SIOCETHTOOL:
4940 +                       dev_load(net, ifr.ifr_name);
4941 +                       rtnl_lock();
4942 +                       ret = dev_ethtool(net, &ifr);
4943 +                       rtnl_unlock();
4944 +                       if (!ret) {
4945 +                               if (colon)
4946 +                                       *colon = ':';
4947 +                               if (copy_to_user(arg, &ifr,
4948 +                                                sizeof(struct ifreq)))
4949 +                                       ret = -EFAULT;
4950 +                       }
4951 +                       return ret;
4952 +
4953 +               /*
4954 +                *      These ioctl calls:
4955 +                *      - require superuser power.
4956 +                *      - require strict serialization.
4957 +                *      - return a value
4958 +                */
4959 +               case SIOCGMIIPHY:
4960 +               case SIOCGMIIREG:
4961 +               case SIOCSIFNAME:
4962 +                       if (!capable(CAP_NET_ADMIN))
4963 +                               return -EPERM;
4964 +                       dev_load(net, ifr.ifr_name);
4965 +                       rtnl_lock();
4966 +                       ret = dev_ifsioc(net, &ifr, cmd);
4967 +                       rtnl_unlock();
4968 +                       if (!ret) {
4969 +                               if (colon)
4970 +                                       *colon = ':';
4971 +                               if (copy_to_user(arg, &ifr,
4972 +                                                sizeof(struct ifreq)))
4973 +                                       ret = -EFAULT;
4974 +                       }
4975 +                       return ret;
4976 +
4977 +               /*
4978 +                *      These ioctl calls:
4979 +                *      - require superuser power.
4980 +                *      - require strict serialization.
4981 +                *      - do not return a value
4982 +                */
4983 +               case SIOCSIFFLAGS:
4984 +               case SIOCSIFMETRIC:
4985 +               case SIOCSIFMTU:
4986 +               case SIOCSIFMAP:
4987 +               case SIOCSIFHWADDR:
4988 +               case SIOCSIFSLAVE:
4989 +               case SIOCADDMULTI:
4990 +               case SIOCDELMULTI:
4991 +               case SIOCSIFHWBROADCAST:
4992 +               case SIOCSIFTXQLEN:
4993 +               case SIOCSMIIREG:
4994 +               case SIOCBONDENSLAVE:
4995 +               case SIOCBONDRELEASE:
4996 +               case SIOCBONDSETHWADDR:
4997 +               case SIOCBONDCHANGEACTIVE:
4998 +               case SIOCBRADDIF:
4999 +               case SIOCBRDELIF:
5000 +               case SIOCSHWTSTAMP:
5001 +                       if (!capable(CAP_NET_ADMIN))
5002 +                               return -EPERM;
5003 +                       /* fall through */
5004 +               case SIOCBONDSLAVEINFOQUERY:
5005 +               case SIOCBONDINFOQUERY:
5006 +                       dev_load(net, ifr.ifr_name);
5007 +                       rtnl_lock();
5008 +                       ret = dev_ifsioc(net, &ifr, cmd);
5009 +                       rtnl_unlock();
5010 +                       return ret;
5011 +
5012 +               case SIOCGIFMEM:
5013 +                       /* Get the per device memory space. We can add this but
5014 +                        * currently do not support it */
5015 +               case SIOCSIFMEM:
5016 +                       /* Set the per device memory buffer space.
5017 +                        * Not applicable in our case */
5018 +               case SIOCSIFLINK:
5019 +                       return -EINVAL;
5020 +
5021 +               /*
5022 +                *      Unknown or private ioctl.
5023 +                */
5024 +               default:
5025 +                       if (cmd == SIOCWANDEV ||
5026 +                           (cmd >= SIOCDEVPRIVATE &&
5027 +                            cmd <= SIOCDEVPRIVATE + 15)) {
5028 +                               dev_load(net, ifr.ifr_name);
5029 +                               rtnl_lock();
5030 +                               ret = dev_ifsioc(net, &ifr, cmd);
5031 +                               rtnl_unlock();
5032 +                               if (!ret && copy_to_user(arg, &ifr,
5033 +                                                        sizeof(struct ifreq)))
5034 +                                       ret = -EFAULT;
5035 +                               return ret;
5036 +                       }
5037 +                       /* Take care of Wireless Extensions */
5038 +                       if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
5039 +                               return wext_handle_ioctl(net, &ifr, cmd, arg);
5040 +                       return -EINVAL;
5041 +       }
5042 +}
5043 +
5044 +
5045 +/**
5046 + *     dev_new_index   -       allocate an ifindex
5047 + *     @net: the applicable net namespace
5048 + *
5049 + *     Returns a suitable unique value for a new device interface
5050 + *     number.  The caller must hold the rtnl semaphore or the
5051 + *     dev_base_lock to be sure it remains unique.
5052 + */
5053 +static int dev_new_index(struct net *net)
5054 +{
5055 +       static int ifindex;
5056 +       for (;;) {
5057 +               if (++ifindex <= 0)
5058 +                       ifindex = 1;
5059 +               if (!__dev_get_by_index(net, ifindex))
5060 +                       return ifindex;
5061 +       }
5062 +}
5063 +
5064 +/* Delayed registration/unregisteration */
5065 +static LIST_HEAD(net_todo_list);
5066 +
5067 +static void net_set_todo(struct net_device *dev)
5068 +{
5069 +       list_add_tail(&dev->todo_list, &net_todo_list);
5070 +}
5071 +
5072 +static void rollback_registered(struct net_device *dev)
5073 +{
5074 +       BUG_ON(dev_boot_phase);
5075 +       ASSERT_RTNL();
5076 +
5077 +       /* Some devices call without registering for initialization unwind. */
5078 +       if (dev->reg_state == NETREG_UNINITIALIZED) {
5079 +               printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
5080 +                                 "was registered\n", dev->name, dev);
5081 +
5082 +               WARN_ON(1);
5083 +               return;
5084 +       }
5085 +
5086 +       BUG_ON(dev->reg_state != NETREG_REGISTERED);
5087 +
5088 +       /* If device is running, close it first. */
5089 +       dev_close(dev);
5090 +
5091 +       /* And unlink it from device chain. */
5092 +       unlist_netdevice(dev);
5093 +
5094 +       dev->reg_state = NETREG_UNREGISTERING;
5095 +
5096 +       synchronize_net();
5097 +
5098 +       /* Shutdown queueing discipline. */
5099 +       dev_shutdown(dev);
5100 +
5101 +
5102 +       /* Notify protocols, that we are about to destroy
5103 +          this device. They should clean all the things.
5104 +       */
5105 +       call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5106 +
5107 +       /*
5108 +        *      Flush the unicast and multicast chains
5109 +        */
5110 +       dev_addr_discard(dev);
5111 +
5112 +       if (dev->netdev_ops->ndo_uninit)
5113 +               dev->netdev_ops->ndo_uninit(dev);
5114 +
5115 +       /* Notifier chain MUST detach us from master device. */
5116 +       WARN_ON(dev->master);
5117 +
5118 +       /* Remove entries from kobject tree */
5119 +       netdev_unregister_kobject(dev);
5120 +
5121 +       synchronize_net();
5122 +
5123 +       dev_put(dev);
5124 +}
5125 +
5126 +static void __netdev_init_queue_locks_one(struct net_device *dev,
5127 +                                         struct netdev_queue *dev_queue,
5128 +                                         void *_unused)
5129 +{
5130 +       spin_lock_init(&dev_queue->_xmit_lock);
5131 +       netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
5132 +       dev_queue->xmit_lock_owner = -1;
5133 +}
5134 +
5135 +static void netdev_init_queue_locks(struct net_device *dev)
5136 +{
5137 +       netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
5138 +       __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
5139 +}
5140 +
5141 +unsigned long netdev_fix_features(unsigned long features, const char *name)
5142 +{
5143 +       /* Fix illegal SG+CSUM combinations. */
5144 +       if ((features & NETIF_F_SG) &&
5145 +           !(features & NETIF_F_ALL_CSUM)) {
5146 +               if (name)
5147 +                       printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
5148 +                              "checksum feature.\n", name);
5149 +               features &= ~NETIF_F_SG;
5150 +       }
5151 +
5152 +       /* TSO requires that SG is present as well. */
5153 +       if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
5154 +               if (name)
5155 +                       printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
5156 +                              "SG feature.\n", name);
5157 +               features &= ~NETIF_F_TSO;
5158 +       }
5159 +
5160 +       if (features & NETIF_F_UFO) {
5161 +               if (!(features & NETIF_F_GEN_CSUM)) {
5162 +                       if (name)
5163 +                               printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
5164 +                                      "since no NETIF_F_HW_CSUM feature.\n",
5165 +                                      name);
5166 +                       features &= ~NETIF_F_UFO;
5167 +               }
5168 +
5169 +               if (!(features & NETIF_F_SG)) {
5170 +                       if (name)
5171 +                               printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
5172 +                                      "since no NETIF_F_SG feature.\n", name);
5173 +                       features &= ~NETIF_F_UFO;
5174 +               }
5175 +       }
5176 +
5177 +       return features;
5178 +}
5179 +EXPORT_SYMBOL(netdev_fix_features);
5180 +
5181 +/* Some devices need to (re-)set their netdev_ops inside
5182 + * ->init() or similar.  If that happens, we have to setup
5183 + * the compat pointers again.
5184 + */
5185 +void netdev_resync_ops(struct net_device *dev)
5186 +{
5187 +#ifdef CONFIG_COMPAT_NET_DEV_OPS
5188 +       const struct net_device_ops *ops = dev->netdev_ops;
5189 +
5190 +       dev->init = ops->ndo_init;
5191 +       dev->uninit = ops->ndo_uninit;
5192 +       dev->open = ops->ndo_open;
5193 +       dev->change_rx_flags = ops->ndo_change_rx_flags;
5194 +       dev->set_rx_mode = ops->ndo_set_rx_mode;
5195 +       dev->set_multicast_list = ops->ndo_set_multicast_list;
5196 +       dev->set_mac_address = ops->ndo_set_mac_address;
5197 +       dev->validate_addr = ops->ndo_validate_addr;
5198 +       dev->do_ioctl = ops->ndo_do_ioctl;
5199 +       dev->set_config = ops->ndo_set_config;
5200 +       dev->change_mtu = ops->ndo_change_mtu;
5201 +       dev->neigh_setup = ops->ndo_neigh_setup;
5202 +       dev->tx_timeout = ops->ndo_tx_timeout;
5203 +       dev->get_stats = ops->ndo_get_stats;
5204 +       dev->vlan_rx_register = ops->ndo_vlan_rx_register;
5205 +       dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
5206 +       dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
5207 +#ifdef CONFIG_NET_POLL_CONTROLLER
5208 +       dev->poll_controller = ops->ndo_poll_controller;
5209 +#endif
5210 +#endif
5211 +}
5212 +EXPORT_SYMBOL(netdev_resync_ops);
5213 +
5214 +/**
5215 + *     register_netdevice      - register a network device
5216 + *     @dev: device to register
5217 + *
5218 + *     Take a completed network device structure and add it to the kernel
5219 + *     interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
5220 + *     chain. 0 is returned on success. A negative errno code is returned
5221 + *     on a failure to set up the device, or if the name is a duplicate.
5222 + *
5223 + *     Callers must hold the rtnl semaphore. You may want
5224 + *     register_netdev() instead of this.
5225 + *
5226 + *     BUGS:
5227 + *     The locking appears insufficient to guarantee two parallel registers
5228 + *     will not get the same name.
5229 + */
5230 +
5231 +int register_netdevice(struct net_device *dev)
5232 +{
5233 +       struct hlist_head *head;
5234 +       struct hlist_node *p;
5235 +       int ret;
5236 +       struct net *net = dev_net(dev);
5237 +
5238 +       BUG_ON(dev_boot_phase);
5239 +       ASSERT_RTNL();
5240 +
5241 +       might_sleep();
5242 +
5243 +       /* When net_device's are persistent, this will be fatal. */
5244 +       BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
5245 +       BUG_ON(!net);
5246 +
5247 +       spin_lock_init(&dev->addr_list_lock);
5248 +       netdev_set_addr_lockdep_class(dev);
5249 +       netdev_init_queue_locks(dev);
5250 +
5251 +       dev->iflink = -1;
5252 +
5253 +#ifdef CONFIG_COMPAT_NET_DEV_OPS
5254 +       /* Netdevice_ops API compatibility support.
5255 +        * This is temporary until all network devices are converted.
5256 +        */
5257 +       if (dev->netdev_ops) {
5258 +               netdev_resync_ops(dev);
5259 +       } else {
5260 +               char drivername[64];
5261 +               pr_info("%s (%s): not using net_device_ops yet\n",
5262 +                       dev->name, netdev_drivername(dev, drivername, 64));
5263 +
5264 +               /* This works only because net_device_ops and the
5265 +                  compatibility structure are the same. */
5266 +               dev->netdev_ops = (void *) &(dev->init);
5267 +       }
5268 +#endif
5269 +
5270 +       /* Init, if this function is available */
5271 +       if (dev->netdev_ops->ndo_init) {
5272 +               ret = dev->netdev_ops->ndo_init(dev);
5273 +               if (ret) {
5274 +                       if (ret > 0)
5275 +                               ret = -EIO;
5276 +                       goto out;
5277 +               }
5278 +       }
5279 +
5280 +       if (!dev_valid_name(dev->name)) {
5281 +               ret = -EINVAL;
5282 +               goto err_uninit;
5283 +       }
5284 +
5285 +       dev->ifindex = dev_new_index(net);
5286 +       if (dev->iflink == -1)
5287 +               dev->iflink = dev->ifindex;
5288 +
5289 +       /* Check for existence of name */
5290 +       head = dev_name_hash(net, dev->name);
5291 +       hlist_for_each(p, head) {
5292 +               struct net_device *d
5293 +                       = hlist_entry(p, struct net_device, name_hlist);
5294 +               if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
5295 +                       ret = -EEXIST;
5296 +                       goto err_uninit;
5297 +               }
5298 +       }
5299 +
5300 +       /* Fix illegal checksum combinations */
5301 +       if ((dev->features & NETIF_F_HW_CSUM) &&
5302 +           (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5303 +               printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
5304 +                      dev->name);
5305 +               dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
5306 +       }
5307 +
5308 +       if ((dev->features & NETIF_F_NO_CSUM) &&
5309 +           (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
5310 +               printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
5311 +                      dev->name);
5312 +               dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
5313 +       }
5314 +
5315 +       dev->features = netdev_fix_features(dev->features, dev->name);
5316 +
5317 +       /* Enable software GSO if SG is supported. */
5318 +       if (dev->features & NETIF_F_SG)
5319 +               dev->features |= NETIF_F_GSO;
5320 +
5321 +       netdev_initialize_kobject(dev);
5322 +       ret = netdev_register_kobject(dev);
5323 +       if (ret)
5324 +               goto err_uninit;
5325 +       dev->reg_state = NETREG_REGISTERED;
5326 +
5327 +       /*
5328 +        *      Default initial state at registry is that the
5329 +        *      device is present.
5330 +        */
5331 +
5332 +       set_bit(__LINK_STATE_PRESENT, &dev->state);
5333 +
5334 +       dev_init_scheduler(dev);
5335 +       dev_hold(dev);
5336 +       list_netdevice(dev);
5337 +
5338 +       /* Notify protocols, that a new device appeared. */
5339 +       ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
5340 +       ret = notifier_to_errno(ret);
5341 +       if (ret) {
5342 +               rollback_registered(dev);
5343 +               dev->reg_state = NETREG_UNREGISTERED;
5344 +       }
5345 +
5346 +out:
5347 +       return ret;
5348 +
5349 +err_uninit:
5350 +       if (dev->netdev_ops->ndo_uninit)
5351 +               dev->netdev_ops->ndo_uninit(dev);
5352 +       goto out;
5353 +}
5354 +
5355 +/**
5356 + *     init_dummy_netdev       - init a dummy network device for NAPI
5357 + *     @dev: device to init
5358 + *
5359 + *     This takes a network device structure and initialize the minimum
5360 + *     amount of fields so it can be used to schedule NAPI polls without
5361 + *     registering a full blown interface. This is to be used by drivers
5362 + *     that need to tie several hardware interfaces to a single NAPI
5363 + *     poll scheduler due to HW limitations.
5364 + */
5365 +int init_dummy_netdev(struct net_device *dev)
5366 +{
5367 +       /* Clear everything. Note we don't initialize spinlocks
5368 +        * are they aren't supposed to be taken by any of the
5369 +        * NAPI code and this dummy netdev is supposed to be
5370 +        * only ever used for NAPI polls
5371 +        */
5372 +       memset(dev, 0, sizeof(struct net_device));
5373 +
5374 +       /* make sure we BUG if trying to hit standard
5375 +        * register/unregister code path
5376 +        */
5377 +       dev->reg_state = NETREG_DUMMY;
5378 +
5379 +       /* initialize the ref count */
5380 +       atomic_set(&dev->refcnt, 1);
5381 +
5382 +       /* NAPI wants this */
5383 +       INIT_LIST_HEAD(&dev->napi_list);
5384 +
5385 +       /* a dummy interface is started by default */
5386 +       set_bit(__LINK_STATE_PRESENT, &dev->state);
5387 +       set_bit(__LINK_STATE_START, &dev->state);
5388 +
5389 +       return 0;
5390 +}
5391 +EXPORT_SYMBOL_GPL(init_dummy_netdev);
5392 +
5393 +
5394 +/**
5395 + *     register_netdev - register a network device
5396 + *     @dev: device to register
5397 + *
5398 + *     Take a completed network device structure and add it to the kernel
5399 + *     interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
5400 + *     chain. 0 is returned on success. A negative errno code is returned
5401 + *     on a failure to set up the device, or if the name is a duplicate.
5402 + *
5403 + *     This is a wrapper around register_netdevice that takes the rtnl semaphore
5404 + *     and expands the device name if you passed a format string to
5405 + *     alloc_netdev.
5406 + */
5407 +int register_netdev(struct net_device *dev)
5408 +{
5409 +       int err;
5410 +
5411 +       rtnl_lock();
5412 +
5413 +       /*
5414 +        * If the name is a format string the caller wants us to do a
5415 +        * name allocation.
5416 +        */
5417 +       if (strchr(dev->name, '%')) {
5418 +               err = dev_alloc_name(dev, dev->name);
5419 +               if (err < 0)
5420 +                       goto out;
5421 +       }
5422 +
5423 +       err = register_netdevice(dev);
5424 +out:
5425 +       rtnl_unlock();
5426 +       return err;
5427 +}
5428 +EXPORT_SYMBOL(register_netdev);
5429 +
5430 +/*
5431 + * netdev_wait_allrefs - wait until all references are gone.
5432 + *
5433 + * This is called when unregistering network devices.
5434 + *
5435 + * Any protocol or device that holds a reference should register
5436 + * for netdevice notification, and cleanup and put back the
5437 + * reference if they receive an UNREGISTER event.
5438 + * We can get stuck here if buggy protocols don't correctly
5439 + * call dev_put.
5440 + */
5441 +static void netdev_wait_allrefs(struct net_device *dev)
5442 +{
5443 +       unsigned long rebroadcast_time, warning_time;
5444 +
5445 +       rebroadcast_time = warning_time = jiffies;
5446 +       while (atomic_read(&dev->refcnt) != 0) {
5447 +               if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
5448 +                       rtnl_lock();
5449 +
5450 +                       /* Rebroadcast unregister notification */
5451 +                       call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5452 +
5453 +                       if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
5454 +                                    &dev->state)) {
5455 +                               /* We must not have linkwatch events
5456 +                                * pending on unregister. If this
5457 +                                * happens, we simply run the queue
5458 +                                * unscheduled, resulting in a noop
5459 +                                * for this device.
5460 +                                */
5461 +                               linkwatch_run_queue();
5462 +                       }
5463 +
5464 +                       __rtnl_unlock();
5465 +
5466 +                       rebroadcast_time = jiffies;
5467 +               }
5468 +
5469 +               msleep(250);
5470 +
5471 +               if (time_after(jiffies, warning_time + 10 * HZ)) {
5472 +                       printk(KERN_EMERG "unregister_netdevice: "
5473 +                              "waiting for %s to become free. Usage "
5474 +                              "count = %d\n",
5475 +                              dev->name, atomic_read(&dev->refcnt));
5476 +                       warning_time = jiffies;
5477 +               }
5478 +       }
5479 +}
5480 +
5481 +/* The sequence is:
5482 + *
5483 + *     rtnl_lock();
5484 + *     ...
5485 + *     register_netdevice(x1);
5486 + *     register_netdevice(x2);
5487 + *     ...
5488 + *     unregister_netdevice(y1);
5489 + *     unregister_netdevice(y2);
5490 + *      ...
5491 + *     rtnl_unlock();
5492 + *     free_netdev(y1);
5493 + *     free_netdev(y2);
5494 + *
5495 + * We are invoked by rtnl_unlock().
5496 + * This allows us to deal with problems:
5497 + * 1) We can delete sysfs objects which invoke hotplug
5498 + *    without deadlocking with linkwatch via keventd.
5499 + * 2) Since we run with the RTNL semaphore not held, we can sleep
5500 + *    safely in order to wait for the netdev refcnt to drop to zero.
5501 + *
5502 + * We must not return until all unregister events added during
5503 + * the interval the lock was held have been completed.
5504 + */
5505 +void netdev_run_todo(void)
5506 +{
5507 +       struct list_head list;
5508 +
5509 +       /* Snapshot list, allow later requests */
5510 +       list_replace_init(&net_todo_list, &list);
5511 +
5512 +       __rtnl_unlock();
5513 +
5514 +       while (!list_empty(&list)) {
5515 +               struct net_device *dev
5516 +                       = list_entry(list.next, struct net_device, todo_list);
5517 +               list_del(&dev->todo_list);
5518 +
5519 +               if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
5520 +                       printk(KERN_ERR "network todo '%s' but state %d\n",
5521 +                              dev->name, dev->reg_state);
5522 +                       dump_stack();
5523 +                       continue;
5524 +               }
5525 +
5526 +               dev->reg_state = NETREG_UNREGISTERED;
5527 +
5528 +               on_each_cpu(flush_backlog, dev, 1);
5529 +
5530 +               netdev_wait_allrefs(dev);
5531 +
5532 +               /* paranoia */
5533 +               BUG_ON(atomic_read(&dev->refcnt));
5534 +               WARN_ON(dev->ip_ptr);
5535 +               WARN_ON(dev->ip6_ptr);
5536 +               WARN_ON(dev->dn_ptr);
5537 +
5538 +               if (dev->destructor)
5539 +                       dev->destructor(dev);
5540 +
5541 +               /* Free network device */
5542 +               kobject_put(&dev->dev.kobj);
5543 +       }
5544 +}
5545 +
5546 +/**
5547 + *     dev_get_stats   - get network device statistics
5548 + *     @dev: device to get statistics from
5549 + *
5550 + *     Get network statistics from device. The device driver may provide
5551 + *     its own method by setting dev->netdev_ops->get_stats; otherwise
5552 + *     the internal statistics structure is used.
5553 + */
5554 +const struct net_device_stats *dev_get_stats(struct net_device *dev)
5555 + {
5556 +       const struct net_device_ops *ops = dev->netdev_ops;
5557 +
5558 +       if (ops->ndo_get_stats)
5559 +               return ops->ndo_get_stats(dev);
5560 +       else
5561 +               return &dev->stats;
5562 +}
5563 +EXPORT_SYMBOL(dev_get_stats);
5564 +
5565 +static void netdev_init_one_queue(struct net_device *dev,
5566 +                                 struct netdev_queue *queue,
5567 +                                 void *_unused)
5568 +{
5569 +       queue->dev = dev;
5570 +}
5571 +
5572 +static void netdev_init_queues(struct net_device *dev)
5573 +{
5574 +       netdev_init_one_queue(dev, &dev->rx_queue, NULL);
5575 +       netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
5576 +       spin_lock_init(&dev->tx_global_lock);
5577 +}
5578 +
5579 +/**
5580 + *     alloc_netdev_mq - allocate network device
5581 + *     @sizeof_priv:   size of private data to allocate space for
5582 + *     @name:          device name format string
5583 + *     @setup:         callback to initialize device
5584 + *     @queue_count:   the number of subqueues to allocate
5585 + *
5586 + *     Allocates a struct net_device with private data area for driver use
5587 + *     and performs basic initialization.  Also allocates subquue structs
5588 + *     for each queue on the device at the end of the netdevice.
5589 + */
5590 +struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
5591 +               void (*setup)(struct net_device *), unsigned int queue_count)
5592 +{
5593 +       struct netdev_queue *tx;
5594 +       struct net_device *dev;
5595 +       size_t alloc_size;
5596 +       void *p;
5597 +
5598 +       BUG_ON(strlen(name) >= sizeof(dev->name));
5599 +
5600 +       alloc_size = sizeof(struct net_device);
5601 +       if (sizeof_priv) {
5602 +               /* ensure 32-byte alignment of private area */
5603 +               alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
5604 +               alloc_size += sizeof_priv;
5605 +       }
5606 +       /* ensure 32-byte alignment of whole construct */
5607 +       alloc_size += NETDEV_ALIGN_CONST;
5608 +
5609 +       p = kzalloc(alloc_size, GFP_KERNEL);
5610 +       if (!p) {
5611 +               printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
5612 +               return NULL;
5613 +       }
5614 +
5615 +       tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
5616 +       if (!tx) {
5617 +               printk(KERN_ERR "alloc_netdev: Unable to allocate "
5618 +                      "tx qdiscs.\n");
5619 +               kfree(p);
5620 +               return NULL;
5621 +       }
5622 +
5623 +       dev = (struct net_device *)
5624 +               (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
5625 +       dev->padded = (char *)dev - (char *)p;
5626 +       dev_net_set(dev, &init_net);
5627 +
5628 +       dev->_tx = tx;
5629 +       dev->num_tx_queues = queue_count;
5630 +       dev->real_num_tx_queues = queue_count;
5631 +
5632 +       dev->gso_max_size = GSO_MAX_SIZE;
5633 +
5634 +       netdev_init_queues(dev);
5635 +
5636 +       INIT_LIST_HEAD(&dev->napi_list);
5637 +       setup(dev);
5638 +       strcpy(dev->name, name);
5639 +       return dev;
5640 +}
5641 +EXPORT_SYMBOL(alloc_netdev_mq);
5642 +
5643 +/**
5644 + *     free_netdev - free network device
5645 + *     @dev: device
5646 + *
5647 + *     This function does the last stage of destroying an allocated device
5648 + *     interface. The reference to the device object is released.
5649 + *     If this is the last reference then it will be freed.
5650 + */
5651 +void free_netdev(struct net_device *dev)
5652 +{
5653 +       struct napi_struct *p, *n;
5654 +
5655 +       release_net(dev_net(dev));
5656 +
5657 +       kfree(dev->_tx);
5658 +
5659 +       list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
5660 +               netif_napi_del(p);
5661 +
5662 +       /*  Compatibility with error handling in drivers */
5663 +       if (dev->reg_state == NETREG_UNINITIALIZED) {
5664 +               kfree((char *)dev - dev->padded);
5665 +               return;
5666 +       }
5667 +
5668 +       BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
5669 +       dev->reg_state = NETREG_RELEASED;
5670 +
5671 +       /* will free via device release */
5672 +       put_device(&dev->dev);
5673 +}
5674 +
5675 +/**
5676 + *     synchronize_net -  Synchronize with packet receive processing
5677 + *
5678 + *     Wait for packets currently being received to be done.
5679 + *     Does not block later packets from starting.
5680 + */
5681 +void synchronize_net(void)
5682 +{
5683 +       might_sleep();
5684 +       synchronize_rcu();
5685 +}
5686 +
5687 +/**
5688 + *     unregister_netdevice - remove device from the kernel
5689 + *     @dev: device
5690 + *
5691 + *     This function shuts down a device interface and removes it
5692 + *     from the kernel tables.
5693 + *
5694 + *     Callers must hold the rtnl semaphore.  You may want
5695 + *     unregister_netdev() instead of this.
5696 + */
5697 +
5698 +void unregister_netdevice(struct net_device *dev)
5699 +{
5700 +       ASSERT_RTNL();
5701 +
5702 +       rollback_registered(dev);
5703 +       /* Finish processing unregister after unlock */
5704 +       net_set_todo(dev);
5705 +}
5706 +
5707 +/**
5708 + *     unregister_netdev - remove device from the kernel
5709 + *     @dev: device
5710 + *
5711 + *     This function shuts down a device interface and removes it
5712 + *     from the kernel tables.
5713 + *
5714 + *     This is just a wrapper for unregister_netdevice that takes
5715 + *     the rtnl semaphore.  In general you want to use this and not
5716 + *     unregister_netdevice.
5717 + */
5718 +void unregister_netdev(struct net_device *dev)
5719 +{
5720 +       rtnl_lock();
5721 +       unregister_netdevice(dev);
5722 +       rtnl_unlock();
5723 +}
5724 +
5725 +EXPORT_SYMBOL(unregister_netdev);
5726 +
5727 +/**
5728 + *     dev_change_net_namespace - move device to different nethost namespace
5729 + *     @dev: device
5730 + *     @net: network namespace
5731 + *     @pat: If not NULL name pattern to try if the current device name
5732 + *           is already taken in the destination network namespace.
5733 + *
5734 + *     This function shuts down a device interface and moves it
5735 + *     to a new network namespace. On success 0 is returned, on
5736 + *     a failure a netagive errno code is returned.
5737 + *
5738 + *     Callers must hold the rtnl semaphore.
5739 + */
5740 +
5741 +int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
5742 +{
5743 +       char buf[IFNAMSIZ];
5744 +       const char *destname;
5745 +       int err;
5746 +
5747 +       ASSERT_RTNL();
5748 +
5749 +       /* Don't allow namespace local devices to be moved. */
5750 +       err = -EINVAL;
5751 +       if (dev->features & NETIF_F_NETNS_LOCAL)
5752 +               goto out;
5753 +
5754 +#ifdef CONFIG_SYSFS
5755 +       /* Don't allow real devices to be moved when sysfs
5756 +        * is enabled.
5757 +        */
5758 +       err = -EINVAL;
5759 +       if (dev->dev.parent)
5760 +               goto out;
5761 +#endif
5762 +
5763 +       /* Ensure the device has been registrered */
5764 +       err = -EINVAL;
5765 +       if (dev->reg_state != NETREG_REGISTERED)
5766 +               goto out;
5767 +
5768 +       /* Get out if there is nothing todo */
5769 +       err = 0;
5770 +       if (net_eq(dev_net(dev), net))
5771 +               goto out;
5772 +
5773 +       /* Pick the destination device name, and ensure
5774 +        * we can use it in the destination network namespace.
5775 +        */
5776 +       err = -EEXIST;
5777 +       destname = dev->name;
5778 +       if (__dev_get_by_name(net, destname)) {
5779 +               /* We get here if we can't use the current device name */
5780 +               if (!pat)
5781 +                       goto out;
5782 +               if (!dev_valid_name(pat))
5783 +                       goto out;
5784 +               if (strchr(pat, '%')) {
5785 +                       if (__dev_alloc_name(net, pat, buf) < 0)
5786 +                               goto out;
5787 +                       destname = buf;
5788 +               } else
5789 +                       destname = pat;
5790 +               if (__dev_get_by_name(net, destname))
5791 +                       goto out;
5792 +       }
5793 +
5794 +       /*
5795 +        * And now a mini version of register_netdevice unregister_netdevice.
5796 +        */
5797 +
5798 +       /* If device is running close it first. */
5799 +       dev_close(dev);
5800 +
5801 +       /* And unlink it from device chain */
5802 +       err = -ENODEV;
5803 +       unlist_netdevice(dev);
5804 +
5805 +       synchronize_net();
5806 +
5807 +       /* Shutdown queueing discipline. */
5808 +       dev_shutdown(dev);
5809 +
5810 +       /* Notify protocols, that we are about to destroy
5811 +          this device. They should clean all the things.
5812 +       */
5813 +       call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
5814 +
5815 +       /*
5816 +        *      Flush the unicast and multicast chains
5817 +        */
5818 +       dev_addr_discard(dev);
5819 +
5820 +       netdev_unregister_kobject(dev);
5821 +
5822 +       /* Actually switch the network namespace */
5823 +       dev_net_set(dev, net);
5824 +
5825 +       /* Assign the new device name */
5826 +       if (destname != dev->name)
5827 +               strcpy(dev->name, destname);
5828 +
5829 +       /* If there is an ifindex conflict assign a new one */
5830 +       if (__dev_get_by_index(net, dev->ifindex)) {
5831 +               int iflink = (dev->iflink == dev->ifindex);
5832 +               dev->ifindex = dev_new_index(net);
5833 +               if (iflink)
5834 +                       dev->iflink = dev->ifindex;
5835 +       }
5836 +
5837 +       /* Fixup kobjects */
5838 +       err = netdev_register_kobject(dev);
5839 +       WARN_ON(err);
5840 +
5841 +       /* Add the device back in the hashes */
5842 +       list_netdevice(dev);
5843 +
5844 +       /* Notify protocols, that a new device appeared. */
5845 +       call_netdevice_notifiers(NETDEV_REGISTER, dev);
5846 +
5847 +       synchronize_net();
5848 +       err = 0;
5849 +out:
5850 +       return err;
5851 +}
5852 +
5853 +static int dev_cpu_callback(struct notifier_block *nfb,
5854 +                           unsigned long action,
5855 +                           void *ocpu)
5856 +{
5857 +       struct sk_buff **list_skb;
5858 +       struct Qdisc **list_net;
5859 +       struct sk_buff *skb;
5860 +       unsigned int cpu, oldcpu = (unsigned long)ocpu;
5861 +       struct softnet_data *sd, *oldsd;
5862 +
5863 +       if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
5864 +               return NOTIFY_OK;
5865 +
5866 +       local_irq_disable();
5867 +       cpu = smp_processor_id();
5868 +       sd = &per_cpu(softnet_data, cpu);
5869 +       oldsd = &per_cpu(softnet_data, oldcpu);
5870 +
5871 +       /* Find end of our completion_queue. */
5872 +       list_skb = &sd->completion_queue;
5873 +       while (*list_skb)
5874 +               list_skb = &(*list_skb)->next;
5875 +       /* Append completion queue from offline CPU. */
5876 +       *list_skb = oldsd->completion_queue;
5877 +       oldsd->completion_queue = NULL;
5878 +
5879 +       /* Find end of our output_queue. */
5880 +       list_net = &sd->output_queue;
5881 +       while (*list_net)
5882 +               list_net = &(*list_net)->next_sched;
5883 +       /* Append output queue from offline CPU. */
5884 +       *list_net = oldsd->output_queue;
5885 +       oldsd->output_queue = NULL;
5886 +
5887 +       raise_softirq_irqoff(NET_TX_SOFTIRQ);
5888 +       local_irq_enable();
5889 +
5890 +       /* Process offline CPU's input_pkt_queue */
5891 +       while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
5892 +               netif_rx(skb);
5893 +
5894 +       return NOTIFY_OK;
5895 +}
5896 +
5897 +
5898 +/**
5899 + *     netdev_increment_features - increment feature set by one
5900 + *     @all: current feature set
5901 + *     @one: new feature set
5902 + *     @mask: mask feature set
5903 + *
5904 + *     Computes a new feature set after adding a device with feature set
5905 + *     @one to the master device with current feature set @all.  Will not
5906 + *     enable anything that is off in @mask. Returns the new feature set.
5907 + */
5908 +unsigned long netdev_increment_features(unsigned long all, unsigned long one,
5909 +                                       unsigned long mask)
5910 +{
5911 +       /* If device needs checksumming, downgrade to it. */
5912 +        if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
5913 +               all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
5914 +       else if (mask & NETIF_F_ALL_CSUM) {
5915 +               /* If one device supports v4/v6 checksumming, set for all. */
5916 +               if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
5917 +                   !(all & NETIF_F_GEN_CSUM)) {
5918 +                       all &= ~NETIF_F_ALL_CSUM;
5919 +                       all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
5920 +               }
5921 +
5922 +               /* If one device supports hw checksumming, set for all. */
5923 +               if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
5924 +                       all &= ~NETIF_F_ALL_CSUM;
5925 +                       all |= NETIF_F_HW_CSUM;
5926 +               }
5927 +       }
5928 +
5929 +       one |= NETIF_F_ALL_CSUM;
5930 +
5931 +       one |= all & NETIF_F_ONE_FOR_ALL;
5932 +       all &= one | NETIF_F_LLTX | NETIF_F_GSO;
5933 +       all |= one & mask & NETIF_F_ONE_FOR_ALL;
5934 +
5935 +       return all;
5936 +}
5937 +EXPORT_SYMBOL(netdev_increment_features);
5938 +
5939 +static struct hlist_head *netdev_create_hash(void)
5940 +{
5941 +       int i;
5942 +       struct hlist_head *hash;
5943 +
5944 +       hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
5945 +       if (hash != NULL)
5946 +               for (i = 0; i < NETDEV_HASHENTRIES; i++)
5947 +                       INIT_HLIST_HEAD(&hash[i]);
5948 +
5949 +       return hash;
5950 +}
5951 +
5952 +/* Initialize per network namespace state */
5953 +static int __net_init netdev_init(struct net *net)
5954 +{
5955 +       INIT_LIST_HEAD(&net->dev_base_head);
5956 +
5957 +       net->dev_name_head = netdev_create_hash();
5958 +       if (net->dev_name_head == NULL)
5959 +               goto err_name;
5960 +
5961 +       net->dev_index_head = netdev_create_hash();
5962 +       if (net->dev_index_head == NULL)
5963 +               goto err_idx;
5964 +
5965 +       return 0;
5966 +
5967 +err_idx:
5968 +       kfree(net->dev_name_head);
5969 +err_name:
5970 +       return -ENOMEM;
5971 +}
5972 +
5973 +/**
5974 + *     netdev_drivername - network driver for the device
5975 + *     @dev: network device
5976 + *     @buffer: buffer for resulting name
5977 + *     @len: size of buffer
5978 + *
5979 + *     Determine network driver for device.
5980 + */
5981 +char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
5982 +{
5983 +       const struct device_driver *driver;
5984 +       const struct device *parent;
5985 +
5986 +       if (len <= 0 || !buffer)
5987 +               return buffer;
5988 +       buffer[0] = 0;
5989 +
5990 +       parent = dev->dev.parent;
5991 +
5992 +       if (!parent)
5993 +               return buffer;
5994 +
5995 +       driver = parent->driver;
5996 +       if (driver && driver->name)
5997 +               strlcpy(buffer, driver->name, len);
5998 +       return buffer;
5999 +}
6000 +
6001 +static void __net_exit netdev_exit(struct net *net)
6002 +{
6003 +       kfree(net->dev_name_head);
6004 +       kfree(net->dev_index_head);
6005 +}
6006 +
6007 +static struct pernet_operations __net_initdata netdev_net_ops = {
6008 +       .init = netdev_init,
6009 +       .exit = netdev_exit,
6010 +};
6011 +
6012 +static void __net_exit default_device_exit(struct net *net)
6013 +{
6014 +       struct net_device *dev;
6015 +       /*
6016 +        * Push all migratable of the network devices back to the
6017 +        * initial network namespace
6018 +        */
6019 +       rtnl_lock();
6020 +restart:
6021 +       for_each_netdev(net, dev) {
6022 +               int err;
6023 +               char fb_name[IFNAMSIZ];
6024 +
6025 +               /* Ignore unmoveable devices (i.e. loopback) */
6026 +               if (dev->features & NETIF_F_NETNS_LOCAL)
6027 +                       continue;
6028 +
6029 +               /* Delete virtual devices */
6030 +               if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
6031 +                       dev->rtnl_link_ops->dellink(dev);
6032 +                       goto restart;
6033 +               }
6034 +
6035 +               /* Push remaing network devices to init_net */
6036 +               snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
6037 +               err = dev_change_net_namespace(dev, &init_net, fb_name);
6038 +               if (err) {
6039 +                       printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
6040 +                               __func__, dev->name, err);
6041 +                       BUG();
6042 +               }
6043 +               goto restart;
6044 +       }
6045 +       rtnl_unlock();
6046 +}
6047 +
6048 +static struct pernet_operations __net_initdata default_device_ops = {
6049 +       .exit = default_device_exit,
6050 +};
6051 +
6052 +/*
6053 + *     Initialize the DEV module. At boot time this walks the device list and
6054 + *     unhooks any devices that fail to initialise (normally hardware not
6055 + *     present) and leaves us with a valid list of present and active devices.
6056 + *
6057 + */
6058 +
6059 +/*
6060 + *       This is called single threaded during boot, so no need
6061 + *       to take the rtnl semaphore.
6062 + */
6063 +static int __init net_dev_init(void)
6064 +{
6065 +       int i, rc = -ENOMEM;
6066 +
6067 +       BUG_ON(!dev_boot_phase);
6068 +
6069 +       if (dev_proc_init())
6070 +               goto out;
6071 +
6072 +       if (netdev_kobject_init())
6073 +               goto out;
6074 +
6075 +       INIT_LIST_HEAD(&ptype_all);
6076 +       for (i = 0; i < PTYPE_HASH_SIZE; i++)
6077 +               INIT_LIST_HEAD(&ptype_base[i]);
6078 +
6079 +       if (register_pernet_subsys(&netdev_net_ops))
6080 +               goto out;
6081 +
6082 +       /*
6083 +        *      Initialise the packet receive queues.
6084 +        */
6085 +
6086 +       for_each_possible_cpu(i) {
6087 +               struct softnet_data *queue;
6088 +
6089 +               queue = &per_cpu(softnet_data, i);
6090 +               skb_queue_head_init(&queue->input_pkt_queue);
6091 +               queue->completion_queue = NULL;
6092 +               INIT_LIST_HEAD(&queue->poll_list);
6093 +
6094 +               queue->backlog.poll = process_backlog;
6095 +               queue->backlog.weight = weight_p;
6096 +               queue->backlog.gro_list = NULL;
6097 +               queue->backlog.gro_count = 0;
6098 +       }
6099 +
6100 +       dev_boot_phase = 0;
6101 +
6102 +       /* The loopback device is special if any other network devices
6103 +        * is present in a network namespace the loopback device must
6104 +        * be present. Since we now dynamically allocate and free the
6105 +        * loopback device ensure this invariant is maintained by
6106 +        * keeping the loopback device as the first device on the
6107 +        * list of network devices.  Ensuring the loopback devices
6108 +        * is the first device that appears and the last network device
6109 +        * that disappears.
6110 +        */
6111 +       if (register_pernet_device(&loopback_net_ops))
6112 +               goto out;
6113 +
6114 +       if (register_pernet_device(&default_device_ops))
6115 +               goto out;
6116 +
6117 +       open_softirq(NET_TX_SOFTIRQ, net_tx_action);
6118 +       open_softirq(NET_RX_SOFTIRQ, net_rx_action);
6119 +
6120 +       hotcpu_notifier(dev_cpu_callback, 0);
6121 +       dst_init();
6122 +       dev_mcast_init();
6123 +       rc = 0;
6124 +out:
6125 +       return rc;
6126 +}
6127 +
6128 +subsys_initcall(net_dev_init);
6129 +
6130 +static int __init initialize_hashrnd(void)
6131 +{
6132 +       get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
6133 +       return 0;
6134 +}
6135 +
6136 +late_initcall_sync(initialize_hashrnd);
6137 +
6138 +EXPORT_SYMBOL(__dev_get_by_index);
6139 +EXPORT_SYMBOL(__dev_get_by_name);
6140 +EXPORT_SYMBOL(__dev_remove_pack);
6141 +EXPORT_SYMBOL(dev_valid_name);
6142 +EXPORT_SYMBOL(dev_add_pack);
6143 +EXPORT_SYMBOL(dev_alloc_name);
6144 +EXPORT_SYMBOL(dev_close);
6145 +EXPORT_SYMBOL(dev_get_by_flags);
6146 +EXPORT_SYMBOL(dev_get_by_index);
6147 +EXPORT_SYMBOL(dev_get_by_name);
6148 +EXPORT_SYMBOL(dev_open);
6149 +EXPORT_SYMBOL(dev_queue_xmit);
6150 +EXPORT_SYMBOL(dev_remove_pack);
6151 +EXPORT_SYMBOL(dev_set_allmulti);
6152 +EXPORT_SYMBOL(dev_set_promiscuity);
6153 +EXPORT_SYMBOL(dev_change_flags);
6154 +EXPORT_SYMBOL(dev_set_mtu);
6155 +EXPORT_SYMBOL(dev_set_mac_address);
6156 +EXPORT_SYMBOL(free_netdev);
6157 +EXPORT_SYMBOL(netdev_boot_setup_check);
6158 +EXPORT_SYMBOL(netdev_set_master);
6159 +EXPORT_SYMBOL(netdev_state_change);
6160 +EXPORT_SYMBOL(netif_receive_skb);
6161 +EXPORT_SYMBOL(netif_rx);
6162 +EXPORT_SYMBOL(register_gifconf);
6163 +EXPORT_SYMBOL(register_netdevice);
6164 +EXPORT_SYMBOL(register_netdevice_notifier);
6165 +EXPORT_SYMBOL(skb_checksum_help);
6166 +EXPORT_SYMBOL(synchronize_net);
6167 +EXPORT_SYMBOL(unregister_netdevice);
6168 +EXPORT_SYMBOL(unregister_netdevice_notifier);
6169 +EXPORT_SYMBOL(net_enable_timestamp);
6170 +EXPORT_SYMBOL(net_disable_timestamp);
6171 +EXPORT_SYMBOL(dev_get_flags);
6172 +
6173 +#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
6174 +EXPORT_SYMBOL(br_handle_frame_hook);
6175 +EXPORT_SYMBOL(br_fdb_get_hook);
6176 +EXPORT_SYMBOL(br_fdb_put_hook);
6177 +#endif
6178 +
6179 +EXPORT_SYMBOL(dev_load);
6180 +
6181 +EXPORT_PER_CPU_SYMBOL(softnet_data);
6182 diff --unified --recursive --new-file linux-2.6.30/net/Kconfig linux-2.6.30-1-686-smp-PF_RING/net/Kconfig
6183 --- linux-2.6.30/net/Kconfig    2009-06-10 05:05:27.000000000 +0200
6184 +++ linux-2.6.30-1-686-smp-PF_RING/net/Kconfig  2009-07-21 04:40:31.395512101 +0200
6185 @@ -30,6 +30,7 @@
6186  source "net/xfrm/Kconfig"
6187  source "net/iucv/Kconfig"
6188
6189 +source "net/ring/Kconfig"
6190  config INET
6191         bool "TCP/IP networking"
6192         ---help---
6193 diff --unified --recursive --new-file linux-2.6.30/net/Makefile linux-2.6.30-1-686-smp-PF_RING/net/Makefile
6194 --- linux-2.6.30/net/Makefile   2009-06-10 05:05:27.000000000 +0200
6195 +++ linux-2.6.30-1-686-smp-PF_RING/net/Makefile 2009-07-21 04:40:31.378997724 +0200
6196 @@ -46,6 +46,7 @@
6197  obj-$(CONFIG_PHONET)           += phonet/
6198  ifneq ($(CONFIG_VLAN_8021Q),)
6199  obj-y                          += 8021q/
6200 +obj-$(CONFIG_RING)             += ring/
6201  endif
6202  obj-$(CONFIG_IP_DCCP)          += dccp/
6203  obj-$(CONFIG_IP_SCTP)          += sctp/
6204 diff --unified --recursive --new-file linux-2.6.30/net/Makefile.ORG linux-2.6.30-1-686-smp-PF_RING/net/Makefile.ORG
6205 --- linux-2.6.30/net/Makefile.ORG       1970-01-01 01:00:00.000000000 +0100
6206 +++ linux-2.6.30-1-686-smp-PF_RING/net/Makefile.ORG     2009-07-21 04:40:31.369103612 +0200
6207 @@ -0,0 +1,67 @@
6208 +#
6209 +# Makefile for the linux networking.
6210 +#
6211 +# 2 Sep 2000, Christoph Hellwig <hch@infradead.org>
6212 +# Rewritten to use lists instead of if-statements.
6213 +#
6214 +
6215 +obj-y  := nonet.o
6216 +
6217 +obj-$(CONFIG_NET)              := socket.o core/
6218 +
6219 +tmp-$(CONFIG_COMPAT)           := compat.o
6220 +obj-$(CONFIG_NET)              += $(tmp-y)
6221 +
6222 +# LLC has to be linked before the files in net/802/
6223 +obj-$(CONFIG_LLC)              += llc/
6224 +obj-$(CONFIG_NET)              += ethernet/ 802/ sched/ netlink/
6225 +obj-$(CONFIG_NETFILTER)                += netfilter/
6226 +obj-$(CONFIG_INET)             += ipv4/
6227 +obj-$(CONFIG_XFRM)             += xfrm/
6228 +obj-$(CONFIG_UNIX)             += unix/
6229 +ifneq ($(CONFIG_IPV6),)
6230 +obj-y                          += ipv6/
6231 +endif
6232 +obj-$(CONFIG_PACKET)           += packet/
6233 +obj-$(CONFIG_NET_KEY)          += key/
6234 +obj-$(CONFIG_NET_SCHED)                += sched/
6235 +obj-$(CONFIG_BRIDGE)           += bridge/
6236 +obj-$(CONFIG_NET_DSA)          += dsa/
6237 +obj-$(CONFIG_IPX)              += ipx/
6238 +obj-$(CONFIG_ATALK)            += appletalk/
6239 +obj-$(CONFIG_WAN_ROUTER)       += wanrouter/
6240 +obj-$(CONFIG_X25)              += x25/
6241 +obj-$(CONFIG_LAPB)             += lapb/
6242 +obj-$(CONFIG_NETROM)           += netrom/
6243 +obj-$(CONFIG_ROSE)             += rose/
6244 +obj-$(CONFIG_AX25)             += ax25/
6245 +obj-$(CONFIG_CAN)              += can/
6246 +obj-$(CONFIG_IRDA)             += irda/
6247 +obj-$(CONFIG_BT)               += bluetooth/
6248 +obj-$(CONFIG_SUNRPC)           += sunrpc/
6249 +obj-$(CONFIG_AF_RXRPC)         += rxrpc/
6250 +obj-$(CONFIG_ATM)              += atm/
6251 +obj-$(CONFIG_DECNET)           += decnet/
6252 +obj-$(CONFIG_ECONET)           += econet/
6253 +obj-$(CONFIG_PHONET)           += phonet/
6254 +ifneq ($(CONFIG_VLAN_8021Q),)
6255 +obj-y                          += 8021q/
6256 +endif
6257 +obj-$(CONFIG_IP_DCCP)          += dccp/
6258 +obj-$(CONFIG_IP_SCTP)          += sctp/
6259 +obj-$(CONFIG_RDS)              += rds/
6260 +obj-y                          += wireless/
6261 +obj-$(CONFIG_MAC80211)         += mac80211/
6262 +obj-$(CONFIG_TIPC)             += tipc/
6263 +obj-$(CONFIG_NETLABEL)         += netlabel/
6264 +obj-$(CONFIG_IUCV)             += iucv/
6265 +obj-$(CONFIG_RFKILL)           += rfkill/
6266 +obj-$(CONFIG_NET_9P)           += 9p/
6267 +ifneq ($(CONFIG_DCB),)
6268 +obj-y                          += dcb/
6269 +endif
6270 +
6271 +ifeq ($(CONFIG_NET),y)
6272 +obj-$(CONFIG_SYSCTL)           += sysctl_net.o
6273 +endif
6274 +obj-$(CONFIG_WIMAX)            += wimax/
6275 diff --unified --recursive --new-file linux-2.6.30/net/ring/Kconfig linux-2.6.30-1-686-smp-PF_RING/net/ring/Kconfig
6276 --- linux-2.6.30/net/ring/Kconfig       1970-01-01 01:00:00.000000000 +0100
6277 +++ linux-2.6.30-1-686-smp-PF_RING/net/ring/Kconfig     2009-07-21 04:40:31.399104158 +0200
6278 @@ -0,0 +1,14 @@
6279 +config RING
6280 +       tristate "PF_RING sockets (EXPERIMENTAL)"
6281 +       depends on EXPERIMENTAL
6282 +       ---help---
6283 +         PF_RING socket family, optimized for packet capture.
6284 +          If a PF_RING socket is bound to an adapter (via the bind() system
6285 +          call), such adapter will be used in read-only mode until the socket
6286 +          is destroyed. Whenever an incoming packet is received from the adapter
6287 +          it will not passed to upper layers, but instead it is copied to a ring
6288 +          buffer, which in turn is exported to user space applications via mmap.
6289 +          Please refer to http://luca.ntop.org/Ring.pdf for more.
6290 +
6291 +         Say N unless you know what you are doing.
6292 +
6293 diff --unified --recursive --new-file linux-2.6.30/net/ring/Makefile linux-2.6.30-1-686-smp-PF_RING/net/ring/Makefile
6294 --- linux-2.6.30/net/ring/Makefile      1970-01-01 01:00:00.000000000 +0100
6295 +++ linux-2.6.30-1-686-smp-PF_RING/net/ring/Makefile    2009-07-21 04:40:31.315770393 +0200
6296 @@ -0,0 +1,7 @@
6297 +#
6298 +# Makefile for the ring driver.
6299 +#
6300 +
6301 +obj-m += ring.o
6302 +
6303 +ring-objs := ring_packet.o
6304 diff --unified --recursive --new-file linux-2.6.30/net/ring/ring_packet.c linux-2.6.30-1-686-smp-PF_RING/net/ring/ring_packet.c
6305 --- linux-2.6.30/net/ring/ring_packet.c 1970-01-01 01:00:00.000000000 +0100
6306 +++ linux-2.6.30-1-686-smp-PF_RING/net/ring/ring_packet.c       2009-07-21 04:40:31.315770393 +0200
6307 @@ -0,0 +1,4897 @@
6308 +/* ***************************************************************
6309 + *
6310 + * (C) 2004-09 - Luca Deri <deri@ntop.org>
6311 + *
6312 + * This code includes contributions courtesy of
6313 + * - Amit D. Chaudhary <amit_ml@rajgad.com>
6314 + * - Andrew Gallatin <gallatyn@myri.com>
6315 + * - Brad Doctor <brad@stillsecure.com>
6316 + * - Felipe Huici <felipe.huici@nw.neclab.eu>
6317 + * - Francesco Fusco <fusco@ntop.org> (IP defrag)
6318 + * - Helmut Manck <helmut.manck@secunet.com>
6319 + * - Hitoshi Irino <irino@sfc.wide.ad.jp>
6320 + * - Jakov Haron <jyh@cabel.net>
6321 + * - Jeff Randall <jrandall@nexvu.com>
6322 + * - Kevin Wormington <kworm@sofnet.com>
6323 + * - Mahdi Dashtbozorgi <rdfm2000@gmail.com>
6324 + * - Marketakis Yannis <marketak@ics.forth.gr>
6325 + * - Matthew J. Roth <mroth@imminc.com>
6326 + * - Michael Stiller <ms@2scale.net> (VM memory support)
6327 + * - Noam Dev <noamdev@gmail.com>
6328 + * - Siva Kollipara <siva@cs.arizona.edu>
6329 + * - Vincent Carrier <vicarrier@wanadoo.fr>
6330 + * - Eugene Bogush <b_eugene@ukr.net>
6331 + * - Samir Chang <coobyhb@gmail.com>
6332 + *
6333 + * This program is free software; you can redistribute it and/or modify
6334 + * it under the terms of the GNU General Public License as published by
6335 + * the Free Software Foundation; either version 2 of the License, or
6336 + * (at your option) any later version.
6337 + *
6338 + * This program is distributed in the hope that it will be useful,
6339 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
6340 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
6341 + * GNU General Public License for more details.
6342 + *
6343 + * You should have received a copy of the GNU General Public License
6344 + * along with this program; if not, write to the Free Software Foundation,
6345 + * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
6346 + *
6347 + */
6348 +
6349 +#include <linux/version.h>
6350 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,19))
6351 +#include <linux/autoconf.h>
6352 +#else
6353 +#include <linux/config.h>
6354 +#endif
6355 +#include <linux/module.h>
6356 +#include <linux/kernel.h>
6357 +#include <linux/socket.h>
6358 +#include <linux/skbuff.h>
6359 +#include <linux/rtnetlink.h>
6360 +#include <linux/in.h>
6361 +#include <linux/inet.h>
6362 +#include <linux/in6.h>
6363 +#include <linux/init.h>
6364 +#include <linux/filter.h>
6365 +#include <linux/ring.h>
6366 +#include <linux/ip.h>
6367 +#include <linux/tcp.h>
6368 +#include <linux/udp.h>
6369 +#include <linux/list.h>
6370 +#include <linux/netdevice.h>
6371 +#include <linux/proc_fs.h>
6372 +
6373 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
6374 +#include <net/xfrm.h>
6375 +#else
6376 +#include <linux/poll.h>
6377 +#endif
6378 +#include <net/sock.h>
6379 +#include <asm/io.h>   /* needed for virt_to_phys() */
6380 +#ifdef CONFIG_INET
6381 +#include <net/inet_common.h>
6382 +#endif
6383 +#include <net/ip.h>
6384 +
6385 +/* ================================================== */
6386 +
6387 +/*
6388 + * regcomp and regexec -- regsub and regerror are elsewhere
6389 + * @(#)regexp.c        1.3 of 18 April 87
6390 + *
6391 + *     Copyright (c) 1986 by University of Toronto.
6392 + *     Written by Henry Spencer.  Not derived from licensed software.
6393 + *
6394 + *     Permission is granted to anyone to use this software for any
6395 + *     purpose on any computer system, and to redistribute it freely,
6396 + *     subject to the following restrictions:
6397 + *
6398 + *     1. The author is not responsible for the consequences of use of
6399 + *             this software, no matter how awful, even if they arise
6400 + *             from defects in it.
6401 + *
6402 + *     2. The origin of this software must not be misrepresented, either
6403 + *             by explicit claim or by omission.
6404 + *
6405 + *     3. Altered versions must be plainly marked as such, and must not
6406 + *             be misrepresented as being the original software.
6407 + *
6408 + * Beware that some of this code is subtly aware of the way operator
6409 + * precedence is structured in regular expressions.  Serious changes in
6410 + * regular-expression syntax might require a total rethink.
6411 + *
6412 + * This code was modified by Ethan Sommer to work within the kernel
6413 + * (it now uses kmalloc etc..)
6414 + *
6415 + * Modified slightly by Matthew Strait to use more modern C.
6416 + */
6417 +
6418 +/* added by ethan and matt.  Lets it work in both kernel and user space.
6419 +   (So iptables can use it, for instance.)  Yea, it goes both ways... */
6420 +#if __KERNEL__
6421 +#define malloc(foo) kmalloc(foo,GFP_ATOMIC)
6422 +#else
6423 +#define printk(format,args...) printf(format,##args)
6424 +#endif
6425 +
6426 +void regerror(char * s)
6427 +{
6428 +  printk("<3>Regexp: %s\n", s);
6429 +  /* NOTREACHED */
6430 +}
6431 +
6432 +/*
6433 + * The "internal use only" fields in regexp.h are present to pass info from
6434 + * compile to execute that permits the execute phase to run lots faster on
6435 + * simple cases.  They are:
6436 + *
6437 + * regstart    char that must begin a match; '\0' if none obvious
6438 + * reganch     is the match anchored (at beginning-of-line only)?
6439 + * regmust     string (pointer into program) that match must include, or NULL
6440 + * regmlen     length of regmust string
6441 + *
6442 + * Regstart and reganch permit very fast decisions on suitable starting points
6443 + * for a match, cutting down the work a lot.  Regmust permits fast rejection
6444 + * of lines that cannot possibly match.  The regmust tests are costly enough
6445 + * that regcomp() supplies a regmust only if the r.e. contains something
6446 + * potentially expensive (at present, the only such thing detected is * or +
6447 + * at the start of the r.e., which can involve a lot of backup).  Regmlen is
6448 + * supplied because the test in regexec() needs it and regcomp() is computing
6449 + * it anyway.
6450 + */
6451 +
6452 +/*
6453 + * Structure for regexp "program".  This is essentially a linear encoding
6454 + * of a nondeterministic finite-state machine (aka syntax charts or
6455 + * "railroad normal form" in parsing technology).  Each node is an opcode
6456 + * plus a "next" pointer, possibly plus an operand.  "Next" pointers of
6457 + * all nodes except BRANCH implement concatenation; a "next" pointer with
6458 + * a BRANCH on both ends of it is connecting two alternatives.  (Here we
6459 + * have one of the subtle syntax dependencies:  an individual BRANCH (as
6460 + * opposed to a collection of them) is never concatenated with anything
6461 + * because of operator precedence.)  The operand of some types of node is
6462 + * a literal string; for others, it is a node leading into a sub-FSM.  In
6463 + * particular, the operand of a BRANCH node is the first node of the branch.
6464 + * (NB this is *not* a tree structure:  the tail of the branch connects
6465 + * to the thing following the set of BRANCHes.)  The opcodes are:
6466 + */
6467 +
6468 +/* definition  number  opnd?   meaning */
6469 +#define        END     0       /* no   End of program. */
6470 +#define        BOL     1       /* no   Match "" at beginning of line. */
6471 +#define        EOL     2       /* no   Match "" at end of line. */
6472 +#define        ANY     3       /* no   Match any one character. */
6473 +#define        ANYOF   4       /* str  Match any character in this string. */
6474 +#define        ANYBUT  5       /* str  Match any character not in this string. */
6475 +#define        BRANCH  6       /* node Match this alternative, or the next... */
6476 +#define        BACK    7       /* no   Match "", "next" ptr points backward. */
6477 +#define        EXACTLY 8       /* str  Match this string. */
6478 +#define        NOTHING 9       /* no   Match empty string. */
6479 +#define        STAR    10      /* node Match this (simple) thing 0 or more times. */
6480 +#define        PLUS    11      /* node Match this (simple) thing 1 or more times. */
6481 +#define        OPEN    20      /* no   Mark this point in input as start of #n. */
6482 +                       /*      OPEN+1 is number 1, etc. */
6483 +#define        CLOSE   30      /* no   Analogous to OPEN. */
6484 +
6485 +/*
6486 + * Opcode notes:
6487 + *
6488 + * BRANCH      The set of branches constituting a single choice are hooked
6489 + *             together with their "next" pointers, since precedence prevents
6490 + *             anything being concatenated to any individual branch.  The
6491 + *             "next" pointer of the last BRANCH in a choice points to the
6492 + *             thing following the whole choice.  This is also where the
6493 + *             final "next" pointer of each individual branch points; each
6494 + *             branch starts with the operand node of a BRANCH node.
6495 + *
6496 + * BACK                Normal "next" pointers all implicitly point forward; BACK
6497 + *             exists to make loop structures possible.
6498 + *
6499 + * STAR,PLUS   '?', and complex '*' and '+', are implemented as circular
6500 + *             BRANCH structures using BACK.  Simple cases (one character
6501 + *             per match) are implemented with STAR and PLUS for speed
6502 + *             and to minimize recursive plunges.
6503 + *
6504 + * OPEN,CLOSE  ...are numbered at compile time.
6505 + */
6506 +
6507 +/*
6508 + * A node is one char of opcode followed by two chars of "next" pointer.
6509 + * "Next" pointers are stored as two 8-bit pieces, high order first.  The
6510 + * value is a positive offset from the opcode of the node containing it.
6511 + * An operand, if any, simply follows the node.  (Note that much of the
6512 + * code generation knows about this implicit relationship.)
6513 + *
6514 + * Using two bytes for the "next" pointer is vast overkill for most things,
6515 + * but allows patterns to get big without disasters.
6516 + */
6517 +#define        OP(p)   (*(p))
6518 +#define        NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
6519 +#define        OPERAND(p)      ((p) + 3)
6520 +
6521 +/*
6522 + * See regmagic.h for one further detail of program structure.
6523 + */
6524 +
6525 +
6526 +/*
6527 + * Utility definitions.
6528 + */
6529 +#ifndef CHARBITS
6530 +#define        UCHARAT(p)      ((int)*(unsigned char *)(p))
6531 +#else
6532 +#define        UCHARAT(p)      ((int)*(p)&CHARBITS)
6533 +#endif
6534 +
6535 +#define        FAIL(m) { regerror(m); return(NULL); }
6536 +#define        ISMULT(c)       ((c) == '*' || (c) == '+' || (c) == '?')
6537 +#define        META    "^$.[()|?+*\\"
6538 +
6539 +/*
6540 + * Flags to be passed up and down.
6541 + */
6542 +#define        HASWIDTH        01      /* Known never to match null string. */
6543 +#define        SIMPLE          02      /* Simple enough to be STAR/PLUS operand. */
6544 +#define        SPSTART         04      /* Starts with * or +. */
6545 +#define        WORST           0       /* Worst case. */
6546 +
6547 +/*
6548 + * Global work variables for regcomp().
6549 + */
6550 +struct match_globals {
6551 +  char *reginput;              /* String-input pointer. */
6552 +  char *regbol;                /* Beginning of input, for ^ check. */
6553 +  char **regstartp;    /* Pointer to startp array. */
6554 +  char **regendp;              /* Ditto for endp. */
6555 +  char *regparse;              /* Input-scan pointer. */
6556 +  int regnpar;         /* () count. */
6557 +  char regdummy;
6558 +  char *regcode;               /* Code-emit pointer; &regdummy = don't. */
6559 +  long regsize;                /* Code size. */
6560 +};
6561 +
6562 +/*
6563 + * Forward declarations for regcomp()'s friends.
6564 + */
6565 +#ifndef STATIC
6566 +#define        STATIC  static
6567 +#endif
6568 +STATIC char *reg(struct match_globals *g, int paren,int *flagp);
6569 +STATIC char *regbranch(struct match_globals *g, int *flagp);
6570 +STATIC char *regpiece(struct match_globals *g, int *flagp);
6571 +STATIC char *regatom(struct match_globals *g, int *flagp);
6572 +STATIC char *regnode(struct match_globals *g, char op);
6573 +STATIC char *regnext(struct match_globals *g, char *p);
6574 +STATIC void regc(struct match_globals *g, char b);
6575 +STATIC void reginsert(struct match_globals *g, char op, char *opnd);
6576 +STATIC void regtail(struct match_globals *g, char *p, char *val);
6577 +STATIC void regoptail(struct match_globals *g, char *p, char *val);
6578 +
6579 +static u_int8_t case_insensitive = 1;
6580 +
6581 +__kernel_size_t my_strcspn(const char *s1,const char *s2)
6582 +{
6583 +  char *scan1;
6584 +  char *scan2;
6585 +  int count;
6586 +
6587 +  count = 0;
6588 +  for (scan1 = (char *)s1; *scan1 != '\0'; scan1++) {
6589 +    for (scan2 = (char *)s2; *scan2 != '\0';)       /* ++ moved down. */
6590 +      if (*scan1 == *scan2++)
6591 +       return(count);
6592 +    count++;
6593 +  }
6594 +  return(count);
6595 +}
6596 +
6597 +/* ********************************************** */
6598 +
6599 +inline char tolower(char c) {
6600 +  return ((c >= 'A' && c <= 'Z') ? c + 32: c);
6601 +}
6602 +
6603 +inline void string2lower(char* str, int str_len) {
6604 +  int i;
6605 +
6606 +  for(i=0; i<str_len; i++) str[i] = tolower(str[i]);
6607 +}
6608 +
6609 +/* ********************************************** */
6610 +
6611 +/*
6612 +  - regcomp - compile a regular expression into internal code
6613 +  *
6614 +  * We can't allocate space until we know how big the compiled form will be,
6615 +  * but we can't compile it (and thus know how big it is) until we've got a
6616 +  * place to put the code.  So we cheat:  we compile it twice, once with code
6617 +  * generation turned off and size counting turned on, and once "for real".
6618 +  * This also means that we don't allocate space until we are sure that the
6619 +  * thing really will compile successfully, and we never have to move the
6620 +  * code and thus invalidate pointers into it.  (Note that it has to be in
6621 +  * one piece because free() must be able to free it all.)
6622 +  *
6623 +  * Beware that the optimization-preparation code in here knows about some
6624 +  * of the structure of the compiled regexp.
6625 +  */
6626 +regexp *
6627 +regcomp(char *exp,int *patternsize)
6628 +{
6629 +  register regexp *r;
6630 +  register char *scan;
6631 +  register char *longest;
6632 +  register int len;
6633 +  int flags;
6634 +  struct match_globals g;
6635 +
6636 +  /* commented out by ethan
6637 +     extern char *malloc();
6638 +  */
6639 +
6640 +  if (exp == NULL)
6641 +    FAIL("NULL argument");
6642 +
6643 +  if(case_insensitive) string2lower(exp, strlen(exp));
6644 +
6645 +  /* First pass: determine size, legality. */
6646 +  g.regparse = exp;
6647 +  g.regnpar = 1;
6648 +  g.regsize = 0L;
6649 +  g.regcode = &g.regdummy;
6650 +  regc(&g, MAGIC);
6651 +  if (reg(&g, 0, &flags) == NULL)
6652 +    return(NULL);
6653 +
6654 +  /* Small enough for pointer-storage convention? */
6655 +  if (g.regsize >= 32767L)             /* Probably could be 65535L. */
6656 +    FAIL("regexp too big");
6657 +
6658 +  /* Allocate space. */
6659 +  *patternsize=sizeof(regexp) + (unsigned)g.regsize;
6660 +  r = (regexp *)malloc(sizeof(regexp) + (unsigned)g.regsize);
6661 +  if (r == NULL)
6662 +    FAIL("out of space");
6663 +
6664 +  /* Second pass: emit code. */
6665 +  g.regparse = exp;
6666 +  g.regnpar = 1;
6667 +  g.regcode = r->program;
6668 +  regc(&g, MAGIC);
6669 +  if (reg(&g, 0, &flags) == NULL)
6670 +    return(NULL);
6671 +
6672 +  /* Dig out information for optimizations. */
6673 +  r->regstart = '\0';  /* Worst-case defaults. */
6674 +  r->reganch = 0;
6675 +  r->regmust = NULL;
6676 +  r->regmlen = 0;
6677 +  scan = r->program+1;                 /* First BRANCH. */
6678 +  if (OP(regnext(&g, scan)) == END) {          /* Only one top-level choice. */
6679 +    scan = OPERAND(scan);
6680 +
6681 +    /* Starting-point info. */
6682 +    if (OP(scan) == EXACTLY)
6683 +      r->regstart = *OPERAND(scan);
6684 +    else if (OP(scan) == BOL)
6685 +      r->reganch++;
6686 +
6687 +    /*
6688 +     * If there's something expensive in the r.e., find the
6689 +     * longest literal string that must appear and make it the
6690 +     * regmust.  Resolve ties in favor of later strings, since
6691 +     * the regstart check works with the beginning of the r.e.
6692 +     * and avoiding duplication strengthens checking.  Not a
6693 +     * strong reason, but sufficient in the absence of others.
6694 +     */
6695 +    if (flags&SPSTART) {
6696 +      longest = NULL;
6697 +      len = 0;
6698 +      for (; scan != NULL; scan = regnext(&g, scan))
6699 +       if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) {
6700 +         longest = OPERAND(scan);
6701 +         len = strlen(OPERAND(scan));
6702 +       }
6703 +      r->regmust = longest;
6704 +      r->regmlen = len;
6705 +    }
6706 +  }
6707 +
6708 +  return(r);
6709 +}
6710 +
6711 +/*
6712 +  - reg - regular expression, i.e. main body or parenthesized thing
6713 +  *
6714 +  * Caller must absorb opening parenthesis.
6715 +  *
6716 +  * Combining parenthesis handling with the base level of regular expression
6717 +  * is a trifle forced, but the need to tie the tails of the branches to what
6718 +  * follows makes it hard to avoid.
6719 +  */
6720 +static char *
6721 +reg(struct match_globals *g, int paren, int *flagp /* Parenthesized? */ )
6722 +{
6723 +  register char *ret;
6724 +  register char *br;
6725 +  register char *ender;
6726 +  register int parno = 0; /* 0 makes gcc happy */
6727 +  int flags;
6728 +
6729 +  *flagp = HASWIDTH;   /* Tentatively. */
6730 +
6731 +  /* Make an OPEN node, if parenthesized. */
6732 +  if (paren) {
6733 +    if (g->regnpar >= NSUBEXP)
6734 +      FAIL("too many ()");
6735 +    parno = g->regnpar;
6736 +    g->regnpar++;
6737 +    ret = regnode(g, OPEN+parno);
6738 +  } else
6739 +    ret = NULL;
6740 +
6741 +  /* Pick up the branches, linking them together. */
6742 +  br = regbranch(g, &flags);
6743 +  if (br == NULL)
6744 +    return(NULL);
6745 +  if (ret != NULL)
6746 +    regtail(g, ret, br);       /* OPEN -> first. */
6747 +  else
6748 +    ret = br;
6749 +  if (!(flags&HASWIDTH))
6750 +    *flagp &= ~HASWIDTH;
6751 +  *flagp |= flags&SPSTART;
6752 +  while (*g->regparse == '|') {
6753 +    g->regparse++;
6754 +    br = regbranch(g, &flags);
6755 +    if (br == NULL)
6756 +      return(NULL);
6757 +    regtail(g, ret, br);       /* BRANCH -> BRANCH. */
6758 +    if (!(flags&HASWIDTH))
6759 +      *flagp &= ~HASWIDTH;
6760 +    *flagp |= flags&SPSTART;
6761 +  }
6762 +
6763 +  /* Make a closing node, and hook it on the end. */
6764 +  ender = regnode(g, (paren) ? CLOSE+parno : END);
6765 +  regtail(g, ret, ender);
6766 +
6767 +  /* Hook the tails of the branches to the closing node. */
6768 +  for (br = ret; br != NULL; br = regnext(g, br))
6769 +    regoptail(g, br, ender);
6770 +
6771 +  /* Check for proper termination. */
6772 +  if (paren && *g->regparse++ != ')') {
6773 +    FAIL("unmatched ()");
6774 +  } else if (!paren && *g->regparse != '\0') {
6775 +    if (*g->regparse == ')') {
6776 +      FAIL("unmatched ()");
6777 +    } else
6778 +      FAIL("junk on end");     /* "Can't happen". */
6779 +    /* NOTREACHED */
6780 +  }
6781 +
6782 +  return(ret);
6783 +}
6784 +
6785 +/*
6786 +  - regbranch - one alternative of an | operator
6787 +  *
6788 +  * Implements the concatenation operator.
6789 +  */
6790 +static char *
6791 +regbranch(struct match_globals *g, int *flagp)
6792 +{
6793 +  register char *ret;
6794 +  register char *chain;
6795 +  register char *latest;
6796 +  int flags;
6797 +
6798 +  *flagp = WORST;              /* Tentatively. */
6799 +
6800 +  ret = regnode(g, BRANCH);
6801 +  chain = NULL;
6802 +  while (*g->regparse != '\0' && *g->regparse != '|' && *g->regparse != ')') {
6803 +    latest = regpiece(g, &flags);
6804 +    if (latest == NULL)
6805 +      return(NULL);
6806 +    *flagp |= flags&HASWIDTH;
6807 +    if (chain == NULL) /* First piece. */
6808 +      *flagp |= flags&SPSTART;
6809 +    else
6810 +      regtail(g, chain, latest);
6811 +    chain = latest;
6812 +  }
6813 +  if (chain == NULL)   /* Loop ran zero times. */
6814 +    (void) regnode(g, NOTHING);
6815 +
6816 +  return(ret);
6817 +}
6818 +
6819 +/*
6820 +  - regpiece - something followed by possible [*+?]
6821 +  *
6822 +  * Note that the branching code sequences used for ? and the general cases
6823 +  * of * and + are somewhat optimized:  they use the same NOTHING node as
6824 +  * both the endmarker for their branch list and the body of the last branch.
6825 +  * It might seem that this node could be dispensed with entirely, but the
6826 +  * endmarker role is not redundant.
6827 +  */
6828 +static char *
6829 +regpiece(struct match_globals *g, int *flagp)
6830 +{
6831 +  register char *ret;
6832 +  register char op;
6833 +  register char *next;
6834 +  int flags;
6835 +
6836 +  ret = regatom(g, &flags);
6837 +  if (ret == NULL)
6838 +    return(NULL);
6839 +
6840 +  op = *g->regparse;
6841 +  if (!ISMULT(op)) {
6842 +    *flagp = flags;
6843 +    return(ret);
6844 +  }
6845 +
6846 +  if (!(flags&HASWIDTH) && op != '?')
6847 +    FAIL("*+ operand could be empty");
6848 +  *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
6849 +
6850 +  if (op == '*' && (flags&SIMPLE))
6851 +    reginsert(g, STAR, ret);
6852 +  else if (op == '*') {
6853 +    /* Emit x* as (x&|), where & means "self". */
6854 +    reginsert(g, BRANCH, ret);                 /* Either x */
6855 +    regoptail(g, ret, regnode(g, BACK));               /* and loop */
6856 +    regoptail(g, ret, ret);                    /* back */
6857 +    regtail(g, ret, regnode(g, BRANCH));               /* or */
6858 +    regtail(g, ret, regnode(g, NOTHING));              /* null. */
6859 +  } else if (op == '+' && (flags&SIMPLE))
6860 +    reginsert(g, PLUS, ret);
6861 +  else if (op == '+') {
6862 +    /* Emit x+ as x(&|), where & means "self". */
6863 +    next = regnode(g, BRANCH);                 /* Either */
6864 +    regtail(g, ret, next);
6865 +    regtail(g, regnode(g, BACK), ret);         /* loop back */
6866 +    regtail(g, next, regnode(g, BRANCH));              /* or */
6867 +    regtail(g, ret, regnode(g, NOTHING));              /* null. */
6868 +  } else if (op == '?') {
6869 +    /* Emit x? as (x|) */
6870 +    reginsert(g, BRANCH, ret);                 /* Either x */
6871 +    regtail(g, ret, regnode(g, BRANCH));               /* or */
6872 +    next = regnode(g, NOTHING);                /* null. */
6873 +    regtail(g, ret, next);
6874 +    regoptail(g, ret, next);
6875 +  }
6876 +  g->regparse++;
6877 +  if (ISMULT(*g->regparse))
6878 +    FAIL("nested *?+");
6879 +
6880 +  return(ret);
6881 +}
6882 +
6883 +/*
6884 +  - regatom - the lowest level
6885 +  *
6886 +  * Optimization:  gobbles an entire sequence of ordinary characters so that
6887 +  * it can turn them into a single node, which is smaller to store and
6888 +  * faster to run.  Backslashed characters are exceptions, each becoming a
6889 +  * separate node; the code is simpler that way and it's not worth fixing.
6890 +  */
6891 +static char *
6892 +regatom(struct match_globals *g, int *flagp)
6893 +{
6894 +  register char *ret;
6895 +  int flags;
6896 +
6897 +  *flagp = WORST;              /* Tentatively. */
6898 +
6899 +  switch (*g->regparse++) {
6900 +  case '^':
6901 +    ret = regnode(g, BOL);
6902 +    break;
6903 +  case '$':
6904 +    ret = regnode(g, EOL);
6905 +    break;
6906 +  case '.':
6907 +    ret = regnode(g, ANY);
6908 +    *flagp |= HASWIDTH|SIMPLE;
6909 +    break;
6910 +  case '[': {
6911 +    register int class;
6912 +    register int classend;
6913 +
6914 +    if (*g->regparse == '^') { /* Complement of range. */
6915 +      ret = regnode(g, ANYBUT);
6916 +      g->regparse++;
6917 +    } else
6918 +      ret = regnode(g, ANYOF);
6919 +    if (*g->regparse == ']' || *g->regparse == '-')
6920 +      regc(g, *g->regparse++);
6921 +    while (*g->regparse != '\0' && *g->regparse != ']') {
6922 +      if (*g->regparse == '-') {
6923 +       g->regparse++;
6924 +       if (*g->regparse == ']' || *g->regparse == '\0')
6925 +         regc(g, '-');
6926 +       else {
6927 +         class = UCHARAT(g->regparse-2)+1;
6928 +         classend = UCHARAT(g->regparse);
6929 +         if (class > classend+1)
6930 +           FAIL("invalid [] range");
6931 +         for (; class <= classend; class++)
6932 +           regc(g, class);
6933 +         g->regparse++;
6934 +       }
6935 +      } else
6936 +       regc(g, *g->regparse++);
6937 +    }
6938 +    regc(g, '\0');
6939 +    if (*g->regparse != ']')
6940 +      FAIL("unmatched []");
6941 +    g->regparse++;
6942 +    *flagp |= HASWIDTH|SIMPLE;
6943 +  }
6944 +    break;
6945 +  case '(':
6946 +    ret = reg(g, 1, &flags);
6947 +    if (ret == NULL)
6948 +      return(NULL);
6949 +    *flagp |= flags&(HASWIDTH|SPSTART);
6950 +    break;
6951 +  case '\0':
6952 +  case '|':
6953 +  case ')':
6954 +    FAIL("internal urp");      /* Supposed to be caught earlier. */
6955 +    break;
6956 +  case '?':
6957 +  case '+':
6958 +  case '*':
6959 +    FAIL("?+* follows nothing");
6960 +    break;
6961 +  case '\\':
6962 +    if (*g->regparse == '\0')
6963 +      FAIL("trailing \\");
6964 +    ret = regnode(g, EXACTLY);
6965 +    regc(g, *g->regparse++);
6966 +    regc(g, '\0');
6967 +    *flagp |= HASWIDTH|SIMPLE;
6968 +    break;
6969 +  default: {
6970 +    register int len;
6971 +    register char ender;
6972 +
6973 +    g->regparse--;
6974 +    len = my_strcspn((const char *)g->regparse, (const char *)META);
6975 +    if (len <= 0)
6976 +      FAIL("internal disaster");
6977 +    ender = *(g->regparse+len);
6978 +    if (len > 1 && ISMULT(ender))
6979 +      len--;           /* Back off clear of ?+* operand. */
6980 +    *flagp |= HASWIDTH;
6981 +    if (len == 1)
6982 +      *flagp |= SIMPLE;
6983 +    ret = regnode(g, EXACTLY);
6984 +    while (len > 0) {
6985 +      regc(g, *g->regparse++);
6986 +      len--;
6987 +    }
6988 +    regc(g, '\0');
6989 +  }
6990 +    break;
6991 +  }
6992 +
6993 +  return(ret);
6994 +}
6995 +
6996 +/*
6997 +  - regnode - emit a node
6998 +*/
6999 +static char *                  /* Location. */
7000 +regnode(struct match_globals *g, char op)
7001 +{
7002 +  register char *ret;
7003 +  register char *ptr;
7004 +
7005 +  ret = g->regcode;
7006 +  if (ret == &g->regdummy) {
7007 +    g->regsize += 3;
7008 +    return(ret);
7009 +  }
7010 +
7011 +  ptr = ret;
7012 +  *ptr++ = op;
7013 +  *ptr++ = '\0';               /* Null "next" pointer. */
7014 +  *ptr++ = '\0';
7015 +  g->regcode = ptr;
7016 +
7017 +  return(ret);
7018 +}
7019 +
7020 +/*
7021 +  - regc - emit (if appropriate) a byte of code
7022 +*/
7023 +static void
7024 +regc(struct match_globals *g, char b)
7025 +{
7026 +  if (g->regcode != &g->regdummy)
7027 +    *g->regcode++ = b;
7028 +  else
7029 +    g->regsize++;
7030 +}
7031 +
7032 +/*
7033 +  - reginsert - insert an operator in front of already-emitted operand
7034 +  *
7035 +  * Means relocating the operand.
7036 +  */
7037 +static void
7038 +reginsert(struct match_globals *g, char op, char* opnd)
7039 +{
7040 +  register char *src;
7041 +  register char *dst;
7042 +  register char *place;
7043 +
7044 +  if (g->regcode == &g->regdummy) {
7045 +    g->regsize += 3;
7046 +    return;
7047 +  }
7048 +
7049 +  src = g->regcode;
7050 +  g->regcode += 3;
7051 +  dst = g->regcode;
7052 +  while (src > opnd)
7053 +    *--dst = *--src;
7054 +
7055 +  place = opnd;                /* Op node, where operand used to be. */
7056 +  *place++ = op;
7057 +  *place++ = '\0';
7058 +  *place++ = '\0';
7059 +}
7060 +
7061 +/*
7062 +  - regtail - set the next-pointer at the end of a node chain
7063 +*/
7064 +static void
7065 +regtail(struct match_globals *g, char *p, char *val)
7066 +{
7067 +  register char *scan;
7068 +  register char *temp;
7069 +  register int offset;
7070 +
7071 +  if (p == &g->regdummy)
7072 +    return;
7073 +
7074 +  /* Find last node. */
7075 +  scan = p;
7076 +  for (;;) {
7077 +    temp = regnext(g, scan);
7078 +    if (temp == NULL)
7079 +      break;
7080 +    scan = temp;
7081 +  }
7082 +
7083 +  if (OP(scan) == BACK)
7084 +    offset = scan - val;
7085 +  else
7086 +    offset = val - scan;
7087 +  *(scan+1) = (offset>>8)&0377;
7088 +  *(scan+2) = offset&0377;
7089 +}
7090 +
7091 +/*
7092 +  - regoptail - regtail on operand of first argument; nop if operandless
7093 +*/
7094 +static void
7095 +regoptail(struct match_globals *g, char *p, char *val)
7096 +{
7097 +  /* "Operandless" and "op != BRANCH" are synonymous in practice. */
7098 +  if (p == NULL || p == &g->regdummy || OP(p) != BRANCH)
7099 +    return;
7100 +  regtail(g, OPERAND(p), val);
7101 +}
7102 +
7103 +/*
7104 + * regexec and friends
7105 + */
7106 +
7107 +
7108 +/*
7109 + * Forwards.
7110 + */
7111 +STATIC int regtry(struct match_globals *g, regexp *prog, char *string);
7112 +STATIC int regmatch(struct match_globals *g, char *prog);
7113 +STATIC int regrepeat(struct match_globals *g, char *p);
7114 +
7115 +/*
7116 +  - regexec - match a regexp against a string
7117 +*/
7118 +int
7119 +regexec(regexp *prog, char *string)
7120 +{
7121 +  register char *s;
7122 +  struct match_globals g;
7123 +
7124 +  if(case_insensitive) string2lower(string, strlen(string));
7125 +
7126 +  /* Be paranoid... */
7127 +  if (prog == NULL || string == NULL) {
7128 +    printk("<3>Regexp: NULL parameter\n");
7129 +    return(0);
7130 +  }
7131 +
7132 +  /* Check validity of program. */
7133 +  if (UCHARAT(prog->program) != MAGIC) {
7134 +    printk("<3>Regexp: corrupted program\n");
7135 +    return(0);
7136 +  }
7137 +
7138 +  /* If there is a "must appear" string, look for it. */
7139 +  if (prog->regmust != NULL) {
7140 +    s = string;
7141 +    while ((s = strchr(s, prog->regmust[0])) != NULL) {
7142 +      if (strncmp(s, prog->regmust, prog->regmlen) == 0)
7143 +       break;  /* Found it. */
7144 +      s++;
7145 +    }
7146 +    if (s == NULL)     /* Not present. */
7147 +      return(0);
7148 +  }
7149 +
7150 +  /* Mark beginning of line for ^ . */
7151 +  g.regbol = string;
7152 +
7153 +  /* Simplest case:  anchored match need be tried only once. */
7154 +  if (prog->reganch)
7155 +    return(regtry(&g, prog, string));
7156 +
7157 +  /* Messy cases:  unanchored match. */
7158 +  s = string;
7159 +  if (prog->regstart != '\0')
7160 +    /* We know what char it must start with. */
7161 +    while ((s = strchr(s, prog->regstart)) != NULL) {
7162 +      if (regtry(&g, prog, s))
7163 +       return(1);
7164 +      s++;
7165 +    }
7166 +  else
7167 +    /* We don't -- general case. */
7168 +    do {
7169 +      if (regtry(&g, prog, s))
7170 +       return(1);
7171 +    } while (*s++ != '\0');
7172 +
7173 +  /* Failure. */
7174 +  return(0);
7175 +}
7176 +
7177 +/*
7178 +  - regtry - try match at specific point
7179 +*/
7180 +static int                     /* 0 failure, 1 success */
7181 +regtry(struct match_globals *g, regexp *prog, char *string)
7182 +{
7183 +  register int i;
7184 +  register char **sp;
7185 +  register char **ep;
7186 +
7187 +  g->reginput = string;
7188 +  g->regstartp = prog->startp;
7189 +  g->regendp = prog->endp;
7190 +
7191 +  sp = prog->startp;
7192 +  ep = prog->endp;
7193 +  for (i = NSUBEXP; i > 0; i--) {
7194 +    *sp++ = NULL;
7195 +    *ep++ = NULL;
7196 +  }
7197 +  if (regmatch(g, prog->program + 1)) {
7198 +    prog->startp[0] = string;
7199 +    prog->endp[0] = g->reginput;
7200 +    return(1);
7201 +  } else
7202 +    return(0);
7203 +}
7204 +
7205 +/*
7206 +  - regmatch - main matching routine
7207 +  *
7208 +  * Conceptually the strategy is simple:  check to see whether the current
7209 +  * node matches, call self recursively to see whether the rest matches,
7210 +  * and then act accordingly.  In practice we make some effort to avoid
7211 +  * recursion, in particular by going through "ordinary" nodes (that don't
7212 +  * need to know whether the rest of the match failed) by a loop instead of
7213 +  * by recursion.
7214 +  */
7215 +static int                     /* 0 failure, 1 success */
7216 +regmatch(struct match_globals *g, char *prog)
7217 +{
7218 +  register char *scan = prog; /* Current node. */
7219 +  char *next;              /* Next node. */
7220 +
7221 +#ifdef DEBUG
7222 +  if (scan != NULL && regnarrate)
7223 +    printk("%s(\n", regprop(scan));
7224 +#endif
7225 +  while (scan != NULL) {
7226 +#ifdef DEBUG
7227 +    if (regnarrate)
7228 +      printk("%s...\n", regprop(scan));
7229 +#endif
7230 +    next = regnext(g, scan);
7231 +
7232 +    switch (OP(scan)) {
7233 +    case BOL:
7234 +      if (g->reginput != g->regbol)
7235 +       return(0);
7236 +      break;
7237 +    case EOL:
7238 +      if (*g->reginput != '\0')
7239 +       return(0);
7240 +      break;
7241 +    case ANY:
7242 +      if (*g->reginput == '\0')
7243 +       return(0);
7244 +      g->reginput++;
7245 +      break;
7246 +    case EXACTLY: {
7247 +      register int len;
7248 +      register char *opnd;
7249 +
7250 +      opnd = OPERAND(scan);
7251 +      /* Inline the first character, for speed. */
7252 +      if (*opnd != *g->reginput)
7253 +       return(0);
7254 +      len = strlen(opnd);
7255 +      if (len > 1 && strncmp(opnd, g->reginput, len) != 0)
7256 +       return(0);
7257 +      g->reginput += len;
7258 +    }
7259 +      break;
7260 +    case ANYOF:
7261 +      if (*g->reginput == '\0' || strchr(OPERAND(scan), *g->reginput) == NULL)
7262 +       return(0);
7263 +      g->reginput++;
7264 +      break;
7265 +    case ANYBUT:
7266 +      if (*g->reginput == '\0' || strchr(OPERAND(scan), *g->reginput) != NULL)
7267 +       return(0);
7268 +      g->reginput++;
7269 +      break;
7270 +    case NOTHING:
7271 +    case BACK:
7272 +      break;
7273 +    case OPEN+1:
7274 +    case OPEN+2:
7275 +    case OPEN+3:
7276 +    case OPEN+4:
7277 +    case OPEN+5:
7278 +    case OPEN+6:
7279 +    case OPEN+7:
7280 +    case OPEN+8:
7281 +    case OPEN+9: {
7282 +      register int no;
7283 +      register char *save;
7284 +
7285 +      no = OP(scan) - OPEN;
7286 +      save = g->reginput;
7287 +
7288 +      if (regmatch(g, next)) {
7289 +       /*
7290 +        * Don't set startp if some later
7291 +        * invocation of the same parentheses
7292 +        * already has.
7293 +        */
7294 +       if (g->regstartp[no] == NULL)
7295 +         g->regstartp[no] = save;
7296 +       return(1);
7297 +      } else
7298 +       return(0);
7299 +    }
7300 +      break;
7301 +    case CLOSE+1:
7302 +    case CLOSE+2:
7303 +    case CLOSE+3:
7304 +    case CLOSE+4:
7305 +    case CLOSE+5:
7306 +    case CLOSE+6:
7307 +    case CLOSE+7:
7308 +    case CLOSE+8:
7309 +    case CLOSE+9:
7310 +      {
7311 +       register int no;
7312 +       register char *save;
7313 +
7314 +       no = OP(scan) - CLOSE;
7315 +       save = g->reginput;
7316 +
7317 +       if (regmatch(g, next)) {
7318 +         /*
7319 +          * Don't set endp if some later
7320 +          * invocation of the same parentheses
7321 +          * already has.
7322 +          */
7323 +         if (g->regendp[no] == NULL)
7324 +           g->regendp[no] = save;
7325 +         return(1);
7326 +       } else
7327 +         return(0);
7328 +      }
7329 +      break;
7330 +    case BRANCH: {
7331 +      register char *save;
7332 +
7333 +      if (OP(next) != BRANCH)          /* No choice. */
7334 +       next = OPERAND(scan);   /* Avoid recursion. */
7335 +      else {
7336 +       do {
7337 +         save = g->reginput;
7338 +         if (regmatch(g, OPERAND(scan)))
7339 +           return(1);
7340 +         g->reginput = save;
7341 +         scan = regnext(g, scan);
7342 +       } while (scan != NULL && OP(scan) == BRANCH);
7343 +       return(0);
7344 +       /* NOTREACHED */
7345 +      }
7346 +    }
7347 +      break;
7348 +    case STAR:
7349 +    case PLUS: {
7350 +      register char nextch;
7351 +      register int no;
7352 +      register char *save;
7353 +      register int min;
7354 +
7355 +      /*
7356 +       * Lookahead to avoid useless match attempts
7357 +       * when we know what character comes next.
7358 +       */
7359 +      nextch = '\0';
7360 +      if (OP(next) == EXACTLY)
7361 +       nextch = *OPERAND(next);
7362 +      min = (OP(scan) == STAR) ? 0 : 1;
7363 +      save = g->reginput;
7364 +      no = regrepeat(g, OPERAND(scan));
7365 +      while (no >= min) {
7366 +       /* If it could work, try it. */
7367 +       if (nextch == '\0' || *g->reginput == nextch)
7368 +         if (regmatch(g, next))
7369 +           return(1);
7370 +       /* Couldn't or didn't -- back up. */
7371 +       no--;
7372 +       g->reginput = save + no;
7373 +      }
7374 +      return(0);
7375 +    }
7376 +      break;
7377 +    case END:
7378 +      return(1);       /* Success! */
7379 +      break;
7380 +    default:
7381 +      printk("<3>Regexp: memory corruption\n");
7382 +      return(0);
7383 +      break;
7384 +    }
7385 +
7386 +    scan = next;
7387 +  }
7388 +
7389 +  /*
7390 +   * We get here only if there's trouble -- normally "case END" is
7391 +   * the terminating point.
7392 +   */
7393 +  printk("<3>Regexp: corrupted pointers\n");
7394 +  return(0);
7395 +}
7396 +
7397 +/*
7398 +  - regrepeat - repeatedly match something simple, report how many
7399 +*/
7400 +static int
7401 +regrepeat(struct match_globals *g, char *p)
7402 +{
7403 +  register int count = 0;
7404 +  register char *scan;
7405 +  register char *opnd;
7406 +
7407 +  scan = g->reginput;
7408 +  opnd = OPERAND(p);
7409 +  switch (OP(p)) {
7410 +  case ANY:
7411 +    count = strlen(scan);
7412 +    scan += count;
7413 +    break;
7414 +  case EXACTLY:
7415 +    while (*opnd == *scan) {
7416 +      count++;
7417 +      scan++;
7418 +    }
7419 +    break;
7420 +  case ANYOF:
7421 +    while (*scan != '\0' && strchr(opnd, *scan) != NULL) {
7422 +      count++;
7423 +      scan++;
7424 +    }
7425 +    break;
7426 +  case ANYBUT:
7427 +    while (*scan != '\0' && strchr(opnd, *scan) == NULL) {
7428 +      count++;
7429 +      scan++;
7430 +    }
7431 +    break;
7432 +  default:             /* Oh dear.  Called inappropriately. */
7433 +    printk("<3>Regexp: internal foulup\n");
7434 +    count = 0; /* Best compromise. */
7435 +    break;
7436 +  }
7437 +  g->reginput = scan;
7438 +
7439 +  return(count);
7440 +}
7441 +
7442 +/*
7443 +  - regnext - dig the "next" pointer out of a node
7444 +*/
7445 +static char*
7446 +regnext(struct match_globals *g, char *p)
7447 +{
7448 +  register int offset;
7449 +
7450 +  if (p == &g->regdummy)
7451 +    return(NULL);
7452 +
7453 +  offset = NEXT(p);
7454 +  if (offset == 0)
7455 +    return(NULL);
7456 +
7457 +  if (OP(p) == BACK)
7458 +    return(p-offset);
7459 +  else
7460 +    return(p+offset);
7461 +}
7462 +
7463 +/* ================================================== */
7464 +
7465 +/* #define RING_DEBUG */
7466 +
7467 +/* ************************************************* */
7468 +
7469 +#define TH_FIN_MULTIPLIER      0x01
7470 +#define TH_SYN_MULTIPLIER      0x02
7471 +#define TH_RST_MULTIPLIER      0x04
7472 +#define TH_PUSH_MULTIPLIER     0x08
7473 +#define TH_ACK_MULTIPLIER      0x10
7474 +#define TH_URG_MULTIPLIER      0x20
7475 +
7476 +/* ************************************************* */
7477 +
7478 +#define PROC_INFO         "info"
7479 +#define PROC_PLUGINS_INFO "plugins_info"
7480 +
7481 +/* ************************************************* */
7482 +
7483 +/* List of all ring sockets. */
7484 +static struct list_head ring_table;
7485 +static u_int ring_table_size;
7486 +
7487 +/*
7488 +  For each device, pf_ring keeps a list of the number of
7489 +  available ring socket slots. So that a caller knows in advance whether
7490 +  there are slots available (for rings bound to such device)
7491 +  that can potentially host the packet
7492 + */
7493 +static struct list_head device_ring_list[MAX_NUM_DEVICES];
7494 +
7495 +/* List of all clusters */
7496 +static struct list_head ring_cluster_list;
7497 +
7498 +/* List of all dna (direct nic access) devices */
7499 +static struct list_head ring_dna_devices_list;
7500 +static u_int dna_devices_list_size = 0;
7501 +
7502 +/* List of all plugins */
7503 +static u_int plugin_registration_size = 0;
7504 +static struct pfring_plugin_registration *plugin_registration[MAX_PLUGIN_ID] = { NULL };
7505 +static u_short max_registered_plugin_id = 0;
7506 +static rwlock_t ring_mgmt_lock = RW_LOCK_UNLOCKED;
7507 +
7508 +/* ********************************** */
7509 +
7510 +/* /proc entry for ring module */
7511 +struct proc_dir_entry *ring_proc_dir = NULL;
7512 +struct proc_dir_entry *ring_proc = NULL;
7513 +struct proc_dir_entry *ring_proc_plugins_info = NULL;
7514 +
7515 +static int ring_proc_get_info(char *, char **, off_t, int, int *, void *);
7516 +static int ring_proc_get_plugin_info(char *, char **, off_t, int, int *, void *);
7517 +static void ring_proc_add(struct ring_opt *pfr, struct net_device *dev);
7518 +static void ring_proc_remove(struct ring_opt *pfr);
7519 +static void ring_proc_init(void);
7520 +static void ring_proc_term(void);
7521 +
7522 +/*
7523 +  Caveat
7524 +  [http://lists.metaprl.org/pipermail/cs134-labs/2002-October/000025.html]
7525 +
7526 +  GFP_ATOMIC means roughly "make the allocation operation atomic".  This
7527 +  means that the kernel will try to find the memory using a pile of free
7528 +  memory set aside for urgent allocation.  If that pile doesn't have
7529 +  enough free pages, the operation will fail.  This flag is useful for
7530 +  allocation within interrupt handlers.
7531 +
7532 +  GFP_KERNEL will try a little harder to find memory.  There's a
7533 +  possibility that the call to kmalloc() will sleep while the kernel is
7534 +  trying to find memory (thus making it unsuitable for interrupt
7535 +  handlers).  It's much more rare for an allocation with GFP_KERNEL to
7536 +  fail than with GFP_ATOMIC.
7537 +
7538 +  In all cases, kmalloc() should only be used allocating small amounts of
7539 +  memory (a few kb).  vmalloc() is better for larger amounts.
7540 +
7541 +  Also note that in lab 1 and lab 2, it would have been arguably better to
7542 +  use GFP_KERNEL instead of GFP_ATOMIC.  GFP_ATOMIC should be saved for
7543 +  those instances in which a sleep would be totally unacceptable.
7544 +*/
7545 +/* ********************************** */
7546 +
7547 +/* Forward */
7548 +static struct proto_ops ring_ops;
7549 +
7550 +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11))
7551 +static struct proto ring_proto;
7552 +#endif
7553 +
7554 +static int skb_ring_handler(struct sk_buff *skb, u_char recv_packet,
7555 +                           u_char real_skb, short channel_id);
7556 +static int buffer_ring_handler(struct net_device *dev, char *data, int len);
7557 +static int remove_from_cluster(struct sock *sock, struct ring_opt *pfr);
7558 +
7559 +/* Extern */
7560 +extern
7561 +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23))
7562 +struct sk_buff*
7563 +#else
7564 +int
7565 +#endif
7566 +ip_defrag(struct sk_buff *skb, u32 user);
7567 +
7568 +/* ********************************** */
7569 +
7570 +/* Defaults */
7571 +static unsigned int num_slots = 4096;
7572 +static unsigned int enable_tx_capture = 1;
7573 +static unsigned int enable_ip_defrag = 0;
7574 +static unsigned int transparent_mode = 1;
7575 +static u_int32_t ring_id_serial = 0;
7576 +
7577 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16))
7578 +module_param(num_slots,  uint, 0644);
7579 +module_param(transparent_mode, uint, 0644);
7580 +module_param(enable_tx_capture, uint, 0644);
7581 +module_param(enable_ip_defrag, uint, 0644);
7582 +#else
7583 +MODULE_PARM(num_slots, "i");
7584 +MODULE_PARM(transparent_mode, "i");
7585 +MODULE_PARM(enable_tx_capture, "i");
7586 +MODULE_PARM(enable_ip_defrag, "i");
7587 +#endif
7588 +
7589 +MODULE_PARM_DESC(num_slots,  "Number of ring slots");
7590 +MODULE_PARM_DESC(transparent_mode,
7591 +                 "Set to 1 to set transparent mode "
7592 +                 "(slower but backwards compatible)");
7593 +MODULE_PARM_DESC(enable_tx_capture, "Set to 1 to capture outgoing packets");
7594 +MODULE_PARM_DESC(enable_ip_defrag,
7595 +                "Set to 1 to enable IP defragmentation"
7596 +                "(only rx traffic is defragmentead)");
7597 +
7598 +/* ********************************** */
7599 +
7600 +#define MIN_QUEUED_PKTS      64
7601 +#define MAX_QUEUE_LOOPS      64
7602 +
7603 +
7604 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
7605 +#define ring_sk_datatype(__sk) ((struct ring_opt *)__sk)
7606 +#define ring_sk(__sk) ((__sk)->sk_protinfo)
7607 +#else
7608 +#define ring_sk_datatype(a) (a)
7609 +#define ring_sk(__sk) ((__sk)->protinfo.pf_ring)
7610 +#endif
7611 +
7612 +#define _rdtsc() ({ uint64_t x; asm volatile("rdtsc" : "=A" (x)); x; })
7613 +
7614 +/* ***************** Legacy code ************************ */
7615 +
7616 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22))
7617 +static inline struct iphdr *ip_hdr(const struct sk_buff *skb)
7618 +{
7619 +  return (struct iphdr *)skb->nh.iph;
7620 +}
7621 +
7622 +static inline void skb_set_network_header(struct sk_buff *skb,
7623 +                                         const int offset)
7624 +{
7625 +  skb->nh.iph = (struct iphdr*)skb->data + offset;
7626 +}
7627 +
7628 +static inline void skb_reset_network_header(struct sk_buff *skb)
7629 +{
7630 +  ;
7631 +}
7632 +
7633 +static inline void skb_reset_transport_header(struct sk_buff *skb)
7634 +{
7635 +  ;
7636 +}
7637 +#endif
7638 +
7639 +/* ***** Code taken from other kernel modules ******** */
7640 +
7641 +/**
7642 + * rvmalloc copied from usbvideo.c
7643 + */
7644 +static void *rvmalloc(unsigned long size)
7645 +{
7646 +  void *mem;
7647 +  unsigned long adr;
7648 +  unsigned long pages = 0;
7649 +
7650 +#if defined(RING_DEBUG)
7651 +  printk("[PF_RING] rvmalloc: %lu bytes\n", size);
7652 +#endif
7653 +
7654 +  size = PAGE_ALIGN(size);
7655 +  mem = vmalloc_32(size);
7656 +  if (!mem)
7657 +    return NULL;
7658 +
7659 +  memset(mem, 0, size); /* Clear the ram out, no junk to the user */
7660 +  adr = (unsigned long) mem;
7661 +  while (size > 0) {
7662 +    SetPageReserved(vmalloc_to_page((void *)adr));
7663 +    pages++;
7664 +    adr += PAGE_SIZE;
7665 +    size -= PAGE_SIZE;
7666 +  }
7667 +
7668 +#if defined(RING_DEBUG)
7669 +  printk("[PF_RING] rvmalloc: %lu pages\n", pages);
7670 +#endif
7671 +  return mem;
7672 +}
7673 +
7674 +/* ************************************************** */
7675 +
7676 +/**
7677 + * rvfree copied from usbvideo.c
7678 + */
7679 +static void rvfree(void *mem, unsigned long size)
7680 +{
7681 +  unsigned long adr;
7682 +  unsigned long pages = 0;
7683 +
7684 +#if defined(RING_DEBUG)
7685 +  printk("[PF_RING] rvfree: %lu bytes\n", size);
7686 +#endif
7687 +
7688 +  if (!mem)
7689 +    return;
7690 +
7691 +  adr = (unsigned long) mem;
7692 +  while ((long) size > 0) {
7693 +    ClearPageReserved(vmalloc_to_page((void *)adr));
7694 +    pages++;
7695 +    adr += PAGE_SIZE;
7696 +    size -= PAGE_SIZE;
7697 +  }
7698 +#if defined(RING_DEBUG)
7699 +  printk("[PF_RING] rvfree: %lu pages\n", pages);
7700 +  printk("[PF_RING] rvfree: calling vfree....\n");
7701 +#endif
7702 +  vfree(mem);
7703 +#if defined(RING_DEBUG)
7704 +  printk("[PF_RING] rvfree: after vfree....\n");
7705 +#endif
7706 +}
7707 +
7708 +/* ********************************** */
7709 +
7710 +#define IP_DEFRAG_RING 1234
7711 +
7712 +/* Returns new sk_buff, or NULL  */
7713 +static struct sk_buff *ring_gather_frags(struct sk_buff *skb)
7714 +{
7715 +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,23))
7716 +  skb = ip_defrag(skb, IP_DEFRAG_RING);
7717 +
7718 +  if(skb)
7719 +    ip_send_check(ip_hdr(skb));
7720 +#else
7721 +  if(ip_defrag(skb, IP_DEFRAG_RING))
7722 +    skb = NULL;
7723 +  else
7724 +    ip_send_check(ip_hdr(skb));
7725 +#endif
7726 +
7727 +  return(skb);
7728 +}
7729 +
7730 +/* ********************************** */
7731 +
7732 +static void ring_sock_destruct(struct sock *sk)
7733 +{
7734 +  struct ring_opt *pfr;
7735 +
7736 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
7737 +  skb_queue_purge(&sk->sk_receive_queue);
7738 +
7739 +  if (!sock_flag(sk, SOCK_DEAD)) {
7740 +#if defined(RING_DEBUG)
7741 +    printk("[PF_RING] Attempt to release alive ring socket: %p\n", sk);
7742 +#endif
7743 +    return;
7744 +  }
7745 +#else
7746 +  if (!sk->dead) {
7747 +#if defined(RING_DEBUG)
7748 +    printk("[PF_RING] Attempt to release alive ring socket: %p\n", sk);
7749 +#endif
7750 +    return;
7751 +  }
7752 +#endif
7753 +
7754 +  pfr = ring_sk(sk);
7755 +
7756 +  if(pfr) kfree(pfr);
7757 +
7758 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
7759 +  MOD_DEC_USE_COUNT;
7760 +#endif
7761 +}
7762 +
7763 +/* ********************************** */
7764 +
7765 +static void ring_proc_add(struct ring_opt *pfr, struct net_device *dev)
7766 +{
7767 +  if(ring_proc_dir != NULL) {
7768 +    char name[64];
7769 +
7770 +    pfr->ring_pid = current->pid;
7771 +    pfr->ring_id = ring_id_serial++;
7772 +
7773 +    if(NULL != dev)
7774 +      snprintf(name, sizeof(name), "%d-%s.%d", pfr->ring_pid, dev->name, pfr->ring_id);
7775 +    else
7776 +      snprintf(name, sizeof(name), "%d.%d", pfr->ring_pid, pfr->ring_id);
7777 +
7778 +    create_proc_read_entry(name, 0, ring_proc_dir, ring_proc_get_info, pfr);
7779 +    /* printk("[PF_RING] added /proc/net/pf_ring/%s\n", name); */
7780 +    /* printk("[PF_RING] %s has index %d\n", dev->name, dev->ifindex); */
7781 +  }
7782 +}
7783 +
7784 +/* ********************************** */
7785 +
7786 +static void ring_proc_remove(struct ring_opt *pfr)
7787 +{
7788 +  if(ring_proc_dir != NULL) {
7789 +    char name[64];
7790 +
7791 +    if (pfr->ring_netdev && pfr->ring_netdev->name)
7792 +      snprintf(name, sizeof(name), "%d-%s.%d",
7793 +              pfr->ring_pid,pfr-> ring_netdev->name, pfr->ring_id);
7794 +    else
7795 +      snprintf(name, sizeof(name), "%d.%d", pfr->ring_pid, pfr->ring_id);
7796 +
7797 +    remove_proc_entry(name, ring_proc_dir);
7798 +    printk("[PF_RING] removed /proc/net/pf_ring/%s\n", name);
7799 +  }
7800 +}
7801 +
7802 +/* ********************************** */
7803 +
7804 +static u_int32_t num_queued_pkts(struct ring_opt *pfr)
7805 +{
7806 +  if(pfr->ring_slots != NULL) {
7807 +    u_int32_t tot_insert = pfr->slots_info->tot_insert, tot_read = pfr->slots_info->tot_read;
7808 +
7809 +    if(tot_insert >= tot_read) {
7810 +      return(tot_insert-tot_read);
7811 +    } else {
7812 +      return(((u_int32_t)-1)+tot_insert-tot_read);
7813 +    }
7814 +
7815 +#if defined(RING_DEBUG)
7816 +    printk("[PF_RING] -> [tot_insert=%d][tot_read=%d]\n",
7817 +          tot_insert, tot_read);
7818 +#endif
7819 +  } else
7820 +    return(0);
7821 +}
7822 +
7823 +/* ************************************* */
7824 +
7825 +inline u_int get_num_ring_free_slots(struct ring_opt *pfr) {
7826 +  return(pfr->slots_info->tot_slots - num_queued_pkts(pfr));
7827 +}
7828 +
7829 +/* ********************************** */
7830 +
7831 +static int ring_proc_get_info(char *buf, char **start, off_t offset,
7832 +                             int len, int *unused, void *data)
7833 +{
7834 +  int rlen = 0;
7835 +  struct ring_opt *pfr;
7836 +  FlowSlotInfo *fsi;
7837 +
7838 +  if(data == NULL) {
7839 +    /* /proc/net/pf_ring/info */
7840 +    rlen = sprintf(buf,         "Version             : %s\n", RING_VERSION);
7841 +    rlen += sprintf(buf + rlen, "Ring slots          : %d\n", num_slots);
7842 +    rlen += sprintf(buf + rlen, "Slot version        : %d\n", RING_FLOWSLOT_VERSION);
7843 +    rlen += sprintf(buf + rlen, "Capture TX          : %s\n",
7844 +                   enable_tx_capture ? "Yes [RX+TX]" : "No [RX only]");
7845 +    rlen += sprintf(buf + rlen, "IP Defragment       : %s\n",  enable_ip_defrag ? "Yes" : "No");
7846 +    rlen += sprintf(buf + rlen, "Transparent mode    : %s\n",
7847 +                   transparent_mode ? "Yes" : "No");
7848 +    rlen += sprintf(buf + rlen, "Total rings         : %d\n", ring_table_size);
7849 +    rlen += sprintf(buf + rlen, "Total plugins       : %d\n", plugin_registration_size);
7850 +  } else {
7851 +    /* detailed statistics about a PF_RING */
7852 +    pfr = (struct ring_opt*)data;
7853 +
7854 +    if(data) {
7855 +      fsi = pfr->slots_info;
7856 +
7857 +      if(fsi) {
7858 +       rlen = sprintf(buf,        "Bound Device  : %s\n",
7859 +                      pfr->ring_netdev->name == NULL ? "<NULL>" : pfr->ring_netdev->name);
7860 +       rlen += sprintf(buf + rlen, "Version       : %d\n",  fsi->version);
7861 +       rlen += sprintf(buf + rlen, "Sampling Rate : %d\n",  pfr->sample_rate);
7862 +       rlen += sprintf(buf + rlen, "Appl. Name    : %s\n",  pfr->appl_name ? pfr->appl_name : "<unknown>");
7863 +       rlen += sprintf(buf + rlen, "IP Defragment : %s\n",  enable_ip_defrag ? "Yes" : "No");
7864 +       rlen += sprintf(buf + rlen, "BPF Filtering : %s\n",  pfr->bpfFilter ? "Enabled" : "Disabled");
7865 +       rlen += sprintf(buf + rlen, "# Filt. Rules : %d\n",  pfr->num_filtering_rules);
7866 +       rlen += sprintf(buf + rlen, "Cluster Id    : %d\n",  pfr->cluster_id);
7867 +       rlen += sprintf(buf + rlen, "Channel Id    : %d\n",  pfr->channel_id);
7868 +       rlen += sprintf(buf + rlen, "Tot Slots     : %d\n",  fsi->tot_slots);
7869 +       rlen += sprintf(buf + rlen, "Bucket Len    : %d\n",  fsi->data_len);
7870 +       rlen += sprintf(buf + rlen, "Slot Len      : %d [bucket+header]\n",  fsi->slot_len);
7871 +       rlen += sprintf(buf + rlen, "Tot Memory    : %d\n",  fsi->tot_mem);
7872 +       rlen += sprintf(buf + rlen, "Tot Packets   : %lu\n", (unsigned long)fsi->tot_pkts);
7873 +       rlen += sprintf(buf + rlen, "Tot Pkt Lost  : %lu\n", (unsigned long)fsi->tot_lost);
7874 +       rlen += sprintf(buf + rlen, "Tot Insert    : %lu\n", (unsigned long)fsi->tot_insert);
7875 +       rlen += sprintf(buf + rlen, "Tot Read      : %lu\n", (unsigned long)fsi->tot_read);
7876 +       rlen += sprintf(buf + rlen, "Num Free Slots: %u\n",  get_num_ring_free_slots(pfr));
7877 +      } else
7878 +       rlen = sprintf(buf, "WARNING fsi == NULL\n");
7879 +    } else
7880 +      rlen = sprintf(buf, "WARNING data == NULL\n");
7881 +  }
7882 +
7883 +  return rlen;
7884 +}
7885 +
7886 +/* ********************************** */
7887 +
7888 +static int ring_proc_get_plugin_info(char *buf, char **start, off_t offset,
7889 +                                    int len, int *unused, void *data)
7890 +{
7891 +  int rlen = 0, i = 0;
7892 +  struct pfring_plugin_registration* tmp = NULL;
7893 +
7894 +  /* FIXME: I should now the number of plugins registered */
7895 +  if (!plugin_registration_size) return rlen;
7896 +
7897 +  /* plugins_info */
7898 +
7899 +  rlen += sprintf(buf + rlen , "ID\tPlugin\n");
7900 +
7901 +  for(i = 0; i < MAX_PLUGIN_ID; i++) {
7902 +    tmp = plugin_registration[i];
7903 +    if (tmp) {
7904 +      rlen += sprintf(buf + rlen , "%d\t%s [%s]\n",
7905 +                     tmp->plugin_id, tmp->name, tmp->description);
7906 +    }
7907 +  }
7908 +
7909 +  return rlen;
7910 +}
7911 +
7912 +/* ********************************** */
7913 +
7914 +static void ring_proc_init(void)
7915 +{
7916 +  ring_proc_dir = proc_mkdir("pf_ring",
7917 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24))
7918 +                            init_net.
7919 +#endif
7920 +                            proc_net);
7921 +
7922 +  if(ring_proc_dir) {
7923 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30))
7924 +    ring_proc_dir->owner = THIS_MODULE;
7925 +#endif
7926 +    ring_proc = create_proc_read_entry(PROC_INFO, 0,
7927 +                                      ring_proc_dir,
7928 +                                      ring_proc_get_info,
7929 +                                      NULL);
7930 +    ring_proc_plugins_info = create_proc_read_entry(PROC_PLUGINS_INFO, 0,
7931 +                                                   ring_proc_dir,
7932 +                                                   ring_proc_get_plugin_info,
7933 +                                                   NULL);
7934 +    if(!ring_proc || !ring_proc_plugins_info)
7935 +      printk("[PF_RING] unable to register proc file\n");
7936 +    else {
7937 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30))
7938 +      ring_proc->owner = THIS_MODULE;
7939 +      ring_proc_plugins_info->owner = THIS_MODULE;
7940 +#endif
7941 +      printk("[PF_RING] registered /proc/net/pf_ring/\n");
7942 +    }
7943 +  } else
7944 +    printk("[PF_RING] unable to create /proc/net/pf_ring\n");
7945 +}
7946 +
7947 +/* ********************************** */
7948 +
7949 +static void ring_proc_term(void)
7950 +{
7951 +  if(ring_proc != NULL) {
7952 +    remove_proc_entry(PROC_INFO, ring_proc_dir);
7953 +    printk("[PF_RING] removed /proc/net/pf_ring/%s\n", PROC_INFO);
7954 +
7955 +    remove_proc_entry(PROC_PLUGINS_INFO, ring_proc_dir);
7956 +    printk("[PF_RING] removed /proc/net/pf_ring/%s\n", PROC_PLUGINS_INFO);
7957 +
7958 +    if(ring_proc_dir != NULL) {
7959 +      remove_proc_entry("pf_ring",
7960 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24))
7961 +                       init_net.
7962 +#endif
7963 +                       proc_net);
7964 +      printk("[PF_RING] deregistered /proc/net/pf_ring\n");
7965 +    }
7966 +  }
7967 +}
7968 +
7969 +/* ********************************** */
7970 +
7971 +/*
7972 + * ring_insert()
7973 + *
7974 + * store the sk in a new element and add it
7975 + * to the head of the list.
7976 + */
7977 +static inline void ring_insert(struct sock *sk)
7978 +{
7979 +  struct ring_element *next;
7980 +  struct ring_opt *pfr;
7981 +
7982 +#if defined(RING_DEBUG)
7983 +  printk("[PF_RING] ring_insert()\n");
7984 +#endif
7985 +
7986 +  next = kmalloc(sizeof(struct ring_element), GFP_ATOMIC);
7987 +  if(next != NULL) {
7988 +    next->sk = sk;
7989 +    write_lock_bh(&ring_mgmt_lock);
7990 +    list_add(&next->list, &ring_table);
7991 +    write_unlock_bh(&ring_mgmt_lock);
7992 +  } else {
7993 +    if(net_ratelimit())
7994 +      printk("[PF_RING] net_ratelimit() failure\n");
7995 +  }
7996 +
7997 +  ring_table_size++;
7998 +  //ring_proc_add(ring_sk(sk));
7999 +  pfr = (struct ring_opt *)ring_sk(sk);
8000 +  pfr->ring_pid = current->pid;
8001 +}
8002 +
8003 +/* ********************************** */
8004 +
8005 +/*
8006 + * ring_remove()
8007 + *
8008 + * For each of the elements in the list:
8009 + *  - check if this is the element we want to delete
8010 + *  - if it is, remove it from the list, and free it.
8011 + *
8012 + * stop when we find the one we're looking for (break),
8013 + * or when we reach the end of the list.
8014 + */
8015 +static inline void ring_remove(struct sock *sk)
8016 +{
8017 +  struct list_head *ptr, *tmp_ptr;
8018 +  struct ring_element *entry;
8019 +
8020 +#if defined(RING_DEBUG)
8021 +  printk("[PF_RING] ring_remove()\n");
8022 +#endif
8023 +
8024 +  list_for_each_safe(ptr, tmp_ptr, &ring_table) {
8025 +    entry = list_entry(ptr, struct ring_element, list);
8026 +
8027 +    if(entry->sk == sk) {
8028 +      list_del(ptr);
8029 +      kfree(entry);
8030 +      ring_table_size--;
8031 +      break;
8032 +    }
8033 +  }
8034 +
8035 +#if defined(RING_DEBUG)
8036 +  printk("[PF_RING] leaving ring_remove()\n");
8037 +#endif
8038 +}
8039 +
8040 +/* ********************************** */
8041 +
8042 +static inline FlowSlot* get_insert_slot(struct ring_opt *pfr)
8043 +{
8044 +  if(pfr->ring_slots != NULL) {
8045 +    FlowSlot *slot = (FlowSlot*)&(pfr->ring_slots[pfr->slots_info->insert_idx
8046 +                                                 *pfr->slots_info->slot_len]);
8047 +#if defined(RING_DEBUG)
8048 +    printk("[PF_RING] get_insert_slot(%d): returned slot [slot_state=%d]\n",
8049 +          pfr->slots_info->insert_idx, slot->slot_state);
8050 +#endif
8051 +    return(slot);
8052 +  } else {
8053 +#if defined(RING_DEBUG)
8054 +    printk("[PF_RING] get_insert_slot(%d): NULL slot\n", pfr->slots_info->insert_idx);
8055 +#endif
8056 +    return(NULL);
8057 +  }
8058 +}
8059 +
8060 +/* ********************************** */
8061 +
8062 +static inline FlowSlot* get_remove_slot(struct ring_opt *pfr)
8063 +{
8064 +#if defined(RING_DEBUG)
8065 +  printk("[PF_RING] get_remove_slot(%d)\n", pfr->slots_info->remove_idx);
8066 +#endif
8067 +
8068 +  if(pfr->ring_slots != NULL)
8069 +    return((FlowSlot*)&(pfr->ring_slots[pfr->slots_info->remove_idx*
8070 +                                       pfr->slots_info->slot_len]));
8071 +  else
8072 +    return(NULL);
8073 +}
8074 +
8075 +/* ******************************************************* */
8076 +
8077 +static int parse_pkt(struct sk_buff *skb,
8078 +                    u_int16_t skb_displ,
8079 +                    struct pfring_pkthdr *hdr)
8080 +{
8081 +  struct iphdr *ip;
8082 +  struct ethhdr *eh = (struct ethhdr*)(skb->data-skb_displ);
8083 +  u_int16_t displ;
8084 +
8085 +  memset(&hdr->parsed_pkt, 0, sizeof(struct pkt_parsing_info));
8086 +  hdr->parsed_header_len = 9;
8087 +
8088 +  hdr->parsed_pkt.eth_type   = ntohs(eh->h_proto);
8089 +  hdr->parsed_pkt.pkt_detail.offset.eth_offset = -skb_displ;
8090 +
8091 +  if(hdr->parsed_pkt.eth_type == 0x8100 /* 802.1q (VLAN) */)
8092 +    {
8093 +      hdr->parsed_pkt.pkt_detail.offset.vlan_offset = hdr->parsed_pkt.pkt_detail.offset.eth_offset + sizeof(struct ethhdr);
8094 +      hdr->parsed_pkt.vlan_id = (skb->data[hdr->parsed_pkt.pkt_detail.offset.vlan_offset] & 15) * 256
8095 +       + skb->data[hdr->parsed_pkt.pkt_detail.offset.vlan_offset + 1];
8096 +      hdr->parsed_pkt.eth_type = (skb->data[hdr->parsed_pkt.pkt_detail.offset.vlan_offset + 2]) * 256
8097 +       + skb->data[hdr->parsed_pkt.pkt_detail.offset.vlan_offset + 3];
8098 +      displ = 4;
8099 +    }
8100 +  else
8101 +    {
8102 +      displ = 0;
8103 +      hdr->parsed_pkt.vlan_id = 0; /* Any VLAN */
8104 +    }
8105 +
8106 +  if(hdr->parsed_pkt.eth_type == 0x0800 /* IP */) {
8107 +    hdr->parsed_pkt.pkt_detail.offset.l3_offset = hdr->parsed_pkt.pkt_detail.offset.eth_offset+displ+sizeof(struct ethhdr);
8108 +    ip = (struct iphdr*)(skb->data+hdr->parsed_pkt.pkt_detail.offset.l3_offset);
8109 +
8110 +    hdr->parsed_pkt.ipv4_src = ntohl(ip->saddr), hdr->parsed_pkt.ipv4_dst = ntohl(ip->daddr), hdr->parsed_pkt.l3_proto = ip->protocol;
8111 +    hdr->parsed_pkt.ipv4_tos = ip->tos;
8112 +    hdr->parsed_pkt.pkt_detail.offset.l4_offset = hdr->parsed_pkt.pkt_detail.offset.l3_offset+ip->ihl*4;
8113 +
8114 +    if((ip->protocol == IPPROTO_TCP) || (ip->protocol == IPPROTO_UDP))
8115 +      {
8116 +       if(ip->protocol == IPPROTO_TCP)
8117 +         {
8118 +           struct tcphdr *tcp = (struct tcphdr*)(skb->data+hdr->parsed_pkt.pkt_detail.offset.l4_offset);
8119 +           hdr->parsed_pkt.l4_src_port = ntohs(tcp->source), hdr->parsed_pkt.l4_dst_port = ntohs(tcp->dest);
8120 +           hdr->parsed_pkt.pkt_detail.offset.payload_offset = hdr->parsed_pkt.pkt_detail.offset.l4_offset+(tcp->doff * 4);
8121 +           hdr->parsed_pkt.tcp_flags = (tcp->fin * TH_FIN_MULTIPLIER) + (tcp->syn * TH_SYN_MULTIPLIER) + (tcp->rst * TH_RST_MULTIPLIER) +
8122 +             (tcp->psh * TH_PUSH_MULTIPLIER) + (tcp->ack * TH_ACK_MULTIPLIER) + (tcp->urg * TH_URG_MULTIPLIER);
8123 +         } else if(ip->protocol == IPPROTO_UDP)
8124 +           {
8125 +             struct udphdr *udp = (struct udphdr*)(skb->data+hdr->parsed_pkt.pkt_detail.offset.l4_offset);
8126 +             hdr->parsed_pkt.l4_src_port = ntohs(udp->source), hdr->parsed_pkt.l4_dst_port = ntohs(udp->dest);
8127 +             hdr->parsed_pkt.pkt_detail.offset.payload_offset = hdr->parsed_pkt.pkt_detail.offset.l4_offset+sizeof(struct udphdr);
8128 +           } else
8129 +             hdr->parsed_pkt.pkt_detail.offset.payload_offset = hdr->parsed_pkt.pkt_detail.offset.l4_offset;
8130 +      } else
8131 +       hdr->parsed_pkt.l4_src_port = hdr->parsed_pkt.l4_dst_port = 0;
8132 +
8133 +    hdr->parsed_pkt.pkt_detail.offset.eth_offset = skb_displ;
8134 +
8135 +    return(1); /* IP */
8136 +  } /* TODO: handle IPv6 */
8137 +
8138 +  return(0); /* No IP */
8139 +}
8140 +
8141 +/* ********************************** */
8142 +
8143 +inline u_int32_t hash_pkt(u_int16_t vlan_id, u_int8_t proto,
8144 +                         u_int32_t host_peer_a, u_int32_t host_peer_b,
8145 +                         u_int16_t port_peer_a, u_int16_t port_peer_b)
8146 +{
8147 +  return(vlan_id+proto+host_peer_a+host_peer_b+port_peer_a+port_peer_b);
8148 +}
8149 +
8150 +/* ********************************** */
8151 +
8152 +inline u_int32_t hash_pkt_header(struct pfring_pkthdr *hdr, u_char mask_src, u_char mask_dst)
8153 +{
8154 +  return(hash_pkt(hdr->parsed_pkt.vlan_id,
8155 +                 hdr->parsed_pkt.l3_proto,
8156 +                 mask_src ? 0 : hdr->parsed_pkt.ipv4_src,
8157 +                 mask_dst ? 0 : hdr->parsed_pkt.ipv4_dst,
8158 +                 mask_src ? 0 : hdr->parsed_pkt.l4_src_port,
8159 +                 mask_dst ? 0 : hdr->parsed_pkt.l4_dst_port));
8160 +}
8161 +
8162 +/* ********************************** */
8163 +
8164 +static int hash_bucket_match(filtering_hash_bucket *hash_bucket,
8165 +                            struct pfring_pkthdr *hdr,
8166 +                            u_char mask_src, u_char mask_dst)
8167 +{
8168 +  if((hash_bucket->rule.proto == hdr->parsed_pkt.l3_proto)
8169 +     && (hash_bucket->rule.vlan_id == hdr->parsed_pkt.vlan_id)
8170 +     && (((hash_bucket->rule.host_peer_a == (mask_src ? 0 : hdr->parsed_pkt.ipv4_src))
8171 +         && (hash_bucket->rule.host_peer_b == (mask_dst ? 0 : hdr->parsed_pkt.ipv4_dst))
8172 +         && (hash_bucket->rule.port_peer_a == (mask_src ? 0 : hdr->parsed_pkt.l4_src_port))
8173 +         && (hash_bucket->rule.port_peer_b == (mask_dst ? 0 : hdr->parsed_pkt.l4_dst_port)))
8174 +        ||
8175 +        ((hash_bucket->rule.host_peer_a == (mask_dst ? 0 : hdr->parsed_pkt.ipv4_dst))
8176 +         && (hash_bucket->rule.host_peer_b == (mask_src ? 0 : hdr->parsed_pkt.ipv4_src))
8177 +         && (hash_bucket->rule.port_peer_a == (mask_dst ? 0 : hdr->parsed_pkt.l4_dst_port))
8178 +         && (hash_bucket->rule.port_peer_b == (mask_src ? 0 : hdr->parsed_pkt.l4_src_port))))) {
8179 +    hash_bucket->rule.jiffies_last_match = jiffies;
8180 +    return(1);
8181 +  } else
8182 +    return(0);
8183 +}
8184 +
8185 +/* ********************************** */
8186 +
8187 +inline int hash_bucket_match_rule(filtering_hash_bucket *hash_bucket,
8188 +                                 hash_filtering_rule *rule)
8189 +{
8190 +  int debug = 0;
8191 +
8192 +  if(debug)
8193 +    printk("[PF_RING] (%u,%d,%d.%d.%d.%d:%u,%d.%d.%d.%d:%u) (%u,%d,%d.%d.%d.%d:%u,%d.%d.%d.%d:%u)\n",
8194 +          hash_bucket->rule.vlan_id, hash_bucket->rule.proto,
8195 +          ((hash_bucket->rule.host_peer_a >> 24) & 0xff),
8196 +          ((hash_bucket->rule.host_peer_a >> 16) & 0xff),
8197 +          ((hash_bucket->rule.host_peer_a >> 8) & 0xff),
8198 +          ((hash_bucket->rule.host_peer_a >> 0) & 0xff),
8199 +          hash_bucket->rule.port_peer_a,
8200 +          ((hash_bucket->rule.host_peer_b >> 24) & 0xff),
8201 +          ((hash_bucket->rule.host_peer_b >> 16) & 0xff),
8202 +          ((hash_bucket->rule.host_peer_b >> 8) & 0xff),
8203 +          ((hash_bucket->rule.host_peer_b >> 0) & 0xff),
8204 +          hash_bucket->rule.port_peer_b,
8205 +          rule->vlan_id, rule->proto,
8206 +          ((rule->host_peer_a >> 24) & 0xff),
8207 +          ((rule->host_peer_a >> 16) & 0xff),
8208 +          ((rule->host_peer_a >> 8) & 0xff),
8209 +          ((rule->host_peer_a >> 0) & 0xff),
8210 +          rule->port_peer_a,
8211 +          ((rule->host_peer_b >> 24) & 0xff),
8212 +          ((rule->host_peer_b >> 16) & 0xff),
8213 +          ((rule->host_peer_b >> 8) & 0xff),
8214 +          ((rule->host_peer_b >> 0) & 0xff),
8215 +          rule->port_peer_b);
8216 +
8217 +  if((hash_bucket->rule.proto == rule->proto)
8218 +     && (hash_bucket->rule.vlan_id == rule->vlan_id)
8219 +     && (((hash_bucket->rule.host_peer_a == rule->host_peer_a)
8220 +         && (hash_bucket->rule.host_peer_b == rule->host_peer_b)
8221 +         && (hash_bucket->rule.port_peer_a == rule->port_peer_a)
8222 +         && (hash_bucket->rule.port_peer_b == rule->port_peer_b))
8223 +        ||
8224 +        ((hash_bucket->rule.host_peer_a == rule->host_peer_b)
8225 +         && (hash_bucket->rule.host_peer_b == rule->host_peer_a)
8226 +         && (hash_bucket->rule.port_peer_a == rule->port_peer_b)
8227 +         && (hash_bucket->rule.port_peer_b == rule->port_peer_a)))) {
8228 +    hash_bucket->rule.jiffies_last_match = jiffies;
8229 +    return(1);
8230 +  } else
8231 +    return(0);
8232 +}
8233 +
8234 +/* ********************************** */
8235 +
8236 +inline int hash_filtering_rule_match(hash_filtering_rule *a,
8237 +                                    hash_filtering_rule *b)
8238 +{
8239 +  int debug = 0;
8240 +
8241 +  if(debug)
8242 +    printk("[PF_RING] (%u,%d,%d.%d.%d.%d:%u,%d.%d.%d.%d:%u) (%u,%d,%d.%d.%d.%d:%u,%d.%d.%d.%d:%u)\n",
8243 +          a->vlan_id, a->proto,
8244 +          ((a->host_peer_a >> 24) & 0xff),
8245 +          ((a->host_peer_a >> 16) & 0xff),
8246 +          ((a->host_peer_a >> 8) & 0xff),
8247 +          ((a->host_peer_a >> 0) & 0xff),
8248 +          a->port_peer_a,
8249 +          ((a->host_peer_b >> 24) & 0xff),
8250 +          ((a->host_peer_b >> 16) & 0xff),
8251 +          ((a->host_peer_b >> 8) & 0xff),
8252 +          ((a->host_peer_b >> 0) & 0xff),
8253 +          a->port_peer_b,
8254 +
8255 +          b->vlan_id, b->proto,
8256 +          ((b->host_peer_a >> 24) & 0xff),
8257 +          ((b->host_peer_a >> 16) & 0xff),
8258 +          ((b->host_peer_a >> 8) & 0xff),
8259 +          ((b->host_peer_a >> 0) & 0xff),
8260 +          b->port_peer_a,
8261 +          ((b->host_peer_b >> 24) & 0xff),
8262 +          ((b->host_peer_b >> 16) & 0xff),
8263 +          ((b->host_peer_b >> 8) & 0xff),
8264 +          ((b->host_peer_b >> 0) & 0xff),
8265 +          b->port_peer_b);
8266 +
8267 +
8268 +  if((a->proto == b->proto)
8269 +     && (a->vlan_id == b->vlan_id)
8270 +     && (((a->host_peer_a == b->host_peer_a)
8271 +         && (a->host_peer_b == b->host_peer_b)
8272 +         && (a->port_peer_a == b->port_peer_a)
8273 +         && (a->port_peer_b == b->port_peer_b))
8274 +        ||
8275 +        ((a->host_peer_a == b->host_peer_b)
8276 +         && (a->host_peer_b == b->host_peer_a)
8277 +         && (a->port_peer_a == b->port_peer_b)
8278 +         && (a->port_peer_b == b->port_peer_a)))) {
8279 +    return(1);
8280 +  } else
8281 +    return(0);
8282 +}
8283 +
8284 +/* ********************************** */
8285 +
8286 +/* 0 = no match, 1 = match */
8287 +static int match_filtering_rule(struct ring_opt *the_ring,
8288 +                               filtering_rule_element *rule,
8289 +                               struct pfring_pkthdr *hdr,
8290 +                               struct sk_buff *skb,
8291 +                               int displ,
8292 +                               struct parse_buffer *parse_memory_buffer[],
8293 +                               u_int8_t *free_parse_mem,
8294 +                               u_int *last_matched_plugin,
8295 +                               packet_action_behaviour *behaviour)
8296 +{
8297 +  int debug = 0;
8298 +
8299 +  /* if(debug) printk("[PF_RING] match_filtering_rule()\n"); */
8300 +
8301 +  *behaviour = use_rule_forward_policy; /* Default */
8302 +
8303 +  if((rule->rule.core_fields.vlan_id > 0) && (hdr->parsed_pkt.vlan_id  != rule->rule.core_fields.vlan_id)) return(0);
8304 +  if((rule->rule.core_fields.proto > 0)   && (hdr->parsed_pkt.l3_proto != rule->rule.core_fields.proto))   return(0);
8305 +
8306 +  if(rule->rule.core_fields.host_low > 0) {
8307 +    if(((hdr->parsed_pkt.ipv4_src < rule->rule.core_fields.host_low)
8308 +       || (hdr->parsed_pkt.ipv4_src > rule->rule.core_fields.host_high))
8309 +       && ((hdr->parsed_pkt.ipv4_dst < rule->rule.core_fields.host_low)
8310 +          || (hdr->parsed_pkt.ipv4_dst > rule->rule.core_fields.host_high)))
8311 +      return(0);
8312 +  }
8313 +
8314 +  if((rule->rule.core_fields.port_high > 0)
8315 +     && (!((hdr->parsed_pkt.l4_src_port >= rule->rule.core_fields.port_low)
8316 +          && (hdr->parsed_pkt.l4_src_port <= rule->rule.core_fields.port_high)))
8317 +     && (!((hdr->parsed_pkt.l4_dst_port >= rule->rule.core_fields.port_low)
8318 +          && (hdr->parsed_pkt.l4_dst_port <= rule->rule.core_fields.port_high))))
8319 +    return(0);
8320 +
8321 +  if(rule->rule.balance_pool > 0) {
8322 +    u_int32_t balance_hash = hash_pkt_header(hdr, 0, 0) % rule->rule.balance_pool;
8323 +    if(balance_hash != rule->rule.balance_id) return(0);
8324 +  }
8325 +
8326 +  if(rule->pattern != NULL) {
8327 +    if((hdr->parsed_pkt.pkt_detail.offset.payload_offset > 0)
8328 +       && (hdr->caplen > hdr->parsed_pkt.pkt_detail.offset.payload_offset)) {
8329 +      char *payload = (char*)&(skb->data[hdr->parsed_pkt.pkt_detail.offset.payload_offset /* -displ */]);
8330 +      int i, rc, payload_len = hdr->caplen - hdr->parsed_pkt.pkt_detail.offset.payload_offset - displ;
8331 +
8332 +      if(payload_len > 0) {
8333 +       if(debug) {
8334 +         printk("[PF_RING] Trying to match pattern [caplen=%d][len=%d][displ=%d][payload_offset=%d][",
8335 +                hdr->caplen, payload_len, displ, hdr->parsed_pkt.pkt_detail.offset.payload_offset);
8336 +
8337 +         for(i=0; i<payload_len; i++) printk("[%d/%c]", i, payload[i] & 0xFF);
8338 +         printk("]\n");
8339 +       }
8340 +
8341 +       payload[payload_len] = '\0';
8342 +
8343 +       if(debug) printk("[PF_RING] Attempt to match [%s]\n", payload);
8344 +       rc = regexec(rule->pattern, payload);
8345 +
8346 +       if(debug)
8347 +         printk("[PF_RING] Match returned: %d [payload_len=%d][%s]\n", rc, payload_len, payload);
8348 +
8349 +       if(rc == 0)
8350 +         return(0); /* No match */
8351 +      } else
8352 +       return(0); /* No payload data */
8353 +    } else
8354 +      return(0); /* No payload data */
8355 +  }
8356 +
8357 +  if((rule->rule.extended_fields.filter_plugin_id > 0)
8358 +     && (rule->rule.extended_fields.filter_plugin_id < MAX_PLUGIN_ID)
8359 +     && (plugin_registration[rule->rule.extended_fields.filter_plugin_id] != NULL)
8360 +     && (plugin_registration[rule->rule.extended_fields.filter_plugin_id]->pfring_plugin_filter_skb != NULL)
8361 +     ) {
8362 +    int rc;
8363 +
8364 +    if(debug)
8365 +      printk("[PF_RING] rule->plugin_id [rule_id=%d][filter_plugin_id=%d][plugin_action=%d][ptr=%p]\n",
8366 +            rule->rule.rule_id,
8367 +            rule->rule.extended_fields.filter_plugin_id,
8368 +            rule->rule.plugin_action.plugin_id,
8369 +            plugin_registration[rule->rule.plugin_action.plugin_id]);
8370 +
8371 +    rc = plugin_registration[rule->rule.extended_fields.filter_plugin_id]
8372 +      ->pfring_plugin_filter_skb(the_ring, rule, hdr, skb,
8373 +                                &parse_memory_buffer[rule->rule.extended_fields.filter_plugin_id]);
8374 +
8375 +    if(parse_memory_buffer[rule->rule.extended_fields.filter_plugin_id]) *free_parse_mem = 1;
8376 +
8377 +    if(rc <= 0) {
8378 +      return(0); /* No match */
8379 +    } else {
8380 +      *last_matched_plugin = rule->rule.extended_fields.filter_plugin_id;
8381 +      hdr->parsed_pkt.last_matched_plugin_id = rule->rule.extended_fields.filter_plugin_id;
8382 +
8383 +      if(debug)
8384 +       printk("[PF_RING] [last_matched_plugin = %d][buffer=%p][len=%d]\n",
8385 +              *last_matched_plugin, parse_memory_buffer[rule->rule.extended_fields.filter_plugin_id],
8386 +              parse_memory_buffer[rule->rule.extended_fields.filter_plugin_id] ?
8387 +              parse_memory_buffer[rule->rule.extended_fields.filter_plugin_id]->mem_len : 0);
8388 +    }
8389 +  }
8390 +
8391 +  /* Action to be performed in case of match */
8392 +  if((rule->rule.plugin_action.plugin_id != 0)
8393 +     && (rule->rule.plugin_action.plugin_id < MAX_PLUGIN_ID)
8394 +     && (plugin_registration[rule->rule.plugin_action.plugin_id] != NULL)
8395 +     && (plugin_registration[rule->rule.plugin_action.plugin_id]->pfring_plugin_handle_skb != NULL)
8396 +     ) {
8397 +    if(debug) printk("[PF_RING] Calling pfring_plugin_handle_skb()\n");
8398 +
8399 +    plugin_registration[rule->rule.plugin_action.plugin_id]
8400 +      ->pfring_plugin_handle_skb(the_ring, rule, NULL, hdr, skb,
8401 +                                rule->rule.extended_fields.filter_plugin_id,
8402 +                                &parse_memory_buffer[rule->rule.extended_fields.filter_plugin_id],
8403 +                                behaviour);
8404 +
8405 +    if(*last_matched_plugin == 0)
8406 +      *last_matched_plugin = rule->rule.plugin_action.plugin_id;
8407 +
8408 +    if(parse_memory_buffer[rule->rule.plugin_action.plugin_id]) *free_parse_mem = 1;
8409 +  } else {
8410 +    if(debug) printk("[PF_RING] Skipping pfring_plugin_handle_skb(plugin_action=%d)\n",
8411 +                    rule->rule.plugin_action.plugin_id);
8412 +  }
8413 +
8414 +  if(debug) {
8415 +    printk("[PF_RING] MATCH: match_filtering_rule(vlan=%u, proto=%u, sip=%u, sport=%u, dip=%u, dport=%u)\n",
8416 +          hdr->parsed_pkt.vlan_id, hdr->parsed_pkt.l3_proto, hdr->parsed_pkt.ipv4_src, hdr->parsed_pkt.l4_src_port,
8417 +          hdr->parsed_pkt.ipv4_dst, hdr->parsed_pkt.l4_dst_port);
8418 +    printk("[PF_RING] [rule(vlan=%u, proto=%u, ip=%u-%u, port=%u-%u)(behaviour=%d)]\n",
8419 +          rule->rule.core_fields.vlan_id, rule->rule.core_fields.proto,
8420 +          rule->rule.core_fields.host_low, rule->rule.core_fields.host_high,
8421 +          rule->rule.core_fields.port_low,
8422 +          rule->rule.core_fields.port_high, *behaviour);
8423 +  }
8424 +
8425 +  rule->rule.jiffies_last_match = jiffies;
8426 +  return(1); /* match */
8427 +}
8428 +
8429 +/* ********************************** */
8430 +
8431 +static void add_pkt_to_ring(struct sk_buff *skb,
8432 +                           struct ring_opt *pfr,
8433 +                           struct pfring_pkthdr *hdr,
8434 +                           int displ, short channel_id,
8435 +                           int offset, void* plugin_mem)
8436 +{
8437 +  char *ring_bucket;
8438 +  int idx;
8439 +  FlowSlot *theSlot;
8440 +  int32_t the_bit = 1 << channel_id;
8441 +
8442 +  if(!pfr->ring_active) return;
8443 +
8444 +#if defined(RING_DEBUG)
8445 +  printk("[PF_RING] --> add_pkt_to_ring(len=%d) [pfr->channel_id=%d][channel_id=%d]\n",
8446 +        hdr->len, pfr->channel_id, channel_id);
8447 +#endif
8448 +
8449 +  if((pfr->channel_id != RING_ANY_CHANNEL)
8450 +     && (channel_id != RING_ANY_CHANNEL)
8451 +     && ((pfr->channel_id & the_bit) != the_bit))
8452 +    return; /* Wrong channel */
8453 +
8454 +  write_lock_bh(&pfr->ring_index_lock);
8455 +  idx = pfr->slots_info->insert_idx;
8456 +  idx++, theSlot = get_insert_slot(pfr);
8457 +  pfr->slots_info->tot_pkts++;
8458 +
8459 +  if((theSlot == NULL) || (theSlot->slot_state != 0)) {
8460 +    /* No room left */
8461 +    pfr->slots_info->tot_lost++;
8462 +    write_unlock_bh(&pfr->ring_index_lock);
8463 +    return;
8464 +  }
8465 +
8466 +  ring_bucket = &theSlot->bucket;
8467 +  memcpy(ring_bucket, hdr, sizeof(struct pfring_pkthdr)); /* Copy extended packet header */
8468 +
8469 +  if((plugin_mem != NULL) && (offset > 0)) {
8470 +    memcpy(&ring_bucket[sizeof(struct pfring_pkthdr)], plugin_mem, offset);
8471 +  }
8472 +
8473 +  if(skb != NULL) {
8474 +    hdr->caplen = min(pfr->bucket_len-offset, hdr->caplen);
8475 +
8476 +    if(hdr->caplen > 0) {
8477 +#if defined(RING_DEBUG)
8478 +      printk("[PF_RING] --> [caplen=%d][len=%d][displ=%d][parsed_header_len=%d][bucket_len=%d]\n",
8479 +            hdr->caplen, hdr->len, displ, hdr->parsed_header_len, pfr->bucket_len);
8480 +#endif
8481 +      skb_copy_bits(skb, -displ, &ring_bucket[sizeof(struct pfring_pkthdr)+offset], hdr->caplen);
8482 +    } else {
8483 +      if(hdr->parsed_header_len >= pfr->bucket_len) {
8484 +       static u_char print_once = 0;
8485 +
8486 +       if(!print_once) {
8487 +         printk("[PF_RING] WARNING: the bucket len is [%d] shorter than the plugin parsed header [%d]\n",
8488 +              pfr->bucket_len, hdr->parsed_header_len);
8489 +         print_once = 1;
8490 +       }
8491 +      }
8492 +    }
8493 +  }
8494 +
8495 +  if(idx == pfr->slots_info->tot_slots)
8496 +    pfr->slots_info->insert_idx = 0;
8497 +  else
8498 +    pfr->slots_info->insert_idx = idx;
8499 +
8500 +#if defined(RING_DEBUG)
8501 +  printk("[PF_RING] ==> insert_idx=%d\n",  pfr->slots_info->insert_idx);
8502 +#endif
8503 +
8504 +  pfr->slots_info->tot_insert++;
8505 +  theSlot->slot_state = 1;
8506 +  write_unlock_bh(&pfr->ring_index_lock);
8507 +
8508 +  /* wakeup in case of poll() */
8509 +  if(waitqueue_active(&pfr->ring_slots_waitqueue))
8510 +    wake_up_interruptible(&pfr->ring_slots_waitqueue);
8511 +}
8512 +
8513 +/* ********************************** */
8514 +
8515 +static int add_hdr_to_ring(struct ring_opt *pfr,
8516 +                          struct pfring_pkthdr *hdr) {
8517 +  read_lock_bh(&ring_mgmt_lock);
8518 +  add_pkt_to_ring(NULL, pfr, hdr, 0, 0, 0, NULL);
8519 +  read_unlock_bh(&ring_mgmt_lock);
8520 +  return(0);
8521 +}
8522 +
8523 +/* ********************************** */
8524 +
8525 +/* Free filtering placeholders */
8526 +static void free_parse_memory(struct parse_buffer *parse_memory_buffer[]) {
8527 +  int i;
8528 +
8529 +  for(i=1; i<=max_registered_plugin_id; i++)
8530 +    if(parse_memory_buffer[i]) {
8531 +      if(parse_memory_buffer[i]->mem != NULL) {
8532 +       kfree(parse_memory_buffer[i]->mem);
8533 +      }
8534 +
8535 +      kfree(parse_memory_buffer[i]);
8536 +    }
8537 +}
8538 +
8539 +/* ********************************** */
8540 +
8541 +static int add_skb_to_ring(struct sk_buff *skb,
8542 +                          struct ring_opt *pfr,
8543 +                          struct pfring_pkthdr *hdr,
8544 +                          int is_ip_pkt,
8545 +                          int displ,
8546 +                          short channel_id)
8547 +{
8548 +  int fwd_pkt = 0;
8549 +  struct list_head *ptr, *tmp_ptr;
8550 +  u_int8_t free_parse_mem = 0;
8551 +  u_int last_matched_plugin = 0, debug = 0;
8552 +  u_char hash_found = 0;
8553 +  struct parse_buffer *parse_memory_buffer[MAX_PLUGIN_ID] = { NULL };
8554 +  /* This is a memory holder
8555 +     for storing parsed packet information
8556 +     that will then be freed when the packet
8557 +     has been handled
8558 +  */
8559 +
8560 +  if(!pfr->ring_active) return(-1);
8561 +  atomic_set(&pfr->num_ring_users, 1);
8562 +
8563 +  /* [1] BPF Filtering (from af_packet.c) */
8564 +  if(pfr->bpfFilter != NULL) {
8565 +    unsigned res = 1, len;
8566 +
8567 +    len = skb->len-skb->data_len;
8568 +
8569 +    skb->data -= displ;
8570 +    res = sk_run_filter(skb, pfr->bpfFilter->insns, pfr->bpfFilter->len);
8571 +    skb->data += displ;
8572 +
8573 +    if(res == 0) {
8574 +      /* Filter failed */
8575 +#if defined(RING_DEBUG)
8576 +      printk("[PF_RING] add_skb_to_ring(skb): Filter failed [len=%d][tot=%llu]"
8577 +            "[insertIdx=%d][pkt_type=%d][cloned=%d]\n",
8578 +            (int)skb->len, pfr->slots_info->tot_pkts,
8579 +            pfr->slots_info->insert_idx,
8580 +            skb->pkt_type, skb->cloned);
8581 +#endif
8582 +      atomic_set(&pfr->num_ring_users, 0);
8583 +      return(-1);
8584 +    }
8585 +  }
8586 +
8587 +#if defined(RING_DEBUG)
8588 +  printk("[PF_RING] add_skb_to_ring: [displ=%d][len=%d][caplen=%d]"
8589 +        "[is_ip_pkt=%d][%d -> %d]\n",
8590 +        displ, hdr->len, hdr->caplen,
8591 +        is_ip_pkt, hdr->parsed_pkt.l4_src_port,
8592 +        hdr->parsed_pkt.l4_dst_port);
8593 +#endif
8594 +
8595 +  /* ************************************* */
8596 +
8597 +#if defined(RING_DEBUG)
8598 +  printk("[PF_RING] add_skb_to_ring(skb) [len=%d][tot=%llu][insertIdx=%d]"
8599 +        "[pkt_type=%d][cloned=%d]\n",
8600 +        (int)skb->len, pfr->slots_info->tot_pkts,
8601 +        pfr->slots_info->insert_idx,
8602 +        skb->pkt_type, skb->cloned);
8603 +#endif
8604 +
8605 +  /* Extensions */
8606 +  fwd_pkt = pfr->rules_default_accept_policy;
8607 +  /* printk("[PF_RING] rules_default_accept_policy: [fwd_pkt=%d]\n", fwd_pkt); */
8608 +
8609 +  /* ************************** */
8610 +
8611 +  /* [2] Filter packet according to rules */
8612 +
8613 +  if(0)
8614 +    printk("[PF_RING] About to evaluate packet [len=%d][tot=%llu][insertIdx=%d]"
8615 +          "[pkt_type=%d][cloned=%d]\n",
8616 +          (int)skb->len, pfr->slots_info->tot_pkts,
8617 +          pfr->slots_info->insert_idx,
8618 +          skb->pkt_type, skb->cloned);
8619 +
8620 +  /* [2.1] Search the hash */
8621 +  if(pfr->filtering_hash != NULL) {
8622 +    u_int hash_idx;
8623 +    filtering_hash_bucket *hash_bucket;
8624 +
8625 +    hash_idx = hash_pkt_header(hdr, 0, 0) % DEFAULT_RING_HASH_SIZE;
8626 +    hash_bucket = pfr->filtering_hash[hash_idx];
8627 +
8628 +    while(hash_bucket != NULL) {
8629 +      if(hash_bucket_match(hash_bucket, hdr, 0, 0)) {
8630 +       hash_found = 1;
8631 +       break;
8632 +      } else
8633 +       hash_bucket = hash_bucket->next;
8634 +    } /* while */
8635 +
8636 +    if(hash_found) {
8637 +      packet_action_behaviour behaviour = forward_packet_and_stop_rule_evaluation;
8638 +
8639 +      if((hash_bucket->rule.plugin_action.plugin_id != 0)
8640 +        && (hash_bucket->rule.plugin_action.plugin_id < MAX_PLUGIN_ID)
8641 +        && (plugin_registration[hash_bucket->rule.plugin_action.plugin_id] != NULL)
8642 +        && (plugin_registration[hash_bucket->rule.plugin_action.plugin_id]->pfring_plugin_handle_skb != NULL)
8643 +        ) {
8644 +       plugin_registration[hash_bucket->rule.plugin_action.plugin_id]
8645 +         ->pfring_plugin_handle_skb(pfr, NULL, hash_bucket, hdr, skb,
8646 +                                    0 /* no plugin */,
8647 +                                    &parse_memory_buffer[hash_bucket->rule.plugin_action.plugin_id],
8648 +                                    &behaviour);
8649 +
8650 +       if(parse_memory_buffer[hash_bucket->rule.plugin_action.plugin_id]) free_parse_mem = 1;
8651 +       last_matched_plugin = hash_bucket->rule.plugin_action.plugin_id;
8652 +       hdr->parsed_pkt.last_matched_plugin_id = hash_bucket->rule.plugin_action.plugin_id;
8653 +      }
8654 +
8655 +      if((behaviour == forward_packet_and_stop_rule_evaluation)
8656 +        || (behaviour == forward_packet_add_rule_and_stop_rule_evaluation)
8657 +        )
8658 +       fwd_pkt = 1;
8659 +      else if(behaviour == dont_forward_packet_and_stop_rule_evaluation)
8660 +       fwd_pkt = 0;
8661 +      else {
8662 +       if(hash_bucket->rule.rule_action == forward_packet_and_stop_rule_evaluation) {
8663 +         fwd_pkt = 1;
8664 +       } else if(hash_bucket->rule.rule_action == dont_forward_packet_and_stop_rule_evaluation) {
8665 +         fwd_pkt = 0;
8666 +       } else if(hash_bucket->rule.rule_action == execute_action_and_continue_rule_evaluation) {
8667 +         hash_found = 0; /* This way we also evaluate the list of rules */
8668 +       }
8669 +      }
8670 +    } else {
8671 +      /* printk("[PF_RING] Packet not found\n"); */
8672 +    }
8673 +  }
8674 +
8675 +  /* [2.2] Search rules list */
8676 +  if((!hash_found) && (pfr->num_filtering_rules > 0)) {
8677 +    list_for_each_safe(ptr, tmp_ptr, &pfr->rules)
8678 +      {
8679 +       filtering_rule_element *entry;
8680 +       packet_action_behaviour behaviour = forward_packet_and_stop_rule_evaluation;
8681 +
8682 +       entry = list_entry(ptr, filtering_rule_element, list);
8683 +
8684 +       if(match_filtering_rule(pfr, entry, hdr, skb, displ,
8685 +                               parse_memory_buffer, &free_parse_mem,
8686 +                               &last_matched_plugin, &behaviour))
8687 +         {
8688 +
8689 +           if(behaviour == use_rule_forward_policy)
8690 +             behaviour = entry->rule.rule_action;
8691 +
8692 +           if(debug) printk("[PF_RING] behaviour=%d\n", behaviour);
8693 +
8694 +           if(behaviour == forward_packet_and_stop_rule_evaluation) {
8695 +             fwd_pkt = 1;
8696 +             break;
8697 +           } else if(behaviour == forward_packet_add_rule_and_stop_rule_evaluation) {
8698 +             filtering_hash_bucket *hash_bucket;
8699 +
8700 +             fwd_pkt = 1;
8701 +
8702 +             hash_bucket = (filtering_hash_bucket*)kcalloc(1, sizeof(filtering_hash_bucket), GFP_KERNEL);
8703 +
8704 +             if(hash_bucket) {
8705 +               int rc;
8706 +
8707 +               hash_bucket->rule.vlan_id = hdr->parsed_pkt.vlan_id;
8708 +               hash_bucket->rule.proto = hdr->parsed_pkt.l3_proto;
8709 +               hash_bucket->rule.host_peer_a = hdr->parsed_pkt.ipv4_src;
8710 +               hash_bucket->rule.host_peer_b = hdr->parsed_pkt.ipv4_dst;
8711 +               hash_bucket->rule.port_peer_a = hdr->parsed_pkt.l4_src_port;
8712 +               hash_bucket->rule.port_peer_b = hdr->parsed_pkt.l4_dst_port;
8713 +               hash_bucket->rule.rule_action = forward_packet_and_stop_rule_evaluation;
8714 +               hash_bucket->rule.jiffies_last_match = jiffies; /* Avoid immediate rule purging */
8715 +
8716 +               //write_lock_bh(&pfr->ring_rules_lock);
8717 +               rc = pfr->handle_hash_rule(pfr, hash_bucket, 1 /* add_rule_from_plugin */);
8718 +               pfr->num_filtering_rules++;
8719 +               // write_unlock_bh(&pfr->ring_rules_lock);
8720 +
8721 +               if(rc != 0) {
8722 +                 kfree(hash_bucket);
8723 +                 return(-1);
8724 +               } else {
8725 +                 if(debug) printk("[PF_RING] Added rule: [%d.%d.%d.%d:%d <-> %d.%d.%d.%d:%d][tot_rules=%d]\n",
8726 +                                  ((hash_bucket->rule.host_peer_a >> 24) & 0xff),
8727 +                                  ((hash_bucket->rule.host_peer_a >> 16) & 0xff),
8728 +                                  ((hash_bucket->rule.host_peer_a >> 8) & 0xff),
8729 +                                  ((hash_bucket->rule.host_peer_a >> 0) & 0xff),
8730 +                                  hash_bucket->rule.port_peer_a,
8731 +                                  ((hash_bucket->rule.host_peer_b >> 24) & 0xff),
8732 +                                  ((hash_bucket->rule.host_peer_b >> 16) & 0xff),
8733 +                                  ((hash_bucket->rule.host_peer_b >> 8) & 0xff),
8734 +                                  ((hash_bucket->rule.host_peer_b >> 0) & 0xff),
8735 +                                  hash_bucket->rule.port_peer_b,
8736 +                                  pfr->num_filtering_rules);
8737 +               }
8738 +             }
8739 +
8740 +             break;
8741 +           } else if(behaviour == dont_forward_packet_and_stop_rule_evaluation) {
8742 +             fwd_pkt = 0;
8743 +             break;
8744 +           } else {
8745 +             if(entry->rule.rule_action == forward_packet_and_stop_rule_evaluation) {
8746 +               fwd_pkt = 1;
8747 +               break;
8748 +             } else if(entry->rule.rule_action == dont_forward_packet_and_stop_rule_evaluation) {
8749 +               fwd_pkt = 0;
8750 +               break;
8751 +             } else if(entry->rule.rule_action == execute_action_and_continue_rule_evaluation) {
8752 +               /* The action has already been performed inside match_filtering_rule()
8753 +                  hence instead of stopping rule evaluation, the next rule
8754 +                  will be evaluated */
8755 +             }
8756 +           }
8757 +         }
8758 +      } /* for */
8759 +  }
8760 +
8761 +  if(fwd_pkt) {
8762 +    /* We accept the packet: it needs to be queued */
8763 +
8764 +    /* [3] Packet sampling */
8765 +    if(pfr->sample_rate > 1) {
8766 +      write_lock_bh(&pfr->ring_index_lock);
8767 +      pfr->slots_info->tot_pkts++;
8768 +
8769 +      if(pfr->pktToSample == 0) {
8770 +       pfr->pktToSample = pfr->sample_rate;
8771 +      } else {
8772 +       pfr->pktToSample--;
8773 +
8774 +#if defined(RING_DEBUG)
8775 +       printk("[PF_RING] add_skb_to_ring(skb): sampled packet [len=%d]"
8776 +              "[tot=%llu][insertIdx=%d][pkt_type=%d][cloned=%d]\n",
8777 +              (int)skb->len, pfr->slots_info->tot_pkts,
8778 +              pfr->slots_info->insert_idx,
8779 +              skb->pkt_type, skb->cloned);
8780 +#endif
8781 +
8782 +       write_unlock_bh(&pfr->ring_index_lock);
8783 +       if(free_parse_mem) free_parse_memory(parse_memory_buffer);
8784 +       atomic_set(&pfr->num_ring_users, 0);
8785 +       return(-1);
8786 +      }
8787 +
8788 +      write_unlock_bh(&pfr->ring_index_lock);
8789 +    }
8790 +
8791 +    /* [4] Check if there is a reflector device defined */
8792 +    if((pfr->reflector_dev != NULL)
8793 +       && (!netif_queue_stopped(pfr->reflector_dev) /* TX is in good shape */)
8794 +       )
8795 +      {
8796 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,30))
8797 +       struct netdev_queue *txq = netdev_get_tx_queue(pfr->reflector_dev, 0 /* TX queue 0 */);
8798 +#endif
8799 +       int ret;
8800 +
8801 +       atomic_inc(&skb->users); /* Avoid others to free the skb and crash */
8802 +
8803 +       HARD_TX_LOCK(pfr->reflector_dev,
8804 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,30))
8805 +                    txq,
8806 +#endif
8807 +                    smp_processor_id());
8808 +       skb->data -= displ, skb->len += displ;
8809 +       ret = pfr->reflector_dev->hard_start_xmit(skb, pfr->reflector_dev);
8810 +       skb->data += displ, skb->len -= displ;
8811 +       HARD_TX_UNLOCK(pfr->reflector_dev
8812 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,30))
8813 +                      , txq
8814 +#endif
8815 +                      );
8816 +
8817 +#if defined(RING_DEBUG)
8818 +       printk("[PF_RING] reflect(len=%d, displ=%d): %d\n", skb->len, displ, ret);
8819 +#endif
8820 +
8821 +       atomic_set(&pfr->num_ring_users, 0); /* Done */
8822 +       if(free_parse_mem) free_parse_memory(parse_memory_buffer);
8823 +       return(ret == NETDEV_TX_OK ? 0 : -ENETDOWN); /* -ENETDOWN */
8824 +      }
8825 +
8826 +    /* No reflector device: the packet needs to be queued */
8827 +    if(hdr->caplen > 0) {
8828 +      /* Copy the packet into the bucket */
8829 +      int offset;
8830 +      void *mem;
8831 +
8832 +      if((last_matched_plugin > 0)
8833 +        && (parse_memory_buffer[last_matched_plugin] != NULL)) {
8834 +       offset = hdr->parsed_header_len = parse_memory_buffer[last_matched_plugin]->mem_len;
8835 +
8836 +       hdr->parsed_pkt.last_matched_plugin_id = last_matched_plugin;
8837 +
8838 +#if defined(RING_DEBUG)
8839 +       printk("[PF_RING] --> [last_matched_plugin = %d][parsed_header_len=%d]\n",
8840 +              last_matched_plugin, hdr->parsed_header_len);
8841 +#endif
8842 +
8843 +       if(offset > pfr->bucket_len) offset = hdr->parsed_header_len = pfr->bucket_len;
8844 +
8845 +       mem = parse_memory_buffer[last_matched_plugin]->mem;
8846 +      } else
8847 +       offset = 0, hdr->parsed_header_len = 0, mem = NULL;
8848 +
8849 +      add_pkt_to_ring(skb, pfr, hdr, displ, channel_id, offset, mem);
8850 +    }
8851 +  }
8852 +
8853 +#if defined(RING_DEBUG)
8854 +  printk("[PF_RING] [pfr->slots_info->insert_idx=%d]\n", pfr->slots_info->insert_idx);
8855 +#endif
8856 +
8857 +  if(free_parse_mem) free_parse_memory(parse_memory_buffer);
8858 +  atomic_set(&pfr->num_ring_users, 0);
8859 +
8860 +  return(0);
8861 +}
8862 +
8863 +/* ********************************** */
8864 +
8865 +static u_int hash_skb(ring_cluster_element *cluster_ptr,
8866 +                     struct sk_buff *skb,
8867 +                     int displ)
8868 +{
8869 +  u_int idx;
8870 +  struct iphdr *ip;
8871 +
8872 +  if(cluster_ptr->cluster.hashing_mode == cluster_round_robin)
8873 +    {
8874 +      idx = cluster_ptr->cluster.hashing_id++;
8875 +    }
8876 +  else
8877 +    {
8878 +      /* Per-flow clustering */
8879 +      if(skb->len > sizeof(struct iphdr)+sizeof(struct tcphdr))
8880 +       {
8881 +         /*
8882 +           skb->data+displ
8883 +
8884 +           Always points to to the IP part of the packet
8885 +         */
8886 +         ip = (struct iphdr*)(skb->data+displ);
8887 +         idx = ip->saddr+ip->daddr+ip->protocol;
8888 +
8889 +         if(ip->protocol == IPPROTO_TCP)
8890 +           {
8891 +             struct tcphdr *tcp = (struct tcphdr*)(skb->data+displ
8892 +                                                   +sizeof(struct iphdr));
8893 +             idx += tcp->source+tcp->dest;
8894 +           }
8895 +         else if(ip->protocol == IPPROTO_UDP)
8896 +           {
8897 +             struct udphdr *udp = (struct udphdr*)(skb->data+displ
8898 +                                                   +sizeof(struct iphdr));
8899 +             idx += udp->source+udp->dest;
8900 +           }
8901 +       }
8902 +      else
8903 +       idx = skb->len;
8904 +    }
8905 +
8906 +  return(idx % cluster_ptr->cluster.num_cluster_elements);
8907 +}
8908 +
8909 +/* ********************************** */
8910 +
8911 +static int register_plugin(struct pfring_plugin_registration *reg)
8912 +{
8913 +  if(reg == NULL) return(-1);
8914 +
8915 +#ifdef RING_DEBUG
8916 +  printk("[PF_RING] --> register_plugin(%d)\n", reg->plugin_id);
8917 +#endif
8918 +
8919 +  if((reg->plugin_id >= MAX_PLUGIN_ID) || (reg->plugin_id == 0))
8920 +    return(-EINVAL);
8921 +
8922 +  if(plugin_registration[reg->plugin_id] != NULL)
8923 +    return(-EINVAL); /* plugin already registered */
8924 +  else {
8925 +    plugin_registration[reg->plugin_id] = reg;
8926 +    plugin_registration_size++;
8927 +
8928 +    max_registered_plugin_id = max(max_registered_plugin_id, reg->plugin_id);
8929 +
8930 +    printk("[PF_RING] registered plugin [id=%d][max=%d][%p]\n",
8931 +          reg->plugin_id, max_registered_plugin_id, plugin_registration[reg->plugin_id]);
8932 +    try_module_get(THIS_MODULE); /* Increment usage count */
8933 +    return(0);
8934 +  }
8935 +}
8936 +
8937 +/* ********************************** */
8938 +
8939 +int unregister_plugin(u_int16_t pfring_plugin_id)
8940 +{
8941 +  int i;
8942 +
8943 +  if(pfring_plugin_id >= MAX_PLUGIN_ID)
8944 +    return(-EINVAL);
8945 +
8946 +  if(plugin_registration[pfring_plugin_id] == NULL)
8947 +    return(-EINVAL); /* plugin not registered */
8948 +  else {
8949 +    struct list_head *ptr, *tmp_ptr, *ring_ptr, *ring_tmp_ptr;
8950 +
8951 +    plugin_registration[pfring_plugin_id] = NULL;
8952 +    plugin_registration_size--;
8953 +
8954 +    read_lock_bh(&ring_mgmt_lock);
8955 +    list_for_each_safe(ring_ptr, ring_tmp_ptr, &ring_table) {
8956 +      struct ring_element *entry = list_entry(ring_ptr, struct ring_element, list);
8957 +      struct ring_opt *pfr = ring_sk(entry->sk);
8958 +
8959 +      list_for_each_safe(ptr, tmp_ptr, &pfr->rules)
8960 +       {
8961 +         filtering_rule_element *rule;
8962 +
8963 +         rule = list_entry(ptr, filtering_rule_element, list);
8964 +
8965 +         if(rule->rule.plugin_action.plugin_id == pfring_plugin_id) {
8966 +           if(plugin_registration[pfring_plugin_id]
8967 +              && plugin_registration[pfring_plugin_id]->pfring_plugin_free_ring_mem) {
8968 +             /* Custom free function */
8969 +             plugin_registration[pfring_plugin_id]->pfring_plugin_free_ring_mem(rule);
8970 +           } else {
8971 +             if(rule->plugin_data_ptr != NULL) {
8972 +               kfree(rule->plugin_data_ptr);
8973 +               rule->plugin_data_ptr = NULL;
8974 +             }
8975 +           }
8976 +
8977 +           rule->rule.plugin_action.plugin_id = 0;
8978 +         }
8979 +       }
8980 +    }
8981 +    read_unlock_bh(&ring_mgmt_lock);
8982 +
8983 +    for(i=MAX_PLUGIN_ID-1; i>0; i--) {
8984 +      if(plugin_registration[i] != NULL) {
8985 +       max_registered_plugin_id = i;
8986 +       break;
8987 +      }
8988 +    }
8989 +
8990 +    printk("[PF_RING] unregistered plugin [id=%d][max=%d]\n",
8991 +          pfring_plugin_id, max_registered_plugin_id);
8992 +    module_put(THIS_MODULE); /* Decrement usage count */
8993 +    return(0);
8994 +  }
8995 +}
8996 +
8997 +/* ********************************** */
8998 +
8999 +static int skb_ring_handler(struct sk_buff *skb,
9000 +                           u_char recv_packet,
9001 +                           u_char real_skb /* 1=real skb, 0=faked skb */,
9002 +                           short channel_id)
9003 +{
9004 +  struct sock *skElement;
9005 +  int rc = 0, is_ip_pkt;
9006 +  struct list_head *ptr;
9007 +  struct pfring_pkthdr hdr;
9008 +  int displ;
9009 +  struct sk_buff *skk = NULL;
9010 +  struct sk_buff *orig_skb = skb;
9011 +
9012 +#ifdef PROFILING
9013 +  uint64_t rdt = _rdtsc(), rdt1, rdt2;
9014 +#endif
9015 +
9016 +  if((!skb) /* Invalid skb */
9017 +     || ((!enable_tx_capture) && (!recv_packet)))
9018 +    {
9019 +      /*
9020 +       An outgoing packet is about to be sent out
9021 +       but we decided not to handle transmitted
9022 +       packets.
9023 +      */
9024 +      return(0);
9025 +    }
9026 +
9027 +#if defined(RING_DEBUG)
9028 +  if(1) {
9029 +    struct timeval tv;
9030 +
9031 +    skb_get_timestamp(skb, &tv);
9032 +    printk("[PF_RING] skb_ring_handler() [skb=%p][%u.%u][len=%d][dev=%s][csum=%u]\n",
9033 +          skb, (unsigned int)tv.tv_sec, (unsigned int)tv.tv_usec, skb->len,
9034 +          skb->dev->name == NULL ? "<NULL>" : skb->dev->name, skb->csum);
9035 +  }
9036 +#endif
9037 +
9038 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,21))
9039 +  if(channel_id == RING_ANY_CHANNEL /* Unknown channel */)
9040 +    channel_id = skb->iif; /* Might have been set by the driver */
9041 +#endif
9042 +
9043 +#if defined (RING_DEBUG)
9044 +  /* printk("[PF_RING] channel_id=%d\n", channel_id); */
9045 +#endif
9046 +
9047 +#ifdef PROFILING
9048 +  rdt1 = _rdtsc();
9049 +#endif
9050 +
9051 +  if(recv_packet) {
9052 +    /* Hack for identifying a packet received by the e1000 */
9053 +    if(real_skb)
9054 +      displ = SKB_DISPLACEMENT;
9055 +    else
9056 +      displ = 0; /* Received by the e1000 wrapper */
9057 +  } else
9058 +    displ = 0;
9059 +
9060 +  is_ip_pkt = parse_pkt(skb, displ, &hdr);
9061 +
9062 +  /* (de)Fragmentation <fusco@ntop.org> */
9063 +  if (enable_ip_defrag
9064 +      && real_skb
9065 +      && is_ip_pkt
9066 +      && recv_packet
9067 +      && (ring_table_size > 0))
9068 +    {
9069 +      struct sk_buff *cloned = NULL;
9070 +      struct iphdr* iphdr = NULL;
9071 +
9072 +      skb_reset_network_header(skb);
9073 +      skb_reset_transport_header(skb);
9074 +      skb_set_network_header(skb, ETH_HLEN-displ);
9075 +
9076 +      iphdr = ip_hdr(skb);
9077 +
9078 +      if(iphdr) {
9079 +#if defined (RING_DEBUG)
9080 +       printk("[PF_RING] [version=%d] %X -> %X\n", iphdr->version, iphdr->saddr, iphdr->daddr);
9081 +#endif
9082 +       if (iphdr->frag_off & htons(IP_MF | IP_OFFSET))
9083 +         {
9084 +           if((cloned = skb_clone(skb, GFP_ATOMIC)) != NULL)
9085 +             {
9086 +#if defined (RING_DEBUG)
9087 +               int offset = ntohs(iphdr->frag_off);
9088 +               offset &= IP_OFFSET;
9089 +               offset <<= 3;
9090 +
9091 +               printk("[PF_RING] There is a fragment to handle [proto=%d][frag_off=%u]"
9092 +                      "[ip_id=%u][network_header=%d][displ=%d]\n",
9093 +                      iphdr->protocol, offset, ntohs(iphdr->id),
9094 +                      hdr.parsed_pkt.pkt_detail.offset.l3_offset-displ, displ);
9095 +#endif
9096 +               skk = ring_gather_frags(cloned);
9097 +
9098 +               if(skk != NULL)
9099 +                 {
9100 +#if defined (RING_DEBUG)
9101 +                   printk("[PF_RING] IP reasm on new skb [skb_len=%d][head_len=%d][nr_frags=%d][frag_list=%p]\n",
9102 +                          (int)skk->len, skb_headlen(skk),
9103 +                          skb_shinfo(skk)->nr_frags, skb_shinfo(skk)->frag_list);
9104 +#endif
9105 +                   skb = skk;
9106 +                   parse_pkt(skb, displ, &hdr);
9107 +                   hdr.len = hdr.caplen = skb->len+displ;
9108 +                 } else {
9109 +                   //printk("[PF_RING] Fragment queued \n");
9110 +                   return(0); /* mask rcvd fragments */
9111 +                 }
9112 +             }
9113 +         }
9114 +       else
9115 +         {
9116 +#if defined (RING_DEBUG)
9117 +           printk("[PF_RING] Do not seems to be a fragmented ip_pkt[iphdr=%p]\n", iphdr);
9118 +#endif
9119 +         }
9120 +      }
9121 +    }
9122 +
9123 +  /* BD - API changed for time keeping */
9124 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14))
9125 +  if(skb->stamp.tv_sec == 0) do_gettimeofday(&skb->stamp);
9126 +  hdr.ts.tv_sec = skb->stamp.tv_sec, hdr.ts.tv_usec = skb->stamp.tv_usec;
9127 +#elif (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22))
9128 +  if(skb->tstamp.off_sec == 0) __net_timestamp(skb);
9129 +  hdr.ts.tv_sec = skb->tstamp.off_sec, hdr.ts.tv_usec = skb->tstamp.off_usec;
9130 +#else /* 2.6.22 and above */
9131 +  if(skb->tstamp.tv64 == 0) __net_timestamp(skb);
9132 +  hdr.ts = ktime_to_timeval(skb->tstamp);
9133 +#endif
9134 +
9135 +  hdr.len = hdr.caplen = skb->len+displ;
9136 +
9137 +  /* Avoid the ring to be manipulated while playing with it */
9138 +  read_lock_bh(&ring_mgmt_lock);
9139 +
9140 +#if 0
9141 +  printk("[PF_RING] -----------------------------------\n");
9142 +#endif
9143 +
9144 +  /* [1] Check unclustered sockets */
9145 +  list_for_each(ptr, &ring_table) {
9146 +    struct ring_opt *pfr;
9147 +    struct ring_element *entry;
9148 +
9149 +    entry = list_entry(ptr, struct ring_element, list);
9150 +
9151 +    skElement = entry->sk;
9152 +    pfr = ring_sk(skElement);
9153 +
9154 +#if 0
9155 +    if(pfr  && (pfr->ring_slots != NULL)) {
9156 +      /* if(pfr->ring_netdev && pfr->ring_netdev->name && strcmp(pfr->ring_netdev->name, "eth0")) */
9157 +       printk("[PF_RING] Received packet [device=%s][socket=%s][%p]\n",
9158 +              skb->dev->name ? skb->dev->name : "<unknown>",
9159 +              pfr->ring_netdev->name ? pfr->ring_netdev->name : "<unknown>", pfr);
9160 +    }
9161 +#endif
9162 +
9163 +    if((pfr != NULL)
9164 +       && (pfr->cluster_id == 0 /* No cluster */)
9165 +       && (pfr->ring_slots != NULL)
9166 +       && ((pfr->ring_netdev == skb->dev)
9167 +          || ((skb->dev->flags & IFF_SLAVE)
9168 +              && (pfr->ring_netdev == skb->dev->master)))) {
9169 +      /* We've found the ring where the packet can be stored */
9170 +      int old_caplen = hdr.caplen; /* Keep old lenght */
9171 +      hdr.caplen = min(hdr.caplen, pfr->bucket_len);
9172 +#if 0
9173 +      printk("[PF_RING] MATCH received packet [device=%s][socket=%s][%p]\n",
9174 +            skb->dev->name ? skb->dev->name : "<unknown>",
9175 +            pfr->ring_netdev->name ? pfr->ring_netdev->name : "<unknown>", pfr);
9176 +#endif
9177 +
9178 +      add_skb_to_ring(skb, pfr, &hdr, is_ip_pkt, displ, channel_id);
9179 +      hdr.caplen = old_caplen;
9180 +      rc = 1; /* Ring found: we've done our job */
9181 +    }
9182 +  }
9183 +
9184 +  /* [2] Check socket clusters */
9185 +  list_for_each(ptr, &ring_cluster_list) {
9186 +    ring_cluster_element *cluster_ptr;
9187 +    struct ring_opt *pfr;
9188 +
9189 +    cluster_ptr = list_entry(ptr, ring_cluster_element, list);
9190 +
9191 +    if(cluster_ptr->cluster.num_cluster_elements > 0) {
9192 +      u_int skb_hash = hash_skb(cluster_ptr, skb, displ);
9193 +
9194 +      skElement = cluster_ptr->cluster.sk[skb_hash];
9195 +
9196 +      if(skElement != NULL) {
9197 +       pfr = ring_sk(skElement);
9198 +
9199 +       if((pfr != NULL)
9200 +          && (pfr->ring_slots != NULL)
9201 +          && ((pfr->ring_netdev == skb->dev)
9202 +              || ((skb->dev->flags & IFF_SLAVE)
9203 +                  && (pfr->ring_netdev == skb->dev->master)))) {
9204 +         /* We've found the ring where the packet can be stored */
9205 +         add_skb_to_ring(skb, pfr, &hdr, is_ip_pkt, displ, channel_id);
9206 +         rc = 1; /* Ring found: we've done our job */
9207 +       }
9208 +      }
9209 +    }
9210 +  }
9211 +
9212 +  read_unlock_bh(&ring_mgmt_lock);
9213 +
9214 +#ifdef PROFILING
9215 +  rdt1 = _rdtsc()-rdt1;
9216 +#endif
9217 +
9218 +#ifdef PROFILING
9219 +  rdt2 = _rdtsc();
9220 +#endif
9221 +
9222 +  /* Fragment handling */
9223 +  if(skk != NULL)
9224 +    kfree_skb(skk);
9225 +
9226 +  if(rc == 1) {
9227 +    if(transparent_mode) {
9228 +      rc = 0;
9229 +    } else {
9230 +      if(recv_packet && real_skb) {
9231 +#if defined(RING_DEBUG)
9232 +       printk("[PF_RING] kfree_skb()\n");
9233 +#endif
9234 +
9235 +       kfree_skb(orig_skb);
9236 +      }
9237 +    }
9238 +  }
9239 +
9240 +#ifdef PROFILING
9241 +  rdt2 = _rdtsc()-rdt2;
9242 +  rdt = _rdtsc()-rdt;
9243 +
9244 +#if defined(RING_DEBUG)
9245 +  printk("[PF_RING] # cycles: %d [lock costed %d %d%%][free costed %d %d%%]\n",
9246 +        (int)rdt, rdt-rdt1,
9247 +        (int)((float)((rdt-rdt1)*100)/(float)rdt),
9248 +        rdt2,
9249 +        (int)((float)(rdt2*100)/(float)rdt));
9250 +#endif
9251 +#endif
9252 +
9253 +  //printk("[PF_RING] Returned %d\n", rc);
9254 +  return(rc); /*  0 = packet not handled */
9255 +}
9256 +
9257 +/* ********************************** */
9258 +
9259 +struct sk_buff skb;
9260 +
9261 +static int buffer_ring_handler(struct net_device *dev,
9262 +                              char *data, int len)
9263 +{
9264 +#if defined(RING_DEBUG)
9265 +  printk("[PF_RING] buffer_ring_handler: [dev=%s][len=%d]\n",
9266 +        dev->name == NULL ? "<NULL>" : dev->name, len);
9267 +#endif
9268 +
9269 +  skb.dev = dev, skb.len = len, skb.data = data, skb.data_len = len;
9270 +
9271 +  /* BD - API changed for time keeping */
9272 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14))
9273 +  skb.stamp.tv_sec = 0;
9274 +#elif (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22))
9275 +  skb.tstamp.off_sec = 0;
9276 +#else
9277 +  skb.tstamp.tv64 = 0;
9278 +#endif
9279 +
9280 +  return(skb_ring_handler(&skb, 1, 0 /* fake skb */, -1 /* Unknown channel */));
9281 +}
9282 +
9283 +/* ************************************* */
9284 +
9285 +static int handle_filtering_hash_bucket(struct ring_opt *pfr,
9286 +                                       filtering_hash_bucket* rule,
9287 +                                       u_char add_rule)
9288 +{
9289 +  u_int32_t hash_value = hash_pkt(rule->rule.vlan_id, rule->rule.proto,
9290 +                                 rule->rule.host_peer_a, rule->rule.host_peer_b,
9291 +                                 rule->rule.port_peer_a, rule->rule.port_peer_b) % DEFAULT_RING_HASH_SIZE;
9292 +  int rc = -1, debug = 0;
9293 +
9294 +  if(debug) printk("[PF_RING] handle_filtering_hash_bucket(vlan=%u, proto=%u, "
9295 +                  "sip=%d.%d.%d.%d, sport=%u, dip=%d.%d.%d.%d, dport=%u, "
9296 +                  "hash_value=%u, add_rule=%d) called\n",
9297 +                  rule->rule.vlan_id, rule->rule.proto,
9298 +                  ((rule->rule.host_peer_a >> 24) & 0xff),
9299 +                  ((rule->rule.host_peer_a >> 16) & 0xff),
9300 +                  ((rule->rule.host_peer_a >> 8) & 0xff),
9301 +                  ((rule->rule.host_peer_a >> 0) & 0xff),
9302 +                  rule->rule.port_peer_a,
9303 +                  ((rule->rule.host_peer_b >> 24) & 0xff),
9304 +                  ((rule->rule.host_peer_b >> 16) & 0xff),
9305 +                  ((rule->rule.host_peer_b >> 8) & 0xff),
9306 +                  ((rule->rule.host_peer_b >> 0) & 0xff),
9307 +                  rule->rule.port_peer_b,
9308 +                  hash_value, add_rule);
9309 +
9310 +  if(add_rule) {
9311 +    if(pfr->filtering_hash == NULL)
9312 +      pfr->filtering_hash = (filtering_hash_bucket**)kcalloc(DEFAULT_RING_HASH_SIZE,
9313 +                                                            sizeof(filtering_hash_bucket*),
9314 +                                                            GFP_ATOMIC);
9315 +    if(pfr->filtering_hash == NULL) {
9316 +      /* kfree(rule); */
9317 +      if(debug) printk("[PF_RING] handle_filtering_hash_bucket() returned %d [0]\n", -EFAULT);
9318 +      return(-EFAULT);
9319 +    }
9320 +  }
9321 +
9322 +  if(debug) printk("[PF_RING] handle_filtering_hash_bucket() allocated memory\n");
9323 +
9324 +  if(pfr->filtering_hash == NULL) {
9325 +    /* We're trying to delete a hash rule from an empty hash */
9326 +    return(-EFAULT);
9327 +  }
9328 +
9329 +  if(pfr->filtering_hash[hash_value] == NULL) {
9330 +    if(add_rule)
9331 +      pfr->filtering_hash[hash_value] = rule, rule->next = NULL, rc = 0;
9332 +    else {
9333 +      if(debug) printk("[PF_RING] handle_filtering_hash_bucket() returned %d [1]\n", -1);
9334 +      return(-1); /* Unable to find the specified rule */
9335 +    }
9336 +  } else {
9337 +    filtering_hash_bucket *prev = NULL, *bucket = pfr->filtering_hash[hash_value];
9338 +
9339 +    while(bucket != NULL) {
9340 +      if(hash_filtering_rule_match(&bucket->rule, &rule->rule)) {
9341 +       if(add_rule) {
9342 +         if(debug) printk("[PF_RING] Duplicate found while adding rule: discarded\n");
9343 +         /* kfree(rule); */
9344 +         return(-EFAULT);
9345 +       } else {
9346 +         /* We've found the bucket to delete */
9347 +
9348 +         if(debug) printk("[PF_RING] handle_filtering_hash_bucket() found a bucket to delete: removing it\n");
9349 +         if(prev == NULL)
9350 +           pfr->filtering_hash[hash_value] = bucket->next;
9351 +         else
9352 +           prev->next = bucket->next;
9353 +
9354 +         /* Free the bucket */
9355 +         if(bucket->plugin_data_ptr) kfree(bucket->plugin_data_ptr);
9356 +         kfree(bucket);
9357 +         if(debug) printk("[PF_RING] handle_filtering_hash_bucket() returned %d [2]\n", 0);
9358 +         return(0);
9359 +       }
9360 +      } else {
9361 +       prev = bucket;
9362 +       bucket = bucket->next;
9363 +      }
9364 +    }
9365 +
9366 +    if(add_rule) {
9367 +      /* If the flow arrived until here, then this rule is unique */
9368 +
9369 +      if(debug) printk("[PF_RING] handle_filtering_hash_bucket() no duplicate rule found: adding the rule\n");
9370 +      rule->next = pfr->filtering_hash[hash_value];
9371 +      pfr->filtering_hash[hash_value] = rule;
9372 +      rc = 0;
9373 +    } else {
9374 +      /* The rule we searched for has not been found */
9375 +      rc = -1;
9376 +    }
9377 +  }
9378 +
9379 +  if(debug) printk("[PF_RING] handle_filtering_hash_bucket() returned %d [3]\n", rc);
9380 +
9381 +  return(rc);
9382 +}
9383 +
9384 +/* ********************************** */
9385 +
9386 +static int ring_create(
9387 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24))
9388 +                      struct net *net,
9389 +#endif
9390 +                      struct socket *sock, int protocol)
9391 +{
9392 +  struct sock *sk;
9393 +  struct ring_opt *pfr;
9394 +  int err;
9395 +
9396 +#if defined(RING_DEBUG)
9397 +  printk("[PF_RING] ring_create()\n");
9398 +#endif
9399 +
9400 +  /* Are you root, superuser or so ? */
9401 +  if(!capable(CAP_NET_ADMIN))
9402 +    return -EPERM;
9403 +
9404 +  if(sock->type != SOCK_RAW)
9405 +    return -ESOCKTNOSUPPORT;
9406 +
9407 +  if(protocol != htons(ETH_P_ALL))
9408 +    return -EPROTONOSUPPORT;
9409 +
9410 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
9411 +  MOD_INC_USE_COUNT;
9412 +#endif
9413 +
9414 +  err = -ENOMEM;
9415 +
9416 +  // BD: -- broke this out to keep it more simple and clear as to what the
9417 +  // options are.
9418 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
9419 +  sk = sk_alloc(PF_RING, GFP_KERNEL, 1); /* Kernel 2.4 */
9420 +#else
9421 +  /* 2.6.X */
9422 +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,11))
9423 +  sk = sk_alloc(PF_RING, GFP_KERNEL, 1, NULL);
9424 +#else
9425 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24))
9426 +  // BD: API changed in 2.6.12, ref:
9427 +  // http://svn.clkao.org/svnweb/linux/revision/?rev=28201
9428 +  sk = sk_alloc(PF_RING, GFP_ATOMIC, &ring_proto, 1);
9429 +#else
9430 +  sk = sk_alloc(net, PF_INET, GFP_KERNEL, &ring_proto);
9431 +#endif
9432 +#endif
9433 +#endif
9434 +
9435 +  if (sk == NULL)
9436 +    goto out;
9437 +
9438 +  sock->ops = &ring_ops;
9439 +  sock_init_data(sock, sk);
9440 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
9441 +#if (LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,11))
9442 +  sk_set_owner(sk, THIS_MODULE);
9443 +#endif
9444 +#endif
9445 +
9446 +  err = -ENOMEM;
9447 +  ring_sk(sk) = ring_sk_datatype(kmalloc(sizeof(*pfr), GFP_KERNEL));
9448 +
9449 +  if (!(pfr = ring_sk(sk))) {
9450 +    sk_free(sk);
9451 +    goto out;
9452 +  }
9453 +  memset(pfr, 0, sizeof(*pfr));
9454 +  pfr->ring_active = 0; /* We activate as soon as somebody waits for packets */
9455 +  pfr->channel_id = RING_ANY_CHANNEL;
9456 +  pfr->bucket_len = DEFAULT_BUCKET_LEN;
9457 +  pfr->handle_hash_rule = handle_filtering_hash_bucket;
9458 +  init_waitqueue_head(&pfr->ring_slots_waitqueue);
9459 +  rwlock_init(&pfr->ring_index_lock);
9460 +  rwlock_init(&pfr->ring_rules_lock);
9461 +  atomic_set(&pfr->num_ring_users, 0);
9462 +  INIT_LIST_HEAD(&pfr->rules);
9463 +
9464 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
9465 +  sk->sk_family       = PF_RING;
9466 +  sk->sk_destruct     = ring_sock_destruct;
9467 +#else
9468 +  sk->family          = PF_RING;
9469 +  sk->destruct        = ring_sock_destruct;
9470 +  sk->num             = protocol;
9471 +#endif
9472 +
9473 +  ring_insert(sk);
9474 +
9475 +#if defined(RING_DEBUG)
9476 +  printk("[PF_RING] ring_create() - created\n");
9477 +#endif
9478 +
9479 +  return(0);
9480 + out:
9481 +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0))
9482 +  MOD_DEC_USE_COUNT;
9483 +#endif
9484 +  return err;
9485 +}
9486 +
9487 +/* *********************************************** */
9488 +
9489 +static int ring_release(struct socket *sock)
9490 +{
9491 +  struct sock *sk = sock->sk;
9492 +  struct ring_opt *pfr = ring_sk(sk);
9493 +  struct list_head *ptr, *tmp_ptr;
9494 +  void * ring_memory_ptr;
9495 +
9496 +  if(!sk)
9497 +    return 0;
9498 +  else
9499 +    pfr->ring_active = 0;
9500 +
9501 +  while(atomic_read(&pfr->num_ring_users) > 0) {
9502 +    schedule();
9503 +  }
9504 +
9505 +#if defined(RING_DEBUG)
9506 +  printk("[PF_RING] called ring_release\n");
9507 +#endif
9508 +
9509 +  /*
9510 +    The calls below must be placed outside the
9511 +    write_lock_bh...write_unlock_bh block.
9512 +  */
9513 +  sock_orphan(sk);
9514 +  ring_proc_remove(ring_sk(sk));
9515 +
9516 +  if(pfr->ring_netdev && (pfr->ring_netdev->ifindex < MAX_NUM_DEVICES)) {
9517 +    struct list_head *ptr, *tmp_ptr;
9518 +    device_ring_list_element *entry;
9519 +
9520 +    list_for_each_safe(ptr, tmp_ptr, &device_ring_list[pfr->ring_netdev->ifindex]) {
9521 +      entry = list_entry(ptr, device_ring_list_element, list);
9522 +
9523 +      if(entry->the_ring == pfr) {
9524 +       list_del(ptr);
9525 +       kfree(entry);
9526 +       break;
9527 +      }
9528 +    }
9529 +  }
9530 +
9531 +  write_lock_bh(&ring_mgmt_lock);
9532 +  ring_remove(sk);
9533 +  sock->sk = NULL;
9534 +
9535 +  /* Free rules */
9536 +  list_for_each_safe(ptr, tmp_ptr, &pfr->rules)
9537 +    {
9538 +      filtering_rule_element *rule;
9539 +
9540 +      rule = list_entry(ptr, filtering_rule_element, list);
9541 +
9542 +      if(plugin_registration[rule->rule.plugin_action.plugin_id]
9543 +        && plugin_registration[rule->rule.plugin_action.plugin_id]->pfring_plugin_free_ring_mem) {
9544 +       /* Custom free function */
9545 +       plugin_registration[rule->rule.plugin_action.plugin_id]->pfring_plugin_free_ring_mem(rule);
9546 +      } else {
9547 +#ifdef DEBUG
9548 +       printk("[PF_RING] --> default_free [rule->rule.plugin_action.plugin_id=%d]\n",
9549 +              rule->rule.plugin_action.plugin_id);
9550 +#endif
9551 +       if(rule->plugin_data_ptr != NULL) {
9552 +         kfree(rule->plugin_data_ptr);
9553 +         rule->plugin_data_ptr = NULL;
9554 +       }
9555 +      }
9556 +
9557 +      if(rule->pattern) kfree(rule->pattern);
9558 +
9559 +      list_del(ptr);
9560 +      kfree(rule);
9561 +    }
9562 +
9563 +  /* Filtering hash rules */
9564 +  if(pfr->filtering_hash) {
9565 +    int i;
9566 +
9567 +    for(i=0; i<DEFAULT_RING_HASH_SIZE; i++) {
9568 +      if(pfr->filtering_hash[i] != NULL) {
9569 +       filtering_hash_bucket *scan = pfr->filtering_hash[i], *next;
9570 +
9571 +       while(scan != NULL) {
9572 +         next = scan->next;
9573 +         if(scan->plugin_data_ptr != NULL) kfree(scan->plugin_data_ptr);
9574 +         kfree(scan);
9575 +         scan = next;
9576 +       }
9577 +      }
9578 +    }
9579 +
9580 +    kfree(pfr->filtering_hash);
9581 +  }
9582 +
9583 +  if(pfr->reflector_dev != NULL)
9584 +    dev_put(pfr->reflector_dev); /* Release device */
9585 +
9586 +  /* Free the ring buffer later, vfree needs interrupts enabled */
9587 +  ring_memory_ptr = pfr->ring_memory;
9588 +  ring_sk(sk) = NULL;
9589 +
9590 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
9591 +  skb_queue_purge(&sk->sk_write_queue);
9592 +#endif
9593 +
9594 +  sock_put(sk);
9595 +  write_unlock_bh(&ring_mgmt_lock);
9596 +  if(pfr->appl_name != NULL) kfree(pfr->appl_name);
9597 +
9598 +  if(ring_memory_ptr != NULL) {
9599 +#if defined(RING_DEBUG)
9600 +    printk("[PF_RING] ring_release: rvfree\n");
9601 +#endif
9602 +    rvfree(ring_memory_ptr, pfr->slots_info->tot_mem);
9603 +  }
9604 +
9605 +  kfree(pfr);
9606 +
9607 +#if defined(RING_DEBUG)
9608 +  printk("[PF_RING] ring_release: rvfree done\n");
9609 +#endif
9610 +
9611 +#if defined(RING_DEBUG)
9612 +  printk("[PF_RING] ring_release: done\n");
9613 +#endif
9614 +
9615 +  return 0;
9616 +}
9617 +
9618 +/* ********************************** */
9619 +
9620 +/*
9621 + * We create a ring for this socket and bind it to the specified device
9622 + */
9623 +static int packet_ring_bind(struct sock *sk, struct net_device *dev)
9624 +{
9625 +  u_int the_slot_len;
9626 +  u_int32_t tot_mem;
9627 +  struct ring_opt *pfr = ring_sk(sk);
9628 +  // struct page *page, *page_end;
9629 +
9630 +  if(!dev) return(-1);
9631 +
9632 +#if defined(RING_DEBUG)
9633 +  printk("[PF_RING] packet_ring_bind(%s) called\n", dev->name);
9634 +#endif
9635 +
9636 +  /* **********************************************
9637 +
9638 +  *************************************
9639 +  *                                   *
9640 +  *        FlowSlotInfo               *
9641 +  *                                   *
9642 +  ************************************* <-+
9643 +  *        FlowSlot                   *   |
9644 +  *************************************   |
9645 +  *        FlowSlot                   *   |
9646 +  *************************************   +- num_slots
9647 +  *        FlowSlot                   *   |
9648 +  *************************************   |
9649 +  *        FlowSlot                   *   |
9650 +  ************************************* <-+
9651 +
9652 +  ********************************************** */
9653 +
9654 +  the_slot_len = sizeof(u_char)    /* flowSlot.slot_state */
9655 +#ifdef RING_MAGIC
9656 +    + sizeof(u_char)
9657 +#endif
9658 +    + sizeof(struct pfring_pkthdr)
9659 +    + pfr->bucket_len      /* flowSlot.bucket */;
9660 +
9661 +  tot_mem = sizeof(FlowSlotInfo) + num_slots*the_slot_len;
9662 +  if (tot_mem % PAGE_SIZE)
9663 +    tot_mem += PAGE_SIZE - (tot_mem % PAGE_SIZE);
9664 +
9665 +  pfr->ring_memory = rvmalloc(tot_mem);
9666 +
9667 +  if (pfr->ring_memory != NULL) {
9668 +    printk("[PF_RING] successfully allocated %lu bytes at 0x%08lx\n",
9669 +          (unsigned long) tot_mem, (unsigned long) pfr->ring_memory);
9670 +  } else {
9671 +    printk("[PF_RING] ERROR: not enough memory for ring\n");
9672 +    return(-1);
9673 +  }
9674 +
9675 +  // memset(pfr->ring_memory, 0, tot_mem); // rvmalloc does the memset already
9676 +
9677 +  pfr->slots_info = (FlowSlotInfo*)pfr->ring_memory;
9678 +  pfr->ring_slots = (char*)(pfr->ring_memory+sizeof(FlowSlotInfo));
9679 +
9680 +  pfr->slots_info->version     = RING_FLOWSLOT_VERSION;
9681 +  pfr->slots_info->slot_len    = the_slot_len;
9682 +  pfr->slots_info->data_len    = pfr->bucket_len;
9683 +  pfr->slots_info->tot_slots   = (tot_mem-sizeof(FlowSlotInfo))/the_slot_len;
9684 +  pfr->slots_info->tot_mem     = tot_mem;
9685 +  pfr->slots_info->sample_rate = 1;
9686 +
9687 +  printk("[PF_RING] allocated %d slots [slot_len=%d][tot_mem=%u]\n",
9688 +        pfr->slots_info->tot_slots, pfr->slots_info->slot_len,
9689 +        pfr->slots_info->tot_mem);
9690 +
9691 +#ifdef RING_MAGIC
9692 +  {
9693 +    int i;
9694 +
9695 +    for(i=0; i<pfr->slots_info->tot_slots; i++) {
9696 +      unsigned long idx = i*pfr->slots_info->slot_len;
9697 +      FlowSlot *slot = (FlowSlot*)&pfr->ring_slots[idx];
9698 +      slot->magic = RING_MAGIC_VALUE; slot->slot_state = 0;
9699 +    }
9700 +  }
9701 +#endif
9702 +
9703 +  pfr->sample_rate = 1; /* No sampling */
9704 +  pfr->insert_page_id = 1, pfr->insert_slot_id = 0;
9705 +  pfr->rules_default_accept_policy = 1, pfr->num_filtering_rules = 0;
9706 +  ring_proc_add(ring_sk(sk), dev);
9707 +
9708 +  if(dev->ifindex < MAX_NUM_DEVICES) {
9709 +    device_ring_list_element *elem;
9710 +
9711 +    /* printk("[PF_RING] Adding ring to device index %d\n", dev->ifindex); */
9712 +
9713 +    elem = kmalloc(sizeof(device_ring_list_element), GFP_ATOMIC);
9714 +    if(elem != NULL) {
9715 +      elem->the_ring = pfr;
9716 +      INIT_LIST_HEAD(&elem->list);
9717 +      list_add(&elem->list, &device_ring_list[dev->ifindex]);
9718 +      /* printk("[PF_RING] Added ring to device index %d\n", dev->ifindex); */
9719 +    }
9720 +  }
9721 +
9722 +  /*
9723 +    IMPORTANT
9724 +    Leave this statement here as last one. In fact when
9725 +    the ring_netdev != NULL the socket is ready to be used.
9726 +  */
9727 +  pfr->ring_netdev = dev;
9728 +
9729 +  return(0);
9730 +}
9731 +
9732 +/* ************************************* */
9733 +
9734 +/* Bind to a device */
9735 +static int ring_bind(struct socket *sock,
9736 +                    struct sockaddr *sa, int addr_len)
9737 +{
9738 +  struct sock *sk=sock->sk;
9739 +  struct net_device *dev = NULL;
9740 +
9741 +#if defined(RING_DEBUG)
9742 +  printk("[PF_RING] ring_bind() called\n");
9743 +#endif
9744 +
9745 +  /*
9746 +   *   Check legality
9747 +   */
9748 +  if(addr_len != sizeof(struct sockaddr))
9749 +    return -EINVAL;
9750 +  if(sa->sa_family != PF_RING)
9751 +    return -EINVAL;
9752 +  if(sa->sa_data == NULL)
9753 +    return -EINVAL;
9754 +
9755 +  /* Safety check: add trailing zero if missing */
9756 +  sa->sa_data[sizeof(sa->sa_data)-1] = '\0';
9757 +
9758 +#if defined(RING_DEBUG)
9759 +  printk("[PF_RING] searching device %s\n", sa->sa_data);
9760 +#endif
9761 +
9762 +  if((dev = __dev_get_by_name(
9763 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24))
9764 +                             &init_net,
9765 +#endif
9766 +                             sa->sa_data)) == NULL) {
9767 +#if defined(RING_DEBUG)
9768 +    printk("[PF_RING] search failed\n");
9769 +#endif
9770 +    return(-EINVAL);
9771 +  } else
9772 +    return(packet_ring_bind(sk, dev));
9773 +}
9774 +
9775 +/* ************************************* */
9776 +
9777 +/*
9778 + * rvmalloc / rvfree / kvirt_to_pa copied from usbvideo.c
9779 + */
9780 +unsigned long kvirt_to_pa(unsigned long adr)
9781 +{
9782 +  unsigned long kva, ret;
9783 +
9784 +  kva = (unsigned long) page_address(vmalloc_to_page((void *)adr));
9785 +  kva |= adr & (PAGE_SIZE-1); /* restore the offset */
9786 +  ret = __pa(kva);
9787 +  return ret;
9788 +}
9789 +
9790 +/* ************************************* */
9791 +
9792 +static int do_memory_mmap(struct vm_area_struct *vma,
9793 +                         unsigned long size, char *ptr,
9794 +                         u_int flags, int mode) {
9795 +  unsigned long start;
9796 +  unsigned long page;
9797 +
9798 +  /* we do not want to have this area swapped out, lock it */
9799 +  vma->vm_flags |= flags;
9800 +  start = vma->vm_start;
9801 +
9802 +  while(size > 0)
9803 +    {
9804 +      int rc;
9805 +
9806 +      if(mode == 0) {
9807 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,11))
9808 +       page = vmalloc_to_pfn(ptr);
9809 +       rc   = remap_pfn_range(vma, start, page, PAGE_SIZE, PAGE_SHARED);
9810 +#else
9811 +       page = vmalloc_to_page(ptr);
9812 +       page = kvirt_to_pa(ptr);
9813 +       rc   = remap_page_range(vma, start, page, PAGE_SIZE, PAGE_SHARED);
9814 +#endif
9815 +      } else if(mode == 1) {
9816 +       rc   = remap_pfn_range(vma, start,
9817 +                              __pa(ptr) >> PAGE_SHIFT,
9818 +                              PAGE_SIZE, PAGE_SHARED);
9819 +      } else {
9820 +       rc   = remap_pfn_range(vma, start,
9821 +                              ((unsigned long)ptr) >> PAGE_SHIFT,
9822 +                              PAGE_SIZE, PAGE_SHARED);
9823 +      }
9824 +
9825 +      if(rc) {
9826 +#if defined(RING_DEBUG)
9827 +       printk("[PF_RING] remap_pfn_range() failed\n");
9828 +#endif
9829 +       return(-EAGAIN);
9830 +      }
9831 +
9832 +      start += PAGE_SIZE;
9833 +      ptr   += PAGE_SIZE;
9834 +      if (size > PAGE_SIZE) {
9835 +       size -= PAGE_SIZE;
9836 +      } else {
9837 +       size = 0;
9838 +      }
9839 +    }
9840 +
9841 +  return(0);
9842 +}
9843 +
9844 +/* ************************************* */
9845 +
9846 +static int ring_mmap(struct file *file,
9847 +                    struct socket *sock,
9848 +                    struct vm_area_struct *vma)
9849 +{
9850 +  struct sock *sk = sock->sk;
9851 +  struct ring_opt *pfr = ring_sk(sk);
9852 +  int rc;
9853 +  unsigned long size = (unsigned long)(vma->vm_end - vma->vm_start);
9854 +
9855 +  if(size % PAGE_SIZE) {
9856 +#if defined(RING_DEBUG)
9857 +    printk("[PF_RING] ring_mmap() failed: "
9858 +          "len is not multiple of PAGE_SIZE\n");
9859 +#endif
9860 +    return(-EINVAL);
9861 +  }
9862 +
9863 +#if defined(RING_DEBUG)
9864 +  printk("[PF_RING] ring_mmap() called, size: %ld bytes\n", size);
9865 +#endif
9866 +
9867 +  if((pfr->dna_device == NULL) && (pfr->ring_memory == NULL)) {
9868 +#if defined(RING_DEBUG)
9869 +    printk("[PF_RING] ring_mmap() failed: "
9870 +          "mapping area to an unbound socket\n");
9871 +#endif
9872 +    return -EINVAL;
9873 +  }
9874 +
9875 +  if(pfr->dna_device == NULL) {
9876 +    /* if userspace tries to mmap beyond end of our buffer, fail */
9877 +    if(size > pfr->slots_info->tot_mem) {
9878 +#if defined(RING_DEBUG)
9879 +      printk("[PF_RING] ring_mmap() failed: "
9880 +            "area too large [%ld > %d]\n",
9881 +            size, pfr->slots_info->tot_mem);
9882 +#endif
9883 +      return(-EINVAL);
9884 +    }
9885 +
9886 +#if defined(RING_DEBUG)
9887 +    printk("[PF_RING] mmap [slot_len=%d]"
9888 +          "[tot_slots=%d] for ring on device %s\n",
9889 +          pfr->slots_info->slot_len, pfr->slots_info->tot_slots,
9890 +          pfr->ring_netdev->name);
9891 +#endif
9892 +
9893 +    if((rc = do_memory_mmap(vma, size, pfr->ring_memory, VM_LOCKED, 0)) < 0)
9894 +      return(rc);
9895 +  } else {
9896 +    /* DNA Device */
9897 +    if(pfr->dna_device == NULL) return(-EAGAIN);
9898 +
9899 +    switch(pfr->mmap_count) {
9900 +    case 0:
9901 +      if((rc = do_memory_mmap(vma, size,
9902 +                             (void*)pfr->dna_device->packet_memory,
9903 +                             VM_LOCKED, 1)) < 0)
9904 +       return(rc);
9905 +      break;
9906 +
9907 +    case 1:
9908 +      if((rc = do_memory_mmap(vma, size,
9909 +                             (void*)pfr->dna_device->descr_packet_memory,
9910 +                             VM_LOCKED, 1)) < 0)
9911 +       return(rc);
9912 +      break;
9913 +
9914 +    case 2:
9915 +      if((rc = do_memory_mmap(vma, size,
9916 +                             (void*)pfr->dna_device->phys_card_memory,
9917 +                             (VM_RESERVED | VM_IO), 2)) < 0)
9918 +       return(rc);
9919 +      break;
9920 +
9921 +    default:
9922 +      return(-EAGAIN);
9923 +    }
9924 +
9925 +    pfr->mmap_count++;
9926 +  }
9927 +
9928 +#if defined(RING_DEBUG)
9929 +  printk("[PF_RING] ring_mmap succeeded\n");
9930 +#endif
9931 +
9932 +  return 0;
9933 +}
9934 +
9935 +/* ************************************* */
9936 +
9937 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
9938 +static int ring_recvmsg(struct kiocb *iocb, struct socket *sock,
9939 +                       struct msghdr *msg, size_t len, int flags)
9940 +#else
9941 +     static int ring_recvmsg(struct socket *sock, struct msghdr *msg, int len,
9942 +                            int flags, struct scm_cookie *scm)
9943 +#endif
9944 +{
9945 +  FlowSlot* slot;
9946 +  struct ring_opt *pfr = ring_sk(sock->sk);
9947 +  u_int32_t queued_pkts, num_loops = 0;
9948 +
9949 +#if defined(RING_DEBUG)
9950 +  printk("[PF_RING] ring_recvmsg called\n");
9951 +#endif
9952 +
9953 +  pfr->ring_active = 1;
9954 +  slot = get_remove_slot(pfr);
9955 +
9956 +  while((queued_pkts = num_queued_pkts(pfr)) < MIN_QUEUED_PKTS) {
9957 +    wait_event_interruptible(pfr->ring_slots_waitqueue, 1);
9958 +
9959 +#if defined(RING_DEBUG)
9960 +    printk("[PF_RING] -> ring_recvmsg returning %d [queued_pkts=%d][num_loops=%d]\n",
9961 +          slot->slot_state, queued_pkts, num_loops);
9962 +#endif
9963 +
9964 +    if(queued_pkts > 0) {
9965 +      if(num_loops++ > MAX_QUEUE_LOOPS)
9966 +       break;
9967 +    }
9968 +  }
9969 +
9970 +#if defined(RING_DEBUG)
9971 +  if(slot != NULL)
9972 +    printk("[PF_RING] ring_recvmsg is returning [queued_pkts=%d][num_loops=%d]\n",
9973 +          queued_pkts, num_loops);
9974 +#endif
9975 +
9976 +  return(queued_pkts);
9977 +}
9978 +
9979 +/* ************************************* */
9980 +
9981 +unsigned int ring_poll(struct file * file,
9982 +                      struct socket *sock, poll_table *wait)
9983 +{
9984 +  FlowSlot* slot;
9985 +  struct ring_opt *pfr = ring_sk(sock->sk);
9986 +  int rc;
9987 +
9988 +  /* printk("[PF_RING] -- poll called\n");  */
9989 +
9990 +  if(pfr->dna_device == NULL) {
9991 +    /* PF_RING mode */
9992 +
9993 +#if defined(RING_DEBUG)
9994 +    printk("[PF_RING] poll called (non DNA device)\n");
9995 +#endif
9996 +
9997 +    pfr->ring_active = 1;
9998 +    slot = get_remove_slot(pfr);
9999 +
10000 +    if((slot != NULL) && (slot->slot_state == 0))
10001 +      poll_wait(file, &pfr->ring_slots_waitqueue, wait);
10002 +
10003 +#if defined(RING_DEBUG)
10004 +    printk("[PF_RING] poll returning %d\n", slot->slot_state);
10005 +#endif
10006 +
10007 +    if((slot != NULL) && (slot->slot_state == 1))
10008 +      return(POLLIN | POLLRDNORM);
10009 +    else
10010 +      return(0);
10011 +  } else {
10012 +    /* DNA mode */
10013 +
10014 +#if defined(RING_DEBUG)
10015 +    printk("[PF_RING] poll called on DNA device [%d]\n",
10016 +          *pfr->dna_device->interrupt_received);
10017 +#endif
10018 +
10019 +    if(pfr->dna_device->wait_packet_function_ptr == NULL)
10020 +      return(0);
10021 +
10022 +    rc = pfr->dna_device->wait_packet_function_ptr(pfr->dna_device->adapter_ptr, 1);
10023 +    if(rc == 0) /* No packet arrived yet */ {
10024 +      /* poll_wait(file, pfr->dna_device->packet_waitqueue, wait); */
10025 +    } else
10026 +      rc = pfr->dna_device->wait_packet_function_ptr(pfr->dna_device->adapter_ptr, 0);
10027 +
10028 +    //*pfr->dna_device->interrupt_received = rc;
10029 +    if(rc == 0) rc = *pfr->dna_device->interrupt_received;
10030 +
10031 +#if defined(RING_DEBUG)
10032 +    printk("[PF_RING] poll %s return [%d]\n",
10033 +          pfr->ring_netdev->name,
10034 +          *pfr->dna_device->interrupt_received);
10035 +#endif
10036 +
10037 +    if(rc) {
10038 +      return(POLLIN | POLLRDNORM);
10039 +    } else {
10040 +      return(0);
10041 +    }
10042 +  }
10043 +}
10044 +
10045 +/* ************************************* */
10046 +
10047 +int add_to_cluster_list(ring_cluster_element *el,
10048 +                       struct sock *sock)
10049 +{
10050 +  if(el->cluster.num_cluster_elements == CLUSTER_LEN)
10051 +    return(-1); /* Cluster full */
10052 +
10053 +  ring_sk_datatype(ring_sk(sock))->cluster_id = el->cluster.cluster_id;
10054 +  el->cluster.sk[el->cluster.num_cluster_elements] = sock;
10055 +  el->cluster.num_cluster_elements++;
10056 +  return(0);
10057 +}
10058 +
10059 +/* ************************************* */
10060 +
10061 +int remove_from_cluster_list(struct ring_cluster *el,
10062 +                            struct sock *sock)
10063 +{
10064 +  int i, j;
10065 +
10066 +  for(i=0; i<CLUSTER_LEN; i++)
10067 +    if(el->sk[i] == sock) {
10068 +      el->num_cluster_elements--;
10069 +
10070 +      if(el->num_cluster_elements > 0) {
10071 +       /* The cluster contains other elements */
10072 +       for(j=i; j<CLUSTER_LEN-1; j++)
10073 +         el->sk[j] = el->sk[j+1];
10074 +
10075 +       el->sk[CLUSTER_LEN-1] = NULL;
10076 +      } else {
10077 +       /* Empty cluster */
10078 +       memset(el->sk, 0, sizeof(el->sk));
10079 +      }
10080 +
10081 +      return(0);
10082 +    }
10083 +
10084 +  return(-1); /* Not found */
10085 +}
10086 +
10087 +/* ************************************* */
10088 +
10089 +static int remove_from_cluster(struct sock *sock,
10090 +                              struct ring_opt *pfr)
10091 +{
10092 +  struct list_head *ptr, *tmp_ptr;
10093 +
10094 +#if defined(RING_DEBUG)
10095 +  printk("[PF_RING] --> remove_from_cluster(%d)\n", pfr->cluster_id);
10096 +#endif
10097 +
10098 +  if(pfr->cluster_id == 0 /* 0 = No Cluster */)
10099 +    return(0); /* Noting to do */
10100 +
10101 +  list_for_each_safe(ptr, tmp_ptr, &ring_cluster_list) {
10102 +    ring_cluster_element *cluster_ptr;
10103 +
10104 +    cluster_ptr = list_entry(ptr, ring_cluster_element, list);
10105 +
10106 +    if(cluster_ptr->cluster.cluster_id == pfr->cluster_id) {
10107 +      return(remove_from_cluster_list(&cluster_ptr->cluster, sock));
10108 +    }
10109 +  }
10110 +
10111 +  return(-EINVAL); /* Not found */
10112 +}
10113 +
10114 +/* ************************************* */
10115 +
10116 +static int add_to_cluster(struct sock *sock,
10117 +                         struct ring_opt *pfr,
10118 +                         u_short cluster_id)
10119 +{
10120 +  struct list_head *ptr, *tmp_ptr;
10121 +  ring_cluster_element *cluster_ptr;
10122 +
10123 +#ifndef RING_DEBUG
10124 +  printk("[PF_RING] --> add_to_cluster(%d)\n", cluster_id);
10125 +#endif
10126 +
10127 +  if(cluster_id == 0 /* 0 = No Cluster */) return(-EINVAL);
10128 +
10129 +  if(pfr->cluster_id != 0)
10130 +    remove_from_cluster(sock, pfr);
10131 +
10132 +  list_for_each_safe(ptr, tmp_ptr, &ring_cluster_list) {
10133 +    cluster_ptr = list_entry(ptr, ring_cluster_element, list);
10134 +
10135 +    if(cluster_ptr->cluster.cluster_id == cluster_id) {
10136 +      return(add_to_cluster_list(cluster_ptr, sock));
10137 +    }
10138 +  }
10139 +
10140 +  /* There's no existing cluster. We need to create one */
10141 +  if((cluster_ptr = kmalloc(sizeof(ring_cluster_element),
10142 +                           GFP_KERNEL)) == NULL)
10143 +    return(-ENOMEM);
10144 +
10145 +  INIT_LIST_HEAD(&cluster_ptr->list);
10146 +
10147 +  cluster_ptr->cluster.cluster_id           = cluster_id;
10148 +  cluster_ptr->cluster.num_cluster_elements = 1;
10149 +  cluster_ptr->cluster.hashing_mode = cluster_per_flow; /* Default */
10150 +  cluster_ptr->cluster.hashing_id   = 0;
10151 +
10152 +  memset(cluster_ptr->cluster.sk, 0, sizeof(cluster_ptr->cluster.sk));
10153 +  cluster_ptr->cluster.sk[0] = sock;
10154 +  pfr->cluster_id            = cluster_id;
10155 +
10156 +  list_add(&cluster_ptr->list, &ring_cluster_list); /* Add as first entry */
10157 +
10158 +  return(0); /* 0 = OK */
10159 +}
10160 +
10161 +/* ************************************* */
10162 +
10163 +static int ring_map_dna_device(struct ring_opt *pfr,
10164 +                              dna_device_mapping *mapping) {
10165 +  int debug = 0;
10166 +
10167 +  if(mapping->operation == remove_device_mapping) {
10168 +    pfr->dna_device = NULL;
10169 +    if(debug)
10170 +      printk("[PF_RING] ring_map_dna_device(%s): removed mapping\n",
10171 +            mapping->device_name);
10172 +    return(0);
10173 +  } else {
10174 +    struct list_head *ptr, *tmp_ptr;
10175 +    dna_device_list *entry;
10176 +
10177 +    list_for_each_safe(ptr, tmp_ptr, &ring_dna_devices_list) {
10178 +      entry = list_entry(ptr, dna_device_list, list);
10179 +
10180 +      if((!strcmp(entry->dev.netdev->name, mapping->device_name))
10181 +        && (entry->dev.channel_id == mapping->channel_id)) {
10182 +       pfr->dna_device = &entry->dev, pfr->ring_netdev = entry->dev.netdev;
10183 +
10184 +       if(debug)
10185 +         printk("[PF_RING] ring_map_dna_device(%s): added mapping\n",
10186 +                mapping->device_name);
10187 +
10188 +       return(0);
10189 +      }
10190 +    }
10191 +  }
10192 +
10193 +  printk("[PF_RING] ring_map_dna_device(%s): mapping failed\n",
10194 +        mapping->device_name);
10195 +
10196 +  return(-1);
10197 +}
10198 +
10199 +/* ************************************* */
10200 +
10201 +static void purge_idle_hash_rules(struct ring_opt *pfr, uint16_t rule_inactivity)
10202 +{
10203 +  int i, num_purged_rules = 0, debug = 0;
10204 +  unsigned long expire_jiffies = jiffies - msecs_to_jiffies(1000*rule_inactivity);
10205 +
10206 +  if(debug)
10207 +    printk("[PF_RING] purge_idle_hash_rules(rule_inactivity=%d)\n", rule_inactivity);
10208 +
10209 +  /* Free filtering hash rules inactive for more than rule_inactivity seconds */
10210 +  if(pfr->filtering_hash != NULL) {
10211 +    for(i=0; i<DEFAULT_RING_HASH_SIZE; i++) {
10212 +      if(pfr->filtering_hash[i] != NULL) {
10213 +       filtering_hash_bucket *scan = pfr->filtering_hash[i], *next, *prev = NULL;
10214 +
10215 +       while(scan != NULL) {
10216 +         next = scan->next;
10217 +
10218 +         if(scan->rule.jiffies_last_match < expire_jiffies) {
10219 +           /* Expired rule: free it */
10220 +
10221 +           if(debug)
10222 +             printk("[PF_RING] Purging hash rule "
10223 +                    /* "[last_match=%u][expire_jiffies=%u]" */
10224 +                    "[%d.%d.%d.%d:%d <-> %d.%d.%d.%d:%d][purged=%d][tot_rules=%d]\n",
10225 +                    /*
10226 +                      (unsigned int)scan->rule.jiffies_last_match,
10227 +                      (unsigned int)expire_jiffies,
10228 +                    */
10229 +                    ((scan->rule.host_peer_a >> 24) & 0xff),
10230 +                    ((scan->rule.host_peer_a >> 16) & 0xff),
10231 +                    ((scan->rule.host_peer_a >> 8) & 0xff),
10232 +                    ((scan->rule.host_peer_a >> 0) & 0xff),
10233 +                    scan->rule.port_peer_a,
10234 +                    ((scan->rule.host_peer_b >> 24) & 0xff),
10235 +                    ((scan->rule.host_peer_b >> 16) & 0xff),
10236 +                    ((scan->rule.host_peer_b >> 8) & 0xff),
10237 +                    ((scan->rule.host_peer_b >> 0) & 0xff),
10238 +                    scan->rule.port_peer_b,
10239 +                    num_purged_rules, pfr->num_filtering_rules);
10240 +
10241 +           if(scan->plugin_data_ptr != NULL) kfree(scan->plugin_data_ptr);
10242 +           kfree(scan);
10243 +
10244 +           if(prev == NULL)
10245 +             pfr->filtering_hash[i] = next;
10246 +           else
10247 +             prev->next = next;
10248 +
10249 +           pfr->num_filtering_rules--, num_purged_rules++;
10250 +         } else
10251 +           prev = scan;
10252 +
10253 +         scan = next;
10254 +       }
10255 +      }
10256 +    }
10257 +  }
10258 +
10259 +  if(debug)
10260 +    printk("[PF_RING] Purged %d hash rules [tot_rules=%d]\n",
10261 +          num_purged_rules, pfr->num_filtering_rules);
10262 +}
10263 +
10264 +/* ************************************* */
10265 +
10266 +/* Code taken/inspired from core/sock.c */
10267 +static int ring_setsockopt(struct socket *sock,
10268 +                          int level, int optname,
10269 +                          char __user *optval, int optlen)
10270 +{
10271 +  struct ring_opt *pfr = ring_sk(sock->sk);
10272 +  int val, found, ret = 0 /* OK */;
10273 +  u_int cluster_id, debug = 0;
10274 +  int32_t channel_id;
10275 +  char devName[8], applName[32+1];
10276 +  struct list_head *prev = NULL;
10277 +  filtering_rule_element *entry, *rule;
10278 +  u_int16_t rule_id, rule_inactivity;
10279 +
10280 +  if(pfr == NULL)
10281 +    return(-EINVAL);
10282 +
10283 +  if (get_user(val, (int *)optval))
10284 +    return -EFAULT;
10285 +
10286 +  found = 1;
10287 +
10288 +  switch(optname)
10289 +    {
10290 +    case SO_ATTACH_FILTER:
10291 +      ret = -EINVAL;
10292 +      if (optlen == sizeof(struct sock_fprog))
10293 +       {
10294 +         unsigned int fsize;
10295 +         struct sock_fprog fprog;
10296 +         struct sk_filter *filter;
10297 +
10298 +         ret = -EFAULT;
10299 +
10300 +         /*
10301 +           NOTE
10302 +
10303 +           Do not call copy_from_user within a held
10304 +           splinlock (e.g. ring_mgmt_lock) as this caused
10305 +           problems when certain debugging was enabled under
10306 +           2.6.5 -- including hard lockups of the machine.
10307 +         */
10308 +         if(copy_from_user(&fprog, optval, sizeof(fprog)))
10309 +           break;
10310 +
10311 +         /* Fix below courtesy of Noam Dev <noamdev@gmail.com> */
10312 +         fsize  = sizeof(struct sock_filter) * fprog.len;
10313 +         filter = kmalloc(fsize + sizeof(struct sk_filter), GFP_KERNEL);
10314 +
10315 +         if(filter == NULL)
10316 +           {
10317 +             ret = -ENOMEM;
10318 +             break;
10319 +           }
10320 +
10321 +         if(copy_from_user(filter->insns, fprog.filter, fsize))
10322 +           break;
10323 +
10324 +         filter->len = fprog.len;
10325 +
10326 +         if(sk_chk_filter(filter->insns, filter->len) != 0)
10327 +           {
10328 +             /* Bad filter specified */
10329 +             kfree(filter);
10330 +             pfr->bpfFilter = NULL;
10331 +             break;
10332 +           }
10333 +
10334 +         /* get the lock, set the filter, release the lock */
10335 +         write_lock(&pfr->ring_rules_lock);
10336 +         pfr->bpfFilter = filter;
10337 +         write_unlock(&pfr->ring_rules_lock);
10338 +         ret = 0;
10339 +       }
10340 +      break;
10341 +
10342 +    case SO_DETACH_FILTER:
10343 +      write_lock(&pfr->ring_rules_lock);
10344 +      found = 1;
10345 +      if(pfr->bpfFilter != NULL)
10346 +       {
10347 +         kfree(pfr->bpfFilter);
10348 +         pfr->bpfFilter = NULL;
10349 +       } else
10350 +       ret = -ENONET;
10351 +      write_unlock(&pfr->ring_rules_lock);
10352 +      break;
10353 +
10354 +    case SO_ADD_TO_CLUSTER:
10355 +      if (optlen!=sizeof(val))
10356 +       return -EINVAL;
10357 +
10358 +      if (copy_from_user(&cluster_id, optval, sizeof(cluster_id)))
10359 +       return -EFAULT;
10360 +
10361 +      write_lock(&pfr->ring_rules_lock);
10362 +      ret = add_to_cluster(sock->sk, pfr, cluster_id);
10363 +      write_unlock(&pfr->ring_rules_lock);
10364 +      break;
10365 +
10366 +    case SO_REMOVE_FROM_CLUSTER:
10367 +      write_lock(&pfr->ring_rules_lock);
10368 +      ret = remove_from_cluster(sock->sk, pfr);
10369 +      write_unlock(&pfr->ring_rules_lock);
10370 +      break;
10371 +
10372 +    case SO_SET_CHANNEL_ID:
10373 +      if(optlen != sizeof(channel_id))
10374 +       return -EINVAL;
10375 +
10376 +      if(copy_from_user(&channel_id, optval, sizeof(channel_id)))
10377 +       return -EFAULT;
10378 +
10379 +      pfr->channel_id = channel_id;
10380 +#if defined(RING_DEBUG)
10381 +      printk("[PF_RING] [pfr->channel_id=%d][channel_id=%d]\n",
10382 +            pfr->channel_id, channel_id);
10383 +#endif
10384 +      ret = 0;
10385 +      break;
10386 +
10387 +    case SO_SET_APPL_NAME:
10388 +      if(optlen > sizeof(applName) /* Names should not be too long */)
10389 +       return -EINVAL;
10390 +
10391 +      if(copy_from_user(&applName, optval, optlen))
10392 +       return -EFAULT;
10393 +
10394 +      if(pfr->appl_name != NULL) kfree(pfr->appl_name);
10395 +      pfr->appl_name = (char*)kmalloc(optlen+1, GFP_ATOMIC);
10396 +      if(pfr->appl_name != NULL) {
10397 +       memcpy(pfr->appl_name, applName, optlen);
10398 +       pfr->appl_name[optlen] = '\0';
10399 +      }
10400 +
10401 +      ret = 0;
10402 +      break;
10403 +
10404 +    case SO_PURGE_IDLE_HASH_RULES:
10405 +      if(optlen != sizeof(rule_inactivity))
10406 +       return -EINVAL;
10407 +
10408 +      if(copy_from_user(&rule_inactivity, optval, sizeof(rule_inactivity)))
10409 +       return -EFAULT;
10410 +      else {
10411 +       if(rule_inactivity > 0) {
10412 +         write_lock(&pfr->ring_rules_lock);
10413 +         purge_idle_hash_rules(pfr, rule_inactivity);
10414 +         write_unlock(&pfr->ring_rules_lock);
10415 +       }
10416 +       ret = 0;
10417 +      }
10418 +      break;
10419 +
10420 +    case SO_SET_REFLECTOR:
10421 +      if(optlen >= (sizeof(devName)-1))
10422 +       return -EINVAL;
10423 +
10424 +      if(optlen > 0)
10425 +       {
10426 +         if(copy_from_user(devName, optval, optlen))
10427 +           return -EFAULT;
10428 +       }
10429 +
10430 +      devName[optlen] = '\0';
10431 +
10432 +#if defined(RING_DEBUG)
10433 +      printk("[PF_RING] +++ SO_SET_REFLECTOR(%s)\n", devName);
10434 +#endif
10435 +
10436 +      write_lock(&pfr->ring_rules_lock);
10437 +      pfr->reflector_dev = dev_get_by_name(
10438 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,24))
10439 +                                           &init_net,
10440 +#endif
10441 +                                           devName);
10442 +      write_unlock(&pfr->ring_rules_lock);
10443 +
10444 +#if defined(RING_DEBUG)
10445 +      if(pfr->reflector_dev != NULL)
10446 +       printk("[PF_RING] SO_SET_REFLECTOR(%s): succeded\n", devName);
10447 +      else
10448 +       printk("[PF_RING] SO_SET_REFLECTOR(%s): device unknown\n", devName);
10449 +#endif
10450 +      break;
10451 +
10452 +    case SO_TOGGLE_FILTER_POLICY:
10453 +      if(optlen != sizeof(u_int8_t))
10454 +       return -EINVAL;
10455 +      else {
10456 +       u_int8_t new_policy;
10457 +
10458 +       if(copy_from_user(&new_policy, optval, optlen))
10459 +         return -EFAULT;
10460 +
10461 +       write_lock(&pfr->ring_rules_lock);
10462 +       pfr->rules_default_accept_policy = new_policy;
10463 +       write_unlock(&pfr->ring_rules_lock);
10464 +       /*
10465 +         if(debug) printk("[PF_RING] SO_TOGGLE_FILTER_POLICY: default policy is %s\n",
10466 +         pfr->rules_default_accept_policy ? "accept" : "drop");
10467 +       */
10468 +      }
10469 +      break;
10470 +
10471 +    case SO_ADD_FILTERING_RULE:
10472 +      if(debug) printk("[PF_RING] +++ SO_ADD_FILTERING_RULE(len=%d)\n", optlen);
10473 +
10474 +      if(optlen == sizeof(filtering_rule)) {
10475 +       struct list_head *ptr, *tmp_ptr;
10476 +
10477 +       if(debug) printk("[PF_RING] Allocating memory\n");
10478 +
10479 +       rule = (filtering_rule_element*)kcalloc(1, sizeof(filtering_rule_element), GFP_KERNEL);
10480 +
10481 +       if(rule == NULL)
10482 +         return -EFAULT;
10483 +
10484 +       if(copy_from_user(&rule->rule, optval, optlen))
10485 +         return -EFAULT;
10486 +
10487 +       INIT_LIST_HEAD(&rule->list);
10488 +
10489 +       if(rule->rule.extended_fields.filter_plugin_id > 0) {
10490 +         int ret = 0;
10491 +
10492 +         if(rule->rule.extended_fields.filter_plugin_id >= MAX_PLUGIN_ID)
10493 +           ret = -EFAULT;
10494 +         else if(plugin_registration[rule->rule.extended_fields.filter_plugin_id] == NULL)
10495 +           ret = -EFAULT;
10496 +
10497 +         if(ret != 0) {
10498 +           kfree(rule);
10499 +           return(ret);
10500 +         }
10501 +       }
10502 +
10503 +       if(rule->rule.plugin_action.plugin_id > 0) {
10504 +         int ret = 0;
10505 +
10506 +         if(rule->rule.plugin_action.plugin_id >= MAX_PLUGIN_ID)
10507 +           ret = -EFAULT;
10508 +         else if(plugin_registration[rule->rule.plugin_action.plugin_id] == NULL)
10509 +           ret = -EFAULT;
10510 +
10511 +         if(ret != 0) {
10512 +           kfree(rule);
10513 +           return(ret);
10514 +         }
10515 +       }
10516 +
10517 +       /* Compile pattern if present */
10518 +       if(strlen(rule->rule.extended_fields.payload_pattern) > 0)
10519 +         {
10520 +           int patternsize;
10521 +
10522 +           rule->pattern = regcomp(rule->rule.extended_fields.payload_pattern,
10523 +                                   &patternsize);
10524 +
10525 +           if(rule->pattern == NULL) {
10526 +             printk("[PF_RING] Unable to compile pattern '%s'\n",
10527 +                    rule->rule.extended_fields.payload_pattern);
10528 +             rule->pattern = NULL;
10529 +           } else
10530 +             printk("[PF_RING] Compiled pattern '%s'\n", rule->rule.extended_fields.payload_pattern);
10531 +         } else
10532 +           rule->pattern = NULL;
10533 +
10534 +       write_lock(&pfr->ring_rules_lock);
10535 +       if(debug) printk("[PF_RING] SO_ADD_FILTERING_RULE: About to add rule %d\n", rule->rule.rule_id);
10536 +
10537 +       /* Implement an ordered add */
10538 +       list_for_each_safe(ptr, tmp_ptr, &pfr->rules)
10539 +         {
10540 +           entry = list_entry(ptr, filtering_rule_element, list);
10541 +
10542 +           if(debug) printk("[PF_RING] SO_ADD_FILTERING_RULE: [current rule %d][rule to add %d]\n",
10543 +                            entry->rule.rule_id, rule->rule.rule_id);
10544 +
10545 +           if(entry->rule.rule_id == rule->rule.rule_id)
10546 +             {
10547 +               memcpy(&entry->rule, &rule->rule, sizeof(filtering_rule));
10548 +               if(entry->pattern != NULL) kfree(entry->pattern);
10549 +               entry->pattern = rule->pattern;
10550 +               kfree(rule);
10551 +               rule = NULL;
10552 +               if(debug) printk("[PF_RING] SO_ADD_FILTERING_RULE: overwritten rule_id %d\n", entry->rule.rule_id);
10553 +               break;
10554 +             } else if(entry->rule.rule_id > rule->rule.rule_id) {
10555 +               if(prev == NULL) {
10556 +                 list_add(&rule->list, &pfr->rules); /* Add as first entry */
10557 +                 pfr->num_filtering_rules++;
10558 +                 if(debug) printk("[PF_RING] SO_ADD_FILTERING_RULE: added rule %d as head rule\n", rule->rule.rule_id);
10559 +               } else  {
10560 +                 list_add(&rule->list, prev);
10561 +                 pfr->num_filtering_rules++;
10562 +                 if(debug) printk("[PF_RING] SO_ADD_FILTERING_RULE: added rule %d\n", rule->rule.rule_id);
10563 +               }
10564 +
10565 +               rule = NULL;
10566 +               break;
10567 +             } else
10568 +               prev = ptr;
10569 +         } /* for */
10570 +
10571 +       if(rule != NULL)
10572 +         {
10573 +           if(prev == NULL)
10574 +             {
10575 +               list_add(&rule->list, &pfr->rules); /* Add as first entry */
10576 +               pfr->num_filtering_rules++;
10577 +               if(debug) printk("[PF_RING] SO_ADD_FILTERING_RULE: added rule %d as first rule\n", rule->rule.rule_id);
10578 +             }
10579 +           else
10580 +             {
10581 +               list_add_tail(&rule->list, &pfr->rules); /* Add as first entry */
10582 +               pfr->num_filtering_rules++;
10583 +               if(debug) printk("[PF_RING] SO_ADD_FILTERING_RULE: added rule %d as last rule\n", rule->rule.rule_id);
10584 +             }
10585 +         }
10586 +
10587 +       write_unlock(&pfr->ring_rules_lock);
10588 +      } else if(optlen == sizeof(hash_filtering_rule)) {
10589 +       /* This is a hash rule */
10590 +       filtering_hash_bucket *rule = (filtering_hash_bucket*)kcalloc(1, sizeof(filtering_hash_bucket), GFP_KERNEL);
10591 +       int rc;
10592 +
10593 +       if(rule == NULL)
10594 +         return -EFAULT;
10595 +
10596 +       if(copy_from_user(&rule->rule, optval, optlen))
10597 +         return -EFAULT;
10598 +
10599 +       write_lock(&pfr->ring_rules_lock);
10600 +       rc = handle_filtering_hash_bucket(pfr, rule, 1 /* add */);
10601 +       pfr->num_filtering_rules++;
10602 +       write_unlock(&pfr->ring_rules_lock);
10603 +
10604 +       if(rc != 0) {
10605 +         kfree(rule);
10606 +         return(rc);
10607 +       }
10608 +      } else {
10609 +       printk("[PF_RING] Bad rule length (%d): discarded\n", optlen);
10610 +       return -EFAULT;
10611 +      }
10612 +      break;
10613 +
10614 +    case SO_REMOVE_FILTERING_RULE:
10615 +      if(optlen == sizeof(u_int16_t /* rule _id */))
10616 +       {
10617 +         /* This is a list rule */
10618 +         u_int8_t rule_found = 0;
10619 +         struct list_head *ptr, *tmp_ptr;
10620 +
10621 +         if(copy_from_user(&rule_id, optval, optlen))
10622 +           return -EFAULT;
10623 +
10624 +         write_lock(&pfr->ring_rules_lock);
10625 +
10626 +         list_for_each_safe(ptr, tmp_ptr, &pfr->rules)
10627 +           {
10628 +             entry = list_entry(ptr, filtering_rule_element, list);
10629 +
10630 +             if(entry->rule.rule_id == rule_id)
10631 +               {
10632 +                 if(entry->pattern) kfree(entry->pattern);
10633 +                 list_del(ptr);
10634 +                 pfr->num_filtering_rules--;
10635 +                 if(entry->plugin_data_ptr != NULL) kfree(entry->plugin_data_ptr);
10636 +                 kfree(entry);
10637 +                 if(debug) printk("[PF_RING] SO_REMOVE_FILTERING_RULE: rule %d has been removed\n", rule_id);
10638 +                 rule_found = 1;
10639 +                 break;
10640 +               }
10641 +           } /* for */
10642 +
10643 +         write_unlock(&pfr->ring_rules_lock);
10644 +         if(!rule_found) {
10645 +           if(debug) printk("[PF_RING] SO_REMOVE_FILTERING_RULE: rule %d does not exist\n", rule_id);
10646 +           return -EFAULT; /* Rule not found */
10647 +         }
10648 +       } else if(optlen == sizeof(hash_filtering_rule)) {
10649 +         /* This is a hash rule */
10650 +         filtering_hash_bucket rule;
10651 +         int rc;
10652 +
10653 +         if(copy_from_user(&rule.rule, optval, optlen))
10654 +           return -EFAULT;
10655 +
10656 +         write_lock(&pfr->ring_rules_lock);
10657 +         rc = handle_filtering_hash_bucket(pfr, &rule, 0 /* delete */);
10658 +         pfr->num_filtering_rules--;
10659 +         write_unlock(&pfr->ring_rules_lock);
10660 +         if(rc != 0) return(rc);
10661 +       } else
10662 +         return -EFAULT;
10663 +      break;
10664 +
10665 +    case SO_SET_SAMPLING_RATE:
10666 +      if(optlen != sizeof(pfr->sample_rate))
10667 +       return -EINVAL;
10668 +
10669 +      if(copy_from_user(&pfr->sample_rate, optval, sizeof(pfr->sample_rate)))
10670 +       return -EFAULT;
10671 +      break;
10672 +
10673 +    case SO_ACTIVATE_RING:
10674 +      if(debug) printk("[PF_RING] * SO_ACTIVATE_RING *\n");
10675 +      found = 1, pfr->ring_active = 1;
10676 +      break;
10677 +
10678 +    case SO_RING_BUCKET_LEN:
10679 +      if(optlen != sizeof(u_int32_t))
10680 +       return -EINVAL;
10681 +      else {
10682 +       if(copy_from_user(&pfr->bucket_len, optval, optlen))
10683 +         return -EFAULT;
10684 +      }
10685 +      break;
10686 +
10687 +    case SO_MAP_DNA_DEVICE:
10688 +      if(optlen != sizeof(dna_device_mapping))
10689 +       return -EINVAL;
10690 +      else {
10691 +       dna_device_mapping mapping;
10692 +
10693 +       if(copy_from_user(&mapping, optval, optlen))
10694 +         return -EFAULT;
10695 +       else {
10696 +         ret = ring_map_dna_device(pfr, &mapping), found = 1;
10697 +       }
10698 +      }
10699 +      break;
10700 +
10701 +    default:
10702 +      found = 0;
10703 +      break;
10704 +    }
10705 +
10706 +  if(found)
10707 +    return(ret);
10708 +  else
10709 +    return(sock_setsockopt(sock, level, optname, optval, optlen));
10710 +}
10711 +
10712 +/* ************************************* */
10713 +
10714 +static int ring_getsockopt(struct socket *sock,
10715 +                          int level, int optname,
10716 +                          char __user *optval,
10717 +                          int __user *optlen)
10718 +{
10719 +  int len, debug = 0;
10720 +  struct ring_opt *pfr = ring_sk(sock->sk);
10721 +
10722 +  if(pfr == NULL)
10723 +    return(-EINVAL);
10724 +
10725 +  if(get_user(len, optlen))
10726 +    return -EFAULT;
10727 +
10728 +  if(len < 0)
10729 +    return -EINVAL;
10730 +
10731 +  switch(optname)
10732 +    {
10733 +    case SO_GET_RING_VERSION:
10734 +      {
10735 +       u_int32_t version = RING_VERSION_NUM;
10736 +
10737 +       if(copy_to_user(optval, &version, sizeof(version)))
10738 +         return -EFAULT;
10739 +      }
10740 +      break;
10741 +
10742 +    case PACKET_STATISTICS:
10743 +      {
10744 +       struct tpacket_stats st;
10745 +
10746 +       if (len > sizeof(struct tpacket_stats))
10747 +         len = sizeof(struct tpacket_stats);
10748 +
10749 +       st.tp_packets = pfr->slots_info->tot_insert;
10750 +       st.tp_drops   = pfr->slots_info->tot_lost;
10751 +
10752 +       if (copy_to_user(optval, &st, len))
10753 +         return -EFAULT;
10754 +       break;
10755 +      }
10756 +
10757 +    case SO_GET_HASH_FILTERING_RULE_STATS:
10758 +      {
10759 +       int rc = -EFAULT;
10760 +
10761 +       if(len >= sizeof(hash_filtering_rule)) {
10762 +         hash_filtering_rule rule;
10763 +         u_int hash_idx;
10764 +
10765 +         if(pfr->filtering_hash == NULL) {
10766 +           printk("[PF_RING] so_get_hash_filtering_rule_stats(): no hash failure\n");
10767 +           return -EFAULT;
10768 +         }
10769 +
10770 +         if(copy_from_user(&rule, optval, sizeof(rule))) {
10771 +           printk("[PF_RING] so_get_hash_filtering_rule_stats: copy_from_user() failure\n");
10772 +           return -EFAULT;
10773 +         }
10774 +
10775 +         if(debug)
10776 +           printk("[PF_RING] so_get_hash_filtering_rule_stats"
10777 +                  "(vlan=%u, proto=%u, sip=%u, sport=%u, dip=%u, dport=%u)\n",
10778 +                  rule.vlan_id, rule.proto,
10779 +                  rule.host_peer_a, rule.port_peer_a,
10780 +                  rule.host_peer_b, rule.port_peer_b);
10781 +
10782 +         hash_idx = hash_pkt(rule.vlan_id, rule.proto,
10783 +                             rule.host_peer_a, rule.host_peer_b,
10784 +                             rule.port_peer_a, rule.port_peer_b) % DEFAULT_RING_HASH_SIZE;
10785 +
10786 +         if(pfr->filtering_hash[hash_idx] != NULL) {
10787 +           filtering_hash_bucket *bucket;
10788 +
10789 +           read_lock(&pfr->ring_rules_lock);
10790 +           bucket = pfr->filtering_hash[hash_idx];
10791 +
10792 +           if(debug) printk("[PF_RING] so_get_hash_filtering_rule_stats(): bucket=%p\n", bucket);
10793 +
10794 +           while(bucket != NULL) {
10795 +             if(hash_bucket_match_rule(bucket, &rule)) {
10796 +               char *buffer = kmalloc(len, GFP_ATOMIC);
10797 +
10798 +               if(buffer == NULL) {
10799 +                 printk("[PF_RING] so_get_hash_filtering_rule_stats() no memory failure\n");
10800 +                 rc = -EFAULT;
10801 +               } else {
10802 +                 if((plugin_registration[rule.plugin_action.plugin_id] == NULL)
10803 +                    || (plugin_registration[rule.plugin_action.plugin_id]->pfring_plugin_get_stats == NULL)) {
10804 +                   printk("[PF_RING] Found rule but pluginId %d is not registered\n",
10805 +                          rule.plugin_action.plugin_id);
10806 +                   rc = -EFAULT;
10807 +                 } else
10808 +                   rc = plugin_registration[rule.plugin_action.plugin_id]->
10809 +                     pfring_plugin_get_stats(pfr, NULL, bucket, buffer, len);
10810 +
10811 +                 if(rc > 0) {
10812 +                   if(copy_to_user(optval, buffer, rc)) {
10813 +                     printk("[PF_RING] copy_to_user() failure\n");
10814 +                     rc = -EFAULT;
10815 +                   }
10816 +                 }
10817 +               }
10818 +               break;
10819 +             } else
10820 +               bucket = bucket->next;
10821 +           } /* while */
10822 +
10823 +           read_unlock(&pfr->ring_rules_lock);
10824 +         } else {
10825 +           if(debug)
10826 +             printk("[PF_RING] so_get_hash_filtering_rule_stats(): entry not found [hash_idx=%d]\n",
10827 +                    hash_idx);
10828 +         }
10829 +       }
10830 +
10831 +       return(rc);
10832 +       break;
10833 +      }
10834 +
10835 +    case SO_GET_FILTERING_RULE_STATS:
10836 +      {
10837 +       char *buffer = NULL;
10838 +       int rc = -EFAULT;
10839 +       struct list_head *ptr, *tmp_ptr;
10840 +       u_int16_t rule_id;
10841 +
10842 +       if(len < sizeof(rule_id))
10843 +         return -EINVAL;
10844 +
10845 +       if(copy_from_user(&rule_id, optval, sizeof(rule_id)))
10846 +         return -EFAULT;
10847 +
10848 +       if(debug)
10849 +         printk("[PF_RING] SO_GET_FILTERING_RULE_STATS: rule_id=%d\n", rule_id);
10850 +
10851 +       read_lock(&pfr->ring_rules_lock);
10852 +       list_for_each_safe(ptr, tmp_ptr, &pfr->rules)
10853 +         {
10854 +           filtering_rule_element *rule;
10855 +
10856 +           rule = list_entry(ptr, filtering_rule_element, list);
10857 +           if(rule->rule.rule_id == rule_id)
10858 +             {
10859 +               buffer = kmalloc(len, GFP_ATOMIC);
10860 +
10861 +               if(buffer == NULL)
10862 +                 rc = -EFAULT;
10863 +               else {
10864 +                 if((plugin_registration[rule->rule.plugin_action.plugin_id] == NULL)
10865 +                    || (plugin_registration[rule->rule.plugin_action.plugin_id]->pfring_plugin_get_stats == NULL)) {
10866 +                   printk("[PF_RING] Found rule %d but pluginId %d is not registered\n",
10867 +                          rule_id, rule->rule.plugin_action.plugin_id);
10868 +                   rc = -EFAULT;
10869 +                 } else
10870 +                   rc = plugin_registration[rule->rule.plugin_action.plugin_id]
10871 +                     ->pfring_plugin_get_stats(pfr, rule, NULL, buffer, len);
10872 +
10873 +                 if(rc > 0) {
10874 +                   if(copy_to_user(optval, buffer, rc)) {
10875 +                     rc = -EFAULT;
10876 +                   }
10877 +                 }
10878 +               }
10879 +               break;
10880 +             }
10881 +         }
10882 +
10883 +       read_unlock(&pfr->ring_rules_lock);
10884 +       if(buffer != NULL) kfree(buffer);
10885 +
10886 +       /* printk("[PF_RING] SO_GET_FILTERING_RULE_STATS *END*\n"); */
10887 +       return(rc);
10888 +       break;
10889 +      }
10890 +
10891 +    case SO_GET_MAPPED_DNA_DEVICE:
10892 +      {
10893 +       if(pfr->dna_device == NULL)
10894 +         return -EFAULT;
10895 +
10896 +       if (len > sizeof(dna_device))
10897 +         len = sizeof(dna_device);
10898 +
10899 +       if (copy_to_user(optval, pfr->dna_device, len))
10900 +         return -EFAULT;
10901 +
10902 +       break;
10903 +      }
10904 +
10905 +    default:
10906 +      return -ENOPROTOOPT;
10907 +    }
10908 +
10909 +  if(put_user(len, optlen))
10910 +    return -EFAULT;
10911 +  else
10912 +    return(0);
10913 +}
10914 +
10915 +/* ************************************* */
10916 +
10917 +u_int get_num_device_free_slots(int ifindex) {
10918 +  int num = 0;
10919 +
10920 +  if((ifindex >= 0) && (ifindex < MAX_NUM_DEVICES)) {
10921 +    struct list_head *ptr, *tmp_ptr;
10922 +    device_ring_list_element *entry;
10923 +
10924 +    list_for_each_safe(ptr, tmp_ptr, &device_ring_list[ifindex]) {
10925 +      int num_free_slots;
10926 +
10927 +      entry = list_entry(ptr, device_ring_list_element, list);
10928 +
10929 +      num_free_slots = get_num_ring_free_slots(entry->the_ring);
10930 +
10931 +      if(num_free_slots == 0)
10932 +       return(0);
10933 +      else {
10934 +       if(num == 0)
10935 +         num = num_free_slots;
10936 +       else if(num > num_free_slots)
10937 +         num = num_free_slots;
10938 +      }
10939 +    }
10940 +  }
10941 +
10942 +  return(num);
10943 +}
10944 +
10945 +/* ************************************* */
10946 +
10947 +void dna_device_handler(dna_device_operation operation,
10948 +                       unsigned long packet_memory,
10949 +                       u_int packet_memory_num_slots,
10950 +                       u_int packet_memory_slot_len,
10951 +                       u_int packet_memory_tot_len,
10952 +                       void *descr_packet_memory,
10953 +                       u_int descr_packet_memory_num_slots,
10954 +                       u_int descr_packet_memory_slot_len,
10955 +                       u_int descr_packet_memory_tot_len,
10956 +                       u_int channel_id,
10957 +                       void *phys_card_memory,
10958 +                       u_int phys_card_memory_len,
10959 +                       struct net_device *netdev,
10960 +                       dna_device_model device_model,
10961 +                       wait_queue_head_t *packet_waitqueue,
10962 +                       u_int8_t *interrupt_received,
10963 +                       void *adapter_ptr,
10964 +                       dna_wait_packet wait_packet_function_ptr) {
10965 +  int debug = 0;
10966 +
10967 +  if(debug)
10968 +    printk("[PF_RING] dna_device_handler(%s)\n", netdev->name);
10969 +
10970 +  if(operation == add_device_mapping) {
10971 +    dna_device_list *next;
10972 +
10973 +    next = kmalloc(sizeof(dna_device_list), GFP_ATOMIC);
10974 +    if(next != NULL) {
10975 +      next->dev.packet_memory = packet_memory;
10976 +      next->dev.packet_memory_num_slots = packet_memory_num_slots;
10977 +      next->dev.packet_memory_slot_len = packet_memory_slot_len;
10978 +      next->dev.packet_memory_tot_len = packet_memory_tot_len;
10979 +      next->dev.descr_packet_memory = descr_packet_memory;
10980 +      next->dev.descr_packet_memory_num_slots = descr_packet_memory_num_slots;
10981 +      next->dev.descr_packet_memory_slot_len = descr_packet_memory_slot_len;
10982 +      next->dev.descr_packet_memory_tot_len = descr_packet_memory_tot_len;
10983 +      next->dev.phys_card_memory = phys_card_memory;
10984 +      next->dev.phys_card_memory_len = phys_card_memory_len;
10985 +      next->dev.channel_id = channel_id;
10986 +      next->dev.netdev = netdev;
10987 +      next->dev.device_model = device_model;
10988 +      next->dev.packet_waitqueue = packet_waitqueue;
10989 +      next->dev.interrupt_received = interrupt_received;
10990 +      next->dev.adapter_ptr = adapter_ptr;
10991 +      next->dev.wait_packet_function_ptr = wait_packet_function_ptr;
10992 +      list_add(&next->list, &ring_dna_devices_list);
10993 +      dna_devices_list_size++;
10994 +    } else {
10995 +       printk("[PF_RING] Could not kmalloc slot!!\n");
10996 +    }
10997 +  } else {
10998 +    struct list_head *ptr, *tmp_ptr;
10999 +    dna_device_list *entry;
11000 +
11001 +    list_for_each_safe(ptr, tmp_ptr, &ring_dna_devices_list) {
11002 +      entry = list_entry(ptr, dna_device_list, list);
11003 +
11004 +      if((entry->dev.netdev == netdev)
11005 +        && (entry->dev.channel_id == channel_id)) {
11006 +       list_del(ptr);
11007 +       kfree(entry);
11008 +       dna_devices_list_size--;
11009 +       break;
11010 +      }
11011 +    }
11012 +  }
11013 +
11014 +  if(debug)
11015 +    printk("[PF_RING] dna_device_handler(%s): [dna_devices_list_size=%d]\n",
11016 +          netdev->name, dna_devices_list_size);
11017 +}
11018 +
11019 +/* ************************************* */
11020 +
11021 +static int ring_ioctl(struct socket *sock,
11022 +                     unsigned int cmd, unsigned long arg)
11023 +{
11024 +  switch(cmd) {
11025 +#ifdef CONFIG_INET
11026 +  case SIOCGIFFLAGS:
11027 +  case SIOCSIFFLAGS:
11028 +  case SIOCGIFCONF:
11029 +  case SIOCGIFMETRIC:
11030 +  case SIOCSIFMETRIC:
11031 +  case SIOCGIFMEM:
11032 +  case SIOCSIFMEM:
11033 +  case SIOCGIFMTU:
11034 +  case SIOCSIFMTU:
11035 +  case SIOCSIFLINK:
11036 +  case SIOCGIFHWADDR:
11037 +  case SIOCSIFHWADDR:
11038 +  case SIOCSIFMAP:
11039 +  case SIOCGIFMAP:
11040 +  case SIOCSIFSLAVE:
11041 +  case SIOCGIFSLAVE:
11042 +  case SIOCGIFINDEX:
11043 +  case SIOCGIFNAME:
11044 +  case SIOCGIFCOUNT:
11045 +  case SIOCSIFHWBROADCAST:
11046 +    return(inet_dgram_ops.ioctl(sock, cmd, arg));
11047 +#endif
11048 +
11049 +  default:
11050 +    return -ENOIOCTLCMD;
11051 +  }
11052 +
11053 +  return 0;
11054 +}
11055 +
11056 +/* ************************************* */
11057 +
11058 +static struct proto_ops ring_ops = {
11059 +  .family      =       PF_RING,
11060 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
11061 +  .owner       =       THIS_MODULE,
11062 +#endif
11063 +
11064 +  /* Operations that make no sense on ring sockets. */
11065 +  .connect     =       sock_no_connect,
11066 +  .socketpair  =       sock_no_socketpair,
11067 +  .accept      =       sock_no_accept,
11068 +  .getname     =       sock_no_getname,
11069 +  .listen      =       sock_no_listen,
11070 +  .shutdown    =       sock_no_shutdown,
11071 +  .sendpage    =       sock_no_sendpage,
11072 +  .sendmsg     =       sock_no_sendmsg,
11073 +
11074 +  /* Now the operations that really occur. */
11075 +  .release     =       ring_release,
11076 +  .bind                =       ring_bind,
11077 +  .mmap                =       ring_mmap,
11078 +  .poll                =       ring_poll,
11079 +  .setsockopt  =       ring_setsockopt,
11080 +  .getsockopt  =       ring_getsockopt,
11081 +  .ioctl       =       ring_ioctl,
11082 +  .recvmsg     =       ring_recvmsg,
11083 +};
11084 +
11085 +/* ************************************ */
11086 +
11087 +static struct net_proto_family ring_family_ops = {
11088 +  .family      =       PF_RING,
11089 +  .create      =       ring_create,
11090 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
11091 +  .owner       =       THIS_MODULE,
11092 +#endif
11093 +};
11094 +
11095 +// BD: API changed in 2.6.12, ref:
11096 +// http://svn.clkao.org/svnweb/linux/revision/?rev=28201
11097 +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,6,11))
11098 +static struct proto ring_proto = {
11099 +  .name                =       "PF_RING",
11100 +  .owner       =       THIS_MODULE,
11101 +  .obj_size    =       sizeof(struct sock),
11102 +};
11103 +#endif
11104 +
11105 +/* ************************************ */
11106 +
11107 +static void __exit ring_exit(void)
11108 +{
11109 +  struct list_head *ptr, *tmp_ptr;
11110 +  struct ring_element *entry;
11111 +
11112 +  list_for_each_safe(ptr, tmp_ptr, &ring_table) {
11113 +    entry = list_entry(ptr, struct ring_element, list);
11114 +    list_del(ptr);
11115 +    kfree(entry);
11116 +  }
11117 +
11118 +  list_for_each_safe(ptr, tmp_ptr, &ring_cluster_list) {
11119 +    ring_cluster_element *cluster_ptr;
11120 +
11121 +    cluster_ptr = list_entry(ptr, ring_cluster_element, list);
11122 +
11123 +    list_del(ptr);
11124 +    kfree(cluster_ptr);
11125 +  }
11126 +
11127 +  list_for_each_safe(ptr, tmp_ptr, &ring_dna_devices_list) {
11128 +    dna_device_list *elem;
11129 +
11130 +    elem = list_entry(ptr, dna_device_list, list);
11131 +
11132 +    list_del(ptr);
11133 +    kfree(elem);
11134 +  }
11135 +
11136 +  set_register_pfring_plugin(NULL);
11137 +  set_unregister_pfring_plugin(NULL);
11138 +  set_skb_ring_handler(NULL);
11139 +  set_add_hdr_to_ring(NULL);
11140 +  set_buffer_ring_handler(NULL);
11141 +  set_read_device_pfring_free_slots(NULL);
11142 +  set_ring_dna_device_handler(NULL);
11143 +  sock_unregister(PF_RING);
11144 +  ring_proc_term();
11145 +  printk("[PF_RING] unloaded\n");
11146 +}
11147 +
11148 +/* ************************************ */
11149 +
11150 +static int __init ring_init(void)
11151 +{
11152 +  int i;
11153 +
11154 +  printk("[PF_RING] Welcome to PF_RING %s\n"
11155 +        "(C) 2004-09 L.Deri <deri@ntop.org>\n",
11156 +        RING_VERSION);
11157 +
11158 +  INIT_LIST_HEAD(&ring_table);
11159 +  INIT_LIST_HEAD(&ring_cluster_list);
11160 +  INIT_LIST_HEAD(&ring_dna_devices_list);
11161 +
11162 +  for(i=0; i<MAX_NUM_DEVICES; i++)
11163 +    INIT_LIST_HEAD(&device_ring_list[i]);
11164 +
11165 +  sock_register(&ring_family_ops);
11166 +
11167 +  set_skb_ring_handler(skb_ring_handler);
11168 +  set_add_hdr_to_ring(add_hdr_to_ring);
11169 +  set_buffer_ring_handler(buffer_ring_handler);
11170 +  set_register_pfring_plugin(register_plugin);
11171 +  set_unregister_pfring_plugin(unregister_plugin);
11172 +  set_read_device_pfring_free_slots(get_num_device_free_slots);
11173 +  set_ring_dna_device_handler(dna_device_handler);
11174 +
11175 +  if(get_buffer_ring_handler() != buffer_ring_handler) {
11176 +    printk("[PF_RING] set_buffer_ring_handler FAILED\n");
11177 +
11178 +    set_skb_ring_handler(NULL);
11179 +    set_buffer_ring_handler(NULL);
11180 +    sock_unregister(PF_RING);
11181 +    return -1;
11182 +  } else {
11183 +    printk("[PF_RING] Ring slots       %d\n", num_slots);
11184 +    printk("[PF_RING] Slot version     %d\n", RING_FLOWSLOT_VERSION);
11185 +    printk("[PF_RING] Capture TX       %s\n",
11186 +          enable_tx_capture ? "Yes [RX+TX]" : "No [RX only]");
11187 +    printk("[PF_RING] IP Defragment    %s\n",  enable_ip_defrag ? "Yes" : "No");
11188 +    printk("[PF_RING] Initialized correctly\n");
11189 +
11190 +    ring_proc_init();
11191 +    return 0;
11192 +  }
11193 +}
11194 +
11195 +module_init(ring_init);
11196 +module_exit(ring_exit);
11197 +
11198 +MODULE_LICENSE("GPL");
11199 +MODULE_AUTHOR("Luca Deri <deri@ntop.org>");
11200 +MODULE_DESCRIPTION("Packet capture acceleration by means of a ring buffer");
11201 +
11202 +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
11203 +MODULE_ALIAS_NETPROTO(PF_RING);
11204 +#endif