[packages/kernel.git] / layer7-kernel2.4patch-v0.1.4.patch

diff -Naur linux-2.4.22-rc2-stock/Documentation/Configure.help linux-2.4.22-rc2/Documentation/Configure.help
--- linux-2.4.22-rc2-stock/Documentation/Configure.help	2003-08-14 16:52:09.000000000 -0500
+++ linux-2.4.22-rc2/Documentation/Configure.help	2003-08-18 12:41:10.000000000 -0500
@@ -10241,6 +10241,19 @@
   whenever you want).  If you want to compile it as a module, say M
   here and read <file:Documentation/modules.txt>.
 
+Layer7 classifier
+CONFIG_NET_CLS_LAYER7
+   Say Y if you want to be able to classify connetions (and their 
+   packets) based on application layer data.   This is necessary if 
+   you wish to classify applications such as peer-to-peer filesharing 
+   systems that do not always use the same port.  Say N if unsure.
+
+   This code is also available as a module called cls_layer7 ( = code
+   which can be inserted in and removed from the running kernel
+   whenever you want).  If you want to compile it as a module, say M
+   here and read <file:Documentation/modules.txt>.
+
+
 Special RSVP classifier
 CONFIG_NET_CLS_RSVP
   The Resource Reservation Protocol (RSVP) permits end systems to
diff -Naur linux-2.4.22-rc2-stock/include/linux/netfilter_ipv4/ip_conntrack.h linux-2.4.22-rc2/include/linux/netfilter_ipv4/ip_conntrack.h
--- linux-2.4.22-rc2-stock/include/linux/netfilter_ipv4/ip_conntrack.h	2003-08-14 16:50:41.000000000 -0500
+++ linux-2.4.22-rc2/include/linux/netfilter_ipv4/ip_conntrack.h	2003-08-18 12:32:42.000000000 -0500
@@ -189,6 +189,10 @@
            packet has to the conntrack */
 	struct nf_ct_info infos[IP_CT_NUMBER];
 
+#if defined(CONFIG_NET_CLS_LAYER7) || defined(CONFIG_NET_CLS_LAYER7_MODULE)
+        struct timespec timestamp;
+#endif
+
 	/* Storage reserved for other modules: */
 
 	union ip_conntrack_proto proto;
diff -Naur linux-2.4.22-rc2-stock/include/linux/pkt_cls.h linux-2.4.22-rc2/include/linux/pkt_cls.h
--- linux-2.4.22-rc2-stock/include/linux/pkt_cls.h	2003-08-14 16:50:41.000000000 -0500
+++ linux-2.4.22-rc2/include/linux/pkt_cls.h	2003-08-18 12:26:32.000000000 -0500
@@ -158,4 +158,20 @@
 
 #define TCA_TCINDEX_MAX        TCA_TCINDEX_POLICE
 
+/* Added by Justin */
+/* Layer 7 filter */
+enum
+{
+	TCA_LAYER7_UNSPEC,
+	TCA_LAYER7_HASH,
+	TCA_LAYER7_MASK,
+	TCA_LAYER7_SHIFT,
+	TCA_LAYER7_PROTOCOL,
+	TCA_LAYER7_CLASSID,
+	TCA_LAYER7_POLICE,
+};
+
+#define TCA_LAYER7_MAX TCA_LAYER7_POLICE
+
 #endif
+
diff -Naur linux-2.4.22-rc2-stock/net/ipv4/netfilter/Config.in linux-2.4.22-rc2/net/ipv4/netfilter/Config.in
--- linux-2.4.22-rc2-stock/net/ipv4/netfilter/Config.in	2003-08-14 16:50:58.000000000 -0500
+++ linux-2.4.22-rc2/net/ipv4/netfilter/Config.in	2003-08-14 19:34:45.000000000 -0500
@@ -4,7 +4,7 @@
 mainmenu_option next_comment
 comment '  IP: Netfilter Configuration'
 
-tristate 'Connection tracking (required for masq/NAT)' CONFIG_IP_NF_CONNTRACK
+tristate 'Connection tracking (required for masq/NAT + layer7)' CONFIG_IP_NF_CONNTRACK
 if [ "$CONFIG_IP_NF_CONNTRACK" != "n" ]; then
   dep_tristate '  FTP protocol support' CONFIG_IP_NF_FTP $CONFIG_IP_NF_CONNTRACK
   dep_tristate '  Amanda protocol support' CONFIG_IP_NF_AMANDA $CONFIG_IP_NF_CONNTRACK
diff -Naur linux-2.4.22-rc2-stock/net/sched/Config.in linux-2.4.22-rc2/net/sched/Config.in
--- linux-2.4.22-rc2-stock/net/sched/Config.in	2003-08-14 16:51:00.000000000 -0500
+++ linux-2.4.22-rc2/net/sched/Config.in	2003-08-15 16:32:19.000000000 -0500
@@ -32,6 +32,7 @@
    fi
    tristate '    Firewall based classifier' CONFIG_NET_CLS_FW
    tristate '    U32 classifier' CONFIG_NET_CLS_U32
+   dep_tristate '    Layer7 classifier' CONFIG_NET_CLS_LAYER7 $CONFIG_IP_NF_CONNTRACK $CONFIG_NETFILTER $CONFIG_EXPERIMENTAL
    if [ "$CONFIG_NET_QOS" = "y" ]; then
       tristate '    Special RSVP classifier' CONFIG_NET_CLS_RSVP
       tristate '    Special RSVP classifier for IPv6' CONFIG_NET_CLS_RSVP6
diff -Naur linux-2.4.22-rc2-stock/net/sched/Makefile linux-2.4.22-rc2/net/sched/Makefile
--- linux-2.4.22-rc2-stock/net/sched/Makefile	2003-08-14 16:50:59.000000000 -0500
+++ linux-2.4.22-rc2/net/sched/Makefile	2003-08-15 21:57:27.000000000 -0500
@@ -31,5 +31,6 @@
 obj-$(CONFIG_NET_CLS_RSVP6)	+= cls_rsvp6.o
 obj-$(CONFIG_NET_CLS_ROUTE4)	+= cls_route.o
 obj-$(CONFIG_NET_CLS_FW)	+= cls_fw.o
+obj-$(CONFIG_NET_CLS_LAYER7)    += cls_layer7.o  regexp/regexp.o regexp/regsub.o
 
 include $(TOPDIR)/Rules.make
diff -Naur linux-2.4.22-rc2-stock/net/sched/cls_api.c linux-2.4.22-rc2/net/sched/cls_api.c
--- linux-2.4.22-rc2-stock/net/sched/cls_api.c	2003-08-14 16:51:00.000000000 -0500
+++ linux-2.4.22-rc2/net/sched/cls_api.c	2003-08-15 16:32:19.000000000 -0500
@@ -466,5 +466,9 @@
 #ifdef CONFIG_NET_CLS_RSVP6
 	INIT_TC_FILTER(rsvp6);
 #endif
+#ifdef CONFIG_NET_CLS_LAYER7
+	INIT_TC_FILTER(layer7);
+	layer7_init_proc();
+#endif
 	return 0;
 }
diff -Naur linux-2.4.22-rc2-stock/net/sched/cls_layer7.c linux-2.4.22-rc2/net/sched/cls_layer7.c
--- linux-2.4.22-rc2-stock/net/sched/cls_layer7.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.4.22-rc2/net/sched/cls_layer7.c	2003-08-17 21:43:03.000000000 -0500
@@ -0,0 +1,950 @@
+/*
+  net/sched/cls_layer7.c
+
+  Layer 7 (application layer) packet classifier.
+
+  Written by Matthew Strait, Ethan Sommer and Justin Levandoski, 2003.
+
+  Modeled after:
+  cls_tcindex.c:  Written 1998,1999 by Werner Almesberger, EPFL ICA
+
+  TODO:
+  -Support IPv6
+  -Do matching across multiple packets
+  -Get a better regexp implementation, preferably one with documentation
+  about what it can do!
+  -Improve module unloading support, if possible (this may be tc's fault...)
+  -Better support for connections with children (FTP, etc): ability to
+  classify children seperately from their parents.
+  -Finish writing the functions below that currently say "not implemented"
+*/
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <net/ip.h>
+#include <net/tcp.h>
+#include <linux/if_ether.h>
+#include <net/pkt_sched.h>
+#include <net/route.h>
+
+#include <linux/netfilter_ipv4/ip_conntrack.h>
+#include "regexp/regexp.h"
+#include <linux/proc_fs.h>
+
+/* this needs to be last (or at least after some of the other stuff, or
+ * else isprint() doesn't work! */
+#include <linux/ctype.h>
+
+/* uncomment the next line to to get debugging information printed, including
+dumps of the first few packets of each connection, classifications and warnings
+about incomplete functions. */
+#define LAYER7_DEBUG
+
+/* uncomment for more debugging info */
+//#define LAYER7_DEBUG_MORE
+
+#ifdef LAYER7_DEBUG
+  #define DPRINTK(format,args...) printk(format,##args)
+#else
+  #define DPRINTK(format,args...)
+#endif
+
+#ifdef LAYER7_DEBUG_MORE
+  #define DPRINTK2(format,args...) printk(format,##args)
+#else
+  #define DPRINTK2(format,args...)
+#endif
+
+#define LAYER7_MAX_PATTERN_DEF_SIZE 8192
+#define LAYER7_MAX_PROTOCOL_NAME_SIZE 256
+
+#define	PRIV(tp)	((struct layer7_data *) (tp)->root)
+
+struct layer7_filter_result {
+	struct tcf_police *police;
+	struct tcf_result res;
+};
+
+struct layer7_filter {
+	__u16 key;
+	struct layer7_filter_result result;
+	struct layer7_filter *next;
+};
+
+struct layer7_data {
+	struct layer7_filter_result * perfect; /* perfect hash; NULL if none */
+	struct layer7_filter ** h; /* imperfect hash; only used if !perfect;
+				      NULL if unused */
+	__u16 mask;		/* AND key with mask */
+	int shift;		/* shift ANDed key to the right */
+	int hash;		/* hash table size; 0 if undefined */
+	int alloc_hash;		/* allocated size */
+	int fall_through;	/* 0: only classify if explicit match */
+};
+
+/* one element in the classification hash table, in theory each "connection"
+ * (aka socket) should be remembered here so that it doesn't need to be
+ * reclassified for each packet once the "connection" has been identified */
+struct ct_hashElem {
+	u32 classid;
+	u32 hash;
+	int num_pkts_so_far;
+	int classified;
+};
+
+/* hash table that matches connections to the connection's state */
+struct ct_hashElem currentSockets[32768];
+
+/* a pattern defined by writing to /proc/net/layer7_protocols */
+struct layer7_pattern {
+	char * name;
+	regexp * pattern;
+	int patternsize;
+	char * uncomppattern;
+};
+
+/* pattern classification pair (aka filter rule) */
+struct layer7_patclas_pair {
+	regexp *pattern;
+	u32 classification;
+	u32 handle;
+	void *parent;
+};
+
+
+/* all the rules we are currently attempting to match on */
+struct layer7_patclas_pair *layer7_patclas_pairs = NULL;
+
+/* how many pairs we have so far */
+int layer7_num_patclas_pairs = 0;
+
+/* array of all the patterns which have been defined *
+ * and an int to keep track of how many we have      */
+struct layer7_pattern * layer7_patterns = NULL;
+int layer7_num_patterns = 0;
+
+/* the char* which holds the pattern definitions given to us 
+ * through the /proc filesystem */
+char * layer7_unparsed_patterns = NULL;
+
+/* clear out all of the patterns, so that new ones can be defined
+ * this is called each time someone writes to /proc/net/layer7_protocols
+ * before the new protocols are read in */
+void clear_layer7_patterns( void ) 
+{
+	int x;
+	for (x = 0; x < layer7_num_patterns; x++) {
+		kfree(layer7_patterns[x].name);
+		kfree(layer7_patterns[x].pattern);
+		kfree(layer7_patterns[x].uncomppattern);
+	}
+	kfree(layer7_patterns);
+	layer7_num_patterns = 0;
+}
+
+
+/* 
+ * Define a new pattern (which consists of a name (eg "http") and a regular
+ * expression (eg "http.*get"))
+ * 
+ * this is made to be memory efficent at the cost of time
+ * (it reallocates the memory each time so it only uses exactly
+ * the ammount necesary) because it will only be called one time per
+ * pattern definition */
+void add_layer7_pattern(const char *name, char *pattern) 
+{
+	struct layer7_pattern *newpatterns=NULL;
+	int x;
+	/* first see if we already have a pattern by that name */
+	for (x = 0; x < layer7_num_patterns; x++){
+		if (!strcmp(name, layer7_patterns[x].name)) {
+			/* keep a copy of the old regexp in case the new comp fails */
+			regexp * oldpattern = kmalloc(layer7_patterns[x].patternsize, GFP_KERNEL);
+			memcpy(oldpattern, layer7_patterns[x].pattern, layer7_patterns[x].patternsize);
+	
+			/* just recompile the regexp and return */
+			/* compile the pattern (we only want to do this once */
+			if (!(layer7_patterns[x].pattern =
+			     regcomp(pattern, &layer7_patterns[x].patternsize))) /* if regcomp fails */
+			{ 
+			    printk("<3>layer7: ERROR COMPILING REGEX \"%s\"\nold regex will be kept instead\n", pattern);
+			    /* go back to the old regex */
+			    layer7_patterns[x].pattern = oldpattern;
+			}
+			else
+			{			  
+				kfree(layer7_patterns[x].uncomppattern);
+				layer7_patterns[x].uncomppattern =
+					kmalloc(strlen(pattern)+1, GFP_KERNEL);
+				strcpy(layer7_patterns[x].uncomppattern, pattern);
+				kfree(oldpattern);
+			}
+
+			return;
+		}
+	}
+
+	/* if we have not found a pattern by that name add a new one*/
+
+	/* allocate the memory for the new array */
+	newpatterns = kmalloc( sizeof(struct layer7_pattern) * 
+			 (layer7_num_patterns + 1), GFP_KERNEL);
+
+	if (layer7_num_patterns > 0)
+	{
+		/* copy any previously declared patterns in */
+		memcpy(newpatterns, layer7_patterns,
+		       sizeof(struct layer7_pattern) * (layer7_num_patterns + 1));
+		/* free the memory the old patterns were using */
+		kfree(layer7_patterns);
+	}
+	layer7_num_patterns++;
+
+	/* set the newpatterns to be the authoritative patterns */
+	layer7_patterns = newpatterns;
+
+	/* copy the name */
+	layer7_patterns[layer7_num_patterns-1].name =
+		kmalloc(strlen(name)+1, GFP_KERNEL);
+
+	strcpy(layer7_patterns[layer7_num_patterns-1].name, name);
+	/* copy the uncomp pattern */
+	layer7_patterns[layer7_num_patterns-1].uncomppattern =
+		kmalloc(strlen(pattern)+1, GFP_KERNEL);
+
+	strcpy(layer7_patterns[layer7_num_patterns-1].uncomppattern, pattern);
+
+	/* compile the pattern (we only want to do this once) */
+	if (!(layer7_patterns[layer7_num_patterns-1].pattern =
+	     regcomp(pattern, &layer7_patterns[layer7_num_patterns-1].patternsize))) /* if regcomp fails */
+	{ 
+		printk("<3>layer7: ERROR COMPILING REGEX \"%s\"\n", pattern);
+		/* make sure we don't use this regexp, 
+		   if more are added they will just overwrite the bad regexp */
+		layer7_num_patterns--; 
+	}
+}
+
+/* add_layer7_filter_rule()
+ * defines a new filtering rule, for example "any packet which matches
+ * the pattern called http should be classified as 0x10001"
+ *
+ * this is made to be memory efficent at the cost of time (it reallocates the 
+ * memory each time so it only uses exactly the ammount necesary) because 
+ * it will only be called one time per pattern we are matching on per boot 
+ */
+void add_layer7_filter_rule(const char *name, const u32 classification, const u32 handle, void * parent) 
+{
+	int x;
+	/* loop through all the patterns */
+	for (x = 0; x < layer7_num_patterns; x++) 
+	{
+		if (!strcmp(name, layer7_patterns[x].name)) {
+
+			/* allocate the memory for the new array */
+			struct layer7_patclas_pair * newpairs = 
+				kmalloc( sizeof(struct layer7_patclas_pair) * 
+					 (layer7_num_patclas_pairs + 1), GFP_KERNEL);
+
+			/* don't copy or free things if they don't exist yet*/
+			if (layer7_num_patclas_pairs > 0) {
+				/* copy any previously declared patterns in */
+				memcpy(newpairs, layer7_patclas_pairs,
+				       sizeof(struct layer7_patclas_pair) *
+				       (layer7_num_patclas_pairs+1));
+				
+				/* free the memory the old patterns were using */
+				kfree(layer7_patclas_pairs);
+			}
+			layer7_num_patclas_pairs++;
+
+			/* set the newpatterns to be the authoritative patterns */
+			layer7_patclas_pairs = newpairs;	
+			
+			/* copy in the pattern so that if it is freed we don't crash */
+
+			layer7_patclas_pairs[layer7_num_patclas_pairs - 1].pattern =
+				kmalloc(layer7_patterns[x].patternsize, GFP_KERNEL);
+			memcpy(layer7_patclas_pairs[layer7_num_patclas_pairs-1].pattern,
+			       layer7_patterns[x].pattern, layer7_patterns[x].patternsize);
+
+			layer7_patclas_pairs[layer7_num_patclas_pairs-1].classification = 
+				classification;
+			layer7_patclas_pairs[layer7_num_patclas_pairs-1].handle = 
+				handle;
+			layer7_patclas_pairs[layer7_num_patclas_pairs-1].parent = 
+				parent;
+			return;
+		}
+	}
+	printk("<3>layer-7: There is no rule for \"%s\"\n", name);
+}
+
+/* this is a hash function which acts on the timespec to get a relatively 
+ * good hash. It uses 15 bit chunks and XORs them. 
+ * TODO: make the chunk size user defined so that the hash table 
+ * can be bigger/smaller? */
+static int layer7_hash(struct timespec ts) 
+{
+	int hash = (ts.tv_nsec&32767) ^ 
+	  ((ts.tv_nsec>>15)&32767) ^ 
+	  ((ts.tv_nsec>>30)&32767) ^ 
+	  (ts.tv_sec&32767) ^ 
+	  ((ts.tv_sec>>15)&32767) ^ 
+	  ((ts.tv_sec>>30)&32767);
+	return hash;
+}
+
+/* for debugging only -- enable and call this if you want lots of debug info */
+#if 0 
+static void print_pkt_stuff(struct sk_buff *skb)
+{
+	int x;
+	
+	/* set this pointer to the beginning of the TCP data,
+	which is what we care about. */
+	unsigned char * tcpdata = skb->data + app_data_offset(skb);
+
+	printk("Here's the IP header in hex:\n");
+	for(x = 0; x < sizeof(struct iphdr); x++)
+		printk(" %.2x", ((unsigned char *)skb->nh.iph)[x]);
+	printk("\n");
+
+	/* when routing, this will just print the IP header again */
+	printk("Here's the TCP header in hex:\n");
+	for(x = 0; x < sizeof(struct tcphdr); x++)
+		printk(" %.2x", ((unsigned char *)skb->h.th)[x]);
+	printk("\n");
+
+	printk("TCP header detail: This packet is coming from %d.%d.%d.%d:%d\n", 
+		(skb->nh.iph->saddr&0x000000FF),       (skb->nh.iph->saddr&0x0000FF00) >> 8,  
+		(skb->nh.iph->saddr&0x00FF0000) >> 16, (skb->nh.iph->saddr&0xFF000000) >> 24, skb->h.th->source);
+	printk("and going to %d.%d.%d.%d:%d\n",        
+		(skb->nh.iph->daddr&0x000000FF),       (skb->nh.iph->daddr&0x0000FF00) >> 8,  
+		(skb->nh.iph->daddr&0x00FF0000) >> 16,	(skb->nh.iph->daddr&0xFF000000) >> 24, skb->h.th->dest);
+
+	printk("syn=%d ack=%d rst=%d fin=%d psh=%d urg=%d.\n",
+	skb->h.th->syn, skb->h.th->ack, skb->h.th->rst, 
+	skb->h.th->fin, skb->h.th->psh, skb->h.th->urg);
+
+	printk("TCP data ('X' == non-printable) is:\n");
+	for(x = 0; x < ( (int)skb->tail - (int)tcpdata ); x++)
+        {
+                if (isprint(tcpdata[x])) printk("%c", tcpdata[x]);
+		else			 printk("X");
+        }
+	printk("\nAnd here's the same thing in hex:\n");
+	for(x = 0; x < ( (int)skb->tail - (int)tcpdata ); x++)
+        {
+		printk(" %.2x", tcpdata[x]);
+        }
+	printk("\n\n");
+
+	printk("data ('X' == non-printable) is:\n");
+	for(x = 0; x < ( (int)skb->tail - (int)skb->data ); x++)
+        {
+                if (isprint(skb->data[x])) printk("%c", skb->data[x]);
+		else		 	   printk("X");
+        }
+	printk("\nAnd here's the same thing in hex:\n");
+	for(x = 0; x < ( (int)skb->tail - (int)skb->data ); x++)
+        {
+		printk(" %.2x", skb->data[x]);
+        }
+	printk("\n\n");
+}
+#endif
+
+
+/* These functions test what kind of packet we're dealing with.
+include/linux/if_ether.h suggests that all packets are treated as
+Ethernet, but I'm not absolutely sure, and the presence of *raw in
+skb->mac troubles me.  I depend on the IP header always starting at the
+same offset, so if this is wrong, there's trouble. -MLS */
+
+static int is_ipv4(struct sk_buff * skb)
+{
+	/* I'm also not convinced that this code ever gets run if
+	it isn't IP, since running dhclient (which should send ARPs or 
+	RARPs) doesn't cause this to print numbers other than 800. 
+	I'm not sure what other testing I can do. */
+
+	/* the htons is important.  It fixes the endianness */
+	if(htons(skb->protocol) != ETH_P_IP){
+ 		return 0;
+	}
+
+	return 1;
+}
+
+/* I'd rather just call this "is_tcp", except it depends on it being IPv4 and
+TCP could be used on top of other protocols */
+static inline int is_tcp_over_ipv4(struct sk_buff * skb)
+{
+	/* I don't want to depend on skb->nh.iph->protocol being set, because
+	I bet it isn't when we are acting as a switch, just like skb->h.th isn't
+	when acting as a router. */
+	#define IP_PROTO_OFFSET 9
+
+	/* If it's not TCP */
+	if(skb->data[ETH_HLEN + IP_PROTO_OFFSET] != IPPROTO_TCP){
+		return 0;
+	}
+
+	return 1;
+}
+
+/* Again, I'd rather just call this "is_udp"... */
+static inline int is_udp_over_ipv4(struct sk_buff * skb)
+{
+	#define IP_PROTO_OFFSET 9
+
+	if(skb->data[ETH_HLEN + IP_PROTO_OFFSET] != IPPROTO_UDP){
+		return 0;
+	}
+
+	return 1;
+}
+
+/* Returns the number of bytes into the skb->data that the application
+data starts. This is a kludge because we don't know how to do it right,
+or even if there really is a right way of doing it.  This fact is why 
+we are not currently attempting to classify anything except TCP and UDP over 
+IPv4. */
+/* HLEN == hl == header length.  4 == bytes/word */
+static int app_data_offset(struct sk_buff *skb)
+{
+	/* 12 == offset into TCP header for the header length field. We can't get this
+	with skb->h.th->doff because the tcphdr struct doesn't get set when routing */
+	#define TCP_DOFF_OFF 12
+
+	/* ip_hl = 4*skb->nh.iph->ihl would usually work, but I bet the
+	iph struct isn't set when acting as a switch! */
+	int ip_hl = 4*(skb->data[ETH_HLEN] & 0x0f);
+
+	if( is_udp_over_ipv4(skb) ){
+		return ETH_HLEN + ip_hl + 8; /* UDP header is always 8 bytes */
+	}
+	else{ /* is_tcp_over_ipv4 */
+		int tcp_hl = 4*(skb->data[ETH_HLEN + ip_hl + TCP_DOFF_OFF] >> 4);
+		return ETH_HLEN + ip_hl + tcp_hl;
+	}
+}
+
+/* This function is only called until the connection is classified or for the 
+ * first few packets (whichever limit comes first.) The classification happens 
+ * here. After a connection has been identified it continues to be of that 
+ * type. */
+static int layer7_really_classify(struct sk_buff *skb, struct tcf_result *res, int hash, void* parent)
+{
+	int x, y = 0;
+	int match = 0; 
+
+	/* the application layer data */
+        unsigned char * app_data = skb->data + app_data_offset(skb);  
+
+	/* get the data segment of the packet and, removing any	nulls in it, 
+	put it into buf, then we null terminate buf so it's a happy string */
+	char * buf = (char *)kmalloc((int)skb->tail - (int)app_data + 1, GFP_KERNEL);
+	if(!buf)
+	{
+		printk("<3>layer7: kmalloc failed to allocate %d bytes for buf!\n", 
+			(int)skb->tail - (int)app_data + 1);
+                return TC_POLICE_UNSPEC; /* give up */
+	}
+
+	/* this looks slow, but changing it to a memcpy (which loses the ability to
+	strip out nulls and do tolower) does not make a noticable difference in speed,
+	so we suspect that this is not a bottleneck. Even if we avoided copying altogether
+	it doesn't seem like it would get much faster.  */
+	y = 0;
+	for(x = 0; x < ( (int)skb->tail - (int)app_data ); x++)
+        {
+	  if (app_data[x] != 0) buf[y++] = tolower(app_data[x]);
+        }
+        buf[y] = '\0'; // make it into a null-terminated string 
+
+#ifdef LAYER7_DEBUG
+	if (strlen(buf)!=0) {
+	  printk("buf: (non-printable chars are printed as '.'\n");
+	  for (x=0;x<strlen(buf);x++){
+	    if (isprint(buf[x])) printk("%c",buf[x]);
+	    else  	         printk(".");
+	  }
+	  printk("\n");
+	}
+#endif
+
+	/* loop through all the patclas pairs to see if we can match it */
+	for (x = 0; x < layer7_num_patclas_pairs; x++){
+		match = (layer7_patclas_pairs[x].parent==parent) && 
+			regexec(layer7_patclas_pairs[x].pattern, buf);
+		if (match) 
+		{
+			DPRINTK2("layer 7: layer7_really_classify found a match!\n");
+			break;
+		}
+	}
+
+	kfree(buf);
+
+	if(match){
+		/* classify it */
+		res->classid = layer7_patclas_pairs[x].classification; 
+
+		/* we are a "generic filter", so class is always set to 0.  
+		See "Linux Network Traffic Control -- Implementation Overview,
+		4 Feb 2001, section 5.3 */
+		res->class = 0; 
+
+		/* record how we classified it */
+		currentSockets[hash].classid = layer7_patclas_pairs[x].classification; 
+		currentSockets[hash].hash = hash;
+		currentSockets[hash].classified = 1;
+		return TC_POLICE_OK;
+	}
+	else{
+		res->class = 0;
+	
+		/* remember to use the default in the futrure */
+		currentSockets[hash].classid=res->classid; 
+		currentSockets[hash].hash = hash;
+
+		/* this is the "unclassified" case, so leave
+		currentSockets[hash].classified alone */
+		return TC_POLICE_UNSPEC;
+	}
+}
+
+
+static int layer7_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res)
+{
+	enum ip_conntrack_info ctinfo;
+	struct ip_conntrack *conntrack;
+	int hash;
+
+	/* check if we can deal with the protocol */
+	if( is_ipv4(skb) ){
+		DPRINTK2("layer7: Is IPv4, going on.\n");	
+
+		if ( is_udp_over_ipv4(skb)){
+			DPRINTK2("    layer7: Is UDP/IPv4, going on.\n"); }
+		else if( is_tcp_over_ipv4(skb)){
+			DPRINTK2("    layer7: Is TCP/IPv4, going on.\n"); }
+		else{
+			DPRINTK2("    layer7: Not UDP or TCP, leaving.\n");
+			return TC_POLICE_UNSPEC;
+		}
+	}
+	else{
+		DPRINTK2("layer7: Not IPv4, leaving.\n");
+		return TC_POLICE_UNSPEC;
+	}
+
+	/* get a ip_conntrack */
+	if(!(conntrack = ip_conntrack_get(skb, &ctinfo)))
+	{
+		printk("<3>layer7_classify: error getting conntrack, dropping to default.\n");
+		return TC_POLICE_UNSPEC;
+	}
+
+	/* see if we can get a master conntrack (and its master etc) 
+	   (for ftp etc) */
+	while (master_ct(conntrack) != NULL)  {
+		conntrack=master_ct(conntrack);
+	}
+
+	/* the conntrack got bzeroed somewhere, so that should be 0
+	   the first time around... */
+	if (conntrack->timestamp.tv_sec == 0){
+		jiffies_to_timespec(jiffies,&conntrack->timestamp); /* 2.4/2.6 difference */
+		hash = layer7_hash(conntrack->timestamp);
+		memset(&currentSockets[hash], 0, sizeof(struct ct_hashElem));
+	}
+
+	/* we hash on the timestamp we added to the conntrack */
+	hash = layer7_hash(conntrack->timestamp);
+
+	/* If we already know about this connection, this increments the 
+	   packet count. If not, this doesn't hurt anything. */
+	currentSockets[hash].num_pkts_so_far++;
+
+        /* If we've seen this connection before and we're not trying to 
+	   classify it anymore, either because we've given up (we've arbitrarily
+	   chosen to test the first 8 packets) or because we've found a match */
+	if ( currentSockets[hash].hash == hash && 
+	     (currentSockets[hash].num_pkts_so_far > 8 ||
+		currentSockets[hash].classified) )
+	{
+		if(currentSockets[hash].classified){
+			/* classify it as what we classified it as before */
+			res->classid = currentSockets[hash].classid;
+			res->class = 0;
+			return TC_POLICE_OK;
+		}
+		else{
+			return TC_POLICE_UNSPEC;
+		}
+	}
+	/* if we've seen it before, but we still need to classify it */
+	else if(currentSockets[hash].hash == hash){
+		int retval = layer7_really_classify(skb, res, hash,tp->root);		
+		if(retval == TC_POLICE_UNSPEC)
+			DPRINTK("layer7: seen before, still unmatched. Classified as %x for now\n", currentSockets[hash].classid);
+		else
+			DPRINTK("layer7: found match. Classified as %x\n", currentSockets[hash].classid);
+		return retval;
+	}
+	/* otherwise this is the first packet of a new connection */
+	else{
+		int retval;
+	
+		currentSockets[hash].num_pkts_so_far = 1;
+		currentSockets[hash].classified = 0;
+
+		retval = layer7_really_classify(skb, res, hash,tp->root);
+		if(retval == TC_POLICE_UNSPEC)
+			DPRINTK("layer7: first packet, still unmatched. Classified as %x for now\n", currentSockets[hash].classid);
+		else
+			DPRINTK("layer7: first packet. Classified as %x\n", currentSockets[hash].classid);
+		return retval;
+	}
+
+	return TC_POLICE_OK; /* == 0 */
+}
+
+/* returns the "internal id" (the index into the patclas array) of the 
+   rule corresponding to handle.  Untested. */
+static unsigned long layer7_get(struct tcf_proto *tp, u32 handle)
+{
+	int x;
+	/* loop through to find the corresponding rule */
+	for (x = 0; x < layer7_num_patclas_pairs; x++) {
+		if (layer7_patclas_pairs[x].handle == handle) 
+			return x;
+	}
+	/* otherwise return layer7_num_patclas_pairs */
+	return layer7_num_patclas_pairs;
+}
+
+
+/* This doesn't do anything in other filters either... 
+(but this is one of the required functions) */
+static void layer7_put(struct tcf_proto *tp, unsigned long f)
+{
+	DPRINTK("layer7_put called.  Not implemented.\n");
+}
+
+/* This actually does something, but we're not sure what.
+Or rather, we know that it sets tp and that it makes tc crash if tp isn't
+set, but we don't know why.  */
+static int layer7_init(struct tcf_proto *tp)
+{
+	struct layer7_data *p;
+
+	DPRINTK("layer7_init called: (tp %p). Might not be doing the right thing.\n", tp);
+	
+	MOD_INC_USE_COUNT;
+	p = kmalloc(sizeof(struct layer7_data), GFP_KERNEL);
+	if (!p) {
+		MOD_DEC_USE_COUNT;
+		return -ENOMEM;
+	}
+	tp->root = p;
+	p->perfect = NULL;
+	p->h = NULL;
+	p->hash = 0;
+	p->mask = 0xffff;
+	p->shift = 0;
+	p->fall_through = 1;
+
+	return 0;
+}
+
+/* XXX More info needed here. 
+We're not sure exactly what this is supposed to do.  We're copying what 
+cls_tcindex.c does and nothing appears to be broken because of this approach. */
+static int layer7_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct layer7_filter_result *r = (struct layer7_filter_result *) arg;
+	unsigned long cl;
+
+	DPRINTK("layer7_delete called: might not be doing the right thing.\n");
+
+	cl = __cls_set_class(&r->res.class,0);
+	if (cl)
+		tp->q->ops->cl_ops->unbind_tcf(tp->q,cl);
+
+#ifdef CONFIG_NET_CLS_POLICE
+	tcf_police_release(r->police);
+#endif
+	return 0;
+}
+
+
+/* There are no parameters for layer7_init, so we overload layer7_change */
+static int layer7_change(struct tcf_proto * tp, unsigned long base, u32 handle,
+    struct rtattr ** tca, unsigned long * arg)
+{
+	struct layer7_filter_result new_filter_result = {
+		NULL,		/* no policing */
+		{ 0,0 },	/* no classification */
+	};
+	struct rtattr * opt = tca[TCA_OPTIONS-1];
+	struct rtattr * tb[TCA_LAYER7_MAX];
+	struct layer7_filter_result * r = (struct layer7_filter_result *) * arg;
+	char* protocol = NULL;
+	u32 classid = 0;
+
+
+	//if (arg)
+	//	DPRINTK("layer7_change: *arg = 0x%lx\n",*arg);
+	if (!opt)
+		return 0;
+	if(rtattr_parse(tb, TCA_LAYER7_MAX,RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0)
+		return -EINVAL;
+
+	/* Get protocol here */	
+	if (tb[TCA_LAYER7_PROTOCOL - 1]) {
+		if (RTA_PAYLOAD(tb[TCA_LAYER7_PROTOCOL - 1]) < sizeof(int))
+			return -EINVAL;
+		
+		protocol = (char *)RTA_DATA(tb[TCA_LAYER7_PROTOCOL - 1]);
+	}
+	
+	/* vestigal comment from tcindex.c
+	 *
+	 * Note: this could be as restrictive as
+	 * if (handle & ~(mask >> shift))
+	 * but then, we'd fail handles that may become valid after some
+	 * future mask change. While this is extremely unlikely to ever
+	 * matter, the check below is safer (and also more
+	 * backwards-compatible).
+	 */
+
+	r = &new_filter_result;
+	
+	if (tb[TCA_LAYER7_CLASSID-1]) {
+		classid = *(__u32 *) RTA_DATA(tb[TCA_LAYER7_CLASSID - 1]);
+        }
+
+	DPRINTK2("add_layer7_filter_rule, protocol: %s, with classid: %x, handle %u\n", protocol, classid, handle);
+	add_layer7_filter_rule(protocol, classid, handle,tp->root);
+
+#ifdef CONFIG_NET_CLS_POLICE
+	{
+		struct tcf_police *police;
+
+		police = tb[TCA_LAYER7_POLICE - 1] ?
+		    tcf_police_locate(tb[TCA_LAYER7_POLICE - 1], NULL) : NULL;
+		tcf_tree_lock(tp);
+		police = xchg(&r->police, police);
+		tcf_tree_unlock(tp);
+		tcf_police_release(police);
+	}
+#endif
+	return 0;
+}
+
+/* XXX More information needed here.
+Can't find any documentation on what this function is supposed to do.
+The cls_tcindex.c version of this doesn't appear to do anything that applies
+to us and so far doing nothing appears to work fine. */
+static void layer7_walk(struct tcf_proto * tp, struct tcf_walker * walker)
+{
+	DPRINTK("layer7_walk called.  Not implemented.\n");
+}
+
+/* delete all the rules in the filter */
+static void layer7_destroy(struct tcf_proto *tp)
+{
+	int x;
+
+	/* clear the filter rules */
+	if (layer7_patclas_pairs != NULL) {
+		for (x=0;x<layer7_num_patclas_pairs;x++) 
+		{
+			kfree(layer7_patclas_pairs[x].pattern);
+		}
+		kfree(layer7_patclas_pairs);
+		layer7_patclas_pairs=NULL;
+		layer7_num_patclas_pairs=0;
+	}
+}
+
+
+/* XXX more information needed here.
+This gets called each time a filter is added, but we can't find any 
+documentation that defines what it is supposed to do or why it gets called
+when it does.  However, nothing seems to be broken because of our current
+approach. */
+static int layer7_dump(struct tcf_proto *tp, unsigned long fh,
+    struct sk_buff *skb, struct tcmsg *t)
+{
+	DPRINTK("layer7_dump called.  Might not be doing the right thing.\n");
+
+	return skb->len; /* cls_tcindex.c does this, don't know why... */
+}
+
+struct tcf_proto_ops cls_layer7_ops = {
+	NULL,
+	"layer7",
+	layer7_classify,
+	layer7_init,
+	layer7_destroy,
+
+	layer7_get,
+	layer7_put,
+	layer7_change,
+	layer7_delete,
+	layer7_walk,
+	layer7_dump
+};
+
+
+/* write out the patterns to userland. (yes, write reads and read writes.) */
+int layer7_read_proc(char* page, char ** start, off_t off, int count, 
+		     int* eof, void * data) 
+{
+	if (layer7_patterns == NULL){
+		/* there are no patterns yet */
+		*eof=1;
+		page='\0';
+		return 0;
+	}
+	else{
+		int x;
+		/* there are patterns */
+		page[0]='\0';
+		for (x=0;x<layer7_num_patterns;x++){
+			strcat(page,layer7_patterns[x].name);
+			strcat(page,"\n");
+			strcat(page,layer7_patterns[x].uncomppattern);
+			strcat(page,"\n");
+		}
+			
+		//strncpy(page,layer7_unparsed_patterns,count-1);
+		*eof=1;
+		return strlen(page);
+	}
+}
+
+/* Read in the protocols from userland */
+int layer7_write_proc(struct file* file, const char* buffer, 
+		      unsigned long count, void *data) 
+{
+	int x = 0, y;
+	char *patterns;
+	char *name;
+	char *pattern;
+	
+	/* free the old pattens if they exist */
+	if (layer7_unparsed_patterns != NULL)
+		kfree(layer7_unparsed_patterns);
+
+	/* don't clear the patterns so that many may be added one at a time */
+	//if (layer7_patterns != NULL)
+		//clear_layer7_patterns();
+
+	/* allocate space for the new ones */
+	layer7_unparsed_patterns=(char *)kmalloc(count + 1, GFP_KERNEL);
+	/* bail if it fails */
+	if (!layer7_unparsed_patterns)
+		return 0;
+
+	/* copy in the data from userland */
+	copy_from_user(layer7_unparsed_patterns, buffer, count);
+	layer7_unparsed_patterns[count]= '\0';
+
+
+	/* add the patterns to the pattern definitions table */
+	pattern = (char*)kmalloc(LAYER7_MAX_PATTERN_DEF_SIZE, GFP_KERNEL);
+	patterns = layer7_unparsed_patterns;
+	while (x < count) 
+	{
+		name = (char*)kmalloc(LAYER7_MAX_PROTOCOL_NAME_SIZE, GFP_KERNEL);
+		name[0] = '\0';
+		pattern[0] = '\0';
+		/* read past comment lines */
+		while (x < count && (patterns[x]=='#' || isspace(patterns[x]))){
+			if (patterns[x] == '#'){
+				while (x<count && patterns[x]!='\n'){
+				  x++;
+				}
+			}
+			x++;
+		}
+		
+		/* read in the name */
+		y = 0;
+		while (x < count && !isspace(patterns[x]) &&
+		       y < LAYER7_MAX_PROTOCOL_NAME_SIZE - 1){
+			name[y] = patterns[x];
+			x++; 
+			y++;
+		}
+		name[y] = '\0';
+		
+		/* skip over comments and blank lines */
+		while (x < count && (patterns[x] == '#' || isspace(patterns[x])))
+		{
+			if (patterns[x] == '#')
+				while (x < count && patterns[x] != '\n') 
+					{ x++; }
+			x++;
+		}
+		
+		/* read in the pattern */
+		y = 0;
+		while (x < count && patterns[x] != '\n' && patterns[x] != '\r' &&
+		       y < LAYER7_MAX_PATTERN_DEF_SIZE-1){
+			pattern[y]=patterns[x];
+			x++; 
+			y++;
+		}
+		pattern[y] = '\0';
+		/* if we now have both a pattern and name add it */
+		if (strlen(name) != 0 && strlen(pattern) != 0)
+		{
+			DPRINTK("layer 7: adding %s:%s\n", name, pattern);
+			add_layer7_pattern(name, pattern);
+		}
+	}
+
+	return count;
+}
+
+/* register the proc file */
+void layer7_init_proc(void)
+{
+	struct proc_dir_entry* entry;
+
+	/* create the file */
+	entry = create_proc_entry("layer7_protocols", 0644, proc_net);
+
+	/* set the callback functions */
+	entry->read_proc = layer7_read_proc;
+	entry->write_proc = layer7_write_proc;
+}
+
+#ifdef MODULE
+int init_module(void)
+{
+	DPRINTK2("layer7: init_module called\n");
+	layer7_init_proc();
+	return register_tcf_proto_ops(&cls_layer7_ops);
+}
+
+void cleanup_module(void) 
+{
+	DPRINTK2("layer7: cleanup_module called\n");
+	unregister_tcf_proto_ops(&cls_layer7_ops);
+}
+MODULE_LICENSE("GPL");
+#endif
+
diff -Naur linux-2.4.22-rc2-stock/net/sched/regexp/regexp.c linux-2.4.22-rc2/net/sched/regexp/regexp.c
--- linux-2.4.22-rc2-stock/net/sched/regexp/regexp.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.4.22-rc2/net/sched/regexp/regexp.c	2003-08-17 21:43:49.000000000 -0500
@@ -0,0 +1,1236 @@
+/*
+ * regcomp and regexec -- regsub and regerror are elsewhere
+ * @(#)regexp.c	1.3 of 18 April 87
+ *
+ *	Copyright (c) 1986 by University of Toronto.
+ *	Written by Henry Spencer.  Not derived from licensed software.
+ *
+ *	Permission is granted to anyone to use this software for any
+ *	purpose on any computer system, and to redistribute it freely,
+ *	subject to the following restrictions:
+ *
+ *	1. The author is not responsible for the consequences of use of
+ *		this software, no matter how awful, even if they arise
+ *		from defects in it.
+ *
+ *	2. The origin of this software must not be misrepresented, either
+ *		by explicit claim or by omission.
+ *
+ *	3. Altered versions must be plainly marked as such, and must not
+ *		be misrepresented as being the original software.
+ *
+ * Beware that some of this code is subtly aware of the way operator
+ * precedence is structured in regular expressions.  Serious changes in
+ * regular-expression syntax might require a total rethink.
+ *
+ * This code was modified by Ethan Sommer to work within the kernel
+ * (it now uses kmalloc etc..)
+ * 
+ * Modified slightly by Matthew Strait to use more modern C.
+ */
+#include "regexp.h"
+#include "regmagic.h"
+#include <linux/string.h>
+#include <linux/mm.h>
+
+/* added by ethan */
+#define malloc(foo) kmalloc(foo,GFP_KERNEL)
+
+/* added by MLS */
+#ifdef MODULE
+int init_module(void)
+{ return 0; }
+int cleanup_module(void)
+{ return 0; }
+#include <linux/kernel.h>
+#include <linux/module.h>
+MODULE_LICENSE("GPL"); /* I think this is ok based on the above text... */
+#endif
+
+void regerror(char * s)
+{
+	printk("regexp(3): %s\n", s);
+	/* NOTREACHED */
+}
+
+/*
+ * The "internal use only" fields in regexp.h are present to pass info from
+ * compile to execute that permits the execute phase to run lots faster on
+ * simple cases.  They are:
+ *
+ * regstart	char that must begin a match; '\0' if none obvious
+ * reganch	is the match anchored (at beginning-of-line only)?
+ * regmust	string (pointer into program) that match must include, or NULL
+ * regmlen	length of regmust string
+ *
+ * Regstart and reganch permit very fast decisions on suitable starting points
+ * for a match, cutting down the work a lot.  Regmust permits fast rejection
+ * of lines that cannot possibly match.  The regmust tests are costly enough
+ * that regcomp() supplies a regmust only if the r.e. contains something
+ * potentially expensive (at present, the only such thing detected is * or +
+ * at the start of the r.e., which can involve a lot of backup).  Regmlen is
+ * supplied because the test in regexec() needs it and regcomp() is computing
+ * it anyway.
+ */
+
+/*
+ * Structure for regexp "program".  This is essentially a linear encoding
+ * of a nondeterministic finite-state machine (aka syntax charts or
+ * "railroad normal form" in parsing technology).  Each node is an opcode
+ * plus a "next" pointer, possibly plus an operand.  "Next" pointers of
+ * all nodes except BRANCH implement concatenation; a "next" pointer with
+ * a BRANCH on both ends of it is connecting two alternatives.  (Here we
+ * have one of the subtle syntax dependencies:  an individual BRANCH (as
+ * opposed to a collection of them) is never concatenated with anything
+ * because of operator precedence.)  The operand of some types of node is
+ * a literal string; for others, it is a node leading into a sub-FSM.  In
+ * particular, the operand of a BRANCH node is the first node of the branch.
+ * (NB this is *not* a tree structure:  the tail of the branch connects
+ * to the thing following the set of BRANCHes.)  The opcodes are:
+ */
+
+/* definition	number	opnd?	meaning */
+#define	END	0	/* no	End of program. */
+#define	BOL	1	/* no	Match "" at beginning of line. */
+#define	EOL	2	/* no	Match "" at end of line. */
+#define	ANY	3	/* no	Match any one character. */
+#define	ANYOF	4	/* str	Match any character in this string. */
+#define	ANYBUT	5	/* str	Match any character not in this string. */
+#define	BRANCH	6	/* node	Match this alternative, or the next... */
+#define	BACK	7	/* no	Match "", "next" ptr points backward. */
+#define	EXACTLY	8	/* str	Match this string. */
+#define	NOTHING	9	/* no	Match empty string. */
+#define	STAR	10	/* node	Match this (simple) thing 0 or more times. */
+#define	PLUS	11	/* node	Match this (simple) thing 1 or more times. */
+#define	OPEN	20	/* no	Mark this point in input as start of #n. */
+			/*	OPEN+1 is number 1, etc. */
+#define	CLOSE	30	/* no	Analogous to OPEN. */
+
+/*
+ * Opcode notes:
+ *
+ * BRANCH	The set of branches constituting a single choice are hooked
+ *		together with their "next" pointers, since precedence prevents
+ *		anything being concatenated to any individual branch.  The
+ *		"next" pointer of the last BRANCH in a choice points to the
+ *		thing following the whole choice.  This is also where the
+ *		final "next" pointer of each individual branch points; each
+ *		branch starts with the operand node of a BRANCH node.
+ *
+ * BACK		Normal "next" pointers all implicitly point forward; BACK
+ *		exists to make loop structures possible.
+ *
+ * STAR,PLUS	'?', and complex '*' and '+', are implemented as circular
+ *		BRANCH structures using BACK.  Simple cases (one character
+ *		per match) are implemented with STAR and PLUS for speed
+ *		and to minimize recursive plunges.
+ *
+ * OPEN,CLOSE	...are numbered at compile time.
+ */
+
+/*
+ * A node is one char of opcode followed by two chars of "next" pointer.
+ * "Next" pointers are stored as two 8-bit pieces, high order first.  The
+ * value is a positive offset from the opcode of the node containing it.
+ * An operand, if any, simply follows the node.  (Note that much of the
+ * code generation knows about this implicit relationship.)
+ *
+ * Using two bytes for the "next" pointer is vast overkill for most things,
+ * but allows patterns to get big without disasters.
+ */
+#define	OP(p)	(*(p))
+#define	NEXT(p)	(((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
+#define	OPERAND(p)	((p) + 3)
+
+/*
+ * See regmagic.h for one further detail of program structure.
+ */
+
+
+/*
+ * Utility definitions.
+ */
+#ifndef CHARBITS
+#define	UCHARAT(p)	((int)*(unsigned char *)(p))
+#else
+#define	UCHARAT(p)	((int)*(p)&CHARBITS)
+#endif
+
+#define	FAIL(m)	{ regerror(m); return(NULL); }
+#define	ISMULT(c)	((c) == '*' || (c) == '+' || (c) == '?')
+#define	META	"^$.[()|?+*\\"
+
+/*
+ * Flags to be passed up and down.
+ */
+#define	HASWIDTH	01	/* Known never to match null string. */
+#define	SIMPLE		02	/* Simple enough to be STAR/PLUS operand. */
+#define	SPSTART		04	/* Starts with * or +. */
+#define	WORST		0	/* Worst case. */
+
+/*
+ * Global work variables for regcomp().
+ */
+static char *regparse;		/* Input-scan pointer. */
+static int regnpar;		/* () count. */
+static char regdummy;
+static char *regcode;		/* Code-emit pointer; &regdummy = don't. */
+static long regsize;		/* Code size. */
+
+/*
+ * Forward declarations for regcomp()'s friends.
+ */
+#ifndef STATIC
+#define	STATIC	static
+#endif
+STATIC char *reg(int paren,int *flagp);
+STATIC char *regbranch(int *flagp);
+STATIC char *regpiece(int *flagp);
+STATIC char *regatom(int *flagp);
+STATIC char *regnode(char op);
+STATIC char *regnext(char *p);
+STATIC void regc(char b);
+STATIC void reginsert(char op, char *opnd);
+STATIC void regtail(char *p, char *val);
+STATIC void regoptail(char *p, char *val);
+STATIC int strcspn(char *s1, char *s2);
+
+/*
+ - regcomp - compile a regular expression into internal code
+ *
+ * We can't allocate space until we know how big the compiled form will be,
+ * but we can't compile it (and thus know how big it is) until we've got a
+ * place to put the code.  So we cheat:  we compile it twice, once with code
+ * generation turned off and size counting turned on, and once "for real".
+ * This also means that we don't allocate space until we are sure that the
+ * thing really will compile successfully, and we never have to move the
+ * code and thus invalidate pointers into it.  (Note that it has to be in
+ * one piece because free() must be able to free it all.)
+ *
+ * Beware that the optimization-preparation code in here knows about some
+ * of the structure of the compiled regexp.
+ */
+regexp *
+regcomp(char *exp,int *patternsize)
+{
+	register regexp *r;
+	register char *scan;
+	register char *longest;
+	register int len;
+	int flags;
+	/* commented out by ethan
+	   extern char *malloc();
+	*/
+
+	if (exp == NULL)
+		FAIL("NULL argument");
+
+	/* First pass: determine size, legality. */
+	regparse = exp;
+	regnpar = 1;
+	regsize = 0L;
+	regcode = &regdummy;
+	regc(MAGIC);
+	if (reg(0, &flags) == NULL)
+		return(NULL);
+
+	/* Small enough for pointer-storage convention? */
+	if (regsize >= 32767L)		/* Probably could be 65535L. */
+		FAIL("regexp too big");
+
+	/* Allocate space. */
+	*patternsize=sizeof(regexp) + (unsigned)regsize;
+	r = (regexp *)malloc(sizeof(regexp) + (unsigned)regsize);
+	if (r == NULL)
+		FAIL("out of space");
+
+	/* Second pass: emit code. */
+	regparse = exp;
+	regnpar = 1;
+	regcode = r->program;
+	regc(MAGIC);
+	if (reg(0, &flags) == NULL)
+		return(NULL);
+
+	/* Dig out information for optimizations. */
+	r->regstart = '\0';	/* Worst-case defaults. */
+	r->reganch = 0;
+	r->regmust = NULL;
+	r->regmlen = 0;
+	scan = r->program+1;			/* First BRANCH. */
+	if (OP(regnext(scan)) == END) {		/* Only one top-level choice. */
+		scan = OPERAND(scan);
+
+		/* Starting-point info. */
+		if (OP(scan) == EXACTLY)
+			r->regstart = *OPERAND(scan);
+		else if (OP(scan) == BOL)
+			r->reganch++;
+
+		/*
+		 * If there's something expensive in the r.e., find the
+		 * longest literal string that must appear and make it the
+		 * regmust.  Resolve ties in favor of later strings, since
+		 * the regstart check works with the beginning of the r.e.
+		 * and avoiding duplication strengthens checking.  Not a
+		 * strong reason, but sufficient in the absence of others.
+		 */
+		if (flags&SPSTART) {
+			longest = NULL;
+			len = 0;
+			for (; scan != NULL; scan = regnext(scan))
+				if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) {
+					longest = OPERAND(scan);
+					len = strlen(OPERAND(scan));
+				}
+			r->regmust = longest;
+			r->regmlen = len;
+		}
+	}
+
+	return(r);
+}
+
+/*
+ - reg - regular expression, i.e. main body or parenthesized thing
+ *
+ * Caller must absorb opening parenthesis.
+ *
+ * Combining parenthesis handling with the base level of regular expression
+ * is a trifle forced, but the need to tie the tails of the branches to what
+ * follows makes it hard to avoid.
+ */
+static char *
+reg(paren, flagp)
+int paren;			/* Parenthesized? */
+int *flagp;
+{
+	register char *ret;
+	register char *br;
+	register char *ender;
+	register int parno;
+	int flags;
+
+	*flagp = HASWIDTH;	/* Tentatively. */
+
+	/* Make an OPEN node, if parenthesized. */
+	if (paren) {
+		if (regnpar >= NSUBEXP)
+			FAIL("too many ()");
+		parno = regnpar;
+		regnpar++;
+		ret = regnode(OPEN+parno);
+	} else
+		ret = NULL;
+
+	/* Pick up the branches, linking them together. */
+	br = regbranch(&flags);
+	if (br == NULL)
+		return(NULL);
+	if (ret != NULL)
+		regtail(ret, br);	/* OPEN -> first. */
+	else
+		ret = br;
+	if (!(flags&HASWIDTH))
+		*flagp &= ~HASWIDTH;
+	*flagp |= flags&SPSTART;
+	while (*regparse == '|') {
+		regparse++;
+		br = regbranch(&flags);
+		if (br == NULL)
+			return(NULL);
+		regtail(ret, br);	/* BRANCH -> BRANCH. */
+		if (!(flags&HASWIDTH))
+			*flagp &= ~HASWIDTH;
+		*flagp |= flags&SPSTART;
+	}
+
+	/* Make a closing node, and hook it on the end. */
+	ender = regnode((paren) ? CLOSE+parno : END);	
+	regtail(ret, ender);
+
+	/* Hook the tails of the branches to the closing node. */
+	for (br = ret; br != NULL; br = regnext(br))
+		regoptail(br, ender);
+
+	/* Check for proper termination. */
+	if (paren && *regparse++ != ')') {
+		FAIL("unmatched ()");
+	} else if (!paren && *regparse != '\0') {
+		if (*regparse == ')') {
+			FAIL("unmatched ()");
+		} else
+			FAIL("junk on end");	/* "Can't happen". */
+		/* NOTREACHED */
+	}
+
+	return(ret);
+}
+
+/*
+ - regbranch - one alternative of an | operator
+ *
+ * Implements the concatenation operator.
+ */
+static char *
+regbranch(flagp)
+int *flagp;
+{
+	register char *ret;
+	register char *chain;
+	register char *latest;
+	int flags;
+
+	*flagp = WORST;		/* Tentatively. */
+
+	ret = regnode(BRANCH);
+	chain = NULL;
+	while (*regparse != '\0' && *regparse != '|' && *regparse != ')') {
+		latest = regpiece(&flags);
+		if (latest == NULL)
+			return(NULL);
+		*flagp |= flags&HASWIDTH;
+		if (chain == NULL)	/* First piece. */
+			*flagp |= flags&SPSTART;
+		else
+			regtail(chain, latest);
+		chain = latest;
+	}
+	if (chain == NULL)	/* Loop ran zero times. */
+		(void) regnode(NOTHING);
+
+	return(ret);
+}
+
+/*
+ - regpiece - something followed by possible [*+?]
+ *
+ * Note that the branching code sequences used for ? and the general cases
+ * of * and + are somewhat optimized:  they use the same NOTHING node as
+ * both the endmarker for their branch list and the body of the last branch.
+ * It might seem that this node could be dispensed with entirely, but the
+ * endmarker role is not redundant.
+ */
+static char *
+regpiece(flagp)
+int *flagp;
+{
+	register char *ret;
+	register char op;
+	register char *next;
+	int flags;
+
+	ret = regatom(&flags);
+	if (ret == NULL)
+		return(NULL);
+
+	op = *regparse;
+	if (!ISMULT(op)) {
+		*flagp = flags;
+		return(ret);
+	}
+
+	if (!(flags&HASWIDTH) && op != '?')
+		FAIL("*+ operand could be empty");
+	*flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
+
+	if (op == '*' && (flags&SIMPLE))
+		reginsert(STAR, ret);
+	else if (op == '*') {
+		/* Emit x* as (x&|), where & means "self". */
+		reginsert(BRANCH, ret);			/* Either x */
+		regoptail(ret, regnode(BACK));		/* and loop */
+		regoptail(ret, ret);			/* back */
+		regtail(ret, regnode(BRANCH));		/* or */
+		regtail(ret, regnode(NOTHING));		/* null. */
+	} else if (op == '+' && (flags&SIMPLE))
+		reginsert(PLUS, ret);
+	else if (op == '+') {
+		/* Emit x+ as x(&|), where & means "self". */
+		next = regnode(BRANCH);			/* Either */
+		regtail(ret, next);
+		regtail(regnode(BACK), ret);		/* loop back */
+		regtail(next, regnode(BRANCH));		/* or */
+		regtail(ret, regnode(NOTHING));		/* null. */
+	} else if (op == '?') {
+		/* Emit x? as (x|) */
+		reginsert(BRANCH, ret);			/* Either x */
+		regtail(ret, regnode(BRANCH));		/* or */
+		next = regnode(NOTHING);		/* null. */
+		regtail(ret, next);
+		regoptail(ret, next);
+	}
+	regparse++;
+	if (ISMULT(*regparse))
+		FAIL("nested *?+");
+
+	return(ret);
+}
+
+/*
+ - regatom - the lowest level
+ *
+ * Optimization:  gobbles an entire sequence of ordinary characters so that
+ * it can turn them into a single node, which is smaller to store and
+ * faster to run.  Backslashed characters are exceptions, each becoming a
+ * separate node; the code is simpler that way and it's not worth fixing.
+ */
+static char *
+regatom(flagp)
+int *flagp;
+{
+	register char *ret;
+	int flags;
+
+	*flagp = WORST;		/* Tentatively. */
+
+	switch (*regparse++) {
+	case '^':
+		ret = regnode(BOL);
+		break;
+	case '$':
+		ret = regnode(EOL);
+		break;
+	case '.':
+		ret = regnode(ANY);
+		*flagp |= HASWIDTH|SIMPLE;
+		break;
+	case '[': {
+			register int class;
+			register int classend;
+
+			if (*regparse == '^') {	/* Complement of range. */
+				ret = regnode(ANYBUT);
+				regparse++;
+			} else
+				ret = regnode(ANYOF);
+			if (*regparse == ']' || *regparse == '-')
+				regc(*regparse++);
+			while (*regparse != '\0' && *regparse != ']') {
+				if (*regparse == '-') {
+					regparse++;
+					if (*regparse == ']' || *regparse == '\0')
+						regc('-');
+					else {
+						class = UCHARAT(regparse-2)+1;
+						classend = UCHARAT(regparse);
+						if (class > classend+1)
+							FAIL("invalid [] range");
+						for (; class <= classend; class++)
+							regc(class);
+						regparse++;
+					}
+				} else
+					regc(*regparse++);
+			}
+			regc('\0');
+			if (*regparse != ']')
+				FAIL("unmatched []");
+			regparse++;
+			*flagp |= HASWIDTH|SIMPLE;
+		}
+		break;
+	case '(':
+		ret = reg(1, &flags);
+		if (ret == NULL)
+			return(NULL);
+		*flagp |= flags&(HASWIDTH|SPSTART);
+		break;
+	case '\0':
+	case '|':
+	case ')':
+		FAIL("internal urp");	/* Supposed to be caught earlier. */
+		break;
+	case '?':
+	case '+':
+	case '*':
+		FAIL("?+* follows nothing");
+		break;
+	case '\\':
+		if (*regparse == '\0')
+			FAIL("trailing \\");
+		ret = regnode(EXACTLY);
+		regc(*regparse++);
+		regc('\0');
+		*flagp |= HASWIDTH|SIMPLE;
+		break;
+	default: {
+			register int len;
+			register char ender;
+
+			regparse--;
+			len = strcspn(regparse, META);
+			if (len <= 0)
+				FAIL("internal disaster");
+			ender = *(regparse+len);
+			if (len > 1 && ISMULT(ender))
+				len--;		/* Back off clear of ?+* operand. */
+			*flagp |= HASWIDTH;
+			if (len == 1)
+				*flagp |= SIMPLE;
+			ret = regnode(EXACTLY);
+			while (len > 0) {
+				regc(*regparse++);
+				len--;
+			}
+			regc('\0');
+		}
+		break;
+	}
+
+	return(ret);
+}
+
+/*
+ - regnode - emit a node
+ */
+static char *			/* Location. */
+regnode(op)
+char op;
+{
+	register char *ret;
+	register char *ptr;
+
+	ret = regcode;
+	if (ret == &regdummy) {
+		regsize += 3;
+		return(ret);
+	}
+
+	ptr = ret;
+	*ptr++ = op;
+	*ptr++ = '\0';		/* Null "next" pointer. */
+	*ptr++ = '\0';
+	regcode = ptr;
+
+	return(ret);
+}
+
+/*
+ - regc - emit (if appropriate) a byte of code
+ */
+static void
+regc(b)
+char b;
+{
+	if (regcode != &regdummy)
+		*regcode++ = b;
+	else
+		regsize++;
+}
+
+/*
+ - reginsert - insert an operator in front of already-emitted operand
+ *
+ * Means relocating the operand.
+ */
+static void
+reginsert(op, opnd)
+char op;
+char *opnd;
+{
+	register char *src;
+	register char *dst;
+	register char *place;
+
+	if (regcode == &regdummy) {
+		regsize += 3;
+		return;
+	}
+
+	src = regcode;
+	regcode += 3;
+	dst = regcode;
+	while (src > opnd)
+		*--dst = *--src;
+
+	place = opnd;		/* Op node, where operand used to be. */
+	*place++ = op;
+	*place++ = '\0';
+	*place++ = '\0';
+}
+
+/*
+ - regtail - set the next-pointer at the end of a node chain
+ */
+static void
+regtail(p, val)
+char *p;
+char *val;
+{
+	register char *scan;
+	register char *temp;
+	register int offset;
+
+	if (p == &regdummy)
+		return;
+
+	/* Find last node. */
+	scan = p;
+	for (;;) {
+		temp = regnext(scan);
+		if (temp == NULL)
+			break;
+		scan = temp;
+	}
+
+	if (OP(scan) == BACK)
+		offset = scan - val;
+	else
+		offset = val - scan;
+	*(scan+1) = (offset>>8)&0377;
+	*(scan+2) = offset&0377;
+}
+
+/*
+ - regoptail - regtail on operand of first argument; nop if operandless
+ */
+static void
+regoptail(p, val)
+char *p;
+char *val;
+{
+	/* "Operandless" and "op != BRANCH" are synonymous in practice. */
+	if (p == NULL || p == &regdummy || OP(p) != BRANCH)
+		return;
+	regtail(OPERAND(p), val);
+}
+
+/*
+ * regexec and friends
+ */
+
+/*
+ * Global work variables for regexec().
+ */
+static char *reginput;		/* String-input pointer. */
+static char *regbol;		/* Beginning of input, for ^ check. */
+static char **regstartp;	/* Pointer to startp array. */
+static char **regendp;		/* Ditto for endp. */
+
+/*
+ * Forwards.
+ */
+STATIC int regtry(regexp *prog, char *string);
+STATIC int regmatch(char *prog);
+STATIC int regrepeat(char *p);
+
+#ifdef DEBUG
+int regnarrate = 0;
+void regdump();
+STATIC char *regprop(char *op);
+#endif
+
+/*
+ - regexec - match a regexp against a string
+ */
+int
+regexec(prog, string)
+register regexp *prog;
+register char *string;
+{
+	register char *s;
+
+	/* Be paranoid... */
+	if (prog == NULL || string == NULL) {
+		regerror("NULL parameter");
+		return(0);
+	}
+
+	/* Check validity of program. */
+	if (UCHARAT(prog->program) != MAGIC) {
+		regerror("corrupted program");
+		return(0);
+	}
+
+	/* If there is a "must appear" string, look for it. */
+	if (prog->regmust != NULL) {
+		s = string;
+		while ((s = strchr(s, prog->regmust[0])) != NULL) {
+			if (strncmp(s, prog->regmust, prog->regmlen) == 0)
+				break;	/* Found it. */
+			s++;
+		}
+		if (s == NULL)	/* Not present. */
+			return(0);
+	}
+
+	/* Mark beginning of line for ^ . */
+	regbol = string;
+
+	/* Simplest case:  anchored match need be tried only once. */
+	if (prog->reganch)
+		return(regtry(prog, string));
+
+	/* Messy cases:  unanchored match. */
+	s = string;
+	if (prog->regstart != '\0')
+		/* We know what char it must start with. */
+		while ((s = strchr(s, prog->regstart)) != NULL) {
+			if (regtry(prog, s))
+				return(1);
+			s++;
+		}
+	else
+		/* We don't -- general case. */
+		do {
+			if (regtry(prog, s))
+				return(1);
+		} while (*s++ != '\0');
+
+	/* Failure. */
+	return(0);
+}
+
+/*
+ - regtry - try match at specific point
+ */
+static int			/* 0 failure, 1 success */
+regtry(prog, string)
+regexp *prog;
+char *string;
+{
+	register int i;
+	register char **sp;
+	register char **ep;
+
+	reginput = string;
+	regstartp = prog->startp;
+	regendp = prog->endp;
+
+	sp = prog->startp;
+	ep = prog->endp;
+	for (i = NSUBEXP; i > 0; i--) {
+		*sp++ = NULL;
+		*ep++ = NULL;
+	}
+	if (regmatch(prog->program + 1)) {
+		prog->startp[0] = string;
+		prog->endp[0] = reginput;
+		return(1);
+	} else
+		return(0);
+}
+
+/*
+ - regmatch - main matching routine
+ *
+ * Conceptually the strategy is simple:  check to see whether the current
+ * node matches, call self recursively to see whether the rest matches,
+ * and then act accordingly.  In practice we make some effort to avoid
+ * recursion, in particular by going through "ordinary" nodes (that don't
+ * need to know whether the rest of the match failed) by a loop instead of
+ * by recursion.
+ */
+static int			/* 0 failure, 1 success */
+regmatch(prog)
+char *prog;
+{
+	register char *scan;	/* Current node. */
+	char *next;		/* Next node. */
+
+	scan = prog;
+#ifdef DEBUG
+	if (scan != NULL && regnarrate)
+		fprintf(stderr, "%s(\n", regprop(scan));
+#endif
+	while (scan != NULL) {
+#ifdef DEBUG
+		if (regnarrate)
+			fprintf(stderr, "%s...\n", regprop(scan));
+#endif
+		next = regnext(scan);
+
+		switch (OP(scan)) {
+		case BOL:
+			if (reginput != regbol)
+				return(0);
+			break;
+		case EOL:
+			if (*reginput != '\0')
+				return(0);
+			break;
+		case ANY:
+			if (*reginput == '\0')
+				return(0);
+			reginput++;
+			break;
+		case EXACTLY: {
+				register int len;
+				register char *opnd;
+
+				opnd = OPERAND(scan);
+				/* Inline the first character, for speed. */
+				if (*opnd != *reginput)
+					return(0);
+				len = strlen(opnd);
+				if (len > 1 && strncmp(opnd, reginput, len) != 0)
+					return(0);
+				reginput += len;
+			}
+			break;
+		case ANYOF:
+			if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL)
+				return(0);
+			reginput++;
+			break;
+		case ANYBUT:
+			if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL)
+				return(0);
+			reginput++;
+			break;
+		case NOTHING:
+			break;
+		case BACK:
+			break;
+		case OPEN+1:
+		case OPEN+2:
+		case OPEN+3:
+		case OPEN+4:
+		case OPEN+5:
+		case OPEN+6:
+		case OPEN+7:
+		case OPEN+8:
+		case OPEN+9: {
+				register int no;
+				register char *save;
+
+				no = OP(scan) - OPEN;
+				save = reginput;
+
+				if (regmatch(next)) {
+					/*
+					 * Don't set startp if some later
+					 * invocation of the same parentheses
+					 * already has.
+					 */
+					if (regstartp[no] == NULL)
+						regstartp[no] = save;
+					return(1);
+				} else
+					return(0);
+			}
+			break;
+		case CLOSE+1:
+		case CLOSE+2:
+		case CLOSE+3:
+		case CLOSE+4:
+		case CLOSE+5:
+		case CLOSE+6:
+		case CLOSE+7:
+		case CLOSE+8:
+		case CLOSE+9: {
+				register int no;
+				register char *save;
+
+				no = OP(scan) - CLOSE;
+				save = reginput;
+
+				if (regmatch(next)) {
+					/*
+					 * Don't set endp if some later
+					 * invocation of the same parentheses
+					 * already has.
+					 */
+					if (regendp[no] == NULL)
+						regendp[no] = save;
+					return(1);
+				} else
+					return(0);
+			}
+			break;
+		case BRANCH: {
+				register char *save;
+
+				if (OP(next) != BRANCH)		/* No choice. */
+					next = OPERAND(scan);	/* Avoid recursion. */
+				else {
+					do {
+						save = reginput;
+						if (regmatch(OPERAND(scan)))
+							return(1);
+						reginput = save;
+						scan = regnext(scan);
+					} while (scan != NULL && OP(scan) == BRANCH);
+					return(0);
+					/* NOTREACHED */
+				}
+			}
+			break;
+		case STAR:
+		case PLUS: {
+				register char nextch;
+				register int no;
+				register char *save;
+				register int min;
+
+				/*
+				 * Lookahead to avoid useless match attempts
+				 * when we know what character comes next.
+				 */
+				nextch = '\0';
+				if (OP(next) == EXACTLY)
+					nextch = *OPERAND(next);
+				min = (OP(scan) == STAR) ? 0 : 1;
+				save = reginput;
+				no = regrepeat(OPERAND(scan));
+				while (no >= min) {
+					/* If it could work, try it. */
+					if (nextch == '\0' || *reginput == nextch)
+						if (regmatch(next))
+							return(1);
+					/* Couldn't or didn't -- back up. */
+					no--;
+					reginput = save + no;
+				}
+				return(0);
+			}
+			break;
+		case END:
+			return(1);	/* Success! */
+			break;
+		default:
+			regerror("memory corruption");
+			return(0);
+			break;
+		}
+
+		scan = next;
+	}
+
+	/*
+	 * We get here only if there's trouble -- normally "case END" is
+	 * the terminating point.
+	 */
+	regerror("corrupted pointers");
+	return(0);
+}
+
+/*
+ - regrepeat - repeatedly match something simple, report how many
+ */
+static int
+regrepeat(p)
+char *p;
+{
+	register int count = 0;
+	register char *scan;
+	register char *opnd;
+
+	scan = reginput;
+	opnd = OPERAND(p);
+	switch (OP(p)) {
+	case ANY:
+		count = strlen(scan);
+		scan += count;
+		break;
+	case EXACTLY:
+		while (*opnd == *scan) {
+			count++;
+			scan++;
+		}
+		break;
+	case ANYOF:
+		while (*scan != '\0' && strchr(opnd, *scan) != NULL) {
+			count++;
+			scan++;
+		}
+		break;
+	case ANYBUT:
+		while (*scan != '\0' && strchr(opnd, *scan) == NULL) {
+			count++;
+			scan++;
+		}
+		break;
+	default:		/* Oh dear.  Called inappropriately. */
+		regerror("internal foulup");
+		count = 0;	/* Best compromise. */
+		break;
+	}
+	reginput = scan;
+
+	return(count);
+}
+
+/*
+ - regnext - dig the "next" pointer out of a node
+ */
+static char *
+regnext(p)
+register char *p;
+{
+	register int offset;
+
+	if (p == &regdummy)
+		return(NULL);
+
+	offset = NEXT(p);
+	if (offset == 0)
+		return(NULL);
+
+	if (OP(p) == BACK)
+		return(p-offset);
+	else
+		return(p+offset);
+}
+
+#ifdef DEBUG
+
+STATIC char *regprop();
+
+/*
+ - regdump - dump a regexp onto stdout in vaguely comprehensible form
+ */
+void
+regdump(r)
+regexp *r;
+{
+	register char *s;
+	register char op = EXACTLY;	/* Arbitrary non-END op. */
+	register char *next;
+	extern char *strchr();
+
+
+	s = r->program + 1;
+	while (op != END) {	/* While that wasn't END last time... */
+		op = OP(s);
+		printf("%2d%s", s-r->program, regprop(s));	/* Where, what. */
+		next = regnext(s);
+		if (next == NULL)		/* Next ptr. */
+			printf("(0)");
+		else 
+			printf("(%d)", (s-r->program)+(next-s));
+		s += 3;
+		if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
+			/* Literal string, where present. */
+			while (*s != '\0') {
+				putchar(*s);
+				s++;
+			}
+			s++;
+		}
+		putchar('\n');
+	}
+
+	/* Header fields of interest. */
+	if (r->regstart != '\0')
+		printf("start `%c' ", r->regstart);
+	if (r->reganch)
+		printf("anchored ");
+	if (r->regmust != NULL)
+		printf("must have \"%s\"", r->regmust);
+	printf("\n");
+}
+
+/*
+ - regprop - printable representation of opcode
+ */
+static char *
+regprop(op)
+char *op;
+{
+	register char *p;
+	static char buf[50];
+
+	(void) strcpy(buf, ":");
+
+	switch (OP(op)) {
+	case BOL:
+		p = "BOL";
+		break;
+	case EOL:
+		p = "EOL";
+		break;
+	case ANY:
+		p = "ANY";
+		break;
+	case ANYOF:
+		p = "ANYOF";
+		break;
+	case ANYBUT:
+		p = "ANYBUT";
+		break;
+	case BRANCH:
+		p = "BRANCH";
+		break;
+	case EXACTLY:
+		p = "EXACTLY";
+		break;
+	case NOTHING:
+		p = "NOTHING";
+		break;
+	case BACK:
+		p = "BACK";
+		break;
+	case END:
+		p = "END";
+		break;
+	case OPEN+1:
+	case OPEN+2:
+	case OPEN+3:
+	case OPEN+4:
+	case OPEN+5:
+	case OPEN+6:
+	case OPEN+7:
+	case OPEN+8:
+	case OPEN+9:
+		sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN);
+		p = NULL;
+		break;
+	case CLOSE+1:
+	case CLOSE+2:
+	case CLOSE+3:
+	case CLOSE+4:
+	case CLOSE+5:
+	case CLOSE+6:
+	case CLOSE+7:
+	case CLOSE+8:
+	case CLOSE+9:
+		sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE);
+		p = NULL;
+		break;
+	case STAR:
+		p = "STAR";
+		break;
+	case PLUS:
+		p = "PLUS";
+		break;
+	default:
+		regerror("corrupted opcode");
+		break;
+	}
+	if (p != NULL)
+		(void) strcat(buf, p);
+	return(buf);
+}
+#endif
+
+/*
+ * The following is provided for those people who do not have strcspn() in
+ * their C libraries.  They should get off their butts and do something
+ * about it; at least one public-domain implementation of those (highly
+ * useful) string routines has been published on Usenet.
+ */
+/*
+ * strcspn - find length of initial segment of s1 consisting entirely
+ * of characters not from s2
+ */
+
+static int
+strcspn(s1, s2)
+char *s1;
+char *s2;
+{
+	register char *scan1;
+	register char *scan2;
+	register int count;
+
+	count = 0;
+	for (scan1 = s1; *scan1 != '\0'; scan1++) {
+		for (scan2 = s2; *scan2 != '\0';)	/* ++ moved down. */
+			if (*scan1 == *scan2++)
+				return(count);
+		count++;
+	}
+	return(count);
+}
diff -Naur linux-2.4.22-rc2-stock/net/sched/regexp/regexp.h linux-2.4.22-rc2/net/sched/regexp/regexp.h
--- linux-2.4.22-rc2-stock/net/sched/regexp/regexp.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.4.22-rc2/net/sched/regexp/regexp.h	2003-08-15 21:56:33.000000000 -0500
@@ -0,0 +1,20 @@
+/*
+ * Definitions etc. for regexp(3) routines.
+ *
+ * Caveat:  this is V8 regexp(3) [actually, a reimplementation thereof],
+ * not the System V one.
+ */
+#define NSUBEXP  10
+typedef struct regexp {
+	char *startp[NSUBEXP];
+	char *endp[NSUBEXP];
+	char regstart;		/* Internal use only. */
+	char reganch;		/* Internal use only. */
+	char *regmust;		/* Internal use only. */
+	int regmlen;		/* Internal use only. */
+	char program[1];	/* Unwarranted chumminess with compiler. */
+} regexp;
+
+extern regexp *regcomp(char *exp, int *patternlength);
+int regexec(regexp *prog, char *string);
+void regsub(regexp *prog, char *source, char *dest);
diff -Naur linux-2.4.22-rc2-stock/net/sched/regexp/regmagic.h linux-2.4.22-rc2/net/sched/regexp/regmagic.h
--- linux-2.4.22-rc2-stock/net/sched/regexp/regmagic.h	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.4.22-rc2/net/sched/regexp/regmagic.h	2003-08-15 16:30:21.000000000 -0500
@@ -0,0 +1,5 @@
+/*
+ * The first byte of the regexp internal "program" is actually this magic
+ * number; the start node begins in the second byte.
+ */
+#define	MAGIC	0234
diff -Naur linux-2.4.22-rc2-stock/net/sched/regexp/regsub.c linux-2.4.22-rc2/net/sched/regexp/regsub.c
--- linux-2.4.22-rc2-stock/net/sched/regexp/regsub.c	1969-12-31 19:00:00.000000000 -0500
+++ linux-2.4.22-rc2/net/sched/regexp/regsub.c	2003-08-18 12:33:24.000000000 -0500
@@ -0,0 +1,88 @@
+/*
+ * regsub
+ * @(#)regsub.c	1.3 of 2 April 86
+ *
+ *	Copyright (c) 1986 by University of Toronto.
+ *	Written by Henry Spencer.  Not derived from licensed software.
+ *
+ *	Permission is granted to anyone to use this software for any
+ *	purpose on any computer system, and to redistribute it freely,
+ *	subject to the following restrictions:
+ *
+ *	1. The author is not responsible for the consequences of use of
+ *		this software, no matter how awful, even if they arise
+ *		from defects in it.
+ *
+ *	2. The origin of this software must not be misrepresented, either
+ *		by explicit claim or by omission.
+ *
+ *	3. Altered versions must be plainly marked as such, and must not
+ *		be misrepresented as being the original software.
+ *
+ *
+ * This code was modified by Ethan Sommer to work within the kernel
+ * (it now uses kmalloc etc..)
+ *
+ */
+#include "regexp.h"
+#include "regmagic.h"
+#include <linux/string.h>
+
+#ifndef CHARBITS
+#define	UCHARAT(p)	((int)*(unsigned char *)(p))
+#else
+#define	UCHARAT(p)	((int)*(p)&CHARBITS)
+#endif
+
+extern void regerror(char * s);
+
+/*
+ - regsub - perform substitutions after a regexp match
+ */
+void
+regsub(regexp * prog, char * source, char * dest)
+{
+	register char *src;
+	register char *dst;
+	register char c;
+	register int no;
+	register int len;
+	
+	/* Not necessary and gcc doesn't like it -MLS */
+	/*extern char *strncpy();*/
+
+	if (prog == NULL || source == NULL || dest == NULL) {
+		regerror("NULL parm to regsub");
+		return;
+	}
+	if (UCHARAT(prog->program) != MAGIC) {
+		regerror("damaged regexp fed to regsub");
+		return;
+	}
+
+	src = source;
+	dst = dest;
+	while ((c = *src++) != '\0') {
+		if (c == '&')
+			no = 0;
+		else if (c == '\\' && '0' <= *src && *src <= '9')
+			no = *src++ - '0';
+		else
+			no = -1;
+
+		if (no < 0) {	/* Ordinary character. */
+			if (c == '\\' && (*src == '\\' || *src == '&'))
+				c = *src++;
+			*dst++ = c;
+		} else if (prog->startp[no] != NULL && prog->endp[no] != NULL) {
+			len = prog->endp[no] - prog->startp[no];
+			(void) strncpy(dst, prog->startp[no], len);
+			dst += len;
+			if (len != 0 && *(dst-1) == '\0') {	/* strncpy hit NUL. */
+				regerror("damaged match string");
+				return;
+			}
+		}
+	}
+	*dst++ = '\0';
+}