]> git.pld-linux.org Git - packages/kernel.git/blame - layer7-kernel2.4patch-v0.1.4.patch
- added description of djurban's branch
[packages/kernel.git] / layer7-kernel2.4patch-v0.1.4.patch
CommitLineData
c6ded00f
AM
1diff -Naur linux-2.4.22-rc2-stock/Documentation/Configure.help linux-2.4.22-rc2/Documentation/Configure.help
2--- linux-2.4.22-rc2-stock/Documentation/Configure.help 2003-08-14 16:52:09.000000000 -0500
3+++ linux-2.4.22-rc2/Documentation/Configure.help 2003-08-18 12:41:10.000000000 -0500
4@@ -10241,6 +10241,19 @@
5 whenever you want). If you want to compile it as a module, say M
6 here and read <file:Documentation/modules.txt>.
7
8+Layer7 classifier
9+CONFIG_NET_CLS_LAYER7
10+ Say Y if you want to be able to classify connetions (and their
11+ packets) based on application layer data. This is necessary if
12+ you wish to classify applications such as peer-to-peer filesharing
13+ systems that do not always use the same port. Say N if unsure.
14+
15+ This code is also available as a module called cls_layer7 ( = code
16+ which can be inserted in and removed from the running kernel
17+ whenever you want). If you want to compile it as a module, say M
18+ here and read <file:Documentation/modules.txt>.
19+
20+
21 Special RSVP classifier
22 CONFIG_NET_CLS_RSVP
23 The Resource Reservation Protocol (RSVP) permits end systems to
24diff -Naur linux-2.4.22-rc2-stock/include/linux/netfilter_ipv4/ip_conntrack.h linux-2.4.22-rc2/include/linux/netfilter_ipv4/ip_conntrack.h
25--- linux-2.4.22-rc2-stock/include/linux/netfilter_ipv4/ip_conntrack.h 2003-08-14 16:50:41.000000000 -0500
26+++ linux-2.4.22-rc2/include/linux/netfilter_ipv4/ip_conntrack.h 2003-08-18 12:32:42.000000000 -0500
27@@ -189,6 +189,10 @@
28 packet has to the conntrack */
29 struct nf_ct_info infos[IP_CT_NUMBER];
30
31+#if defined(CONFIG_NET_CLS_LAYER7) || defined(CONFIG_NET_CLS_LAYER7_MODULE)
32+ struct timespec timestamp;
33+#endif
34+
35 /* Storage reserved for other modules: */
36
37 union ip_conntrack_proto proto;
38diff -Naur linux-2.4.22-rc2-stock/include/linux/pkt_cls.h linux-2.4.22-rc2/include/linux/pkt_cls.h
39--- linux-2.4.22-rc2-stock/include/linux/pkt_cls.h 2003-08-14 16:50:41.000000000 -0500
40+++ linux-2.4.22-rc2/include/linux/pkt_cls.h 2003-08-18 12:26:32.000000000 -0500
41@@ -158,4 +158,20 @@
42
43 #define TCA_TCINDEX_MAX TCA_TCINDEX_POLICE
44
45+/* Added by Justin */
46+/* Layer 7 filter */
47+enum
48+{
49+ TCA_LAYER7_UNSPEC,
50+ TCA_LAYER7_HASH,
51+ TCA_LAYER7_MASK,
52+ TCA_LAYER7_SHIFT,
53+ TCA_LAYER7_PROTOCOL,
54+ TCA_LAYER7_CLASSID,
55+ TCA_LAYER7_POLICE,
56+};
57+
58+#define TCA_LAYER7_MAX TCA_LAYER7_POLICE
59+
60 #endif
61+
62diff -Naur linux-2.4.22-rc2-stock/net/ipv4/netfilter/Config.in linux-2.4.22-rc2/net/ipv4/netfilter/Config.in
63--- linux-2.4.22-rc2-stock/net/ipv4/netfilter/Config.in 2003-08-14 16:50:58.000000000 -0500
64+++ linux-2.4.22-rc2/net/ipv4/netfilter/Config.in 2003-08-14 19:34:45.000000000 -0500
65@@ -4,7 +4,7 @@
66 mainmenu_option next_comment
67 comment ' IP: Netfilter Configuration'
68
69-tristate 'Connection tracking (required for masq/NAT)' CONFIG_IP_NF_CONNTRACK
70+tristate 'Connection tracking (required for masq/NAT + layer7)' CONFIG_IP_NF_CONNTRACK
71 if [ "$CONFIG_IP_NF_CONNTRACK" != "n" ]; then
72 dep_tristate ' FTP protocol support' CONFIG_IP_NF_FTP $CONFIG_IP_NF_CONNTRACK
73 dep_tristate ' Amanda protocol support' CONFIG_IP_NF_AMANDA $CONFIG_IP_NF_CONNTRACK
74diff -Naur linux-2.4.22-rc2-stock/net/sched/Config.in linux-2.4.22-rc2/net/sched/Config.in
75--- linux-2.4.22-rc2-stock/net/sched/Config.in 2003-08-14 16:51:00.000000000 -0500
76+++ linux-2.4.22-rc2/net/sched/Config.in 2003-08-15 16:32:19.000000000 -0500
77@@ -32,6 +32,7 @@
78 fi
79 tristate ' Firewall based classifier' CONFIG_NET_CLS_FW
80 tristate ' U32 classifier' CONFIG_NET_CLS_U32
81+ dep_tristate ' Layer7 classifier' CONFIG_NET_CLS_LAYER7 $CONFIG_IP_NF_CONNTRACK $CONFIG_NETFILTER $CONFIG_EXPERIMENTAL
82 if [ "$CONFIG_NET_QOS" = "y" ]; then
83 tristate ' Special RSVP classifier' CONFIG_NET_CLS_RSVP
84 tristate ' Special RSVP classifier for IPv6' CONFIG_NET_CLS_RSVP6
85diff -Naur linux-2.4.22-rc2-stock/net/sched/Makefile linux-2.4.22-rc2/net/sched/Makefile
86--- linux-2.4.22-rc2-stock/net/sched/Makefile 2003-08-14 16:50:59.000000000 -0500
87+++ linux-2.4.22-rc2/net/sched/Makefile 2003-08-15 21:57:27.000000000 -0500
88@@ -31,5 +31,6 @@
89 obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o
90 obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
91 obj-$(CONFIG_NET_CLS_FW) += cls_fw.o
92+obj-$(CONFIG_NET_CLS_LAYER7) += cls_layer7.o regexp/regexp.o regexp/regsub.o
93
94 include $(TOPDIR)/Rules.make
95diff -Naur linux-2.4.22-rc2-stock/net/sched/cls_api.c linux-2.4.22-rc2/net/sched/cls_api.c
96--- linux-2.4.22-rc2-stock/net/sched/cls_api.c 2003-08-14 16:51:00.000000000 -0500
97+++ linux-2.4.22-rc2/net/sched/cls_api.c 2003-08-15 16:32:19.000000000 -0500
98@@ -466,5 +466,9 @@
99 #ifdef CONFIG_NET_CLS_RSVP6
100 INIT_TC_FILTER(rsvp6);
101 #endif
102+#ifdef CONFIG_NET_CLS_LAYER7
103+ INIT_TC_FILTER(layer7);
104+ layer7_init_proc();
105+#endif
106 return 0;
107 }
108diff -Naur linux-2.4.22-rc2-stock/net/sched/cls_layer7.c linux-2.4.22-rc2/net/sched/cls_layer7.c
109--- linux-2.4.22-rc2-stock/net/sched/cls_layer7.c 1969-12-31 19:00:00.000000000 -0500
110+++ linux-2.4.22-rc2/net/sched/cls_layer7.c 2003-08-17 21:43:03.000000000 -0500
111@@ -0,0 +1,950 @@
112+/*
113+ net/sched/cls_layer7.c
114+
115+ Layer 7 (application layer) packet classifier.
116+
117+ Written by Matthew Strait, Ethan Sommer and Justin Levandoski, 2003.
118+
119+ Modeled after:
120+ cls_tcindex.c: Written 1998,1999 by Werner Almesberger, EPFL ICA
121+
122+ TODO:
123+ -Support IPv6
124+ -Do matching across multiple packets
125+ -Get a better regexp implementation, preferably one with documentation
126+ about what it can do!
127+ -Improve module unloading support, if possible (this may be tc's fault...)
128+ -Better support for connections with children (FTP, etc): ability to
129+ classify children seperately from their parents.
130+ -Finish writing the functions below that currently say "not implemented"
131+*/
132+
133+#include <linux/config.h>
134+#include <linux/module.h>
135+#include <linux/types.h>
136+#include <linux/kernel.h>
137+#include <linux/skbuff.h>
138+#include <linux/errno.h>
139+#include <linux/netdevice.h>
140+#include <net/ip.h>
141+#include <net/tcp.h>
142+#include <linux/if_ether.h>
143+#include <net/pkt_sched.h>
144+#include <net/route.h>
145+
146+#include <linux/netfilter_ipv4/ip_conntrack.h>
147+#include "regexp/regexp.h"
148+#include <linux/proc_fs.h>
149+
150+/* this needs to be last (or at least after some of the other stuff, or
151+ * else isprint() doesn't work! */
152+#include <linux/ctype.h>
153+
154+/* uncomment the next line to to get debugging information printed, including
155+dumps of the first few packets of each connection, classifications and warnings
156+about incomplete functions. */
157+#define LAYER7_DEBUG
158+
159+/* uncomment for more debugging info */
160+//#define LAYER7_DEBUG_MORE
161+
162+#ifdef LAYER7_DEBUG
163+ #define DPRINTK(format,args...) printk(format,##args)
164+#else
165+ #define DPRINTK(format,args...)
166+#endif
167+
168+#ifdef LAYER7_DEBUG_MORE
169+ #define DPRINTK2(format,args...) printk(format,##args)
170+#else
171+ #define DPRINTK2(format,args...)
172+#endif
173+
174+#define LAYER7_MAX_PATTERN_DEF_SIZE 8192
175+#define LAYER7_MAX_PROTOCOL_NAME_SIZE 256
176+
177+#define PRIV(tp) ((struct layer7_data *) (tp)->root)
178+
179+struct layer7_filter_result {
180+ struct tcf_police *police;
181+ struct tcf_result res;
182+};
183+
184+struct layer7_filter {
185+ __u16 key;
186+ struct layer7_filter_result result;
187+ struct layer7_filter *next;
188+};
189+
190+struct layer7_data {
191+ struct layer7_filter_result * perfect; /* perfect hash; NULL if none */
192+ struct layer7_filter ** h; /* imperfect hash; only used if !perfect;
193+ NULL if unused */
194+ __u16 mask; /* AND key with mask */
195+ int shift; /* shift ANDed key to the right */
196+ int hash; /* hash table size; 0 if undefined */
197+ int alloc_hash; /* allocated size */
198+ int fall_through; /* 0: only classify if explicit match */
199+};
200+
201+/* one element in the classification hash table, in theory each "connection"
202+ * (aka socket) should be remembered here so that it doesn't need to be
203+ * reclassified for each packet once the "connection" has been identified */
204+struct ct_hashElem {
205+ u32 classid;
206+ u32 hash;
207+ int num_pkts_so_far;
208+ int classified;
209+};
210+
211+/* hash table that matches connections to the connection's state */
212+struct ct_hashElem currentSockets[32768];
213+
214+/* a pattern defined by writing to /proc/net/layer7_protocols */
215+struct layer7_pattern {
216+ char * name;
217+ regexp * pattern;
218+ int patternsize;
219+ char * uncomppattern;
220+};
221+
222+/* pattern classification pair (aka filter rule) */
223+struct layer7_patclas_pair {
224+ regexp *pattern;
225+ u32 classification;
226+ u32 handle;
227+ void *parent;
228+};
229+
230+
231+/* all the rules we are currently attempting to match on */
232+struct layer7_patclas_pair *layer7_patclas_pairs = NULL;
233+
234+/* how many pairs we have so far */
235+int layer7_num_patclas_pairs = 0;
236+
237+/* array of all the patterns which have been defined *
238+ * and an int to keep track of how many we have */
239+struct layer7_pattern * layer7_patterns = NULL;
240+int layer7_num_patterns = 0;
241+
242+/* the char* which holds the pattern definitions given to us
243+ * through the /proc filesystem */
244+char * layer7_unparsed_patterns = NULL;
245+
246+/* clear out all of the patterns, so that new ones can be defined
247+ * this is called each time someone writes to /proc/net/layer7_protocols
248+ * before the new protocols are read in */
249+void clear_layer7_patterns( void )
250+{
251+ int x;
252+ for (x = 0; x < layer7_num_patterns; x++) {
253+ kfree(layer7_patterns[x].name);
254+ kfree(layer7_patterns[x].pattern);
255+ kfree(layer7_patterns[x].uncomppattern);
256+ }
257+ kfree(layer7_patterns);
258+ layer7_num_patterns = 0;
259+}
260+
261+
262+/*
263+ * Define a new pattern (which consists of a name (eg "http") and a regular
264+ * expression (eg "http.*get"))
265+ *
266+ * this is made to be memory efficent at the cost of time
267+ * (it reallocates the memory each time so it only uses exactly
268+ * the ammount necesary) because it will only be called one time per
269+ * pattern definition */
270+void add_layer7_pattern(const char *name, char *pattern)
271+{
272+ struct layer7_pattern *newpatterns=NULL;
273+ int x;
274+ /* first see if we already have a pattern by that name */
275+ for (x = 0; x < layer7_num_patterns; x++){
276+ if (!strcmp(name, layer7_patterns[x].name)) {
277+ /* keep a copy of the old regexp in case the new comp fails */
278+ regexp * oldpattern = kmalloc(layer7_patterns[x].patternsize, GFP_KERNEL);
279+ memcpy(oldpattern, layer7_patterns[x].pattern, layer7_patterns[x].patternsize);
280+
281+ /* just recompile the regexp and return */
282+ /* compile the pattern (we only want to do this once */
283+ if (!(layer7_patterns[x].pattern =
284+ regcomp(pattern, &layer7_patterns[x].patternsize))) /* if regcomp fails */
285+ {
286+ printk("<3>layer7: ERROR COMPILING REGEX \"%s\"\nold regex will be kept instead\n", pattern);
287+ /* go back to the old regex */
288+ layer7_patterns[x].pattern = oldpattern;
289+ }
290+ else
291+ {
292+ kfree(layer7_patterns[x].uncomppattern);
293+ layer7_patterns[x].uncomppattern =
294+ kmalloc(strlen(pattern)+1, GFP_KERNEL);
295+ strcpy(layer7_patterns[x].uncomppattern, pattern);
296+ kfree(oldpattern);
297+ }
298+
299+ return;
300+ }
301+ }
302+
303+ /* if we have not found a pattern by that name add a new one*/
304+
305+ /* allocate the memory for the new array */
306+ newpatterns = kmalloc( sizeof(struct layer7_pattern) *
307+ (layer7_num_patterns + 1), GFP_KERNEL);
308+
309+ if (layer7_num_patterns > 0)
310+ {
311+ /* copy any previously declared patterns in */
312+ memcpy(newpatterns, layer7_patterns,
313+ sizeof(struct layer7_pattern) * (layer7_num_patterns + 1));
314+ /* free the memory the old patterns were using */
315+ kfree(layer7_patterns);
316+ }
317+ layer7_num_patterns++;
318+
319+ /* set the newpatterns to be the authoritative patterns */
320+ layer7_patterns = newpatterns;
321+
322+ /* copy the name */
323+ layer7_patterns[layer7_num_patterns-1].name =
324+ kmalloc(strlen(name)+1, GFP_KERNEL);
325+
326+ strcpy(layer7_patterns[layer7_num_patterns-1].name, name);
327+ /* copy the uncomp pattern */
328+ layer7_patterns[layer7_num_patterns-1].uncomppattern =
329+ kmalloc(strlen(pattern)+1, GFP_KERNEL);
330+
331+ strcpy(layer7_patterns[layer7_num_patterns-1].uncomppattern, pattern);
332+
333+ /* compile the pattern (we only want to do this once) */
334+ if (!(layer7_patterns[layer7_num_patterns-1].pattern =
335+ regcomp(pattern, &layer7_patterns[layer7_num_patterns-1].patternsize))) /* if regcomp fails */
336+ {
337+ printk("<3>layer7: ERROR COMPILING REGEX \"%s\"\n", pattern);
338+ /* make sure we don't use this regexp,
339+ if more are added they will just overwrite the bad regexp */
340+ layer7_num_patterns--;
341+ }
342+}
343+
344+/* add_layer7_filter_rule()
345+ * defines a new filtering rule, for example "any packet which matches
346+ * the pattern called http should be classified as 0x10001"
347+ *
348+ * this is made to be memory efficent at the cost of time (it reallocates the
349+ * memory each time so it only uses exactly the ammount necesary) because
350+ * it will only be called one time per pattern we are matching on per boot
351+ */
352+void add_layer7_filter_rule(const char *name, const u32 classification, const u32 handle, void * parent)
353+{
354+ int x;
355+ /* loop through all the patterns */
356+ for (x = 0; x < layer7_num_patterns; x++)
357+ {
358+ if (!strcmp(name, layer7_patterns[x].name)) {
359+
360+ /* allocate the memory for the new array */
361+ struct layer7_patclas_pair * newpairs =
362+ kmalloc( sizeof(struct layer7_patclas_pair) *
363+ (layer7_num_patclas_pairs + 1), GFP_KERNEL);
364+
365+ /* don't copy or free things if they don't exist yet*/
366+ if (layer7_num_patclas_pairs > 0) {
367+ /* copy any previously declared patterns in */
368+ memcpy(newpairs, layer7_patclas_pairs,
369+ sizeof(struct layer7_patclas_pair) *
370+ (layer7_num_patclas_pairs+1));
371+
372+ /* free the memory the old patterns were using */
373+ kfree(layer7_patclas_pairs);
374+ }
375+ layer7_num_patclas_pairs++;
376+
377+ /* set the newpatterns to be the authoritative patterns */
378+ layer7_patclas_pairs = newpairs;
379+
380+ /* copy in the pattern so that if it is freed we don't crash */
381+
382+ layer7_patclas_pairs[layer7_num_patclas_pairs - 1].pattern =
383+ kmalloc(layer7_patterns[x].patternsize, GFP_KERNEL);
384+ memcpy(layer7_patclas_pairs[layer7_num_patclas_pairs-1].pattern,
385+ layer7_patterns[x].pattern, layer7_patterns[x].patternsize);
386+
387+ layer7_patclas_pairs[layer7_num_patclas_pairs-1].classification =
388+ classification;
389+ layer7_patclas_pairs[layer7_num_patclas_pairs-1].handle =
390+ handle;
391+ layer7_patclas_pairs[layer7_num_patclas_pairs-1].parent =
392+ parent;
393+ return;
394+ }
395+ }
396+ printk("<3>layer-7: There is no rule for \"%s\"\n", name);
397+}
398+
399+/* this is a hash function which acts on the timespec to get a relatively
400+ * good hash. It uses 15 bit chunks and XORs them.
401+ * TODO: make the chunk size user defined so that the hash table
402+ * can be bigger/smaller? */
403+static int layer7_hash(struct timespec ts)
404+{
405+ int hash = (ts.tv_nsec&32767) ^
406+ ((ts.tv_nsec>>15)&32767) ^
407+ ((ts.tv_nsec>>30)&32767) ^
408+ (ts.tv_sec&32767) ^
409+ ((ts.tv_sec>>15)&32767) ^
410+ ((ts.tv_sec>>30)&32767);
411+ return hash;
412+}
413+
414+/* for debugging only -- enable and call this if you want lots of debug info */
415+#if 0
416+static void print_pkt_stuff(struct sk_buff *skb)
417+{
418+ int x;
419+
420+ /* set this pointer to the beginning of the TCP data,
421+ which is what we care about. */
422+ unsigned char * tcpdata = skb->data + app_data_offset(skb);
423+
424+ printk("Here's the IP header in hex:\n");
425+ for(x = 0; x < sizeof(struct iphdr); x++)
426+ printk(" %.2x", ((unsigned char *)skb->nh.iph)[x]);
427+ printk("\n");
428+
429+ /* when routing, this will just print the IP header again */
430+ printk("Here's the TCP header in hex:\n");
431+ for(x = 0; x < sizeof(struct tcphdr); x++)
432+ printk(" %.2x", ((unsigned char *)skb->h.th)[x]);
433+ printk("\n");
434+
435+ printk("TCP header detail: This packet is coming from %d.%d.%d.%d:%d\n",
436+ (skb->nh.iph->saddr&0x000000FF), (skb->nh.iph->saddr&0x0000FF00) >> 8,
437+ (skb->nh.iph->saddr&0x00FF0000) >> 16, (skb->nh.iph->saddr&0xFF000000) >> 24, skb->h.th->source);
438+ printk("and going to %d.%d.%d.%d:%d\n",
439+ (skb->nh.iph->daddr&0x000000FF), (skb->nh.iph->daddr&0x0000FF00) >> 8,
440+ (skb->nh.iph->daddr&0x00FF0000) >> 16, (skb->nh.iph->daddr&0xFF000000) >> 24, skb->h.th->dest);
441+
442+ printk("syn=%d ack=%d rst=%d fin=%d psh=%d urg=%d.\n",
443+ skb->h.th->syn, skb->h.th->ack, skb->h.th->rst,
444+ skb->h.th->fin, skb->h.th->psh, skb->h.th->urg);
445+
446+ printk("TCP data ('X' == non-printable) is:\n");
447+ for(x = 0; x < ( (int)skb->tail - (int)tcpdata ); x++)
448+ {
449+ if (isprint(tcpdata[x])) printk("%c", tcpdata[x]);
450+ else printk("X");
451+ }
452+ printk("\nAnd here's the same thing in hex:\n");
453+ for(x = 0; x < ( (int)skb->tail - (int)tcpdata ); x++)
454+ {
455+ printk(" %.2x", tcpdata[x]);
456+ }
457+ printk("\n\n");
458+
459+ printk("data ('X' == non-printable) is:\n");
460+ for(x = 0; x < ( (int)skb->tail - (int)skb->data ); x++)
461+ {
462+ if (isprint(skb->data[x])) printk("%c", skb->data[x]);
463+ else printk("X");
464+ }
465+ printk("\nAnd here's the same thing in hex:\n");
466+ for(x = 0; x < ( (int)skb->tail - (int)skb->data ); x++)
467+ {
468+ printk(" %.2x", skb->data[x]);
469+ }
470+ printk("\n\n");
471+}
472+#endif
473+
474+
475+/* These functions test what kind of packet we're dealing with.
476+include/linux/if_ether.h suggests that all packets are treated as
477+Ethernet, but I'm not absolutely sure, and the presence of *raw in
478+skb->mac troubles me. I depend on the IP header always starting at the
479+same offset, so if this is wrong, there's trouble. -MLS */
480+
481+static int is_ipv4(struct sk_buff * skb)
482+{
483+ /* I'm also not convinced that this code ever gets run if
484+ it isn't IP, since running dhclient (which should send ARPs or
485+ RARPs) doesn't cause this to print numbers other than 800.
486+ I'm not sure what other testing I can do. */
487+
488+ /* the htons is important. It fixes the endianness */
489+ if(htons(skb->protocol) != ETH_P_IP){
490+ return 0;
491+ }
492+
493+ return 1;
494+}
495+
496+/* I'd rather just call this "is_tcp", except it depends on it being IPv4 and
497+TCP could be used on top of other protocols */
498+static inline int is_tcp_over_ipv4(struct sk_buff * skb)
499+{
500+ /* I don't want to depend on skb->nh.iph->protocol being set, because
501+ I bet it isn't when we are acting as a switch, just like skb->h.th isn't
502+ when acting as a router. */
503+ #define IP_PROTO_OFFSET 9
504+
505+ /* If it's not TCP */
506+ if(skb->data[ETH_HLEN + IP_PROTO_OFFSET] != IPPROTO_TCP){
507+ return 0;
508+ }
509+
510+ return 1;
511+}
512+
513+/* Again, I'd rather just call this "is_udp"... */
514+static inline int is_udp_over_ipv4(struct sk_buff * skb)
515+{
516+ #define IP_PROTO_OFFSET 9
517+
518+ if(skb->data[ETH_HLEN + IP_PROTO_OFFSET] != IPPROTO_UDP){
519+ return 0;
520+ }
521+
522+ return 1;
523+}
524+
525+/* Returns the number of bytes into the skb->data that the application
526+data starts. This is a kludge because we don't know how to do it right,
527+or even if there really is a right way of doing it. This fact is why
528+we are not currently attempting to classify anything except TCP and UDP over
529+IPv4. */
530+/* HLEN == hl == header length. 4 == bytes/word */
531+static int app_data_offset(struct sk_buff *skb)
532+{
533+ /* 12 == offset into TCP header for the header length field. We can't get this
534+ with skb->h.th->doff because the tcphdr struct doesn't get set when routing */
535+ #define TCP_DOFF_OFF 12
536+
537+ /* ip_hl = 4*skb->nh.iph->ihl would usually work, but I bet the
538+ iph struct isn't set when acting as a switch! */
539+ int ip_hl = 4*(skb->data[ETH_HLEN] & 0x0f);
540+
541+ if( is_udp_over_ipv4(skb) ){
542+ return ETH_HLEN + ip_hl + 8; /* UDP header is always 8 bytes */
543+ }
544+ else{ /* is_tcp_over_ipv4 */
545+ int tcp_hl = 4*(skb->data[ETH_HLEN + ip_hl + TCP_DOFF_OFF] >> 4);
546+ return ETH_HLEN + ip_hl + tcp_hl;
547+ }
548+}
549+
550+/* This function is only called until the connection is classified or for the
551+ * first few packets (whichever limit comes first.) The classification happens
552+ * here. After a connection has been identified it continues to be of that
553+ * type. */
554+static int layer7_really_classify(struct sk_buff *skb, struct tcf_result *res, int hash, void* parent)
555+{
556+ int x, y = 0;
557+ int match = 0;
558+
559+ /* the application layer data */
560+ unsigned char * app_data = skb->data + app_data_offset(skb);
561+
562+ /* get the data segment of the packet and, removing any nulls in it,
563+ put it into buf, then we null terminate buf so it's a happy string */
564+ char * buf = (char *)kmalloc((int)skb->tail - (int)app_data + 1, GFP_KERNEL);
565+ if(!buf)
566+ {
567+ printk("<3>layer7: kmalloc failed to allocate %d bytes for buf!\n",
568+ (int)skb->tail - (int)app_data + 1);
569+ return TC_POLICE_UNSPEC; /* give up */
570+ }
571+
572+ /* this looks slow, but changing it to a memcpy (which loses the ability to
573+ strip out nulls and do tolower) does not make a noticable difference in speed,
574+ so we suspect that this is not a bottleneck. Even if we avoided copying altogether
575+ it doesn't seem like it would get much faster. */
576+ y = 0;
577+ for(x = 0; x < ( (int)skb->tail - (int)app_data ); x++)
578+ {
579+ if (app_data[x] != 0) buf[y++] = tolower(app_data[x]);
580+ }
581+ buf[y] = '\0'; // make it into a null-terminated string
582+
583+#ifdef LAYER7_DEBUG
584+ if (strlen(buf)!=0) {
585+ printk("buf: (non-printable chars are printed as '.'\n");
586+ for (x=0;x<strlen(buf);x++){
587+ if (isprint(buf[x])) printk("%c",buf[x]);
588+ else printk(".");
589+ }
590+ printk("\n");
591+ }
592+#endif
593+
594+ /* loop through all the patclas pairs to see if we can match it */
595+ for (x = 0; x < layer7_num_patclas_pairs; x++){
596+ match = (layer7_patclas_pairs[x].parent==parent) &&
597+ regexec(layer7_patclas_pairs[x].pattern, buf);
598+ if (match)
599+ {
600+ DPRINTK2("layer 7: layer7_really_classify found a match!\n");
601+ break;
602+ }
603+ }
604+
605+ kfree(buf);
606+
607+ if(match){
608+ /* classify it */
609+ res->classid = layer7_patclas_pairs[x].classification;
610+
611+ /* we are a "generic filter", so class is always set to 0.
612+ See "Linux Network Traffic Control -- Implementation Overview,
613+ 4 Feb 2001, section 5.3 */
614+ res->class = 0;
615+
616+ /* record how we classified it */
617+ currentSockets[hash].classid = layer7_patclas_pairs[x].classification;
618+ currentSockets[hash].hash = hash;
619+ currentSockets[hash].classified = 1;
620+ return TC_POLICE_OK;
621+ }
622+ else{
623+ res->class = 0;
624+
625+ /* remember to use the default in the futrure */
626+ currentSockets[hash].classid=res->classid;
627+ currentSockets[hash].hash = hash;
628+
629+ /* this is the "unclassified" case, so leave
630+ currentSockets[hash].classified alone */
631+ return TC_POLICE_UNSPEC;
632+ }
633+}
634+
635+
636+static int layer7_classify(struct sk_buff *skb, struct tcf_proto *tp, struct tcf_result *res)
637+{
638+ enum ip_conntrack_info ctinfo;
639+ struct ip_conntrack *conntrack;
640+ int hash;
641+
642+ /* check if we can deal with the protocol */
643+ if( is_ipv4(skb) ){
644+ DPRINTK2("layer7: Is IPv4, going on.\n");
645+
646+ if ( is_udp_over_ipv4(skb)){
647+ DPRINTK2(" layer7: Is UDP/IPv4, going on.\n"); }
648+ else if( is_tcp_over_ipv4(skb)){
649+ DPRINTK2(" layer7: Is TCP/IPv4, going on.\n"); }
650+ else{
651+ DPRINTK2(" layer7: Not UDP or TCP, leaving.\n");
652+ return TC_POLICE_UNSPEC;
653+ }
654+ }
655+ else{
656+ DPRINTK2("layer7: Not IPv4, leaving.\n");
657+ return TC_POLICE_UNSPEC;
658+ }
659+
660+ /* get a ip_conntrack */
661+ if(!(conntrack = ip_conntrack_get(skb, &ctinfo)))
662+ {
663+ printk("<3>layer7_classify: error getting conntrack, dropping to default.\n");
664+ return TC_POLICE_UNSPEC;
665+ }
666+
667+ /* see if we can get a master conntrack (and its master etc)
668+ (for ftp etc) */
669+ while (master_ct(conntrack) != NULL) {
670+ conntrack=master_ct(conntrack);
671+ }
672+
673+ /* the conntrack got bzeroed somewhere, so that should be 0
674+ the first time around... */
675+ if (conntrack->timestamp.tv_sec == 0){
676+ jiffies_to_timespec(jiffies,&conntrack->timestamp); /* 2.4/2.6 difference */
677+ hash = layer7_hash(conntrack->timestamp);
678+ memset(&currentSockets[hash], 0, sizeof(struct ct_hashElem));
679+ }
680+
681+ /* we hash on the timestamp we added to the conntrack */
682+ hash = layer7_hash(conntrack->timestamp);
683+
684+ /* If we already know about this connection, this increments the
685+ packet count. If not, this doesn't hurt anything. */
686+ currentSockets[hash].num_pkts_so_far++;
687+
688+ /* If we've seen this connection before and we're not trying to
689+ classify it anymore, either because we've given up (we've arbitrarily
690+ chosen to test the first 8 packets) or because we've found a match */
691+ if ( currentSockets[hash].hash == hash &&
692+ (currentSockets[hash].num_pkts_so_far > 8 ||
693+ currentSockets[hash].classified) )
694+ {
695+ if(currentSockets[hash].classified){
696+ /* classify it as what we classified it as before */
697+ res->classid = currentSockets[hash].classid;
698+ res->class = 0;
699+ return TC_POLICE_OK;
700+ }
701+ else{
702+ return TC_POLICE_UNSPEC;
703+ }
704+ }
705+ /* if we've seen it before, but we still need to classify it */
706+ else if(currentSockets[hash].hash == hash){
707+ int retval = layer7_really_classify(skb, res, hash,tp->root);
708+ if(retval == TC_POLICE_UNSPEC)
709+ DPRINTK("layer7: seen before, still unmatched. Classified as %x for now\n", currentSockets[hash].classid);
710+ else
711+ DPRINTK("layer7: found match. Classified as %x\n", currentSockets[hash].classid);
712+ return retval;
713+ }
714+ /* otherwise this is the first packet of a new connection */
715+ else{
716+ int retval;
717+
718+ currentSockets[hash].num_pkts_so_far = 1;
719+ currentSockets[hash].classified = 0;
720+
721+ retval = layer7_really_classify(skb, res, hash,tp->root);
722+ if(retval == TC_POLICE_UNSPEC)
723+ DPRINTK("layer7: first packet, still unmatched. Classified as %x for now\n", currentSockets[hash].classid);
724+ else
725+ DPRINTK("layer7: first packet. Classified as %x\n", currentSockets[hash].classid);
726+ return retval;
727+ }
728+
729+ return TC_POLICE_OK; /* == 0 */
730+}
731+
732+/* returns the "internal id" (the index into the patclas array) of the
733+ rule corresponding to handle. Untested. */
734+static unsigned long layer7_get(struct tcf_proto *tp, u32 handle)
735+{
736+ int x;
737+ /* loop through to find the corresponding rule */
738+ for (x = 0; x < layer7_num_patclas_pairs; x++) {
739+ if (layer7_patclas_pairs[x].handle == handle)
740+ return x;
741+ }
742+ /* otherwise return layer7_num_patclas_pairs */
743+ return layer7_num_patclas_pairs;
744+}
745+
746+
747+/* This doesn't do anything in other filters either...
748+(but this is one of the required functions) */
749+static void layer7_put(struct tcf_proto *tp, unsigned long f)
750+{
751+ DPRINTK("layer7_put called. Not implemented.\n");
752+}
753+
754+/* This actually does something, but we're not sure what.
755+Or rather, we know that it sets tp and that it makes tc crash if tp isn't
756+set, but we don't know why. */
757+static int layer7_init(struct tcf_proto *tp)
758+{
759+ struct layer7_data *p;
760+
761+ DPRINTK("layer7_init called: (tp %p). Might not be doing the right thing.\n", tp);
762+
763+ MOD_INC_USE_COUNT;
764+ p = kmalloc(sizeof(struct layer7_data), GFP_KERNEL);
765+ if (!p) {
766+ MOD_DEC_USE_COUNT;
767+ return -ENOMEM;
768+ }
769+ tp->root = p;
770+ p->perfect = NULL;
771+ p->h = NULL;
772+ p->hash = 0;
773+ p->mask = 0xffff;
774+ p->shift = 0;
775+ p->fall_through = 1;
776+
777+ return 0;
778+}
779+
780+/* XXX More info needed here.
781+We're not sure exactly what this is supposed to do. We're copying what
782+cls_tcindex.c does and nothing appears to be broken because of this approach. */
783+static int layer7_delete(struct tcf_proto *tp, unsigned long arg)
784+{
785+ struct layer7_filter_result *r = (struct layer7_filter_result *) arg;
786+ unsigned long cl;
787+
788+ DPRINTK("layer7_delete called: might not be doing the right thing.\n");
789+
790+ cl = __cls_set_class(&r->res.class,0);
791+ if (cl)
792+ tp->q->ops->cl_ops->unbind_tcf(tp->q,cl);
793+
794+#ifdef CONFIG_NET_CLS_POLICE
795+ tcf_police_release(r->police);
796+#endif
797+ return 0;
798+}
799+
800+
801+/* There are no parameters for layer7_init, so we overload layer7_change */
802+static int layer7_change(struct tcf_proto * tp, unsigned long base, u32 handle,
803+ struct rtattr ** tca, unsigned long * arg)
804+{
805+ struct layer7_filter_result new_filter_result = {
806+ NULL, /* no policing */
807+ { 0,0 }, /* no classification */
808+ };
809+ struct rtattr * opt = tca[TCA_OPTIONS-1];
810+ struct rtattr * tb[TCA_LAYER7_MAX];
811+ struct layer7_filter_result * r = (struct layer7_filter_result *) * arg;
812+ char* protocol = NULL;
813+ u32 classid = 0;
814+
815+
816+ //if (arg)
817+ // DPRINTK("layer7_change: *arg = 0x%lx\n",*arg);
818+ if (!opt)
819+ return 0;
820+ if(rtattr_parse(tb, TCA_LAYER7_MAX,RTA_DATA(opt), RTA_PAYLOAD(opt)) < 0)
821+ return -EINVAL;
822+
823+ /* Get protocol here */
824+ if (tb[TCA_LAYER7_PROTOCOL - 1]) {
825+ if (RTA_PAYLOAD(tb[TCA_LAYER7_PROTOCOL - 1]) < sizeof(int))
826+ return -EINVAL;
827+
828+ protocol = (char *)RTA_DATA(tb[TCA_LAYER7_PROTOCOL - 1]);
829+ }
830+
831+ /* vestigal comment from tcindex.c
832+ *
833+ * Note: this could be as restrictive as
834+ * if (handle & ~(mask >> shift))
835+ * but then, we'd fail handles that may become valid after some
836+ * future mask change. While this is extremely unlikely to ever
837+ * matter, the check below is safer (and also more
838+ * backwards-compatible).
839+ */
840+
841+ r = &new_filter_result;
842+
843+ if (tb[TCA_LAYER7_CLASSID-1]) {
844+ classid = *(__u32 *) RTA_DATA(tb[TCA_LAYER7_CLASSID - 1]);
845+ }
846+
847+ DPRINTK2("add_layer7_filter_rule, protocol: %s, with classid: %x, handle %u\n", protocol, classid, handle);
848+ add_layer7_filter_rule(protocol, classid, handle,tp->root);
849+
850+#ifdef CONFIG_NET_CLS_POLICE
851+ {
852+ struct tcf_police *police;
853+
854+ police = tb[TCA_LAYER7_POLICE - 1] ?
855+ tcf_police_locate(tb[TCA_LAYER7_POLICE - 1], NULL) : NULL;
856+ tcf_tree_lock(tp);
857+ police = xchg(&r->police, police);
858+ tcf_tree_unlock(tp);
859+ tcf_police_release(police);
860+ }
861+#endif
862+ return 0;
863+}
864+
865+/* XXX More information needed here.
866+Can't find any documentation on what this function is supposed to do.
867+The cls_tcindex.c version of this doesn't appear to do anything that applies
868+to us and so far doing nothing appears to work fine. */
869+static void layer7_walk(struct tcf_proto * tp, struct tcf_walker * walker)
870+{
871+ DPRINTK("layer7_walk called. Not implemented.\n");
872+}
873+
874+/* delete all the rules in the filter */
875+static void layer7_destroy(struct tcf_proto *tp)
876+{
877+ int x;
878+
879+ /* clear the filter rules */
880+ if (layer7_patclas_pairs != NULL) {
881+ for (x=0;x<layer7_num_patclas_pairs;x++)
882+ {
883+ kfree(layer7_patclas_pairs[x].pattern);
884+ }
885+ kfree(layer7_patclas_pairs);
886+ layer7_patclas_pairs=NULL;
887+ layer7_num_patclas_pairs=0;
888+ }
889+}
890+
891+
892+/* XXX more information needed here.
893+This gets called each time a filter is added, but we can't find any
894+documentation that defines what it is supposed to do or why it gets called
895+when it does. However, nothing seems to be broken because of our current
896+approach. */
897+static int layer7_dump(struct tcf_proto *tp, unsigned long fh,
898+ struct sk_buff *skb, struct tcmsg *t)
899+{
900+ DPRINTK("layer7_dump called. Might not be doing the right thing.\n");
901+
902+ return skb->len; /* cls_tcindex.c does this, don't know why... */
903+}
904+
905+struct tcf_proto_ops cls_layer7_ops = {
906+ NULL,
907+ "layer7",
908+ layer7_classify,
909+ layer7_init,
910+ layer7_destroy,
911+
912+ layer7_get,
913+ layer7_put,
914+ layer7_change,
915+ layer7_delete,
916+ layer7_walk,
917+ layer7_dump
918+};
919+
920+
921+/* write out the patterns to userland. (yes, write reads and read writes.) */
922+int layer7_read_proc(char* page, char ** start, off_t off, int count,
923+ int* eof, void * data)
924+{
925+ if (layer7_patterns == NULL){
926+ /* there are no patterns yet */
927+ *eof=1;
928+ page='\0';
929+ return 0;
930+ }
931+ else{
932+ int x;
933+ /* there are patterns */
934+ page[0]='\0';
935+ for (x=0;x<layer7_num_patterns;x++){
936+ strcat(page,layer7_patterns[x].name);
937+ strcat(page,"\n");
938+ strcat(page,layer7_patterns[x].uncomppattern);
939+ strcat(page,"\n");
940+ }
941+
942+ //strncpy(page,layer7_unparsed_patterns,count-1);
943+ *eof=1;
944+ return strlen(page);
945+ }
946+}
947+
948+/* Read in the protocols from userland */
949+int layer7_write_proc(struct file* file, const char* buffer,
950+ unsigned long count, void *data)
951+{
952+ int x = 0, y;
953+ char *patterns;
954+ char *name;
955+ char *pattern;
956+
957+ /* free the old pattens if they exist */
958+ if (layer7_unparsed_patterns != NULL)
959+ kfree(layer7_unparsed_patterns);
960+
961+ /* don't clear the patterns so that many may be added one at a time */
962+ //if (layer7_patterns != NULL)
963+ //clear_layer7_patterns();
964+
965+ /* allocate space for the new ones */
966+ layer7_unparsed_patterns=(char *)kmalloc(count + 1, GFP_KERNEL);
967+ /* bail if it fails */
968+ if (!layer7_unparsed_patterns)
969+ return 0;
970+
971+ /* copy in the data from userland */
972+ copy_from_user(layer7_unparsed_patterns, buffer, count);
973+ layer7_unparsed_patterns[count]= '\0';
974+
975+
976+ /* add the patterns to the pattern definitions table */
977+ pattern = (char*)kmalloc(LAYER7_MAX_PATTERN_DEF_SIZE, GFP_KERNEL);
978+ patterns = layer7_unparsed_patterns;
979+ while (x < count)
980+ {
981+ name = (char*)kmalloc(LAYER7_MAX_PROTOCOL_NAME_SIZE, GFP_KERNEL);
982+ name[0] = '\0';
983+ pattern[0] = '\0';
984+ /* read past comment lines */
985+ while (x < count && (patterns[x]=='#' || isspace(patterns[x]))){
986+ if (patterns[x] == '#'){
987+ while (x<count && patterns[x]!='\n'){
988+ x++;
989+ }
990+ }
991+ x++;
992+ }
993+
994+ /* read in the name */
995+ y = 0;
996+ while (x < count && !isspace(patterns[x]) &&
997+ y < LAYER7_MAX_PROTOCOL_NAME_SIZE - 1){
998+ name[y] = patterns[x];
999+ x++;
1000+ y++;
1001+ }
1002+ name[y] = '\0';
1003+
1004+ /* skip over comments and blank lines */
1005+ while (x < count && (patterns[x] == '#' || isspace(patterns[x])))
1006+ {
1007+ if (patterns[x] == '#')
1008+ while (x < count && patterns[x] != '\n')
1009+ { x++; }
1010+ x++;
1011+ }
1012+
1013+ /* read in the pattern */
1014+ y = 0;
1015+ while (x < count && patterns[x] != '\n' && patterns[x] != '\r' &&
1016+ y < LAYER7_MAX_PATTERN_DEF_SIZE-1){
1017+ pattern[y]=patterns[x];
1018+ x++;
1019+ y++;
1020+ }
1021+ pattern[y] = '\0';
1022+ /* if we now have both a pattern and name add it */
1023+ if (strlen(name) != 0 && strlen(pattern) != 0)
1024+ {
1025+ DPRINTK("layer 7: adding %s:%s\n", name, pattern);
1026+ add_layer7_pattern(name, pattern);
1027+ }
1028+ }
1029+
1030+ return count;
1031+}
1032+
1033+/* register the proc file */
1034+void layer7_init_proc(void)
1035+{
1036+ struct proc_dir_entry* entry;
1037+
1038+ /* create the file */
1039+ entry = create_proc_entry("layer7_protocols", 0644, proc_net);
1040+
1041+ /* set the callback functions */
1042+ entry->read_proc = layer7_read_proc;
1043+ entry->write_proc = layer7_write_proc;
1044+}
1045+
1046+#ifdef MODULE
1047+int init_module(void)
1048+{
1049+ DPRINTK2("layer7: init_module called\n");
1050+ layer7_init_proc();
1051+ return register_tcf_proto_ops(&cls_layer7_ops);
1052+}
1053+
1054+void cleanup_module(void)
1055+{
1056+ DPRINTK2("layer7: cleanup_module called\n");
1057+ unregister_tcf_proto_ops(&cls_layer7_ops);
1058+}
1059+MODULE_LICENSE("GPL");
1060+#endif
1061+
1062diff -Naur linux-2.4.22-rc2-stock/net/sched/regexp/regexp.c linux-2.4.22-rc2/net/sched/regexp/regexp.c
1063--- linux-2.4.22-rc2-stock/net/sched/regexp/regexp.c 1969-12-31 19:00:00.000000000 -0500
1064+++ linux-2.4.22-rc2/net/sched/regexp/regexp.c 2003-08-17 21:43:49.000000000 -0500
1065@@ -0,0 +1,1236 @@
1066+/*
1067+ * regcomp and regexec -- regsub and regerror are elsewhere
1068+ * @(#)regexp.c 1.3 of 18 April 87
1069+ *
1070+ * Copyright (c) 1986 by University of Toronto.
1071+ * Written by Henry Spencer. Not derived from licensed software.
1072+ *
1073+ * Permission is granted to anyone to use this software for any
1074+ * purpose on any computer system, and to redistribute it freely,
1075+ * subject to the following restrictions:
1076+ *
1077+ * 1. The author is not responsible for the consequences of use of
1078+ * this software, no matter how awful, even if they arise
1079+ * from defects in it.
1080+ *
1081+ * 2. The origin of this software must not be misrepresented, either
1082+ * by explicit claim or by omission.
1083+ *
1084+ * 3. Altered versions must be plainly marked as such, and must not
1085+ * be misrepresented as being the original software.
1086+ *
1087+ * Beware that some of this code is subtly aware of the way operator
1088+ * precedence is structured in regular expressions. Serious changes in
1089+ * regular-expression syntax might require a total rethink.
1090+ *
1091+ * This code was modified by Ethan Sommer to work within the kernel
1092+ * (it now uses kmalloc etc..)
1093+ *
1094+ * Modified slightly by Matthew Strait to use more modern C.
1095+ */
1096+#include "regexp.h"
1097+#include "regmagic.h"
1098+#include <linux/string.h>
1099+#include <linux/mm.h>
1100+
1101+/* added by ethan */
1102+#define malloc(foo) kmalloc(foo,GFP_KERNEL)
1103+
1104+/* added by MLS */
1105+#ifdef MODULE
1106+int init_module(void)
1107+{ return 0; }
1108+int cleanup_module(void)
1109+{ return 0; }
1110+#include <linux/kernel.h>
1111+#include <linux/module.h>
1112+MODULE_LICENSE("GPL"); /* I think this is ok based on the above text... */
1113+#endif
1114+
1115+void regerror(char * s)
1116+{
1117+ printk("regexp(3): %s\n", s);
1118+ /* NOTREACHED */
1119+}
1120+
1121+/*
1122+ * The "internal use only" fields in regexp.h are present to pass info from
1123+ * compile to execute that permits the execute phase to run lots faster on
1124+ * simple cases. They are:
1125+ *
1126+ * regstart char that must begin a match; '\0' if none obvious
1127+ * reganch is the match anchored (at beginning-of-line only)?
1128+ * regmust string (pointer into program) that match must include, or NULL
1129+ * regmlen length of regmust string
1130+ *
1131+ * Regstart and reganch permit very fast decisions on suitable starting points
1132+ * for a match, cutting down the work a lot. Regmust permits fast rejection
1133+ * of lines that cannot possibly match. The regmust tests are costly enough
1134+ * that regcomp() supplies a regmust only if the r.e. contains something
1135+ * potentially expensive (at present, the only such thing detected is * or +
1136+ * at the start of the r.e., which can involve a lot of backup). Regmlen is
1137+ * supplied because the test in regexec() needs it and regcomp() is computing
1138+ * it anyway.
1139+ */
1140+
1141+/*
1142+ * Structure for regexp "program". This is essentially a linear encoding
1143+ * of a nondeterministic finite-state machine (aka syntax charts or
1144+ * "railroad normal form" in parsing technology). Each node is an opcode
1145+ * plus a "next" pointer, possibly plus an operand. "Next" pointers of
1146+ * all nodes except BRANCH implement concatenation; a "next" pointer with
1147+ * a BRANCH on both ends of it is connecting two alternatives. (Here we
1148+ * have one of the subtle syntax dependencies: an individual BRANCH (as
1149+ * opposed to a collection of them) is never concatenated with anything
1150+ * because of operator precedence.) The operand of some types of node is
1151+ * a literal string; for others, it is a node leading into a sub-FSM. In
1152+ * particular, the operand of a BRANCH node is the first node of the branch.
1153+ * (NB this is *not* a tree structure: the tail of the branch connects
1154+ * to the thing following the set of BRANCHes.) The opcodes are:
1155+ */
1156+
1157+/* definition number opnd? meaning */
1158+#define END 0 /* no End of program. */
1159+#define BOL 1 /* no Match "" at beginning of line. */
1160+#define EOL 2 /* no Match "" at end of line. */
1161+#define ANY 3 /* no Match any one character. */
1162+#define ANYOF 4 /* str Match any character in this string. */
1163+#define ANYBUT 5 /* str Match any character not in this string. */
1164+#define BRANCH 6 /* node Match this alternative, or the next... */
1165+#define BACK 7 /* no Match "", "next" ptr points backward. */
1166+#define EXACTLY 8 /* str Match this string. */
1167+#define NOTHING 9 /* no Match empty string. */
1168+#define STAR 10 /* node Match this (simple) thing 0 or more times. */
1169+#define PLUS 11 /* node Match this (simple) thing 1 or more times. */
1170+#define OPEN 20 /* no Mark this point in input as start of #n. */
1171+ /* OPEN+1 is number 1, etc. */
1172+#define CLOSE 30 /* no Analogous to OPEN. */
1173+
1174+/*
1175+ * Opcode notes:
1176+ *
1177+ * BRANCH The set of branches constituting a single choice are hooked
1178+ * together with their "next" pointers, since precedence prevents
1179+ * anything being concatenated to any individual branch. The
1180+ * "next" pointer of the last BRANCH in a choice points to the
1181+ * thing following the whole choice. This is also where the
1182+ * final "next" pointer of each individual branch points; each
1183+ * branch starts with the operand node of a BRANCH node.
1184+ *
1185+ * BACK Normal "next" pointers all implicitly point forward; BACK
1186+ * exists to make loop structures possible.
1187+ *
1188+ * STAR,PLUS '?', and complex '*' and '+', are implemented as circular
1189+ * BRANCH structures using BACK. Simple cases (one character
1190+ * per match) are implemented with STAR and PLUS for speed
1191+ * and to minimize recursive plunges.
1192+ *
1193+ * OPEN,CLOSE ...are numbered at compile time.
1194+ */
1195+
1196+/*
1197+ * A node is one char of opcode followed by two chars of "next" pointer.
1198+ * "Next" pointers are stored as two 8-bit pieces, high order first. The
1199+ * value is a positive offset from the opcode of the node containing it.
1200+ * An operand, if any, simply follows the node. (Note that much of the
1201+ * code generation knows about this implicit relationship.)
1202+ *
1203+ * Using two bytes for the "next" pointer is vast overkill for most things,
1204+ * but allows patterns to get big without disasters.
1205+ */
1206+#define OP(p) (*(p))
1207+#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
1208+#define OPERAND(p) ((p) + 3)
1209+
1210+/*
1211+ * See regmagic.h for one further detail of program structure.
1212+ */
1213+
1214+
1215+/*
1216+ * Utility definitions.
1217+ */
1218+#ifndef CHARBITS
1219+#define UCHARAT(p) ((int)*(unsigned char *)(p))
1220+#else
1221+#define UCHARAT(p) ((int)*(p)&CHARBITS)
1222+#endif
1223+
1224+#define FAIL(m) { regerror(m); return(NULL); }
1225+#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?')
1226+#define META "^$.[()|?+*\\"
1227+
1228+/*
1229+ * Flags to be passed up and down.
1230+ */
1231+#define HASWIDTH 01 /* Known never to match null string. */
1232+#define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */
1233+#define SPSTART 04 /* Starts with * or +. */
1234+#define WORST 0 /* Worst case. */
1235+
1236+/*
1237+ * Global work variables for regcomp().
1238+ */
1239+static char *regparse; /* Input-scan pointer. */
1240+static int regnpar; /* () count. */
1241+static char regdummy;
1242+static char *regcode; /* Code-emit pointer; &regdummy = don't. */
1243+static long regsize; /* Code size. */
1244+
1245+/*
1246+ * Forward declarations for regcomp()'s friends.
1247+ */
1248+#ifndef STATIC
1249+#define STATIC static
1250+#endif
1251+STATIC char *reg(int paren,int *flagp);
1252+STATIC char *regbranch(int *flagp);
1253+STATIC char *regpiece(int *flagp);
1254+STATIC char *regatom(int *flagp);
1255+STATIC char *regnode(char op);
1256+STATIC char *regnext(char *p);
1257+STATIC void regc(char b);
1258+STATIC void reginsert(char op, char *opnd);
1259+STATIC void regtail(char *p, char *val);
1260+STATIC void regoptail(char *p, char *val);
1261+STATIC int strcspn(char *s1, char *s2);
1262+
1263+/*
1264+ - regcomp - compile a regular expression into internal code
1265+ *
1266+ * We can't allocate space until we know how big the compiled form will be,
1267+ * but we can't compile it (and thus know how big it is) until we've got a
1268+ * place to put the code. So we cheat: we compile it twice, once with code
1269+ * generation turned off and size counting turned on, and once "for real".
1270+ * This also means that we don't allocate space until we are sure that the
1271+ * thing really will compile successfully, and we never have to move the
1272+ * code and thus invalidate pointers into it. (Note that it has to be in
1273+ * one piece because free() must be able to free it all.)
1274+ *
1275+ * Beware that the optimization-preparation code in here knows about some
1276+ * of the structure of the compiled regexp.
1277+ */
1278+regexp *
1279+regcomp(char *exp,int *patternsize)
1280+{
1281+ register regexp *r;
1282+ register char *scan;
1283+ register char *longest;
1284+ register int len;
1285+ int flags;
1286+ /* commented out by ethan
1287+ extern char *malloc();
1288+ */
1289+
1290+ if (exp == NULL)
1291+ FAIL("NULL argument");
1292+
1293+ /* First pass: determine size, legality. */
1294+ regparse = exp;
1295+ regnpar = 1;
1296+ regsize = 0L;
1297+ regcode = &regdummy;
1298+ regc(MAGIC);
1299+ if (reg(0, &flags) == NULL)
1300+ return(NULL);
1301+
1302+ /* Small enough for pointer-storage convention? */
1303+ if (regsize >= 32767L) /* Probably could be 65535L. */
1304+ FAIL("regexp too big");
1305+
1306+ /* Allocate space. */
1307+ *patternsize=sizeof(regexp) + (unsigned)regsize;
1308+ r = (regexp *)malloc(sizeof(regexp) + (unsigned)regsize);
1309+ if (r == NULL)
1310+ FAIL("out of space");
1311+
1312+ /* Second pass: emit code. */
1313+ regparse = exp;
1314+ regnpar = 1;
1315+ regcode = r->program;
1316+ regc(MAGIC);
1317+ if (reg(0, &flags) == NULL)
1318+ return(NULL);
1319+
1320+ /* Dig out information for optimizations. */
1321+ r->regstart = '\0'; /* Worst-case defaults. */
1322+ r->reganch = 0;
1323+ r->regmust = NULL;
1324+ r->regmlen = 0;
1325+ scan = r->program+1; /* First BRANCH. */
1326+ if (OP(regnext(scan)) == END) { /* Only one top-level choice. */
1327+ scan = OPERAND(scan);
1328+
1329+ /* Starting-point info. */
1330+ if (OP(scan) == EXACTLY)
1331+ r->regstart = *OPERAND(scan);
1332+ else if (OP(scan) == BOL)
1333+ r->reganch++;
1334+
1335+ /*
1336+ * If there's something expensive in the r.e., find the
1337+ * longest literal string that must appear and make it the
1338+ * regmust. Resolve ties in favor of later strings, since
1339+ * the regstart check works with the beginning of the r.e.
1340+ * and avoiding duplication strengthens checking. Not a
1341+ * strong reason, but sufficient in the absence of others.
1342+ */
1343+ if (flags&SPSTART) {
1344+ longest = NULL;
1345+ len = 0;
1346+ for (; scan != NULL; scan = regnext(scan))
1347+ if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) {
1348+ longest = OPERAND(scan);
1349+ len = strlen(OPERAND(scan));
1350+ }
1351+ r->regmust = longest;
1352+ r->regmlen = len;
1353+ }
1354+ }
1355+
1356+ return(r);
1357+}
1358+
1359+/*
1360+ - reg - regular expression, i.e. main body or parenthesized thing
1361+ *
1362+ * Caller must absorb opening parenthesis.
1363+ *
1364+ * Combining parenthesis handling with the base level of regular expression
1365+ * is a trifle forced, but the need to tie the tails of the branches to what
1366+ * follows makes it hard to avoid.
1367+ */
1368+static char *
1369+reg(paren, flagp)
1370+int paren; /* Parenthesized? */
1371+int *flagp;
1372+{
1373+ register char *ret;
1374+ register char *br;
1375+ register char *ender;
1376+ register int parno;
1377+ int flags;
1378+
1379+ *flagp = HASWIDTH; /* Tentatively. */
1380+
1381+ /* Make an OPEN node, if parenthesized. */
1382+ if (paren) {
1383+ if (regnpar >= NSUBEXP)
1384+ FAIL("too many ()");
1385+ parno = regnpar;
1386+ regnpar++;
1387+ ret = regnode(OPEN+parno);
1388+ } else
1389+ ret = NULL;
1390+
1391+ /* Pick up the branches, linking them together. */
1392+ br = regbranch(&flags);
1393+ if (br == NULL)
1394+ return(NULL);
1395+ if (ret != NULL)
1396+ regtail(ret, br); /* OPEN -> first. */
1397+ else
1398+ ret = br;
1399+ if (!(flags&HASWIDTH))
1400+ *flagp &= ~HASWIDTH;
1401+ *flagp |= flags&SPSTART;
1402+ while (*regparse == '|') {
1403+ regparse++;
1404+ br = regbranch(&flags);
1405+ if (br == NULL)
1406+ return(NULL);
1407+ regtail(ret, br); /* BRANCH -> BRANCH. */
1408+ if (!(flags&HASWIDTH))
1409+ *flagp &= ~HASWIDTH;
1410+ *flagp |= flags&SPSTART;
1411+ }
1412+
1413+ /* Make a closing node, and hook it on the end. */
1414+ ender = regnode((paren) ? CLOSE+parno : END);
1415+ regtail(ret, ender);
1416+
1417+ /* Hook the tails of the branches to the closing node. */
1418+ for (br = ret; br != NULL; br = regnext(br))
1419+ regoptail(br, ender);
1420+
1421+ /* Check for proper termination. */
1422+ if (paren && *regparse++ != ')') {
1423+ FAIL("unmatched ()");
1424+ } else if (!paren && *regparse != '\0') {
1425+ if (*regparse == ')') {
1426+ FAIL("unmatched ()");
1427+ } else
1428+ FAIL("junk on end"); /* "Can't happen". */
1429+ /* NOTREACHED */
1430+ }
1431+
1432+ return(ret);
1433+}
1434+
1435+/*
1436+ - regbranch - one alternative of an | operator
1437+ *
1438+ * Implements the concatenation operator.
1439+ */
1440+static char *
1441+regbranch(flagp)
1442+int *flagp;
1443+{
1444+ register char *ret;
1445+ register char *chain;
1446+ register char *latest;
1447+ int flags;
1448+
1449+ *flagp = WORST; /* Tentatively. */
1450+
1451+ ret = regnode(BRANCH);
1452+ chain = NULL;
1453+ while (*regparse != '\0' && *regparse != '|' && *regparse != ')') {
1454+ latest = regpiece(&flags);
1455+ if (latest == NULL)
1456+ return(NULL);
1457+ *flagp |= flags&HASWIDTH;
1458+ if (chain == NULL) /* First piece. */
1459+ *flagp |= flags&SPSTART;
1460+ else
1461+ regtail(chain, latest);
1462+ chain = latest;
1463+ }
1464+ if (chain == NULL) /* Loop ran zero times. */
1465+ (void) regnode(NOTHING);
1466+
1467+ return(ret);
1468+}
1469+
1470+/*
1471+ - regpiece - something followed by possible [*+?]
1472+ *
1473+ * Note that the branching code sequences used for ? and the general cases
1474+ * of * and + are somewhat optimized: they use the same NOTHING node as
1475+ * both the endmarker for their branch list and the body of the last branch.
1476+ * It might seem that this node could be dispensed with entirely, but the
1477+ * endmarker role is not redundant.
1478+ */
1479+static char *
1480+regpiece(flagp)
1481+int *flagp;
1482+{
1483+ register char *ret;
1484+ register char op;
1485+ register char *next;
1486+ int flags;
1487+
1488+ ret = regatom(&flags);
1489+ if (ret == NULL)
1490+ return(NULL);
1491+
1492+ op = *regparse;
1493+ if (!ISMULT(op)) {
1494+ *flagp = flags;
1495+ return(ret);
1496+ }
1497+
1498+ if (!(flags&HASWIDTH) && op != '?')
1499+ FAIL("*+ operand could be empty");
1500+ *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
1501+
1502+ if (op == '*' && (flags&SIMPLE))
1503+ reginsert(STAR, ret);
1504+ else if (op == '*') {
1505+ /* Emit x* as (x&|), where & means "self". */
1506+ reginsert(BRANCH, ret); /* Either x */
1507+ regoptail(ret, regnode(BACK)); /* and loop */
1508+ regoptail(ret, ret); /* back */
1509+ regtail(ret, regnode(BRANCH)); /* or */
1510+ regtail(ret, regnode(NOTHING)); /* null. */
1511+ } else if (op == '+' && (flags&SIMPLE))
1512+ reginsert(PLUS, ret);
1513+ else if (op == '+') {
1514+ /* Emit x+ as x(&|), where & means "self". */
1515+ next = regnode(BRANCH); /* Either */
1516+ regtail(ret, next);
1517+ regtail(regnode(BACK), ret); /* loop back */
1518+ regtail(next, regnode(BRANCH)); /* or */
1519+ regtail(ret, regnode(NOTHING)); /* null. */
1520+ } else if (op == '?') {
1521+ /* Emit x? as (x|) */
1522+ reginsert(BRANCH, ret); /* Either x */
1523+ regtail(ret, regnode(BRANCH)); /* or */
1524+ next = regnode(NOTHING); /* null. */
1525+ regtail(ret, next);
1526+ regoptail(ret, next);
1527+ }
1528+ regparse++;
1529+ if (ISMULT(*regparse))
1530+ FAIL("nested *?+");
1531+
1532+ return(ret);
1533+}
1534+
1535+/*
1536+ - regatom - the lowest level
1537+ *
1538+ * Optimization: gobbles an entire sequence of ordinary characters so that
1539+ * it can turn them into a single node, which is smaller to store and
1540+ * faster to run. Backslashed characters are exceptions, each becoming a
1541+ * separate node; the code is simpler that way and it's not worth fixing.
1542+ */
1543+static char *
1544+regatom(flagp)
1545+int *flagp;
1546+{
1547+ register char *ret;
1548+ int flags;
1549+
1550+ *flagp = WORST; /* Tentatively. */
1551+
1552+ switch (*regparse++) {
1553+ case '^':
1554+ ret = regnode(BOL);
1555+ break;
1556+ case '$':
1557+ ret = regnode(EOL);
1558+ break;
1559+ case '.':
1560+ ret = regnode(ANY);
1561+ *flagp |= HASWIDTH|SIMPLE;
1562+ break;
1563+ case '[': {
1564+ register int class;
1565+ register int classend;
1566+
1567+ if (*regparse == '^') { /* Complement of range. */
1568+ ret = regnode(ANYBUT);
1569+ regparse++;
1570+ } else
1571+ ret = regnode(ANYOF);
1572+ if (*regparse == ']' || *regparse == '-')
1573+ regc(*regparse++);
1574+ while (*regparse != '\0' && *regparse != ']') {
1575+ if (*regparse == '-') {
1576+ regparse++;
1577+ if (*regparse == ']' || *regparse == '\0')
1578+ regc('-');
1579+ else {
1580+ class = UCHARAT(regparse-2)+1;
1581+ classend = UCHARAT(regparse);
1582+ if (class > classend+1)
1583+ FAIL("invalid [] range");
1584+ for (; class <= classend; class++)
1585+ regc(class);
1586+ regparse++;
1587+ }
1588+ } else
1589+ regc(*regparse++);
1590+ }
1591+ regc('\0');
1592+ if (*regparse != ']')
1593+ FAIL("unmatched []");
1594+ regparse++;
1595+ *flagp |= HASWIDTH|SIMPLE;
1596+ }
1597+ break;
1598+ case '(':
1599+ ret = reg(1, &flags);
1600+ if (ret == NULL)
1601+ return(NULL);
1602+ *flagp |= flags&(HASWIDTH|SPSTART);
1603+ break;
1604+ case '\0':
1605+ case '|':
1606+ case ')':
1607+ FAIL("internal urp"); /* Supposed to be caught earlier. */
1608+ break;
1609+ case '?':
1610+ case '+':
1611+ case '*':
1612+ FAIL("?+* follows nothing");
1613+ break;
1614+ case '\\':
1615+ if (*regparse == '\0')
1616+ FAIL("trailing \\");
1617+ ret = regnode(EXACTLY);
1618+ regc(*regparse++);
1619+ regc('\0');
1620+ *flagp |= HASWIDTH|SIMPLE;
1621+ break;
1622+ default: {
1623+ register int len;
1624+ register char ender;
1625+
1626+ regparse--;
1627+ len = strcspn(regparse, META);
1628+ if (len <= 0)
1629+ FAIL("internal disaster");
1630+ ender = *(regparse+len);
1631+ if (len > 1 && ISMULT(ender))
1632+ len--; /* Back off clear of ?+* operand. */
1633+ *flagp |= HASWIDTH;
1634+ if (len == 1)
1635+ *flagp |= SIMPLE;
1636+ ret = regnode(EXACTLY);
1637+ while (len > 0) {
1638+ regc(*regparse++);
1639+ len--;
1640+ }
1641+ regc('\0');
1642+ }
1643+ break;
1644+ }
1645+
1646+ return(ret);
1647+}
1648+
1649+/*
1650+ - regnode - emit a node
1651+ */
1652+static char * /* Location. */
1653+regnode(op)
1654+char op;
1655+{
1656+ register char *ret;
1657+ register char *ptr;
1658+
1659+ ret = regcode;
1660+ if (ret == &regdummy) {
1661+ regsize += 3;
1662+ return(ret);
1663+ }
1664+
1665+ ptr = ret;
1666+ *ptr++ = op;
1667+ *ptr++ = '\0'; /* Null "next" pointer. */
1668+ *ptr++ = '\0';
1669+ regcode = ptr;
1670+
1671+ return(ret);
1672+}
1673+
1674+/*
1675+ - regc - emit (if appropriate) a byte of code
1676+ */
1677+static void
1678+regc(b)
1679+char b;
1680+{
1681+ if (regcode != &regdummy)
1682+ *regcode++ = b;
1683+ else
1684+ regsize++;
1685+}
1686+
1687+/*
1688+ - reginsert - insert an operator in front of already-emitted operand
1689+ *
1690+ * Means relocating the operand.
1691+ */
1692+static void
1693+reginsert(op, opnd)
1694+char op;
1695+char *opnd;
1696+{
1697+ register char *src;
1698+ register char *dst;
1699+ register char *place;
1700+
1701+ if (regcode == &regdummy) {
1702+ regsize += 3;
1703+ return;
1704+ }
1705+
1706+ src = regcode;
1707+ regcode += 3;
1708+ dst = regcode;
1709+ while (src > opnd)
1710+ *--dst = *--src;
1711+
1712+ place = opnd; /* Op node, where operand used to be. */
1713+ *place++ = op;
1714+ *place++ = '\0';
1715+ *place++ = '\0';
1716+}
1717+
1718+/*
1719+ - regtail - set the next-pointer at the end of a node chain
1720+ */
1721+static void
1722+regtail(p, val)
1723+char *p;
1724+char *val;
1725+{
1726+ register char *scan;
1727+ register char *temp;
1728+ register int offset;
1729+
1730+ if (p == &regdummy)
1731+ return;
1732+
1733+ /* Find last node. */
1734+ scan = p;
1735+ for (;;) {
1736+ temp = regnext(scan);
1737+ if (temp == NULL)
1738+ break;
1739+ scan = temp;
1740+ }
1741+
1742+ if (OP(scan) == BACK)
1743+ offset = scan - val;
1744+ else
1745+ offset = val - scan;
1746+ *(scan+1) = (offset>>8)&0377;
1747+ *(scan+2) = offset&0377;
1748+}
1749+
1750+/*
1751+ - regoptail - regtail on operand of first argument; nop if operandless
1752+ */
1753+static void
1754+regoptail(p, val)
1755+char *p;
1756+char *val;
1757+{
1758+ /* "Operandless" and "op != BRANCH" are synonymous in practice. */
1759+ if (p == NULL || p == &regdummy || OP(p) != BRANCH)
1760+ return;
1761+ regtail(OPERAND(p), val);
1762+}
1763+
1764+/*
1765+ * regexec and friends
1766+ */
1767+
1768+/*
1769+ * Global work variables for regexec().
1770+ */
1771+static char *reginput; /* String-input pointer. */
1772+static char *regbol; /* Beginning of input, for ^ check. */
1773+static char **regstartp; /* Pointer to startp array. */
1774+static char **regendp; /* Ditto for endp. */
1775+
1776+/*
1777+ * Forwards.
1778+ */
1779+STATIC int regtry(regexp *prog, char *string);
1780+STATIC int regmatch(char *prog);
1781+STATIC int regrepeat(char *p);
1782+
1783+#ifdef DEBUG
1784+int regnarrate = 0;
1785+void regdump();
1786+STATIC char *regprop(char *op);
1787+#endif
1788+
1789+/*
1790+ - regexec - match a regexp against a string
1791+ */
1792+int
1793+regexec(prog, string)
1794+register regexp *prog;
1795+register char *string;
1796+{
1797+ register char *s;
1798+
1799+ /* Be paranoid... */
1800+ if (prog == NULL || string == NULL) {
1801+ regerror("NULL parameter");
1802+ return(0);
1803+ }
1804+
1805+ /* Check validity of program. */
1806+ if (UCHARAT(prog->program) != MAGIC) {
1807+ regerror("corrupted program");
1808+ return(0);
1809+ }
1810+
1811+ /* If there is a "must appear" string, look for it. */
1812+ if (prog->regmust != NULL) {
1813+ s = string;
1814+ while ((s = strchr(s, prog->regmust[0])) != NULL) {
1815+ if (strncmp(s, prog->regmust, prog->regmlen) == 0)
1816+ break; /* Found it. */
1817+ s++;
1818+ }
1819+ if (s == NULL) /* Not present. */
1820+ return(0);
1821+ }
1822+
1823+ /* Mark beginning of line for ^ . */
1824+ regbol = string;
1825+
1826+ /* Simplest case: anchored match need be tried only once. */
1827+ if (prog->reganch)
1828+ return(regtry(prog, string));
1829+
1830+ /* Messy cases: unanchored match. */
1831+ s = string;
1832+ if (prog->regstart != '\0')
1833+ /* We know what char it must start with. */
1834+ while ((s = strchr(s, prog->regstart)) != NULL) {
1835+ if (regtry(prog, s))
1836+ return(1);
1837+ s++;
1838+ }
1839+ else
1840+ /* We don't -- general case. */
1841+ do {
1842+ if (regtry(prog, s))
1843+ return(1);
1844+ } while (*s++ != '\0');
1845+
1846+ /* Failure. */
1847+ return(0);
1848+}
1849+
1850+/*
1851+ - regtry - try match at specific point
1852+ */
1853+static int /* 0 failure, 1 success */
1854+regtry(prog, string)
1855+regexp *prog;
1856+char *string;
1857+{
1858+ register int i;
1859+ register char **sp;
1860+ register char **ep;
1861+
1862+ reginput = string;
1863+ regstartp = prog->startp;
1864+ regendp = prog->endp;
1865+
1866+ sp = prog->startp;
1867+ ep = prog->endp;
1868+ for (i = NSUBEXP; i > 0; i--) {
1869+ *sp++ = NULL;
1870+ *ep++ = NULL;
1871+ }
1872+ if (regmatch(prog->program + 1)) {
1873+ prog->startp[0] = string;
1874+ prog->endp[0] = reginput;
1875+ return(1);
1876+ } else
1877+ return(0);
1878+}
1879+
1880+/*
1881+ - regmatch - main matching routine
1882+ *
1883+ * Conceptually the strategy is simple: check to see whether the current
1884+ * node matches, call self recursively to see whether the rest matches,
1885+ * and then act accordingly. In practice we make some effort to avoid
1886+ * recursion, in particular by going through "ordinary" nodes (that don't
1887+ * need to know whether the rest of the match failed) by a loop instead of
1888+ * by recursion.
1889+ */
1890+static int /* 0 failure, 1 success */
1891+regmatch(prog)
1892+char *prog;
1893+{
1894+ register char *scan; /* Current node. */
1895+ char *next; /* Next node. */
1896+
1897+ scan = prog;
1898+#ifdef DEBUG
1899+ if (scan != NULL && regnarrate)
1900+ fprintf(stderr, "%s(\n", regprop(scan));
1901+#endif
1902+ while (scan != NULL) {
1903+#ifdef DEBUG
1904+ if (regnarrate)
1905+ fprintf(stderr, "%s...\n", regprop(scan));
1906+#endif
1907+ next = regnext(scan);
1908+
1909+ switch (OP(scan)) {
1910+ case BOL:
1911+ if (reginput != regbol)
1912+ return(0);
1913+ break;
1914+ case EOL:
1915+ if (*reginput != '\0')
1916+ return(0);
1917+ break;
1918+ case ANY:
1919+ if (*reginput == '\0')
1920+ return(0);
1921+ reginput++;
1922+ break;
1923+ case EXACTLY: {
1924+ register int len;
1925+ register char *opnd;
1926+
1927+ opnd = OPERAND(scan);
1928+ /* Inline the first character, for speed. */
1929+ if (*opnd != *reginput)
1930+ return(0);
1931+ len = strlen(opnd);
1932+ if (len > 1 && strncmp(opnd, reginput, len) != 0)
1933+ return(0);
1934+ reginput += len;
1935+ }
1936+ break;
1937+ case ANYOF:
1938+ if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL)
1939+ return(0);
1940+ reginput++;
1941+ break;
1942+ case ANYBUT:
1943+ if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL)
1944+ return(0);
1945+ reginput++;
1946+ break;
1947+ case NOTHING:
1948+ break;
1949+ case BACK:
1950+ break;
1951+ case OPEN+1:
1952+ case OPEN+2:
1953+ case OPEN+3:
1954+ case OPEN+4:
1955+ case OPEN+5:
1956+ case OPEN+6:
1957+ case OPEN+7:
1958+ case OPEN+8:
1959+ case OPEN+9: {
1960+ register int no;
1961+ register char *save;
1962+
1963+ no = OP(scan) - OPEN;
1964+ save = reginput;
1965+
1966+ if (regmatch(next)) {
1967+ /*
1968+ * Don't set startp if some later
1969+ * invocation of the same parentheses
1970+ * already has.
1971+ */
1972+ if (regstartp[no] == NULL)
1973+ regstartp[no] = save;
1974+ return(1);
1975+ } else
1976+ return(0);
1977+ }
1978+ break;
1979+ case CLOSE+1:
1980+ case CLOSE+2:
1981+ case CLOSE+3:
1982+ case CLOSE+4:
1983+ case CLOSE+5:
1984+ case CLOSE+6:
1985+ case CLOSE+7:
1986+ case CLOSE+8:
1987+ case CLOSE+9: {
1988+ register int no;
1989+ register char *save;
1990+
1991+ no = OP(scan) - CLOSE;
1992+ save = reginput;
1993+
1994+ if (regmatch(next)) {
1995+ /*
1996+ * Don't set endp if some later
1997+ * invocation of the same parentheses
1998+ * already has.
1999+ */
2000+ if (regendp[no] == NULL)
2001+ regendp[no] = save;
2002+ return(1);
2003+ } else
2004+ return(0);
2005+ }
2006+ break;
2007+ case BRANCH: {
2008+ register char *save;
2009+
2010+ if (OP(next) != BRANCH) /* No choice. */
2011+ next = OPERAND(scan); /* Avoid recursion. */
2012+ else {
2013+ do {
2014+ save = reginput;
2015+ if (regmatch(OPERAND(scan)))
2016+ return(1);
2017+ reginput = save;
2018+ scan = regnext(scan);
2019+ } while (scan != NULL && OP(scan) == BRANCH);
2020+ return(0);
2021+ /* NOTREACHED */
2022+ }
2023+ }
2024+ break;
2025+ case STAR:
2026+ case PLUS: {
2027+ register char nextch;
2028+ register int no;
2029+ register char *save;
2030+ register int min;
2031+
2032+ /*
2033+ * Lookahead to avoid useless match attempts
2034+ * when we know what character comes next.
2035+ */
2036+ nextch = '\0';
2037+ if (OP(next) == EXACTLY)
2038+ nextch = *OPERAND(next);
2039+ min = (OP(scan) == STAR) ? 0 : 1;
2040+ save = reginput;
2041+ no = regrepeat(OPERAND(scan));
2042+ while (no >= min) {
2043+ /* If it could work, try it. */
2044+ if (nextch == '\0' || *reginput == nextch)
2045+ if (regmatch(next))
2046+ return(1);
2047+ /* Couldn't or didn't -- back up. */
2048+ no--;
2049+ reginput = save + no;
2050+ }
2051+ return(0);
2052+ }
2053+ break;
2054+ case END:
2055+ return(1); /* Success! */
2056+ break;
2057+ default:
2058+ regerror("memory corruption");
2059+ return(0);
2060+ break;
2061+ }
2062+
2063+ scan = next;
2064+ }
2065+
2066+ /*
2067+ * We get here only if there's trouble -- normally "case END" is
2068+ * the terminating point.
2069+ */
2070+ regerror("corrupted pointers");
2071+ return(0);
2072+}
2073+
2074+/*
2075+ - regrepeat - repeatedly match something simple, report how many
2076+ */
2077+static int
2078+regrepeat(p)
2079+char *p;
2080+{
2081+ register int count = 0;
2082+ register char *scan;
2083+ register char *opnd;
2084+
2085+ scan = reginput;
2086+ opnd = OPERAND(p);
2087+ switch (OP(p)) {
2088+ case ANY:
2089+ count = strlen(scan);
2090+ scan += count;
2091+ break;
2092+ case EXACTLY:
2093+ while (*opnd == *scan) {
2094+ count++;
2095+ scan++;
2096+ }
2097+ break;
2098+ case ANYOF:
2099+ while (*scan != '\0' && strchr(opnd, *scan) != NULL) {
2100+ count++;
2101+ scan++;
2102+ }
2103+ break;
2104+ case ANYBUT:
2105+ while (*scan != '\0' && strchr(opnd, *scan) == NULL) {
2106+ count++;
2107+ scan++;
2108+ }
2109+ break;
2110+ default: /* Oh dear. Called inappropriately. */
2111+ regerror("internal foulup");
2112+ count = 0; /* Best compromise. */
2113+ break;
2114+ }
2115+ reginput = scan;
2116+
2117+ return(count);
2118+}
2119+
2120+/*
2121+ - regnext - dig the "next" pointer out of a node
2122+ */
2123+static char *
2124+regnext(p)
2125+register char *p;
2126+{
2127+ register int offset;
2128+
2129+ if (p == &regdummy)
2130+ return(NULL);
2131+
2132+ offset = NEXT(p);
2133+ if (offset == 0)
2134+ return(NULL);
2135+
2136+ if (OP(p) == BACK)
2137+ return(p-offset);
2138+ else
2139+ return(p+offset);
2140+}
2141+
2142+#ifdef DEBUG
2143+
2144+STATIC char *regprop();
2145+
2146+/*
2147+ - regdump - dump a regexp onto stdout in vaguely comprehensible form
2148+ */
2149+void
2150+regdump(r)
2151+regexp *r;
2152+{
2153+ register char *s;
2154+ register char op = EXACTLY; /* Arbitrary non-END op. */
2155+ register char *next;
2156+ extern char *strchr();
2157+
2158+
2159+ s = r->program + 1;
2160+ while (op != END) { /* While that wasn't END last time... */
2161+ op = OP(s);
2162+ printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */
2163+ next = regnext(s);
2164+ if (next == NULL) /* Next ptr. */
2165+ printf("(0)");
2166+ else
2167+ printf("(%d)", (s-r->program)+(next-s));
2168+ s += 3;
2169+ if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
2170+ /* Literal string, where present. */
2171+ while (*s != '\0') {
2172+ putchar(*s);
2173+ s++;
2174+ }
2175+ s++;
2176+ }
2177+ putchar('\n');
2178+ }
2179+
2180+ /* Header fields of interest. */
2181+ if (r->regstart != '\0')
2182+ printf("start `%c' ", r->regstart);
2183+ if (r->reganch)
2184+ printf("anchored ");
2185+ if (r->regmust != NULL)
2186+ printf("must have \"%s\"", r->regmust);
2187+ printf("\n");
2188+}
2189+
2190+/*
2191+ - regprop - printable representation of opcode
2192+ */
2193+static char *
2194+regprop(op)
2195+char *op;
2196+{
2197+ register char *p;
2198+ static char buf[50];
2199+
2200+ (void) strcpy(buf, ":");
2201+
2202+ switch (OP(op)) {
2203+ case BOL:
2204+ p = "BOL";
2205+ break;
2206+ case EOL:
2207+ p = "EOL";
2208+ break;
2209+ case ANY:
2210+ p = "ANY";
2211+ break;
2212+ case ANYOF:
2213+ p = "ANYOF";
2214+ break;
2215+ case ANYBUT:
2216+ p = "ANYBUT";
2217+ break;
2218+ case BRANCH:
2219+ p = "BRANCH";
2220+ break;
2221+ case EXACTLY:
2222+ p = "EXACTLY";
2223+ break;
2224+ case NOTHING:
2225+ p = "NOTHING";
2226+ break;
2227+ case BACK:
2228+ p = "BACK";
2229+ break;
2230+ case END:
2231+ p = "END";
2232+ break;
2233+ case OPEN+1:
2234+ case OPEN+2:
2235+ case OPEN+3:
2236+ case OPEN+4:
2237+ case OPEN+5:
2238+ case OPEN+6:
2239+ case OPEN+7:
2240+ case OPEN+8:
2241+ case OPEN+9:
2242+ sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN);
2243+ p = NULL;
2244+ break;
2245+ case CLOSE+1:
2246+ case CLOSE+2:
2247+ case CLOSE+3:
2248+ case CLOSE+4:
2249+ case CLOSE+5:
2250+ case CLOSE+6:
2251+ case CLOSE+7:
2252+ case CLOSE+8:
2253+ case CLOSE+9:
2254+ sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE);
2255+ p = NULL;
2256+ break;
2257+ case STAR:
2258+ p = "STAR";
2259+ break;
2260+ case PLUS:
2261+ p = "PLUS";
2262+ break;
2263+ default:
2264+ regerror("corrupted opcode");
2265+ break;
2266+ }
2267+ if (p != NULL)
2268+ (void) strcat(buf, p);
2269+ return(buf);
2270+}
2271+#endif
2272+
2273+/*
2274+ * The following is provided for those people who do not have strcspn() in
2275+ * their C libraries. They should get off their butts and do something
2276+ * about it; at least one public-domain implementation of those (highly
2277+ * useful) string routines has been published on Usenet.
2278+ */
2279+/*
2280+ * strcspn - find length of initial segment of s1 consisting entirely
2281+ * of characters not from s2
2282+ */
2283+
2284+static int
2285+strcspn(s1, s2)
2286+char *s1;
2287+char *s2;
2288+{
2289+ register char *scan1;
2290+ register char *scan2;
2291+ register int count;
2292+
2293+ count = 0;
2294+ for (scan1 = s1; *scan1 != '\0'; scan1++) {
2295+ for (scan2 = s2; *scan2 != '\0';) /* ++ moved down. */
2296+ if (*scan1 == *scan2++)
2297+ return(count);
2298+ count++;
2299+ }
2300+ return(count);
2301+}
2302diff -Naur linux-2.4.22-rc2-stock/net/sched/regexp/regexp.h linux-2.4.22-rc2/net/sched/regexp/regexp.h
2303--- linux-2.4.22-rc2-stock/net/sched/regexp/regexp.h 1969-12-31 19:00:00.000000000 -0500
2304+++ linux-2.4.22-rc2/net/sched/regexp/regexp.h 2003-08-15 21:56:33.000000000 -0500
2305@@ -0,0 +1,20 @@
2306+/*
2307+ * Definitions etc. for regexp(3) routines.
2308+ *
2309+ * Caveat: this is V8 regexp(3) [actually, a reimplementation thereof],
2310+ * not the System V one.
2311+ */
2312+#define NSUBEXP 10
2313+typedef struct regexp {
2314+ char *startp[NSUBEXP];
2315+ char *endp[NSUBEXP];
2316+ char regstart; /* Internal use only. */
2317+ char reganch; /* Internal use only. */
2318+ char *regmust; /* Internal use only. */
2319+ int regmlen; /* Internal use only. */
2320+ char program[1]; /* Unwarranted chumminess with compiler. */
2321+} regexp;
2322+
2323+extern regexp *regcomp(char *exp, int *patternlength);
2324+int regexec(regexp *prog, char *string);
2325+void regsub(regexp *prog, char *source, char *dest);
2326diff -Naur linux-2.4.22-rc2-stock/net/sched/regexp/regmagic.h linux-2.4.22-rc2/net/sched/regexp/regmagic.h
2327--- linux-2.4.22-rc2-stock/net/sched/regexp/regmagic.h 1969-12-31 19:00:00.000000000 -0500
2328+++ linux-2.4.22-rc2/net/sched/regexp/regmagic.h 2003-08-15 16:30:21.000000000 -0500
2329@@ -0,0 +1,5 @@
2330+/*
2331+ * The first byte of the regexp internal "program" is actually this magic
2332+ * number; the start node begins in the second byte.
2333+ */
2334+#define MAGIC 0234
2335diff -Naur linux-2.4.22-rc2-stock/net/sched/regexp/regsub.c linux-2.4.22-rc2/net/sched/regexp/regsub.c
2336--- linux-2.4.22-rc2-stock/net/sched/regexp/regsub.c 1969-12-31 19:00:00.000000000 -0500
2337+++ linux-2.4.22-rc2/net/sched/regexp/regsub.c 2003-08-18 12:33:24.000000000 -0500
2338@@ -0,0 +1,88 @@
2339+/*
2340+ * regsub
2341+ * @(#)regsub.c 1.3 of 2 April 86
2342+ *
2343+ * Copyright (c) 1986 by University of Toronto.
2344+ * Written by Henry Spencer. Not derived from licensed software.
2345+ *
2346+ * Permission is granted to anyone to use this software for any
2347+ * purpose on any computer system, and to redistribute it freely,
2348+ * subject to the following restrictions:
2349+ *
2350+ * 1. The author is not responsible for the consequences of use of
2351+ * this software, no matter how awful, even if they arise
2352+ * from defects in it.
2353+ *
2354+ * 2. The origin of this software must not be misrepresented, either
2355+ * by explicit claim or by omission.
2356+ *
2357+ * 3. Altered versions must be plainly marked as such, and must not
2358+ * be misrepresented as being the original software.
2359+ *
2360+ *
2361+ * This code was modified by Ethan Sommer to work within the kernel
2362+ * (it now uses kmalloc etc..)
2363+ *
2364+ */
2365+#include "regexp.h"
2366+#include "regmagic.h"
2367+#include <linux/string.h>
2368+
2369+#ifndef CHARBITS
2370+#define UCHARAT(p) ((int)*(unsigned char *)(p))
2371+#else
2372+#define UCHARAT(p) ((int)*(p)&CHARBITS)
2373+#endif
2374+
2375+extern void regerror(char * s);
2376+
2377+/*
2378+ - regsub - perform substitutions after a regexp match
2379+ */
2380+void
2381+regsub(regexp * prog, char * source, char * dest)
2382+{
2383+ register char *src;
2384+ register char *dst;
2385+ register char c;
2386+ register int no;
2387+ register int len;
2388+
2389+ /* Not necessary and gcc doesn't like it -MLS */
2390+ /*extern char *strncpy();*/
2391+
2392+ if (prog == NULL || source == NULL || dest == NULL) {
2393+ regerror("NULL parm to regsub");
2394+ return;
2395+ }
2396+ if (UCHARAT(prog->program) != MAGIC) {
2397+ regerror("damaged regexp fed to regsub");
2398+ return;
2399+ }
2400+
2401+ src = source;
2402+ dst = dest;
2403+ while ((c = *src++) != '\0') {
2404+ if (c == '&')
2405+ no = 0;
2406+ else if (c == '\\' && '0' <= *src && *src <= '9')
2407+ no = *src++ - '0';
2408+ else
2409+ no = -1;
2410+
2411+ if (no < 0) { /* Ordinary character. */
2412+ if (c == '\\' && (*src == '\\' || *src == '&'))
2413+ c = *src++;
2414+ *dst++ = c;
2415+ } else if (prog->startp[no] != NULL && prog->endp[no] != NULL) {
2416+ len = prog->endp[no] - prog->startp[no];
2417+ (void) strncpy(dst, prog->startp[no], len);
2418+ dst += len;
2419+ if (len != 0 && *(dst-1) == '\0') { /* strncpy hit NUL. */
2420+ regerror("damaged match string");
2421+ return;
2422+ }
2423+ }
2424+ }
2425+ *dst++ = '\0';
2426+}
This page took 0.434121 seconds and 4 git commands to generate.