]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-layer7.patch
i686 updates
[packages/kernel.git] / kernel-layer7.patch
CommitLineData
e3fad6a6
AM
1diff -urNp -x '*.orig' linux-5.11/include/linux/netfilter/xt_layer7.h linux-5.11/include/linux/netfilter/xt_layer7.h
2--- linux-5.11/include/linux/netfilter/xt_layer7.h 1970-01-01 01:00:00.000000000 +0100
3+++ linux-5.11/include/linux/netfilter/xt_layer7.h 2021-02-24 13:21:48.338440845 +0100
4@@ -0,0 +1,13 @@
5+#ifndef _XT_LAYER7_H
6+#define _XT_LAYER7_H
7+
8+#define MAX_PATTERN_LEN 8192
9+#define MAX_PROTOCOL_LEN 256
10+
11+struct xt_layer7_info {
12+ char protocol[MAX_PROTOCOL_LEN];
13+ char pattern[MAX_PATTERN_LEN];
14+ u_int8_t invert;
15+};
16+
17+#endif /* _XT_LAYER7_H */
18diff -urNp -x '*.orig' linux-5.11/include/net/netfilter/nf_conntrack.h linux-5.11/include/net/netfilter/nf_conntrack.h
19--- linux-5.11/include/net/netfilter/nf_conntrack.h 2021-02-14 23:32:24.000000000 +0100
20+++ linux-5.11/include/net/netfilter/nf_conntrack.h 2021-02-24 13:21:48.338440845 +0100
21@@ -103,6 +103,22 @@ struct nf_conn {
22 /* Extensions */
23 struct nf_ct_ext *ext;
24
25+#if defined(CONFIG_NETFILTER_XT_MATCH_LAYER7) || \
26+ defined(CONFIG_NETFILTER_XT_MATCH_LAYER7_MODULE)
27+ struct {
28+ /*
29+ * e.g. "http". NULL before decision. "unknown" after decision
30+ * if no match.
31+ */
32+ char *app_proto;
33+ /*
34+ * application layer data so far. NULL after match decision.
35+ */
36+ char *app_data;
37+ unsigned int app_data_len;
38+ } layer7;
39+#endif
40+
41 /* Storage reserved for other modules, must be the last member */
42 union nf_conntrack_proto proto;
43 };
44diff -urNp -x '*.orig' linux-5.11/net/netfilter/Kconfig linux-5.11/net/netfilter/Kconfig
45--- linux-5.11/net/netfilter/Kconfig 2021-02-14 23:32:24.000000000 +0100
46+++ linux-5.11/net/netfilter/Kconfig 2021-02-24 13:21:48.335107407 +0100
47@@ -1562,6 +1562,27 @@ config NETFILTER_XT_MATCH_STATE
2380c486
JR
48
49 To compile it as a module, choose M here. If unsure, say N.
50
51+config NETFILTER_XT_MATCH_LAYER7
52+ tristate '"layer7" match support'
53+ depends on NETFILTER_XTABLES
54+ depends on EXPERIMENTAL && (IP_NF_CONNTRACK || NF_CONNTRACK)
2380c486
JR
55+ help
56+ Say Y if you want to be able to classify connections (and their
57+ packets) based on regular expression matching of their application
58+ layer data. This is one way to classify applications such as
59+ peer-to-peer filesharing systems that do not always use the same
60+ port.
61+
62+ To compile it as a module, choose M here. If unsure, say N.
63+
425dfcfc 64+
2380c486
JR
65+config NETFILTER_XT_MATCH_LAYER7_DEBUG
66+ bool 'Layer 7 debugging output'
67+ depends on NETFILTER_XT_MATCH_LAYER7
68+ help
69+ Say Y to get lots of debugging output.
70+
71+
72 config NETFILTER_XT_MATCH_STATISTIC
73 tristate '"statistic" match support'
74 depends on NETFILTER_ADVANCED
e3fad6a6
AM
75diff -urNp -x '*.orig' linux-5.11/net/netfilter/Makefile linux-5.11/net/netfilter/Makefile
76--- linux-5.11/net/netfilter/Makefile 2021-02-14 23:32:24.000000000 +0100
77+++ linux-5.11/net/netfilter/Makefile 2021-02-24 13:21:48.335107407 +0100
78@@ -204,6 +204,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT)
2380c486
JR
79 obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o
80 obj-$(CONFIG_NETFILTER_XT_MATCH_SOCKET) += xt_socket.o
81 obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o
82+obj-$(CONFIG_NETFILTER_XT_MATCH_LAYER7) += xt_layer7.o
83 obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o
84 obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o
85 obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o
e3fad6a6
AM
86diff -urNp -x '*.orig' linux-5.11/net/netfilter/nf_conntrack_core.c linux-5.11/net/netfilter/nf_conntrack_core.c
87--- linux-5.11/net/netfilter/nf_conntrack_core.c 2021-02-14 23:32:24.000000000 +0100
88+++ linux-5.11/net/netfilter/nf_conntrack_core.c 2021-02-24 13:21:48.335107407 +0100
89@@ -528,6 +528,14 @@ static void nf_ct_del_from_dying_or_unco
90 {
91 struct ct_pcpu *pcpu;
92
93+ #if defined(CONFIG_NETFILTER_XT_MATCH_LAYER7) || defined(CONFIG_NETFILTER_XT_MATCH_LAYER7_MODULE)
94+ if(ct->layer7.app_proto)
95+ kfree(ct->layer7.app_proto);
96+ if(ct->layer7.app_data)
97+ kfree(ct->layer7.app_data);
98+ #endif
2380c486 99+
2380c486 100+
e3fad6a6
AM
101 /* We overload first tuple to link into unconfirmed or dying list.*/
102 pcpu = per_cpu_ptr(nf_ct_net(ct)->ct.pcpu_lists, ct->cpu);
103
104diff -urNp -x '*.orig' linux-5.11/net/netfilter/nf_conntrack_standalone.c linux-5.11/net/netfilter/nf_conntrack_standalone.c
105--- linux-5.11/net/netfilter/nf_conntrack_standalone.c 2021-02-14 23:32:24.000000000 +0100
106+++ linux-5.11/net/netfilter/nf_conntrack_standalone.c 2021-02-24 13:21:48.338440845 +0100
107@@ -366,6 +366,12 @@ static int ct_seq_show(struct seq_file *
108 ct_show_zone(s, ct, NF_CT_DEFAULT_ZONE_DIR);
109 ct_show_delta_time(s, ct);
110
111+#if defined(CONFIG_NETFILTER_XT_MATCH_LAYER7) || defined(CONFIG_NETFILTER_XT_MATCH_LAYER7_MODULE)
112+ if(ct->layer7.app_proto &&
113+ seq_printf(s, "l7proto=%s ", ct->layer7.app_proto))
114+ return -ENOSPC;
2380c486 115+#endif
2380c486 116+
e3fad6a6
AM
117 seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use));
118
119 if (seq_has_overflowed(s))
120diff -urNp -x '*.orig' linux-5.11/net/netfilter/regexp/regexp.c linux-5.11/net/netfilter/regexp/regexp.c
121--- linux-5.11/net/netfilter/regexp/regexp.c 1970-01-01 01:00:00.000000000 +0100
122+++ linux-5.11/net/netfilter/regexp/regexp.c 2021-02-24 13:21:48.335107407 +0100
123@@ -0,0 +1,1197 @@
124+/*
125+ * regcomp and regexec -- regsub and regerror are elsewhere
126+ * @(#)regexp.c 1.3 of 18 April 87
127+ *
128+ * Copyright (c) 1986 by University of Toronto.
129+ * Written by Henry Spencer. Not derived from licensed software.
130+ *
131+ * Permission is granted to anyone to use this software for any
132+ * purpose on any computer system, and to redistribute it freely,
133+ * subject to the following restrictions:
134+ *
135+ * 1. The author is not responsible for the consequences of use of
136+ * this software, no matter how awful, even if they arise
137+ * from defects in it.
138+ *
139+ * 2. The origin of this software must not be misrepresented, either
140+ * by explicit claim or by omission.
141+ *
142+ * 3. Altered versions must be plainly marked as such, and must not
143+ * be misrepresented as being the original software.
144+ *
145+ * Beware that some of this code is subtly aware of the way operator
146+ * precedence is structured in regular expressions. Serious changes in
147+ * regular-expression syntax might require a total rethink.
148+ *
149+ * This code was modified by Ethan Sommer to work within the kernel
150+ * (it now uses kmalloc etc..)
151+ *
152+ * Modified slightly by Matthew Strait to use more modern C.
153+ */
2380c486 154+
e3fad6a6
AM
155+#include "regexp.h"
156+#include "regmagic.h"
2380c486 157+
e3fad6a6
AM
158+/* added by ethan and matt. Lets it work in both kernel and user space.
159+(So iptables can use it, for instance.) Yea, it goes both ways... */
160+#if __KERNEL__
161+ #define malloc(foo) kmalloc(foo,GFP_ATOMIC)
2380c486 162+#else
e3fad6a6 163+ #define printk(format,args...) printf(format,##args)
2380c486
JR
164+#endif
165+
e3fad6a6 166+void regerror(char * s)
2380c486 167+{
e3fad6a6
AM
168+ printk("<3>Regexp: %s\n", s);
169+ /* NOTREACHED */
2380c486
JR
170+}
171+
e3fad6a6
AM
172+/*
173+ * The "internal use only" fields in regexp.h are present to pass info from
174+ * compile to execute that permits the execute phase to run lots faster on
175+ * simple cases. They are:
176+ *
177+ * regstart char that must begin a match; '\0' if none obvious
178+ * reganch is the match anchored (at beginning-of-line only)?
179+ * regmust string (pointer into program) that match must include, or NULL
180+ * regmlen length of regmust string
181+ *
182+ * Regstart and reganch permit very fast decisions on suitable starting points
183+ * for a match, cutting down the work a lot. Regmust permits fast rejection
184+ * of lines that cannot possibly match. The regmust tests are costly enough
185+ * that regcomp() supplies a regmust only if the r.e. contains something
186+ * potentially expensive (at present, the only such thing detected is * or +
187+ * at the start of the r.e., which can involve a lot of backup). Regmlen is
188+ * supplied because the test in regexec() needs it and regcomp() is computing
189+ * it anyway.
190+ */
2380c486 191+
e3fad6a6
AM
192+/*
193+ * Structure for regexp "program". This is essentially a linear encoding
194+ * of a nondeterministic finite-state machine (aka syntax charts or
195+ * "railroad normal form" in parsing technology). Each node is an opcode
196+ * plus a "next" pointer, possibly plus an operand. "Next" pointers of
197+ * all nodes except BRANCH implement concatenation; a "next" pointer with
198+ * a BRANCH on both ends of it is connecting two alternatives. (Here we
199+ * have one of the subtle syntax dependencies: an individual BRANCH (as
200+ * opposed to a collection of them) is never concatenated with anything
201+ * because of operator precedence.) The operand of some types of node is
202+ * a literal string; for others, it is a node leading into a sub-FSM. In
203+ * particular, the operand of a BRANCH node is the first node of the branch.
204+ * (NB this is *not* a tree structure: the tail of the branch connects
205+ * to the thing following the set of BRANCHes.) The opcodes are:
206+ */
2380c486 207+
e3fad6a6
AM
208+/* definition number opnd? meaning */
209+#define END 0 /* no End of program. */
210+#define BOL 1 /* no Match "" at beginning of line. */
211+#define EOL 2 /* no Match "" at end of line. */
212+#define ANY 3 /* no Match any one character. */
213+#define ANYOF 4 /* str Match any character in this string. */
214+#define ANYBUT 5 /* str Match any character not in this string. */
215+#define BRANCH 6 /* node Match this alternative, or the next... */
216+#define BACK 7 /* no Match "", "next" ptr points backward. */
217+#define EXACTLY 8 /* str Match this string. */
218+#define NOTHING 9 /* no Match empty string. */
219+#define STAR 10 /* node Match this (simple) thing 0 or more times. */
220+#define PLUS 11 /* node Match this (simple) thing 1 or more times. */
221+#define OPEN 20 /* no Mark this point in input as start of #n. */
222+ /* OPEN+1 is number 1, etc. */
223+#define CLOSE 30 /* no Analogous to OPEN. */
2380c486 224+
e3fad6a6
AM
225+/*
226+ * Opcode notes:
227+ *
228+ * BRANCH The set of branches constituting a single choice are hooked
229+ * together with their "next" pointers, since precedence prevents
230+ * anything being concatenated to any individual branch. The
231+ * "next" pointer of the last BRANCH in a choice points to the
232+ * thing following the whole choice. This is also where the
233+ * final "next" pointer of each individual branch points; each
234+ * branch starts with the operand node of a BRANCH node.
235+ *
236+ * BACK Normal "next" pointers all implicitly point forward; BACK
237+ * exists to make loop structures possible.
238+ *
239+ * STAR,PLUS '?', and complex '*' and '+', are implemented as circular
240+ * BRANCH structures using BACK. Simple cases (one character
241+ * per match) are implemented with STAR and PLUS for speed
242+ * and to minimize recursive plunges.
243+ *
244+ * OPEN,CLOSE ...are numbered at compile time.
245+ */
2380c486 246+
e3fad6a6
AM
247+/*
248+ * A node is one char of opcode followed by two chars of "next" pointer.
249+ * "Next" pointers are stored as two 8-bit pieces, high order first. The
250+ * value is a positive offset from the opcode of the node containing it.
251+ * An operand, if any, simply follows the node. (Note that much of the
252+ * code generation knows about this implicit relationship.)
253+ *
254+ * Using two bytes for the "next" pointer is vast overkill for most things,
255+ * but allows patterns to get big without disasters.
256+ */
257+#define OP(p) (*(p))
258+#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
259+#define OPERAND(p) ((p) + 3)
2380c486 260+
e3fad6a6
AM
261+/*
262+ * See regmagic.h for one further detail of program structure.
263+ */
2380c486 264+
2380c486 265+
e3fad6a6
AM
266+/*
267+ * Utility definitions.
268+ */
269+#ifndef CHARBITS
270+#define UCHARAT(p) ((int)*(unsigned char *)(p))
271+#else
272+#define UCHARAT(p) ((int)*(p)&CHARBITS)
273+#endif
2380c486 274+
e3fad6a6
AM
275+#define FAIL(m) { regerror(m); return(NULL); }
276+#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?')
277+#define META "^$.[()|?+*\\"
2380c486 278+
e3fad6a6
AM
279+/*
280+ * Flags to be passed up and down.
281+ */
282+#define HASWIDTH 01 /* Known never to match null string. */
283+#define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */
284+#define SPSTART 04 /* Starts with * or +. */
285+#define WORST 0 /* Worst case. */
2380c486 286+
e3fad6a6
AM
287+/*
288+ * Global work variables for regcomp().
289+ */
290+struct match_globals {
291+char *reginput; /* String-input pointer. */
292+char *regbol; /* Beginning of input, for ^ check. */
293+char **regstartp; /* Pointer to startp array. */
294+char **regendp; /* Ditto for endp. */
295+char *regparse; /* Input-scan pointer. */
296+int regnpar; /* () count. */
297+char regdummy;
298+char *regcode; /* Code-emit pointer; &regdummy = don't. */
299+long regsize; /* Code size. */
300+};
2380c486 301+
e3fad6a6
AM
302+/*
303+ * Forward declarations for regcomp()'s friends.
304+ */
305+#ifndef STATIC
306+#define STATIC static
307+#endif
308+STATIC char *reg(struct match_globals *g, int paren,int *flagp);
309+STATIC char *regbranch(struct match_globals *g, int *flagp);
310+STATIC char *regpiece(struct match_globals *g, int *flagp);
311+STATIC char *regatom(struct match_globals *g, int *flagp);
312+STATIC char *regnode(struct match_globals *g, char op);
313+STATIC char *regnext(struct match_globals *g, char *p);
314+STATIC void regc(struct match_globals *g, char b);
315+STATIC void reginsert(struct match_globals *g, char op, char *opnd);
316+STATIC void regtail(struct match_globals *g, char *p, char *val);
317+STATIC void regoptail(struct match_globals *g, char *p, char *val);
2380c486 318+
2380c486 319+
e3fad6a6 320+__kernel_size_t my_strcspn(const char *s1,const char *s2)
2380c486 321+{
e3fad6a6
AM
322+ char *scan1;
323+ char *scan2;
324+ int count;
325+
326+ count = 0;
327+ for (scan1 = (char *)s1; *scan1 != '\0'; scan1++) {
328+ for (scan2 = (char *)s2; *scan2 != '\0';) /* ++ moved down. */
329+ if (*scan1 == *scan2++)
330+ return(count);
331+ count++;
332+ }
333+ return(count);
2380c486
JR
334+}
335+
e3fad6a6
AM
336+/*
337+ - regcomp - compile a regular expression into internal code
338+ *
339+ * We can't allocate space until we know how big the compiled form will be,
340+ * but we can't compile it (and thus know how big it is) until we've got a
341+ * place to put the code. So we cheat: we compile it twice, once with code
342+ * generation turned off and size counting turned on, and once "for real".
343+ * This also means that we don't allocate space until we are sure that the
344+ * thing really will compile successfully, and we never have to move the
345+ * code and thus invalidate pointers into it. (Note that it has to be in
346+ * one piece because free() must be able to free it all.)
347+ *
348+ * Beware that the optimization-preparation code in here knows about some
349+ * of the structure of the compiled regexp.
350+ */
351+regexp *
352+regcomp(char *exp,int *patternsize)
2380c486 353+{
e3fad6a6
AM
354+ register regexp *r;
355+ register char *scan;
356+ register char *longest;
357+ register int len;
358+ int flags;
359+ struct match_globals g;
360+
361+ /* commented out by ethan
362+ extern char *malloc();
363+ */
2380c486 364+
e3fad6a6
AM
365+ if (exp == NULL)
366+ FAIL("NULL argument");
2380c486 367+
e3fad6a6
AM
368+ /* First pass: determine size, legality. */
369+ g.regparse = exp;
370+ g.regnpar = 1;
371+ g.regsize = 0L;
372+ g.regcode = &g.regdummy;
373+ regc(&g, MAGIC);
374+ if (reg(&g, 0, &flags) == NULL)
375+ return(NULL);
2380c486 376+
e3fad6a6
AM
377+ /* Small enough for pointer-storage convention? */
378+ if (g.regsize >= 32767L) /* Probably could be 65535L. */
379+ FAIL("regexp too big");
2380c486 380+
e3fad6a6
AM
381+ /* Allocate space. */
382+ *patternsize=sizeof(regexp) + (unsigned)g.regsize;
383+ r = (regexp *)malloc(sizeof(regexp) + (unsigned)g.regsize);
384+ if (r == NULL)
385+ FAIL("out of space");
2380c486 386+
e3fad6a6
AM
387+ /* Second pass: emit code. */
388+ g.regparse = exp;
389+ g.regnpar = 1;
390+ g.regcode = r->program;
391+ regc(&g, MAGIC);
392+ if (reg(&g, 0, &flags) == NULL)
393+ return(NULL);
2380c486 394+
e3fad6a6
AM
395+ /* Dig out information for optimizations. */
396+ r->regstart = '\0'; /* Worst-case defaults. */
397+ r->reganch = 0;
398+ r->regmust = NULL;
399+ r->regmlen = 0;
400+ scan = r->program+1; /* First BRANCH. */
401+ if (OP(regnext(&g, scan)) == END) { /* Only one top-level choice. */
402+ scan = OPERAND(scan);
2380c486 403+
e3fad6a6
AM
404+ /* Starting-point info. */
405+ if (OP(scan) == EXACTLY)
406+ r->regstart = *OPERAND(scan);
407+ else if (OP(scan) == BOL)
408+ r->reganch++;
409+
410+ /*
411+ * If there's something expensive in the r.e., find the
412+ * longest literal string that must appear and make it the
413+ * regmust. Resolve ties in favor of later strings, since
414+ * the regstart check works with the beginning of the r.e.
415+ * and avoiding duplication strengthens checking. Not a
416+ * strong reason, but sufficient in the absence of others.
417+ */
418+ if (flags&SPSTART) {
419+ longest = NULL;
420+ len = 0;
421+ for (; scan != NULL; scan = regnext(&g, scan))
422+ if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) {
423+ longest = OPERAND(scan);
424+ len = strlen(OPERAND(scan));
425+ }
426+ r->regmust = longest;
427+ r->regmlen = len;
2380c486 428+ }
2380c486 429+ }
e3fad6a6
AM
430+
431+ return(r);
2380c486
JR
432+}
433+
e3fad6a6
AM
434+/*
435+ - reg - regular expression, i.e. main body or parenthesized thing
436+ *
437+ * Caller must absorb opening parenthesis.
438+ *
439+ * Combining parenthesis handling with the base level of regular expression
440+ * is a trifle forced, but the need to tie the tails of the branches to what
441+ * follows makes it hard to avoid.
442+ */
443+static char *
444+reg(struct match_globals *g, int paren, int *flagp /* Parenthesized? */ )
2380c486 445+{
e3fad6a6
AM
446+ register char *ret;
447+ register char *br;
448+ register char *ender;
449+ register int parno = 0; /* 0 makes gcc happy */
450+ int flags;
2380c486 451+
e3fad6a6 452+ *flagp = HASWIDTH; /* Tentatively. */
2380c486 453+
e3fad6a6
AM
454+ /* Make an OPEN node, if parenthesized. */
455+ if (paren) {
456+ if (g->regnpar >= NSUBEXP)
457+ FAIL("too many ()");
458+ parno = g->regnpar;
459+ g->regnpar++;
460+ ret = regnode(g, OPEN+parno);
461+ } else
462+ ret = NULL;
2380c486 463+
e3fad6a6
AM
464+ /* Pick up the branches, linking them together. */
465+ br = regbranch(g, &flags);
466+ if (br == NULL)
467+ return(NULL);
468+ if (ret != NULL)
469+ regtail(g, ret, br); /* OPEN -> first. */
470+ else
471+ ret = br;
472+ if (!(flags&HASWIDTH))
473+ *flagp &= ~HASWIDTH;
474+ *flagp |= flags&SPSTART;
475+ while (*g->regparse == '|') {
476+ g->regparse++;
477+ br = regbranch(g, &flags);
478+ if (br == NULL)
479+ return(NULL);
480+ regtail(g, ret, br); /* BRANCH -> BRANCH. */
481+ if (!(flags&HASWIDTH))
482+ *flagp &= ~HASWIDTH;
483+ *flagp |= flags&SPSTART;
484+ }
2380c486 485+
e3fad6a6
AM
486+ /* Make a closing node, and hook it on the end. */
487+ ender = regnode(g, (paren) ? CLOSE+parno : END);
488+ regtail(g, ret, ender);
2380c486 489+
e3fad6a6
AM
490+ /* Hook the tails of the branches to the closing node. */
491+ for (br = ret; br != NULL; br = regnext(g, br))
492+ regoptail(g, br, ender);
2380c486 493+
e3fad6a6
AM
494+ /* Check for proper termination. */
495+ if (paren && *g->regparse++ != ')') {
496+ FAIL("unmatched ()");
497+ } else if (!paren && *g->regparse != '\0') {
498+ if (*g->regparse == ')') {
499+ FAIL("unmatched ()");
500+ } else
501+ FAIL("junk on end"); /* "Can't happen". */
502+ /* NOTREACHED */
2380c486 503+ }
e3fad6a6
AM
504+
505+ return(ret);
2380c486
JR
506+}
507+
e3fad6a6
AM
508+/*
509+ - regbranch - one alternative of an | operator
510+ *
511+ * Implements the concatenation operator.
512+ */
513+static char *
514+regbranch(struct match_globals *g, int *flagp)
2380c486 515+{
e3fad6a6
AM
516+ register char *ret;
517+ register char *chain;
518+ register char *latest;
519+ int flags;
2380c486 520+
e3fad6a6 521+ *flagp = WORST; /* Tentatively. */
2380c486 522+
e3fad6a6
AM
523+ ret = regnode(g, BRANCH);
524+ chain = NULL;
525+ while (*g->regparse != '\0' && *g->regparse != '|' && *g->regparse != ')') {
526+ latest = regpiece(g, &flags);
527+ if (latest == NULL)
528+ return(NULL);
529+ *flagp |= flags&HASWIDTH;
530+ if (chain == NULL) /* First piece. */
531+ *flagp |= flags&SPSTART;
532+ else
533+ regtail(g, chain, latest);
534+ chain = latest;
535+ }
536+ if (chain == NULL) /* Loop ran zero times. */
537+ (void) regnode(g, NOTHING);
2380c486 538+
e3fad6a6 539+ return(ret);
2380c486
JR
540+}
541+
e3fad6a6
AM
542+/*
543+ - regpiece - something followed by possible [*+?]
544+ *
545+ * Note that the branching code sequences used for ? and the general cases
546+ * of * and + are somewhat optimized: they use the same NOTHING node as
547+ * both the endmarker for their branch list and the body of the last branch.
548+ * It might seem that this node could be dispensed with entirely, but the
549+ * endmarker role is not redundant.
550+ */
551+static char *
552+regpiece(struct match_globals *g, int *flagp)
2380c486 553+{
e3fad6a6
AM
554+ register char *ret;
555+ register char op;
556+ register char *next;
557+ int flags;
2380c486 558+
e3fad6a6
AM
559+ ret = regatom(g, &flags);
560+ if (ret == NULL)
561+ return(NULL);
2380c486 562+
e3fad6a6
AM
563+ op = *g->regparse;
564+ if (!ISMULT(op)) {
565+ *flagp = flags;
566+ return(ret);
2380c486
JR
567+ }
568+
e3fad6a6
AM
569+ if (!(flags&HASWIDTH) && op != '?')
570+ FAIL("*+ operand could be empty");
571+ *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
2380c486 572+
e3fad6a6
AM
573+ if (op == '*' && (flags&SIMPLE))
574+ reginsert(g, STAR, ret);
575+ else if (op == '*') {
576+ /* Emit x* as (x&|), where & means "self". */
577+ reginsert(g, BRANCH, ret); /* Either x */
578+ regoptail(g, ret, regnode(g, BACK)); /* and loop */
579+ regoptail(g, ret, ret); /* back */
580+ regtail(g, ret, regnode(g, BRANCH)); /* or */
581+ regtail(g, ret, regnode(g, NOTHING)); /* null. */
582+ } else if (op == '+' && (flags&SIMPLE))
583+ reginsert(g, PLUS, ret);
584+ else if (op == '+') {
585+ /* Emit x+ as x(&|), where & means "self". */
586+ next = regnode(g, BRANCH); /* Either */
587+ regtail(g, ret, next);
588+ regtail(g, regnode(g, BACK), ret); /* loop back */
589+ regtail(g, next, regnode(g, BRANCH)); /* or */
590+ regtail(g, ret, regnode(g, NOTHING)); /* null. */
591+ } else if (op == '?') {
592+ /* Emit x? as (x|) */
593+ reginsert(g, BRANCH, ret); /* Either x */
594+ regtail(g, ret, regnode(g, BRANCH)); /* or */
595+ next = regnode(g, NOTHING); /* null. */
596+ regtail(g, ret, next);
597+ regoptail(g, ret, next);
2380c486 598+ }
e3fad6a6
AM
599+ g->regparse++;
600+ if (ISMULT(*g->regparse))
601+ FAIL("nested *?+");
2380c486 602+
e3fad6a6 603+ return(ret);
2380c486
JR
604+}
605+
e3fad6a6
AM
606+/*
607+ - regatom - the lowest level
608+ *
609+ * Optimization: gobbles an entire sequence of ordinary characters so that
610+ * it can turn them into a single node, which is smaller to store and
611+ * faster to run. Backslashed characters are exceptions, each becoming a
612+ * separate node; the code is simpler that way and it's not worth fixing.
613+ */
614+static char *
615+regatom(struct match_globals *g, int *flagp)
2380c486 616+{
e3fad6a6
AM
617+ register char *ret;
618+ int flags;
2380c486 619+
e3fad6a6 620+ *flagp = WORST; /* Tentatively. */
2380c486 621+
e3fad6a6
AM
622+ switch (*g->regparse++) {
623+ case '^':
624+ ret = regnode(g, BOL);
625+ break;
626+ case '$':
627+ ret = regnode(g, EOL);
628+ break;
629+ case '.':
630+ ret = regnode(g, ANY);
631+ *flagp |= HASWIDTH|SIMPLE;
632+ break;
633+ case '[': {
634+ register int class;
635+ register int classend;
2380c486 636+
e3fad6a6
AM
637+ if (*g->regparse == '^') { /* Complement of range. */
638+ ret = regnode(g, ANYBUT);
639+ g->regparse++;
640+ } else
641+ ret = regnode(g, ANYOF);
642+ if (*g->regparse == ']' || *g->regparse == '-')
643+ regc(g, *g->regparse++);
644+ while (*g->regparse != '\0' && *g->regparse != ']') {
645+ if (*g->regparse == '-') {
646+ g->regparse++;
647+ if (*g->regparse == ']' || *g->regparse == '\0')
648+ regc(g, '-');
649+ else {
650+ class = UCHARAT(g->regparse-2)+1;
651+ classend = UCHARAT(g->regparse);
652+ if (class > classend+1)
653+ FAIL("invalid [] range");
654+ for (; class <= classend; class++)
655+ regc(g, class);
656+ g->regparse++;
657+ }
658+ } else
659+ regc(g, *g->regparse++);
660+ }
661+ regc(g, '\0');
662+ if (*g->regparse != ']')
663+ FAIL("unmatched []");
664+ g->regparse++;
665+ *flagp |= HASWIDTH|SIMPLE;
666+ }
667+ break;
668+ case '(':
669+ ret = reg(g, 1, &flags);
670+ if (ret == NULL)
671+ return(NULL);
672+ *flagp |= flags&(HASWIDTH|SPSTART);
673+ break;
674+ case '\0':
675+ case '|':
676+ case ')':
677+ FAIL("internal urp"); /* Supposed to be caught earlier. */
678+ break;
679+ case '?':
680+ case '+':
681+ case '*':
682+ FAIL("?+* follows nothing");
683+ break;
684+ case '\\':
685+ if (*g->regparse == '\0')
686+ FAIL("trailing \\");
687+ ret = regnode(g, EXACTLY);
688+ regc(g, *g->regparse++);
689+ regc(g, '\0');
690+ *flagp |= HASWIDTH|SIMPLE;
691+ break;
692+ default: {
693+ register int len;
694+ register char ender;
2380c486 695+
e3fad6a6
AM
696+ g->regparse--;
697+ len = my_strcspn((const char *)g->regparse, (const char *)META);
698+ if (len <= 0)
699+ FAIL("internal disaster");
700+ ender = *(g->regparse+len);
701+ if (len > 1 && ISMULT(ender))
702+ len--; /* Back off clear of ?+* operand. */
703+ *flagp |= HASWIDTH;
704+ if (len == 1)
705+ *flagp |= SIMPLE;
706+ ret = regnode(g, EXACTLY);
707+ while (len > 0) {
708+ regc(g, *g->regparse++);
709+ len--;
710+ }
711+ regc(g, '\0');
712+ }
713+ break;
2380c486
JR
714+ }
715+
e3fad6a6
AM
716+ return(ret);
717+}
2380c486 718+
e3fad6a6
AM
719+/*
720+ - regnode - emit a node
721+ */
722+static char * /* Location. */
723+regnode(struct match_globals *g, char op)
724+{
725+ register char *ret;
726+ register char *ptr;
2380c486 727+
e3fad6a6
AM
728+ ret = g->regcode;
729+ if (ret == &g->regdummy) {
730+ g->regsize += 3;
731+ return(ret);
2380c486
JR
732+ }
733+
e3fad6a6
AM
734+ ptr = ret;
735+ *ptr++ = op;
736+ *ptr++ = '\0'; /* Null "next" pointer. */
737+ *ptr++ = '\0';
738+ g->regcode = ptr;
2380c486 739+
e3fad6a6
AM
740+ return(ret);
741+}
2380c486 742+
e3fad6a6
AM
743+/*
744+ - regc - emit (if appropriate) a byte of code
745+ */
746+static void
747+regc(struct match_globals *g, char b)
2380c486 748+{
e3fad6a6
AM
749+ if (g->regcode != &g->regdummy)
750+ *g->regcode++ = b;
751+ else
752+ g->regsize++;
2380c486
JR
753+}
754+
e3fad6a6
AM
755+/*
756+ - reginsert - insert an operator in front of already-emitted operand
757+ *
758+ * Means relocating the operand.
759+ */
760+static void
761+reginsert(struct match_globals *g, char op, char* opnd)
762+{
763+ register char *src;
764+ register char *dst;
765+ register char *place;
2380c486 766+
e3fad6a6
AM
767+ if (g->regcode == &g->regdummy) {
768+ g->regsize += 3;
769+ return;
2380c486 770+ }
2380c486 771+
e3fad6a6
AM
772+ src = g->regcode;
773+ g->regcode += 3;
774+ dst = g->regcode;
775+ while (src > opnd)
776+ *--dst = *--src;
2380c486 777+
e3fad6a6
AM
778+ place = opnd; /* Op node, where operand used to be. */
779+ *place++ = op;
780+ *place++ = '\0';
781+ *place++ = '\0';
2380c486
JR
782+}
783+
e3fad6a6
AM
784+/*
785+ - regtail - set the next-pointer at the end of a node chain
786+ */
787+static void
788+regtail(struct match_globals *g, char *p, char *val)
2380c486 789+{
e3fad6a6
AM
790+ register char *scan;
791+ register char *temp;
792+ register int offset;
2380c486 793+
e3fad6a6
AM
794+ if (p == &g->regdummy)
795+ return;
2380c486 796+
e3fad6a6
AM
797+ /* Find last node. */
798+ scan = p;
799+ for (;;) {
800+ temp = regnext(g, scan);
801+ if (temp == NULL)
802+ break;
803+ scan = temp;
2380c486 804+ }
e3fad6a6
AM
805+
806+ if (OP(scan) == BACK)
807+ offset = scan - val;
808+ else
809+ offset = val - scan;
810+ *(scan+1) = (offset>>8)&0377;
811+ *(scan+2) = offset&0377;
2380c486
JR
812+}
813+
e3fad6a6
AM
814+/*
815+ - regoptail - regtail on operand of first argument; nop if operandless
816+ */
817+static void
818+regoptail(struct match_globals *g, char *p, char *val)
2380c486 819+{
e3fad6a6
AM
820+ /* "Operandless" and "op != BRANCH" are synonymous in practice. */
821+ if (p == NULL || p == &g->regdummy || OP(p) != BRANCH)
822+ return;
823+ regtail(g, OPERAND(p), val);
2380c486
JR
824+}
825+
2380c486 826+/*
e3fad6a6 827+ * regexec and friends
2380c486
JR
828+ */
829+
2380c486 830+
e3fad6a6
AM
831+/*
832+ * Forwards.
833+ */
834+STATIC int regtry(struct match_globals *g, regexp *prog, char *string);
835+STATIC int regmatch(struct match_globals *g, char *prog);
836+STATIC int regrepeat(struct match_globals *g, char *p);
2380c486 837+
e3fad6a6
AM
838+#ifdef DEBUG
839+int regnarrate = 0;
840+void regdump();
841+STATIC char *regprop(char *op);
842+#endif
2380c486
JR
843+
844+/*
e3fad6a6 845+ - regexec - match a regexp against a string
2380c486 846+ */
e3fad6a6
AM
847+int
848+regexec(regexp *prog, char *string)
849+{
850+ register char *s;
851+ struct match_globals g;
2380c486 852+
e3fad6a6
AM
853+ /* Be paranoid... */
854+ if (prog == NULL || string == NULL) {
855+ printk("<3>Regexp: NULL parameter\n");
856+ return(0);
857+ }
2380c486 858+
e3fad6a6
AM
859+ /* Check validity of program. */
860+ if (UCHARAT(prog->program) != MAGIC) {
861+ printk("<3>Regexp: corrupted program\n");
862+ return(0);
863+ }
2380c486 864+
e3fad6a6
AM
865+ /* If there is a "must appear" string, look for it. */
866+ if (prog->regmust != NULL) {
867+ s = string;
868+ while ((s = strchr(s, prog->regmust[0])) != NULL) {
869+ if (strncmp(s, prog->regmust, prog->regmlen) == 0)
870+ break; /* Found it. */
871+ s++;
872+ }
873+ if (s == NULL) /* Not present. */
874+ return(0);
875+ }
2380c486 876+
e3fad6a6
AM
877+ /* Mark beginning of line for ^ . */
878+ g.regbol = string;
2380c486 879+
e3fad6a6
AM
880+ /* Simplest case: anchored match need be tried only once. */
881+ if (prog->reganch)
882+ return(regtry(&g, prog, string));
2380c486 883+
e3fad6a6
AM
884+ /* Messy cases: unanchored match. */
885+ s = string;
886+ if (prog->regstart != '\0')
887+ /* We know what char it must start with. */
888+ while ((s = strchr(s, prog->regstart)) != NULL) {
889+ if (regtry(&g, prog, s))
890+ return(1);
891+ s++;
892+ }
893+ else
894+ /* We don't -- general case. */
895+ do {
896+ if (regtry(&g, prog, s))
897+ return(1);
898+ } while (*s++ != '\0');
2380c486 899+
e3fad6a6
AM
900+ /* Failure. */
901+ return(0);
902+}
2380c486
JR
903+
904+/*
e3fad6a6 905+ - regtry - try match at specific point
2380c486 906+ */
e3fad6a6
AM
907+static int /* 0 failure, 1 success */
908+regtry(struct match_globals *g, regexp *prog, char *string)
2380c486 909+{
e3fad6a6
AM
910+ register int i;
911+ register char **sp;
912+ register char **ep;
2380c486 913+
e3fad6a6
AM
914+ g->reginput = string;
915+ g->regstartp = prog->startp;
916+ g->regendp = prog->endp;
917+
918+ sp = prog->startp;
919+ ep = prog->endp;
920+ for (i = NSUBEXP; i > 0; i--) {
921+ *sp++ = NULL;
922+ *ep++ = NULL;
923+ }
924+ if (regmatch(g, prog->program + 1)) {
925+ prog->startp[0] = string;
926+ prog->endp[0] = g->reginput;
927+ return(1);
928+ } else
929+ return(0);
2380c486
JR
930+}
931+
932+/*
e3fad6a6 933+ - regmatch - main matching routine
2380c486 934+ *
e3fad6a6
AM
935+ * Conceptually the strategy is simple: check to see whether the current
936+ * node matches, call self recursively to see whether the rest matches,
937+ * and then act accordingly. In practice we make some effort to avoid
938+ * recursion, in particular by going through "ordinary" nodes (that don't
939+ * need to know whether the rest of the match failed) by a loop instead of
940+ * by recursion.
2380c486 941+ */
e3fad6a6
AM
942+static int /* 0 failure, 1 success */
943+regmatch(struct match_globals *g, char *prog)
2380c486 944+{
e3fad6a6
AM
945+ register char *scan = prog; /* Current node. */
946+ char *next; /* Next node. */
2380c486 947+
e3fad6a6
AM
948+#ifdef DEBUG
949+ if (scan != NULL && regnarrate)
950+ fprintf(stderr, "%s(\n", regprop(scan));
951+#endif
952+ while (scan != NULL) {
953+#ifdef DEBUG
954+ if (regnarrate)
955+ fprintf(stderr, "%s...\n", regprop(scan));
956+#endif
957+ next = regnext(g, scan);
2380c486 958+
e3fad6a6
AM
959+ switch (OP(scan)) {
960+ case BOL:
961+ if (g->reginput != g->regbol)
962+ return(0);
963+ break;
964+ case EOL:
965+ if (*g->reginput != '\0')
966+ return(0);
967+ break;
968+ case ANY:
969+ if (*g->reginput == '\0')
970+ return(0);
971+ g->reginput++;
972+ break;
973+ case EXACTLY: {
974+ register int len;
975+ register char *opnd;
2380c486 976+
e3fad6a6
AM
977+ opnd = OPERAND(scan);
978+ /* Inline the first character, for speed. */
979+ if (*opnd != *g->reginput)
980+ return(0);
981+ len = strlen(opnd);
982+ if (len > 1 && strncmp(opnd, g->reginput, len) != 0)
983+ return(0);
984+ g->reginput += len;
985+ }
986+ break;
987+ case ANYOF:
988+ if (*g->reginput == '\0' || strchr(OPERAND(scan), *g->reginput) == NULL)
989+ return(0);
990+ g->reginput++;
991+ break;
992+ case ANYBUT:
993+ if (*g->reginput == '\0' || strchr(OPERAND(scan), *g->reginput) != NULL)
994+ return(0);
995+ g->reginput++;
996+ break;
997+ case NOTHING:
998+ case BACK:
999+ break;
1000+ case OPEN+1:
1001+ case OPEN+2:
1002+ case OPEN+3:
1003+ case OPEN+4:
1004+ case OPEN+5:
1005+ case OPEN+6:
1006+ case OPEN+7:
1007+ case OPEN+8:
1008+ case OPEN+9: {
1009+ register int no;
1010+ register char *save;
2380c486 1011+
e3fad6a6
AM
1012+ no = OP(scan) - OPEN;
1013+ save = g->reginput;
2380c486 1014+
e3fad6a6
AM
1015+ if (regmatch(g, next)) {
1016+ /*
1017+ * Don't set startp if some later
1018+ * invocation of the same parentheses
1019+ * already has.
1020+ */
1021+ if (g->regstartp[no] == NULL)
1022+ g->regstartp[no] = save;
1023+ return(1);
1024+ } else
1025+ return(0);
1026+ }
1027+ break;
1028+ case CLOSE+1:
1029+ case CLOSE+2:
1030+ case CLOSE+3:
1031+ case CLOSE+4:
1032+ case CLOSE+5:
1033+ case CLOSE+6:
1034+ case CLOSE+7:
1035+ case CLOSE+8:
1036+ case CLOSE+9:
1037+ {
1038+ register int no;
1039+ register char *save;
2380c486 1040+
e3fad6a6
AM
1041+ no = OP(scan) - CLOSE;
1042+ save = g->reginput;
2380c486 1043+
e3fad6a6
AM
1044+ if (regmatch(g, next)) {
1045+ /*
1046+ * Don't set endp if some later
1047+ * invocation of the same parentheses
1048+ * already has.
1049+ */
1050+ if (g->regendp[no] == NULL)
1051+ g->regendp[no] = save;
1052+ return(1);
1053+ } else
1054+ return(0);
1055+ }
1056+ break;
1057+ case BRANCH: {
1058+ register char *save;
2380c486 1059+
e3fad6a6
AM
1060+ if (OP(next) != BRANCH) /* No choice. */
1061+ next = OPERAND(scan); /* Avoid recursion. */
1062+ else {
1063+ do {
1064+ save = g->reginput;
1065+ if (regmatch(g, OPERAND(scan)))
1066+ return(1);
1067+ g->reginput = save;
1068+ scan = regnext(g, scan);
1069+ } while (scan != NULL && OP(scan) == BRANCH);
1070+ return(0);
1071+ /* NOTREACHED */
2380c486 1072+ }
e3fad6a6
AM
1073+ }
1074+ break;
1075+ case STAR:
1076+ case PLUS: {
1077+ register char nextch;
1078+ register int no;
1079+ register char *save;
1080+ register int min;
1081+
1082+ /*
1083+ * Lookahead to avoid useless match attempts
1084+ * when we know what character comes next.
1085+ */
1086+ nextch = '\0';
1087+ if (OP(next) == EXACTLY)
1088+ nextch = *OPERAND(next);
1089+ min = (OP(scan) == STAR) ? 0 : 1;
1090+ save = g->reginput;
1091+ no = regrepeat(g, OPERAND(scan));
1092+ while (no >= min) {
1093+ /* If it could work, try it. */
1094+ if (nextch == '\0' || *g->reginput == nextch)
1095+ if (regmatch(g, next))
1096+ return(1);
1097+ /* Couldn't or didn't -- back up. */
1098+ no--;
1099+ g->reginput = save + no;
1100+ }
1101+ return(0);
1102+ }
1103+ break;
1104+ case END:
1105+ return(1); /* Success! */
1106+ break;
1107+ default:
1108+ printk("<3>Regexp: memory corruption\n");
1109+ return(0);
1110+ break;
2380c486 1111+ }
e3fad6a6
AM
1112+
1113+ scan = next;
2380c486
JR
1114+ }
1115+
e3fad6a6
AM
1116+ /*
1117+ * We get here only if there's trouble -- normally "case END" is
1118+ * the terminating point.
1119+ */
1120+ printk("<3>Regexp: corrupted pointers\n");
1121+ return(0);
2380c486
JR
1122+}
1123+
1124+/*
e3fad6a6 1125+ - regrepeat - repeatedly match something simple, report how many
2380c486 1126+ */
e3fad6a6
AM
1127+static int
1128+regrepeat(struct match_globals *g, char *p)
2380c486 1129+{
e3fad6a6
AM
1130+ register int count = 0;
1131+ register char *scan;
1132+ register char *opnd;
2380c486 1133+
e3fad6a6
AM
1134+ scan = g->reginput;
1135+ opnd = OPERAND(p);
1136+ switch (OP(p)) {
1137+ case ANY:
1138+ count = strlen(scan);
1139+ scan += count;
1140+ break;
1141+ case EXACTLY:
1142+ while (*opnd == *scan) {
1143+ count++;
1144+ scan++;
1145+ }
1146+ break;
1147+ case ANYOF:
1148+ while (*scan != '\0' && strchr(opnd, *scan) != NULL) {
1149+ count++;
1150+ scan++;
1151+ }
1152+ break;
1153+ case ANYBUT:
1154+ while (*scan != '\0' && strchr(opnd, *scan) == NULL) {
1155+ count++;
1156+ scan++;
1157+ }
1158+ break;
1159+ default: /* Oh dear. Called inappropriately. */
1160+ printk("<3>Regexp: internal foulup\n");
1161+ count = 0; /* Best compromise. */
1162+ break;
2380c486 1163+ }
e3fad6a6 1164+ g->reginput = scan;
2380c486 1165+
e3fad6a6 1166+ return(count);
2380c486
JR
1167+}
1168+
1169+/*
e3fad6a6 1170+ - regnext - dig the "next" pointer out of a node
2380c486 1171+ */
e3fad6a6
AM
1172+static char*
1173+regnext(struct match_globals *g, char *p)
2380c486 1174+{
e3fad6a6 1175+ register int offset;
2380c486 1176+
e3fad6a6
AM
1177+ if (p == &g->regdummy)
1178+ return(NULL);
2380c486 1179+
e3fad6a6
AM
1180+ offset = NEXT(p);
1181+ if (offset == 0)
1182+ return(NULL);
2380c486 1183+
e3fad6a6
AM
1184+ if (OP(p) == BACK)
1185+ return(p-offset);
1186+ else
1187+ return(p+offset);
2380c486
JR
1188+}
1189+
e3fad6a6
AM
1190+#ifdef DEBUG
1191+
1192+STATIC char *regprop();
1193+
2380c486 1194+/*
e3fad6a6 1195+ - regdump - dump a regexp onto stdout in vaguely comprehensible form
2380c486 1196+ */
e3fad6a6
AM
1197+void
1198+regdump(regexp *r)
2380c486 1199+{
e3fad6a6
AM
1200+ register char *s;
1201+ register char op = EXACTLY; /* Arbitrary non-END op. */
2380c486 1202+ register char *next;
e3fad6a6 1203+ /* extern char *strchr(); */
2380c486 1204+
2380c486 1205+
e3fad6a6
AM
1206+ s = r->program + 1;
1207+ while (op != END) { /* While that wasn't END last time... */
1208+ op = OP(s);
1209+ printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */
1210+ next = regnext(s);
1211+ if (next == NULL) /* Next ptr. */
1212+ printf("(0)");
1213+ else
1214+ printf("(%d)", (s-r->program)+(next-s));
1215+ s += 3;
1216+ if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
1217+ /* Literal string, where present. */
1218+ while (*s != '\0') {
1219+ putchar(*s);
1220+ s++;
1221+ }
1222+ s++;
1223+ }
1224+ putchar('\n');
2380c486 1225+ }
2380c486 1226+
e3fad6a6
AM
1227+ /* Header fields of interest. */
1228+ if (r->regstart != '\0')
1229+ printf("start `%c' ", r->regstart);
1230+ if (r->reganch)
1231+ printf("anchored ");
1232+ if (r->regmust != NULL)
1233+ printf("must have \"%s\"", r->regmust);
1234+ printf("\n");
2380c486
JR
1235+}
1236+
1237+/*
e3fad6a6 1238+ - regprop - printable representation of opcode
2380c486
JR
1239+ */
1240+static char *
e3fad6a6 1241+regprop(char *op)
2380c486 1242+{
e3fad6a6
AM
1243+#define BUFLEN 50
1244+ register char *p;
1245+ static char buf[BUFLEN];
2380c486 1246+
e3fad6a6 1247+ strcpy(buf, ":");
2380c486 1248+
e3fad6a6
AM
1249+ switch (OP(op)) {
1250+ case BOL:
1251+ p = "BOL";
2380c486 1252+ break;
e3fad6a6
AM
1253+ case EOL:
1254+ p = "EOL";
2380c486 1255+ break;
e3fad6a6
AM
1256+ case ANY:
1257+ p = "ANY";
2380c486 1258+ break;
e3fad6a6
AM
1259+ case ANYOF:
1260+ p = "ANYOF";
2380c486 1261+ break;
e3fad6a6
AM
1262+ case ANYBUT:
1263+ p = "ANYBUT";
2380c486 1264+ break;
e3fad6a6
AM
1265+ case BRANCH:
1266+ p = "BRANCH";
2380c486 1267+ break;
e3fad6a6
AM
1268+ case EXACTLY:
1269+ p = "EXACTLY";
2380c486 1270+ break;
e3fad6a6
AM
1271+ case NOTHING:
1272+ p = "NOTHING";
2380c486 1273+ break;
e3fad6a6
AM
1274+ case BACK:
1275+ p = "BACK";
1276+ break;
1277+ case END:
1278+ p = "END";
1279+ break;
1280+ case OPEN+1:
1281+ case OPEN+2:
1282+ case OPEN+3:
1283+ case OPEN+4:
1284+ case OPEN+5:
1285+ case OPEN+6:
1286+ case OPEN+7:
1287+ case OPEN+8:
1288+ case OPEN+9:
1289+ snprintf(buf+strlen(buf),BUFLEN-strlen(buf), "OPEN%d", OP(op)-OPEN);
1290+ p = NULL;
1291+ break;
1292+ case CLOSE+1:
1293+ case CLOSE+2:
1294+ case CLOSE+3:
1295+ case CLOSE+4:
1296+ case CLOSE+5:
1297+ case CLOSE+6:
1298+ case CLOSE+7:
1299+ case CLOSE+8:
1300+ case CLOSE+9:
1301+ snprintf(buf+strlen(buf),BUFLEN-strlen(buf), "CLOSE%d", OP(op)-CLOSE);
1302+ p = NULL;
1303+ break;
1304+ case STAR:
1305+ p = "STAR";
1306+ break;
1307+ case PLUS:
1308+ p = "PLUS";
1309+ break;
1310+ default:
1311+ printk("<3>Regexp: corrupted opcode\n");
2380c486
JR
1312+ break;
1313+ }
e3fad6a6
AM
1314+ if (p != NULL)
1315+ strncat(buf, p, BUFLEN-strlen(buf));
1316+ return(buf);
2380c486 1317+}
e3fad6a6 1318+#endif
2380c486 1319+
e3fad6a6
AM
1320+
1321diff -urNp -x '*.orig' linux-5.11/net/netfilter/regexp/regexp.h linux-5.11/net/netfilter/regexp/regexp.h
1322--- linux-5.11/net/netfilter/regexp/regexp.h 1970-01-01 01:00:00.000000000 +0100
1323+++ linux-5.11/net/netfilter/regexp/regexp.h 2021-02-24 13:21:48.335107407 +0100
1324@@ -0,0 +1,41 @@
2380c486 1325+/*
e3fad6a6
AM
1326+ * Definitions etc. for regexp(3) routines.
1327+ *
1328+ * Caveat: this is V8 regexp(3) [actually, a reimplementation thereof],
1329+ * not the System V one.
2380c486 1330+ */
2380c486 1331+
e3fad6a6
AM
1332+#ifndef REGEXP_H
1333+#define REGEXP_H
2380c486 1334+
2380c486 1335+
e3fad6a6
AM
1336+/*
1337+http://www.opensource.apple.com/darwinsource/10.3/expect-1/expect/expect.h ,
1338+which contains a version of this library, says:
1339+
1340+ *
1341+ * NSUBEXP must be at least 10, and no greater than 117 or the parser
1342+ * will not work properly.
1343+ *
1344+
1345+However, it looks rather like this library is limited to 10. If you think
1346+otherwise, let us know.
1347+*/
1348+
1349+#define NSUBEXP 10
1350+typedef struct regexp {
1351+ char *startp[NSUBEXP];
1352+ char *endp[NSUBEXP];
1353+ char regstart; /* Internal use only. */
1354+ char reganch; /* Internal use only. */
1355+ char *regmust; /* Internal use only. */
1356+ int regmlen; /* Internal use only. */
1357+ char program[1]; /* Unwarranted chumminess with compiler. */
1358+} regexp;
1359+
1360+regexp * regcomp(char *exp, int *patternsize);
1361+int regexec(regexp *prog, char *string);
1362+void regsub(regexp *prog, char *source, char *dest);
1363+void regerror(char *s);
2380c486 1364+
e3fad6a6
AM
1365+#endif
1366diff -urNp -x '*.orig' linux-5.11/net/netfilter/regexp/regmagic.h linux-5.11/net/netfilter/regexp/regmagic.h
1367--- linux-5.11/net/netfilter/regexp/regmagic.h 1970-01-01 01:00:00.000000000 +0100
1368+++ linux-5.11/net/netfilter/regexp/regmagic.h 2021-02-24 13:21:48.335107407 +0100
1369@@ -0,0 +1,5 @@
2380c486 1370+/*
e3fad6a6
AM
1371+ * The first byte of the regexp internal "program" is actually this magic
1372+ * number; the start node begins in the second byte.
2380c486 1373+ */
e3fad6a6
AM
1374+#define MAGIC 0234
1375diff -urNp -x '*.orig' linux-5.11/net/netfilter/regexp/regsub.c linux-5.11/net/netfilter/regexp/regsub.c
1376--- linux-5.11/net/netfilter/regexp/regsub.c 1970-01-01 01:00:00.000000000 +0100
1377+++ linux-5.11/net/netfilter/regexp/regsub.c 2021-02-24 13:21:48.335107407 +0100
1378@@ -0,0 +1,95 @@
2380c486 1379+/*
e3fad6a6
AM
1380+ * regsub
1381+ * @(#)regsub.c 1.3 of 2 April 86
1382+ *
1383+ * Copyright (c) 1986 by University of Toronto.
1384+ * Written by Henry Spencer. Not derived from licensed software.
1385+ *
1386+ * Permission is granted to anyone to use this software for any
1387+ * purpose on any computer system, and to redistribute it freely,
1388+ * subject to the following restrictions:
1389+ *
1390+ * 1. The author is not responsible for the consequences of use of
1391+ * this software, no matter how awful, even if they arise
1392+ * from defects in it.
1393+ *
1394+ * 2. The origin of this software must not be misrepresented, either
1395+ * by explicit claim or by omission.
1396+ *
1397+ * 3. Altered versions must be plainly marked as such, and must not
1398+ * be misrepresented as being the original software.
1399+ *
1400+ *
1401+ * This code was modified by Ethan Sommer to work within the kernel
1402+ * (it now uses kmalloc etc..)
2380c486 1403+ *
2380c486 1404+ */
e3fad6a6
AM
1405+#include "regexp.h"
1406+#include "regmagic.h"
1407+#include <linux/string.h>
2380c486 1408+
2380c486 1409+
e3fad6a6
AM
1410+#ifndef CHARBITS
1411+#define UCHARAT(p) ((int)*(unsigned char *)(p))
1412+#else
1413+#define UCHARAT(p) ((int)*(p)&CHARBITS)
1414+#endif
2380c486 1415+
e3fad6a6
AM
1416+#if 0
1417+//void regerror(char * s)
1418+//{
1419+// printk("regexp(3): %s", s);
1420+// /* NOTREACHED */
1421+//}
1422+#endif
2380c486
JR
1423+
1424+/*
e3fad6a6 1425+ - regsub - perform substitutions after a regexp match
2380c486 1426+ */
e3fad6a6
AM
1427+void
1428+regsub(regexp * prog, char * source, char * dest)
2380c486 1429+{
e3fad6a6
AM
1430+ register char *src;
1431+ register char *dst;
1432+ register char c;
1433+ register int no;
1434+ register int len;
1435+
1436+ /* Not necessary and gcc doesn't like it -MLS */
1437+ /*extern char *strncpy();*/
2380c486 1438+
e3fad6a6
AM
1439+ if (prog == NULL || source == NULL || dest == NULL) {
1440+ regerror("NULL parm to regsub");
1441+ return;
1442+ }
1443+ if (UCHARAT(prog->program) != MAGIC) {
1444+ regerror("damaged regexp fed to regsub");
2380c486 1445+ return;
2380c486
JR
1446+ }
1447+
e3fad6a6
AM
1448+ src = source;
1449+ dst = dest;
1450+ while ((c = *src++) != '\0') {
1451+ if (c == '&')
1452+ no = 0;
1453+ else if (c == '\\' && '0' <= *src && *src <= '9')
1454+ no = *src++ - '0';
1455+ else
1456+ no = -1;
2380c486 1457+
e3fad6a6
AM
1458+ if (no < 0) { /* Ordinary character. */
1459+ if (c == '\\' && (*src == '\\' || *src == '&'))
1460+ c = *src++;
1461+ *dst++ = c;
1462+ } else if (prog->startp[no] != NULL && prog->endp[no] != NULL) {
1463+ len = prog->endp[no] - prog->startp[no];
1464+ (void) strncpy(dst, prog->startp[no], len);
1465+ dst += len;
1466+ if (len != 0 && *(dst-1) == '\0') { /* strncpy hit NUL. */
1467+ regerror("damaged match string");
1468+ return;
1469+ }
1470+ }
1471+ }
1472+ *dst++ = '\0';
2380c486 1473+}
e3fad6a6
AM
1474diff -urNp -x '*.orig' linux-5.11/net/netfilter/xt_layer7.c linux-5.11/net/netfilter/xt_layer7.c
1475--- linux-5.11/net/netfilter/xt_layer7.c 1970-01-01 01:00:00.000000000 +0100
1476+++ linux-5.11/net/netfilter/xt_layer7.c 2021-02-24 13:21:48.335107407 +0100
1477@@ -0,0 +1,656 @@
2380c486 1478+/*
e3fad6a6 1479+ Kernel module to match application layer (OSI layer 7) data in connections.
2380c486 1480+
e3fad6a6 1481+ http://l7-filter.sf.net
2380c486 1482+
e3fad6a6 1483+ (C) 2003-2009 Matthew Strait and Ethan Sommer.
2380c486 1484+
e3fad6a6
AM
1485+ This program is free software; you can redistribute it and/or
1486+ modify it under the terms of the GNU General Public License
1487+ as published by the Free Software Foundation; either version
1488+ 2 of the License, or (at your option) any later version.
1489+ http://www.gnu.org/licenses/gpl.txt
1490+
1491+ Based on ipt_string.c (C) 2000 Emmanuel Roger <winfield@freegates.be>,
1492+ xt_helper.c (C) 2002 Harald Welte and cls_layer7.c (C) 2003 Matthew Strait,
1493+ Ethan Sommer, Justin Levandoski.
1494+*/
1495+
1496+#include <linux/spinlock.h>
1497+#include <linux/version.h>
1498+#include <net/ip.h>
1499+#include <net/tcp.h>
1500+#include <linux/module.h>
1501+#include <linux/skbuff.h>
1502+#include <linux/netfilter.h>
1503+#include <net/netfilter/nf_conntrack.h>
1504+#include <net/netfilter/nf_conntrack_core.h>
1505+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
1506+#include <net/netfilter/nf_conntrack_extend.h>
1507+#include <net/netfilter/nf_conntrack_acct.h>
2380c486 1508+#endif
e3fad6a6
AM
1509+#include <linux/netfilter/x_tables.h>
1510+#include <linux/netfilter/xt_layer7.h>
1511+#include <linux/ctype.h>
1512+#include <linux/proc_fs.h>
2380c486 1513+
e3fad6a6 1514+#include "regexp/regexp.c"
2380c486 1515+
e3fad6a6
AM
1516+MODULE_LICENSE("GPL");
1517+MODULE_AUTHOR("Matthew Strait <quadong@users.sf.net>, Ethan Sommer <sommere@users.sf.net>");
1518+MODULE_DESCRIPTION("iptables application layer match module");
1519+MODULE_ALIAS("ipt_layer7");
1520+MODULE_VERSION("2.21");
2380c486 1521+
e3fad6a6
AM
1522+static int maxdatalen = 2048; // this is the default
1523+module_param(maxdatalen, int, 0444);
1524+MODULE_PARM_DESC(maxdatalen, "maximum bytes of data looked at by l7-filter");
1525+#ifdef CONFIG_NETFILTER_XT_MATCH_LAYER7_DEBUG
1526+ #define DPRINTK(format,args...) printk(format,##args)
1527+#else
1528+ #define DPRINTK(format,args...)
1529+#endif
2380c486 1530+
e3fad6a6
AM
1531+/* Number of packets whose data we look at.
1532+This can be modified through /proc/net/layer7_numpackets */
1533+static int num_packets = 10;
2380c486 1534+
e3fad6a6
AM
1535+static struct pattern_cache {
1536+ char * regex_string;
1537+ regexp * pattern;
1538+ struct pattern_cache * next;
1539+} * first_pattern_cache = NULL;
2380c486 1540+
e3fad6a6 1541+DEFINE_SPINLOCK(l7_lock);
2380c486 1542+
e3fad6a6
AM
1543+static int total_acct_packets(struct nf_conn *ct)
1544+{
1545+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 26)
1546+ BUG_ON(ct == NULL);
1547+ return (ct->counters[IP_CT_DIR_ORIGINAL].packets + ct->counters[IP_CT_DIR_REPLY].packets);
1548+#else
1549+ struct nf_conn_counter *acct;
2380c486 1550+
e3fad6a6
AM
1551+ BUG_ON(ct == NULL);
1552+ acct = nf_conn_acct_find(ct);
1553+ if (!acct)
1554+ return 0;
1555+ return (atomic64_read(&acct[IP_CT_DIR_ORIGINAL].packets) + atomic64_read(&acct[IP_CT_DIR_REPLY].packets));
1556+#endif
2380c486
JR
1557+}
1558+
e3fad6a6
AM
1559+#ifdef CONFIG_IP_NF_MATCH_LAYER7_DEBUG
1560+/* Converts an unfriendly string into a friendly one by
1561+replacing unprintables with periods and all whitespace with " ". */
1562+static char * friendly_print(unsigned char * s)
2380c486 1563+{
e3fad6a6
AM
1564+ char * f = kmalloc(strlen(s) + 1, GFP_ATOMIC);
1565+ int i;
2380c486 1566+
e3fad6a6
AM
1567+ if(!f) {
1568+ if (net_ratelimit())
1569+ printk(KERN_ERR "layer7: out of memory in "
1570+ "friendly_print, bailing.\n");
1571+ return NULL;
1572+ }
2380c486 1573+
e3fad6a6
AM
1574+ for(i = 0; i < strlen(s); i++){
1575+ if(isprint(s[i]) && s[i] < 128) f[i] = s[i];
1576+ else if(isspace(s[i])) f[i] = ' ';
1577+ else f[i] = '.';
2380c486 1578+ }
e3fad6a6
AM
1579+ f[i] = '\0';
1580+ return f;
2380c486
JR
1581+}
1582+
e3fad6a6 1583+static char dec2hex(int i)
2380c486 1584+{
e3fad6a6
AM
1585+ switch (i) {
1586+ case 0 ... 9:
1587+ return (i + '0');
2380c486 1588+ break;
e3fad6a6
AM
1589+ case 10 ... 15:
1590+ return (i - 10 + 'a');
2380c486 1591+ break;
e3fad6a6
AM
1592+ default:
1593+ if (net_ratelimit())
1594+ printk("layer7: Problem in dec2hex\n");
1595+ return '\0';
1596+ }
1597+}
2380c486 1598+
e3fad6a6
AM
1599+static char * hex_print(unsigned char * s)
1600+{
1601+ char * g = kmalloc(strlen(s)*3 + 1, GFP_ATOMIC);
1602+ int i;
2380c486 1603+
e3fad6a6
AM
1604+ if(!g) {
1605+ if (net_ratelimit())
1606+ printk(KERN_ERR "layer7: out of memory in hex_print, "
1607+ "bailing.\n");
1608+ return NULL;
1609+ }
2380c486 1610+
e3fad6a6
AM
1611+ for(i = 0; i < strlen(s); i++) {
1612+ g[i*3 ] = dec2hex(s[i]/16);
1613+ g[i*3 + 1] = dec2hex(s[i]%16);
1614+ g[i*3 + 2] = ' ';
1615+ }
1616+ g[i*3] = '\0';
2380c486 1617+
e3fad6a6
AM
1618+ return g;
1619+}
1620+#endif // DEBUG
2380c486 1621+
e3fad6a6
AM
1622+/* Use instead of regcomp. As we expect to be seeing the same regexps over and
1623+over again, it make sense to cache the results. */
1624+static regexp * compile_and_cache(const char * regex_string,
1625+ const char * protocol)
1626+{
1627+ struct pattern_cache * node = first_pattern_cache;
1628+ struct pattern_cache * last_pattern_cache = first_pattern_cache;
1629+ struct pattern_cache * tmp;
1630+ unsigned int len;
2380c486 1631+
e3fad6a6
AM
1632+ while (node != NULL) {
1633+ if (!strcmp(node->regex_string, regex_string))
1634+ return node->pattern;
2380c486 1635+
e3fad6a6
AM
1636+ last_pattern_cache = node;/* points at the last non-NULL node */
1637+ node = node->next;
1638+ }
1639+
1640+ /* If we reach the end of the list, then we have not yet cached
1641+ the pattern for this regex. Let's do that now.
1642+ Be paranoid about running out of memory to avoid list corruption. */
1643+ tmp = kmalloc(sizeof(struct pattern_cache), GFP_ATOMIC);
1644+
1645+ if(!tmp) {
1646+ if (net_ratelimit())
1647+ printk(KERN_ERR "layer7: out of memory in "
1648+ "compile_and_cache, bailing.\n");
1649+ return NULL;
1650+ }
1651+
1652+ tmp->regex_string = kmalloc(strlen(regex_string) + 1, GFP_ATOMIC);
1653+ tmp->pattern = kmalloc(sizeof(struct regexp), GFP_ATOMIC);
1654+ tmp->next = NULL;
2380c486 1655+
e3fad6a6
AM
1656+ if(!tmp->regex_string || !tmp->pattern) {
1657+ if (net_ratelimit())
1658+ printk(KERN_ERR "layer7: out of memory in "
1659+ "compile_and_cache, bailing.\n");
1660+ kfree(tmp->regex_string);
1661+ kfree(tmp->pattern);
1662+ kfree(tmp);
1663+ return NULL;
2380c486
JR
1664+ }
1665+
e3fad6a6
AM
1666+ /* Ok. The new node is all ready now. */
1667+ node = tmp;
1668+
1669+ if(first_pattern_cache == NULL) /* list is empty */
1670+ first_pattern_cache = node; /* make node the beginning */
1671+ else
1672+ last_pattern_cache->next = node; /* attach node to the end */
1673+
1674+ /* copy the string and compile the regex */
1675+ len = strlen(regex_string);
1676+ DPRINTK("About to compile this: \"%s\"\n", regex_string);
1677+ node->pattern = regcomp((char *)regex_string, &len);
1678+ if ( !node->pattern ) {
1679+ if (net_ratelimit())
1680+ printk(KERN_ERR "layer7: Error compiling regexp "
1681+ "\"%s\" (%s)\n",
1682+ regex_string, protocol);
1683+ /* pattern is now cached as NULL, so we won't try again. */
1684+ }
1685+
1686+ strcpy(node->regex_string, regex_string);
1687+ return node->pattern;
2380c486
JR
1688+}
1689+
e3fad6a6 1690+static int can_handle(const struct sk_buff *skb)
2380c486 1691+{
e3fad6a6
AM
1692+ if(!ip_hdr(skb)) /* not IP */
1693+ return 0;
1694+ if(ip_hdr(skb)->protocol != IPPROTO_TCP &&
1695+ ip_hdr(skb)->protocol != IPPROTO_UDP &&
1696+ ip_hdr(skb)->protocol != IPPROTO_ICMP)
1697+ return 0;
1698+ return 1;
1699+}
2380c486 1700+
e3fad6a6
AM
1701+/* Returns offset the into the skb->data that the application data starts */
1702+static int app_data_offset(const struct sk_buff *skb)
1703+{
1704+ /* In case we are ported somewhere (ebtables?) where ip_hdr(skb)
1705+ isn't set, this can be gotten from 4*(skb->data[0] & 0x0f) as well. */
1706+ int ip_hl = 4*ip_hdr(skb)->ihl;
1707+
1708+ if( ip_hdr(skb)->protocol == IPPROTO_TCP ) {
1709+ /* 12 == offset into TCP header for the header length field.
1710+ Can't get this with skb->h.th->doff because the tcphdr
1711+ struct doesn't get set when routing (this is confirmed to be
1712+ true in Netfilter as well as QoS.) */
1713+ int tcp_hl = 4*(skb->data[ip_hl + 12] >> 4);
1714+
1715+ return ip_hl + tcp_hl;
1716+ } else if( ip_hdr(skb)->protocol == IPPROTO_UDP ) {
1717+ return ip_hl + 8; /* UDP header is always 8 bytes */
1718+ } else if( ip_hdr(skb)->protocol == IPPROTO_ICMP ) {
1719+ return ip_hl + 8; /* ICMP header is 8 bytes */
1720+ } else {
1721+ if (net_ratelimit())
1722+ printk(KERN_ERR "layer7: tried to handle unknown "
1723+ "protocol!\n");
1724+ return ip_hl + 8; /* something reasonable */
1725+ }
1726+}
1727+
1728+/* handles whether there's a match when we aren't appending data anymore */
1729+static int match_no_append(struct nf_conn * conntrack,
1730+ struct nf_conn * master_conntrack,
1731+ enum ip_conntrack_info ctinfo,
1732+ enum ip_conntrack_info master_ctinfo,
1733+ const struct xt_layer7_info * info)
1734+{
1735+ /* If we're in here, throw the app data away */
1736+ if(master_conntrack->layer7.app_data != NULL) {
1737+
1738+ #ifdef CONFIG_IP_NF_MATCH_LAYER7_DEBUG
1739+ if(!master_conntrack->layer7.app_proto) {
1740+ char * f =
1741+ friendly_print(master_conntrack->layer7.app_data);
1742+ char * g =
1743+ hex_print(master_conntrack->layer7.app_data);
1744+ DPRINTK("\nl7-filter gave up after %d bytes "
1745+ "(%d packets):\n%s\n",
1746+ strlen(f), total_acct_packets(master_conntrack), f);
1747+ kfree(f);
1748+ DPRINTK("In hex: %s\n", g);
1749+ kfree(g);
2380c486 1750+ }
e3fad6a6
AM
1751+ #endif
1752+
1753+ kfree(master_conntrack->layer7.app_data);
1754+ master_conntrack->layer7.app_data = NULL; /* don't free again */
1755+ }
1756+
1757+ if(master_conntrack->layer7.app_proto){
1758+ /* Here child connections set their .app_proto (for /proc) */
1759+ if(!conntrack->layer7.app_proto) {
1760+ conntrack->layer7.app_proto =
1761+ kmalloc(strlen(master_conntrack->layer7.app_proto)+1,
1762+ GFP_ATOMIC);
1763+ if(!conntrack->layer7.app_proto){
1764+ if (net_ratelimit())
1765+ printk(KERN_ERR "layer7: out of memory "
1766+ "in match_no_append, "
1767+ "bailing.\n");
1768+ return 1;
1769+ }
1770+ strcpy(conntrack->layer7.app_proto,
1771+ master_conntrack->layer7.app_proto);
2380c486 1772+ }
e3fad6a6
AM
1773+
1774+ return (!strcmp(master_conntrack->layer7.app_proto,
1775+ info->protocol));
1776+ }
1777+ else {
1778+ /* If not classified, set to "unknown" to distinguish from
1779+ connections that are still being tested. */
1780+ master_conntrack->layer7.app_proto =
1781+ kmalloc(strlen("unknown")+1, GFP_ATOMIC);
1782+ if(!master_conntrack->layer7.app_proto){
1783+ if (net_ratelimit())
1784+ printk(KERN_ERR "layer7: out of memory in "
1785+ "match_no_append, bailing.\n");
1786+ return 1;
2380c486 1787+ }
e3fad6a6
AM
1788+ strcpy(master_conntrack->layer7.app_proto, "unknown");
1789+ return 0;
2380c486 1790+ }
2380c486
JR
1791+}
1792+
e3fad6a6
AM
1793+/* add the new app data to the conntrack. Return number of bytes added. */
1794+static int add_data(struct nf_conn * master_conntrack,
1795+ char * app_data, int appdatalen)
2380c486 1796+{
e3fad6a6
AM
1797+ int length = 0, i;
1798+ int oldlength = master_conntrack->layer7.app_data_len;
2380c486 1799+
e3fad6a6
AM
1800+ /* This is a fix for a race condition by Deti Fliegl. However, I'm not
1801+ clear on whether the race condition exists or whether this really
1802+ fixes it. I might just be being dense... Anyway, if it's not really
1803+ a fix, all it does is waste a very small amount of time. */
1804+ if(!master_conntrack->layer7.app_data) return 0;
2380c486 1805+
e3fad6a6
AM
1806+ /* Strip nulls. Make everything lower case (our regex lib doesn't
1807+ do case insensitivity). Add it to the end of the current data. */
1808+ for(i = 0; i < maxdatalen-oldlength-1 &&
1809+ i < appdatalen; i++) {
1810+ if(app_data[i] != '\0') {
1811+ /* the kernel version of tolower mungs 'upper ascii' */
1812+ master_conntrack->layer7.app_data[length+oldlength] =
1813+ isascii(app_data[i])?
1814+ tolower(app_data[i]) : app_data[i];
1815+ length++;
1816+ }
1817+ }
2380c486 1818+
e3fad6a6
AM
1819+ master_conntrack->layer7.app_data[length+oldlength] = '\0';
1820+ master_conntrack->layer7.app_data_len = length + oldlength;
2380c486 1821+
e3fad6a6
AM
1822+ return length;
1823+}
2380c486 1824+
e3fad6a6
AM
1825+/* taken from drivers/video/modedb.c */
1826+static int my_atoi(const char *s)
2380c486 1827+{
e3fad6a6 1828+ int val = 0;
2380c486 1829+
e3fad6a6
AM
1830+ for (;; s++) {
1831+ switch (*s) {
1832+ case '0'...'9':
1833+ val = 10*val+(*s-'0');
1834+ break;
1835+ default:
1836+ return val;
2380c486 1837+ }
2380c486 1838+ }
e3fad6a6
AM
1839+}
1840+
1841+/* write out num_packets to userland. */
1842+static int layer7_read_proc(char* page, char ** start, off_t off, int count,
1843+ int* eof, void * data)
1844+{
1845+ if(num_packets > 99 && net_ratelimit())
1846+ printk(KERN_ERR "layer7: NOT REACHED. num_packets too big\n");
2380c486 1847+
e3fad6a6
AM
1848+ page[0] = num_packets/10 + '0';
1849+ page[1] = num_packets%10 + '0';
1850+ page[2] = '\n';
1851+ page[3] = '\0';
1852+
1853+ *eof=1;
1854+
1855+ return 3;
2380c486
JR
1856+}
1857+
e3fad6a6
AM
1858+/* Read in num_packets from userland */
1859+static int layer7_write_proc(struct file* file, const char* buffer,
1860+ unsigned long count, void *data)
2380c486 1861+{
e3fad6a6 1862+ char * foo = kmalloc(count, GFP_ATOMIC);
2380c486 1863+
e3fad6a6
AM
1864+ if(!foo){
1865+ if (net_ratelimit())
1866+ printk(KERN_ERR "layer7: out of memory, bailing. "
1867+ "num_packets unchanged.\n");
1868+ return count;
1869+ }
2380c486 1870+
e3fad6a6
AM
1871+ if(copy_from_user(foo, buffer, count)) {
1872+ return -EFAULT;
2380c486 1873+ }
e3fad6a6
AM
1874+
1875+
1876+ num_packets = my_atoi(foo);
1877+ kfree (foo);
1878+
1879+ /* This has an arbitrary limit to make the math easier. I'm lazy.
1880+ But anyway, 99 is a LOT! If you want more, you're doing it wrong! */
1881+ if(num_packets > 99) {
1882+ printk(KERN_WARNING "layer7: num_packets can't be > 99.\n");
1883+ num_packets = 99;
1884+ } else if(num_packets < 1) {
1885+ printk(KERN_WARNING "layer7: num_packets can't be < 1.\n");
1886+ num_packets = 1;
1887+ }
1888+
1889+ return count;
2380c486 1890+}
e3fad6a6
AM
1891+
1892+static bool
1893+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)
1894+match(const struct sk_buff *skbin, struct xt_action_param *par)
1895+#else
1896+match(const struct sk_buff *skbin,
1897+ const struct net_device *in,
1898+ const struct net_device *out,
1899+ const struct xt_match *match,
1900+ const void *matchinfo,
1901+ int offset,
1902+ unsigned int protoff,
1903+ bool *hotdrop)
2380c486 1904+#endif
e3fad6a6
AM
1905+{
1906+ /* sidestep const without getting a compiler warning... */
1907+ struct sk_buff * skb = (struct sk_buff *)skbin;
2380c486 1908+
e3fad6a6
AM
1909+ const struct xt_layer7_info * info =
1910+ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)
1911+ par->matchinfo;
1912+ #else
1913+ matchinfo;
1914+ #endif
2380c486 1915+
e3fad6a6
AM
1916+ enum ip_conntrack_info master_ctinfo, ctinfo;
1917+ struct nf_conn *master_conntrack, *conntrack;
1918+ unsigned char * app_data;
1919+ unsigned int pattern_result, appdatalen;
1920+ regexp * comppattern;
2380c486 1921+
e3fad6a6
AM
1922+ /* Be paranoid/incompetent - lock the entire match function. */
1923+ spin_lock_bh(&l7_lock);
2380c486 1924+
e3fad6a6
AM
1925+ if(!can_handle(skb)){
1926+ DPRINTK("layer7: This is some protocol I can't handle.\n");
1927+ spin_unlock_bh(&l7_lock);
1928+ return info->invert;
1929+ }
2380c486 1930+
e3fad6a6
AM
1931+ /* Treat parent & all its children together as one connection, except
1932+ for the purpose of setting conntrack->layer7.app_proto in the actual
1933+ connection. This makes /proc/net/ip_conntrack more satisfying. */
1934+ if(!(conntrack = nf_ct_get(skb, &ctinfo)) ||
1935+ !(master_conntrack=nf_ct_get(skb,&master_ctinfo))){
1936+ DPRINTK("layer7: couldn't get conntrack.\n");
1937+ spin_unlock_bh(&l7_lock);
1938+ return info->invert;
1939+ }
2380c486 1940+
e3fad6a6
AM
1941+ /* Try to get a master conntrack (and its master etc) for FTP, etc. */
1942+ while (master_ct(master_conntrack) != NULL)
1943+ master_conntrack = master_ct(master_conntrack);
2380c486 1944+
e3fad6a6
AM
1945+ /* if we've classified it or seen too many packets */
1946+ if(total_acct_packets(master_conntrack) > num_packets ||
1947+ master_conntrack->layer7.app_proto) {
2380c486 1948+
e3fad6a6
AM
1949+ pattern_result = match_no_append(conntrack, master_conntrack,
1950+ ctinfo, master_ctinfo, info);
2380c486 1951+
e3fad6a6
AM
1952+ /* skb->cb[0] == seen. Don't do things twice if there are
1953+ multiple l7 rules. I'm not sure that using cb for this purpose
1954+ is correct, even though it says "put your private variables
1955+ there". But it doesn't look like it is being used for anything
1956+ else in the skbs that make it here. */
1957+ skb->cb[0] = 1; /* marking it seen here's probably irrelevant */
2380c486 1958+
e3fad6a6
AM
1959+ spin_unlock_bh(&l7_lock);
1960+ return (pattern_result ^ info->invert);
1961+ }
2380c486 1962+
e3fad6a6
AM
1963+ if(skb_is_nonlinear(skb)){
1964+ if(skb_linearize(skb) != 0){
1965+ if (net_ratelimit())
1966+ printk(KERN_ERR "layer7: failed to linearize "
1967+ "packet, bailing.\n");
1968+ spin_unlock_bh(&l7_lock);
1969+ return info->invert;
1970+ }
1971+ }
2380c486 1972+
e3fad6a6
AM
1973+ /* now that the skb is linearized, it's safe to set these. */
1974+ app_data = skb->data + app_data_offset(skb);
1975+ appdatalen = skb_tail_pointer(skb) - app_data;
2380c486 1976+
e3fad6a6
AM
1977+ /* the return value gets checked later, when we're ready to use it */
1978+ comppattern = compile_and_cache(info->pattern, info->protocol);
2380c486 1979+
e3fad6a6
AM
1980+ /* On the first packet of a connection, allocate space for app data */
1981+ if(total_acct_packets(master_conntrack) == 1 && !skb->cb[0] &&
1982+ !master_conntrack->layer7.app_data){
1983+ master_conntrack->layer7.app_data =
1984+ kmalloc(maxdatalen, GFP_ATOMIC);
1985+ if(!master_conntrack->layer7.app_data){
1986+ if (net_ratelimit())
1987+ printk(KERN_ERR "layer7: out of memory in "
1988+ "match, bailing.\n");
1989+ spin_unlock_bh(&l7_lock);
1990+ return info->invert;
1991+ }
2380c486 1992+
e3fad6a6 1993+ master_conntrack->layer7.app_data[0] = '\0';
2380c486 1994+ }
e3fad6a6
AM
1995+
1996+ /* Can be here, but unallocated, if numpackets is increased near
1997+ the beginning of a connection */
1998+ if(master_conntrack->layer7.app_data == NULL){
1999+ spin_unlock_bh(&l7_lock);
2000+ return info->invert; /* unmatched */
2380c486
JR
2001+ }
2002+
e3fad6a6
AM
2003+ if(!skb->cb[0]){
2004+ int newbytes;
2005+ newbytes = add_data(master_conntrack, app_data, appdatalen);
2380c486 2006+
e3fad6a6
AM
2007+ if(newbytes == 0) { /* didn't add any data */
2008+ skb->cb[0] = 1;
2009+ /* Didn't match before, not going to match now */
2010+ spin_unlock_bh(&l7_lock);
2011+ return info->invert;
2380c486
JR
2012+ }
2013+ }
e3fad6a6
AM
2014+
2015+ /* If looking for "unknown", then never match. "Unknown" means that
2016+ we've given up; we're still trying with these packets. */
2017+ if(!strcmp(info->protocol, "unknown")) {
2018+ pattern_result = 0;
2019+ /* If looking for "unset", then always match. "Unset" means that we
2020+ haven't yet classified the connection. */
2021+ } else if(!strcmp(info->protocol, "unset")) {
2022+ pattern_result = 2;
2023+ DPRINTK("layer7: matched unset: not yet classified "
2024+ "(%d/%d packets)\n",
2025+ total_acct_packets(master_conntrack), num_packets);
2026+ /* If the regexp failed to compile, don't bother running it */
2027+ } else if(comppattern &&
2028+ regexec(comppattern, master_conntrack->layer7.app_data)){
2029+ DPRINTK("layer7: matched %s\n", info->protocol);
2030+ pattern_result = 1;
2031+ } else pattern_result = 0;
2032+
2033+ if(pattern_result == 1) {
2034+ master_conntrack->layer7.app_proto =
2035+ kmalloc(strlen(info->protocol)+1, GFP_ATOMIC);
2036+ if(!master_conntrack->layer7.app_proto){
2037+ if (net_ratelimit())
2038+ printk(KERN_ERR "layer7: out of memory in "
2039+ "match, bailing.\n");
2040+ spin_unlock_bh(&l7_lock);
2041+ return (pattern_result ^ info->invert);
2042+ }
2043+ strcpy(master_conntrack->layer7.app_proto, info->protocol);
2044+ } else if(pattern_result > 1) { /* cleanup from "unset" */
2045+ pattern_result = 1;
2046+ }
2047+
2048+ /* mark the packet seen */
2049+ skb->cb[0] = 1;
2050+
2051+ spin_unlock_bh(&l7_lock);
2052+ return (pattern_result ^ info->invert);
2380c486 2053+}
2380c486 2054+
e3fad6a6
AM
2055+// load nf_conntrack_ipv4
2056+static int check(const struct xt_mtchk_param *par)
2057+{
2058+ if (nf_ct_l3proto_try_module_get(par->family) < 0) {
2059+ pr_info("can't load conntrack support for "
2060+ "proto=%d\n", par->family);
2061+ return -EINVAL;
2062+ }
2063+ return 0;
2064+}
2380c486 2065+
2380c486 2066+
e3fad6a6
AM
2067+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)
2068+ static void destroy(const struct xt_mtdtor_param *par)
2069+ {
2070+ nf_ct_l3proto_module_put(par->family);
2071+ }
2072+#else
2073+ static void destroy(const struct xt_match *match, void *matchinfo)
2074+ {
2075+ nf_ct_l3proto_module_put(match->family);
2076+ }
2380c486
JR
2077+#endif
2078+
e3fad6a6
AM
2079+static struct xt_match xt_layer7_match[] __read_mostly = {
2080+{
2081+ .name = "layer7",
2082+ .family = AF_INET,
2083+ .checkentry = check,
2084+ .match = match,
2085+ .destroy = destroy,
2086+ .matchsize = sizeof(struct xt_layer7_info),
2087+ .me = THIS_MODULE
2088+}
2089+};
2380c486 2090+
e3fad6a6
AM
2091+static void layer7_cleanup_proc(void)
2092+{
2093+ remove_proc_entry("layer7_numpackets", init_net.proc_net);
2094+}
2380c486 2095+
e3fad6a6
AM
2096+/* register the proc file */
2097+static void layer7_init_proc(void)
2098+{
2099+ struct proc_dir_entry* entry;
2100+ entry = create_proc_entry("layer7_numpackets", 0644, init_net.proc_net);
2101+ entry->read_proc = layer7_read_proc;
2102+ entry->write_proc = layer7_write_proc;
2103+}
2380c486 2104+
e3fad6a6
AM
2105+static int __init xt_layer7_init(void)
2106+{
2107+ need_conntrack();
2108+
2109+ layer7_init_proc();
2110+ if(maxdatalen < 1) {
2111+ printk(KERN_WARNING "layer7: maxdatalen can't be < 1, "
2112+ "using 1\n");
2113+ maxdatalen = 1;
2114+ }
2115+ /* This is not a hard limit. It's just here to prevent people from
2116+ bringing their slow machines to a grinding halt. */
2117+ else if(maxdatalen > 65536) {
2118+ printk(KERN_WARNING "layer7: maxdatalen can't be > 65536, "
2119+ "using 65536\n");
2120+ maxdatalen = 65536;
2121+ }
2122+ return xt_register_matches(xt_layer7_match,
2123+ ARRAY_SIZE(xt_layer7_match));
2124+}
2125+
2126+static void __exit xt_layer7_fini(void)
2127+{
2128+ layer7_cleanup_proc();
2129+ xt_unregister_matches(xt_layer7_match, ARRAY_SIZE(xt_layer7_match));
2130+}
2131+
2132+module_init(xt_layer7_init);
2133+module_exit(xt_layer7_fini);
This page took 0.494667 seconds and 4 git commands to generate.