]> git.pld-linux.org Git - packages/kernel.git/blame - kernel-layer7.patch
up to 5.19.4
[packages/kernel.git] / kernel-layer7.patch
CommitLineData
e3fad6a6
AM
1diff -urNp -x '*.orig' linux-5.11/include/linux/netfilter/xt_layer7.h linux-5.11/include/linux/netfilter/xt_layer7.h
2--- linux-5.11/include/linux/netfilter/xt_layer7.h 1970-01-01 01:00:00.000000000 +0100
3+++ linux-5.11/include/linux/netfilter/xt_layer7.h 2021-02-24 13:21:48.338440845 +0100
4@@ -0,0 +1,13 @@
5+#ifndef _XT_LAYER7_H
6+#define _XT_LAYER7_H
7+
8+#define MAX_PATTERN_LEN 8192
9+#define MAX_PROTOCOL_LEN 256
10+
11+struct xt_layer7_info {
12+ char protocol[MAX_PROTOCOL_LEN];
13+ char pattern[MAX_PATTERN_LEN];
14+ u_int8_t invert;
15+};
16+
17+#endif /* _XT_LAYER7_H */
18diff -urNp -x '*.orig' linux-5.11/include/net/netfilter/nf_conntrack.h linux-5.11/include/net/netfilter/nf_conntrack.h
19--- linux-5.11/include/net/netfilter/nf_conntrack.h 2021-02-14 23:32:24.000000000 +0100
20+++ linux-5.11/include/net/netfilter/nf_conntrack.h 2021-02-24 13:21:48.338440845 +0100
21@@ -103,6 +103,22 @@ struct nf_conn {
22 /* Extensions */
23 struct nf_ct_ext *ext;
24
25+#if defined(CONFIG_NETFILTER_XT_MATCH_LAYER7) || \
26+ defined(CONFIG_NETFILTER_XT_MATCH_LAYER7_MODULE)
27+ struct {
28+ /*
29+ * e.g. "http". NULL before decision. "unknown" after decision
30+ * if no match.
31+ */
32+ char *app_proto;
33+ /*
34+ * application layer data so far. NULL after match decision.
35+ */
36+ char *app_data;
37+ unsigned int app_data_len;
38+ } layer7;
39+#endif
40+
41 /* Storage reserved for other modules, must be the last member */
42 union nf_conntrack_proto proto;
43 };
44diff -urNp -x '*.orig' linux-5.11/net/netfilter/Kconfig linux-5.11/net/netfilter/Kconfig
45--- linux-5.11/net/netfilter/Kconfig 2021-02-14 23:32:24.000000000 +0100
46+++ linux-5.11/net/netfilter/Kconfig 2021-02-24 13:21:48.335107407 +0100
47@@ -1562,6 +1562,27 @@ config NETFILTER_XT_MATCH_STATE
2380c486
JR
48
49 To compile it as a module, choose M here. If unsure, say N.
50
51+config NETFILTER_XT_MATCH_LAYER7
52+ tristate '"layer7" match support'
53+ depends on NETFILTER_XTABLES
54+ depends on EXPERIMENTAL && (IP_NF_CONNTRACK || NF_CONNTRACK)
2380c486
JR
55+ help
56+ Say Y if you want to be able to classify connections (and their
57+ packets) based on regular expression matching of their application
58+ layer data. This is one way to classify applications such as
59+ peer-to-peer filesharing systems that do not always use the same
60+ port.
61+
62+ To compile it as a module, choose M here. If unsure, say N.
63+
425dfcfc 64+
2380c486
JR
65+config NETFILTER_XT_MATCH_LAYER7_DEBUG
66+ bool 'Layer 7 debugging output'
67+ depends on NETFILTER_XT_MATCH_LAYER7
68+ help
69+ Say Y to get lots of debugging output.
70+
71+
72 config NETFILTER_XT_MATCH_STATISTIC
73 tristate '"statistic" match support'
74 depends on NETFILTER_ADVANCED
e3fad6a6
AM
75diff -urNp -x '*.orig' linux-5.11/net/netfilter/Makefile linux-5.11/net/netfilter/Makefile
76--- linux-5.11/net/netfilter/Makefile 2021-02-14 23:32:24.000000000 +0100
77+++ linux-5.11/net/netfilter/Makefile 2021-02-24 13:21:48.335107407 +0100
78@@ -204,6 +204,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_RECENT)
2380c486
JR
79 obj-$(CONFIG_NETFILTER_XT_MATCH_SCTP) += xt_sctp.o
80 obj-$(CONFIG_NETFILTER_XT_MATCH_SOCKET) += xt_socket.o
81 obj-$(CONFIG_NETFILTER_XT_MATCH_STATE) += xt_state.o
82+obj-$(CONFIG_NETFILTER_XT_MATCH_LAYER7) += xt_layer7.o
83 obj-$(CONFIG_NETFILTER_XT_MATCH_STATISTIC) += xt_statistic.o
84 obj-$(CONFIG_NETFILTER_XT_MATCH_STRING) += xt_string.o
85 obj-$(CONFIG_NETFILTER_XT_MATCH_TCPMSS) += xt_tcpmss.o
e3fad6a6
AM
86diff -urNp -x '*.orig' linux-5.11/net/netfilter/nf_conntrack_standalone.c linux-5.11/net/netfilter/nf_conntrack_standalone.c
87--- linux-5.11/net/netfilter/nf_conntrack_standalone.c 2021-02-14 23:32:24.000000000 +0100
88+++ linux-5.11/net/netfilter/nf_conntrack_standalone.c 2021-02-24 13:21:48.338440845 +0100
89@@ -366,6 +366,12 @@ static int ct_seq_show(struct seq_file *
90 ct_show_zone(s, ct, NF_CT_DEFAULT_ZONE_DIR);
91 ct_show_delta_time(s, ct);
92
93+#if defined(CONFIG_NETFILTER_XT_MATCH_LAYER7) || defined(CONFIG_NETFILTER_XT_MATCH_LAYER7_MODULE)
94+ if(ct->layer7.app_proto &&
95+ seq_printf(s, "l7proto=%s ", ct->layer7.app_proto))
96+ return -ENOSPC;
2380c486 97+#endif
2380c486 98+
f4d37d76 99 seq_printf(s, "use=%u\n", refcount_read(&ct->ct_general.use));
e3fad6a6
AM
100
101 if (seq_has_overflowed(s))
102diff -urNp -x '*.orig' linux-5.11/net/netfilter/regexp/regexp.c linux-5.11/net/netfilter/regexp/regexp.c
103--- linux-5.11/net/netfilter/regexp/regexp.c 1970-01-01 01:00:00.000000000 +0100
104+++ linux-5.11/net/netfilter/regexp/regexp.c 2021-02-24 13:21:48.335107407 +0100
105@@ -0,0 +1,1197 @@
106+/*
107+ * regcomp and regexec -- regsub and regerror are elsewhere
108+ * @(#)regexp.c 1.3 of 18 April 87
109+ *
110+ * Copyright (c) 1986 by University of Toronto.
111+ * Written by Henry Spencer. Not derived from licensed software.
112+ *
113+ * Permission is granted to anyone to use this software for any
114+ * purpose on any computer system, and to redistribute it freely,
115+ * subject to the following restrictions:
116+ *
117+ * 1. The author is not responsible for the consequences of use of
118+ * this software, no matter how awful, even if they arise
119+ * from defects in it.
120+ *
121+ * 2. The origin of this software must not be misrepresented, either
122+ * by explicit claim or by omission.
123+ *
124+ * 3. Altered versions must be plainly marked as such, and must not
125+ * be misrepresented as being the original software.
126+ *
127+ * Beware that some of this code is subtly aware of the way operator
128+ * precedence is structured in regular expressions. Serious changes in
129+ * regular-expression syntax might require a total rethink.
130+ *
131+ * This code was modified by Ethan Sommer to work within the kernel
132+ * (it now uses kmalloc etc..)
133+ *
134+ * Modified slightly by Matthew Strait to use more modern C.
135+ */
2380c486 136+
e3fad6a6
AM
137+#include "regexp.h"
138+#include "regmagic.h"
2380c486 139+
e3fad6a6
AM
140+/* added by ethan and matt. Lets it work in both kernel and user space.
141+(So iptables can use it, for instance.) Yea, it goes both ways... */
142+#if __KERNEL__
143+ #define malloc(foo) kmalloc(foo,GFP_ATOMIC)
2380c486 144+#else
e3fad6a6 145+ #define printk(format,args...) printf(format,##args)
2380c486
JR
146+#endif
147+
e3fad6a6 148+void regerror(char * s)
2380c486 149+{
e3fad6a6
AM
150+ printk("<3>Regexp: %s\n", s);
151+ /* NOTREACHED */
2380c486
JR
152+}
153+
e3fad6a6
AM
154+/*
155+ * The "internal use only" fields in regexp.h are present to pass info from
156+ * compile to execute that permits the execute phase to run lots faster on
157+ * simple cases. They are:
158+ *
159+ * regstart char that must begin a match; '\0' if none obvious
160+ * reganch is the match anchored (at beginning-of-line only)?
161+ * regmust string (pointer into program) that match must include, or NULL
162+ * regmlen length of regmust string
163+ *
164+ * Regstart and reganch permit very fast decisions on suitable starting points
165+ * for a match, cutting down the work a lot. Regmust permits fast rejection
166+ * of lines that cannot possibly match. The regmust tests are costly enough
167+ * that regcomp() supplies a regmust only if the r.e. contains something
168+ * potentially expensive (at present, the only such thing detected is * or +
169+ * at the start of the r.e., which can involve a lot of backup). Regmlen is
170+ * supplied because the test in regexec() needs it and regcomp() is computing
171+ * it anyway.
172+ */
2380c486 173+
e3fad6a6
AM
174+/*
175+ * Structure for regexp "program". This is essentially a linear encoding
176+ * of a nondeterministic finite-state machine (aka syntax charts or
177+ * "railroad normal form" in parsing technology). Each node is an opcode
178+ * plus a "next" pointer, possibly plus an operand. "Next" pointers of
179+ * all nodes except BRANCH implement concatenation; a "next" pointer with
180+ * a BRANCH on both ends of it is connecting two alternatives. (Here we
181+ * have one of the subtle syntax dependencies: an individual BRANCH (as
182+ * opposed to a collection of them) is never concatenated with anything
183+ * because of operator precedence.) The operand of some types of node is
184+ * a literal string; for others, it is a node leading into a sub-FSM. In
185+ * particular, the operand of a BRANCH node is the first node of the branch.
186+ * (NB this is *not* a tree structure: the tail of the branch connects
187+ * to the thing following the set of BRANCHes.) The opcodes are:
188+ */
2380c486 189+
e3fad6a6
AM
190+/* definition number opnd? meaning */
191+#define END 0 /* no End of program. */
192+#define BOL 1 /* no Match "" at beginning of line. */
193+#define EOL 2 /* no Match "" at end of line. */
194+#define ANY 3 /* no Match any one character. */
195+#define ANYOF 4 /* str Match any character in this string. */
196+#define ANYBUT 5 /* str Match any character not in this string. */
197+#define BRANCH 6 /* node Match this alternative, or the next... */
198+#define BACK 7 /* no Match "", "next" ptr points backward. */
199+#define EXACTLY 8 /* str Match this string. */
200+#define NOTHING 9 /* no Match empty string. */
201+#define STAR 10 /* node Match this (simple) thing 0 or more times. */
202+#define PLUS 11 /* node Match this (simple) thing 1 or more times. */
203+#define OPEN 20 /* no Mark this point in input as start of #n. */
204+ /* OPEN+1 is number 1, etc. */
205+#define CLOSE 30 /* no Analogous to OPEN. */
2380c486 206+
e3fad6a6
AM
207+/*
208+ * Opcode notes:
209+ *
210+ * BRANCH The set of branches constituting a single choice are hooked
211+ * together with their "next" pointers, since precedence prevents
212+ * anything being concatenated to any individual branch. The
213+ * "next" pointer of the last BRANCH in a choice points to the
214+ * thing following the whole choice. This is also where the
215+ * final "next" pointer of each individual branch points; each
216+ * branch starts with the operand node of a BRANCH node.
217+ *
218+ * BACK Normal "next" pointers all implicitly point forward; BACK
219+ * exists to make loop structures possible.
220+ *
221+ * STAR,PLUS '?', and complex '*' and '+', are implemented as circular
222+ * BRANCH structures using BACK. Simple cases (one character
223+ * per match) are implemented with STAR and PLUS for speed
224+ * and to minimize recursive plunges.
225+ *
226+ * OPEN,CLOSE ...are numbered at compile time.
227+ */
2380c486 228+
e3fad6a6
AM
229+/*
230+ * A node is one char of opcode followed by two chars of "next" pointer.
231+ * "Next" pointers are stored as two 8-bit pieces, high order first. The
232+ * value is a positive offset from the opcode of the node containing it.
233+ * An operand, if any, simply follows the node. (Note that much of the
234+ * code generation knows about this implicit relationship.)
235+ *
236+ * Using two bytes for the "next" pointer is vast overkill for most things,
237+ * but allows patterns to get big without disasters.
238+ */
239+#define OP(p) (*(p))
240+#define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377))
241+#define OPERAND(p) ((p) + 3)
2380c486 242+
e3fad6a6
AM
243+/*
244+ * See regmagic.h for one further detail of program structure.
245+ */
2380c486 246+
2380c486 247+
e3fad6a6
AM
248+/*
249+ * Utility definitions.
250+ */
251+#ifndef CHARBITS
252+#define UCHARAT(p) ((int)*(unsigned char *)(p))
253+#else
254+#define UCHARAT(p) ((int)*(p)&CHARBITS)
255+#endif
2380c486 256+
e3fad6a6
AM
257+#define FAIL(m) { regerror(m); return(NULL); }
258+#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?')
259+#define META "^$.[()|?+*\\"
2380c486 260+
e3fad6a6
AM
261+/*
262+ * Flags to be passed up and down.
263+ */
264+#define HASWIDTH 01 /* Known never to match null string. */
265+#define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */
266+#define SPSTART 04 /* Starts with * or +. */
267+#define WORST 0 /* Worst case. */
2380c486 268+
e3fad6a6
AM
269+/*
270+ * Global work variables for regcomp().
271+ */
272+struct match_globals {
273+char *reginput; /* String-input pointer. */
274+char *regbol; /* Beginning of input, for ^ check. */
275+char **regstartp; /* Pointer to startp array. */
276+char **regendp; /* Ditto for endp. */
277+char *regparse; /* Input-scan pointer. */
278+int regnpar; /* () count. */
279+char regdummy;
280+char *regcode; /* Code-emit pointer; &regdummy = don't. */
281+long regsize; /* Code size. */
282+};
2380c486 283+
e3fad6a6
AM
284+/*
285+ * Forward declarations for regcomp()'s friends.
286+ */
287+#ifndef STATIC
288+#define STATIC static
289+#endif
290+STATIC char *reg(struct match_globals *g, int paren,int *flagp);
291+STATIC char *regbranch(struct match_globals *g, int *flagp);
292+STATIC char *regpiece(struct match_globals *g, int *flagp);
293+STATIC char *regatom(struct match_globals *g, int *flagp);
294+STATIC char *regnode(struct match_globals *g, char op);
295+STATIC char *regnext(struct match_globals *g, char *p);
296+STATIC void regc(struct match_globals *g, char b);
297+STATIC void reginsert(struct match_globals *g, char op, char *opnd);
298+STATIC void regtail(struct match_globals *g, char *p, char *val);
299+STATIC void regoptail(struct match_globals *g, char *p, char *val);
2380c486 300+
2380c486 301+
e3fad6a6 302+__kernel_size_t my_strcspn(const char *s1,const char *s2)
2380c486 303+{
e3fad6a6
AM
304+ char *scan1;
305+ char *scan2;
306+ int count;
307+
308+ count = 0;
309+ for (scan1 = (char *)s1; *scan1 != '\0'; scan1++) {
310+ for (scan2 = (char *)s2; *scan2 != '\0';) /* ++ moved down. */
311+ if (*scan1 == *scan2++)
312+ return(count);
313+ count++;
314+ }
315+ return(count);
2380c486
JR
316+}
317+
e3fad6a6
AM
318+/*
319+ - regcomp - compile a regular expression into internal code
320+ *
321+ * We can't allocate space until we know how big the compiled form will be,
322+ * but we can't compile it (and thus know how big it is) until we've got a
323+ * place to put the code. So we cheat: we compile it twice, once with code
324+ * generation turned off and size counting turned on, and once "for real".
325+ * This also means that we don't allocate space until we are sure that the
326+ * thing really will compile successfully, and we never have to move the
327+ * code and thus invalidate pointers into it. (Note that it has to be in
328+ * one piece because free() must be able to free it all.)
329+ *
330+ * Beware that the optimization-preparation code in here knows about some
331+ * of the structure of the compiled regexp.
332+ */
333+regexp *
334+regcomp(char *exp,int *patternsize)
2380c486 335+{
e3fad6a6
AM
336+ register regexp *r;
337+ register char *scan;
338+ register char *longest;
339+ register int len;
340+ int flags;
341+ struct match_globals g;
342+
343+ /* commented out by ethan
344+ extern char *malloc();
345+ */
2380c486 346+
e3fad6a6
AM
347+ if (exp == NULL)
348+ FAIL("NULL argument");
2380c486 349+
e3fad6a6
AM
350+ /* First pass: determine size, legality. */
351+ g.regparse = exp;
352+ g.regnpar = 1;
353+ g.regsize = 0L;
354+ g.regcode = &g.regdummy;
355+ regc(&g, MAGIC);
356+ if (reg(&g, 0, &flags) == NULL)
357+ return(NULL);
2380c486 358+
e3fad6a6
AM
359+ /* Small enough for pointer-storage convention? */
360+ if (g.regsize >= 32767L) /* Probably could be 65535L. */
361+ FAIL("regexp too big");
2380c486 362+
e3fad6a6
AM
363+ /* Allocate space. */
364+ *patternsize=sizeof(regexp) + (unsigned)g.regsize;
365+ r = (regexp *)malloc(sizeof(regexp) + (unsigned)g.regsize);
366+ if (r == NULL)
367+ FAIL("out of space");
2380c486 368+
e3fad6a6
AM
369+ /* Second pass: emit code. */
370+ g.regparse = exp;
371+ g.regnpar = 1;
372+ g.regcode = r->program;
373+ regc(&g, MAGIC);
374+ if (reg(&g, 0, &flags) == NULL)
375+ return(NULL);
2380c486 376+
e3fad6a6
AM
377+ /* Dig out information for optimizations. */
378+ r->regstart = '\0'; /* Worst-case defaults. */
379+ r->reganch = 0;
380+ r->regmust = NULL;
381+ r->regmlen = 0;
382+ scan = r->program+1; /* First BRANCH. */
383+ if (OP(regnext(&g, scan)) == END) { /* Only one top-level choice. */
384+ scan = OPERAND(scan);
2380c486 385+
e3fad6a6
AM
386+ /* Starting-point info. */
387+ if (OP(scan) == EXACTLY)
388+ r->regstart = *OPERAND(scan);
389+ else if (OP(scan) == BOL)
390+ r->reganch++;
391+
392+ /*
393+ * If there's something expensive in the r.e., find the
394+ * longest literal string that must appear and make it the
395+ * regmust. Resolve ties in favor of later strings, since
396+ * the regstart check works with the beginning of the r.e.
397+ * and avoiding duplication strengthens checking. Not a
398+ * strong reason, but sufficient in the absence of others.
399+ */
400+ if (flags&SPSTART) {
401+ longest = NULL;
402+ len = 0;
403+ for (; scan != NULL; scan = regnext(&g, scan))
404+ if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) {
405+ longest = OPERAND(scan);
406+ len = strlen(OPERAND(scan));
407+ }
408+ r->regmust = longest;
409+ r->regmlen = len;
2380c486 410+ }
2380c486 411+ }
e3fad6a6
AM
412+
413+ return(r);
2380c486
JR
414+}
415+
e3fad6a6
AM
416+/*
417+ - reg - regular expression, i.e. main body or parenthesized thing
418+ *
419+ * Caller must absorb opening parenthesis.
420+ *
421+ * Combining parenthesis handling with the base level of regular expression
422+ * is a trifle forced, but the need to tie the tails of the branches to what
423+ * follows makes it hard to avoid.
424+ */
425+static char *
426+reg(struct match_globals *g, int paren, int *flagp /* Parenthesized? */ )
2380c486 427+{
e3fad6a6
AM
428+ register char *ret;
429+ register char *br;
430+ register char *ender;
431+ register int parno = 0; /* 0 makes gcc happy */
432+ int flags;
2380c486 433+
e3fad6a6 434+ *flagp = HASWIDTH; /* Tentatively. */
2380c486 435+
e3fad6a6
AM
436+ /* Make an OPEN node, if parenthesized. */
437+ if (paren) {
438+ if (g->regnpar >= NSUBEXP)
439+ FAIL("too many ()");
440+ parno = g->regnpar;
441+ g->regnpar++;
442+ ret = regnode(g, OPEN+parno);
443+ } else
444+ ret = NULL;
2380c486 445+
e3fad6a6
AM
446+ /* Pick up the branches, linking them together. */
447+ br = regbranch(g, &flags);
448+ if (br == NULL)
449+ return(NULL);
450+ if (ret != NULL)
451+ regtail(g, ret, br); /* OPEN -> first. */
452+ else
453+ ret = br;
454+ if (!(flags&HASWIDTH))
455+ *flagp &= ~HASWIDTH;
456+ *flagp |= flags&SPSTART;
457+ while (*g->regparse == '|') {
458+ g->regparse++;
459+ br = regbranch(g, &flags);
460+ if (br == NULL)
461+ return(NULL);
462+ regtail(g, ret, br); /* BRANCH -> BRANCH. */
463+ if (!(flags&HASWIDTH))
464+ *flagp &= ~HASWIDTH;
465+ *flagp |= flags&SPSTART;
466+ }
2380c486 467+
e3fad6a6
AM
468+ /* Make a closing node, and hook it on the end. */
469+ ender = regnode(g, (paren) ? CLOSE+parno : END);
470+ regtail(g, ret, ender);
2380c486 471+
e3fad6a6
AM
472+ /* Hook the tails of the branches to the closing node. */
473+ for (br = ret; br != NULL; br = regnext(g, br))
474+ regoptail(g, br, ender);
2380c486 475+
e3fad6a6
AM
476+ /* Check for proper termination. */
477+ if (paren && *g->regparse++ != ')') {
478+ FAIL("unmatched ()");
479+ } else if (!paren && *g->regparse != '\0') {
480+ if (*g->regparse == ')') {
481+ FAIL("unmatched ()");
482+ } else
483+ FAIL("junk on end"); /* "Can't happen". */
484+ /* NOTREACHED */
2380c486 485+ }
e3fad6a6
AM
486+
487+ return(ret);
2380c486
JR
488+}
489+
e3fad6a6
AM
490+/*
491+ - regbranch - one alternative of an | operator
492+ *
493+ * Implements the concatenation operator.
494+ */
495+static char *
496+regbranch(struct match_globals *g, int *flagp)
2380c486 497+{
e3fad6a6
AM
498+ register char *ret;
499+ register char *chain;
500+ register char *latest;
501+ int flags;
2380c486 502+
e3fad6a6 503+ *flagp = WORST; /* Tentatively. */
2380c486 504+
e3fad6a6
AM
505+ ret = regnode(g, BRANCH);
506+ chain = NULL;
507+ while (*g->regparse != '\0' && *g->regparse != '|' && *g->regparse != ')') {
508+ latest = regpiece(g, &flags);
509+ if (latest == NULL)
510+ return(NULL);
511+ *flagp |= flags&HASWIDTH;
512+ if (chain == NULL) /* First piece. */
513+ *flagp |= flags&SPSTART;
514+ else
515+ regtail(g, chain, latest);
516+ chain = latest;
517+ }
518+ if (chain == NULL) /* Loop ran zero times. */
519+ (void) regnode(g, NOTHING);
2380c486 520+
e3fad6a6 521+ return(ret);
2380c486
JR
522+}
523+
e3fad6a6
AM
524+/*
525+ - regpiece - something followed by possible [*+?]
526+ *
527+ * Note that the branching code sequences used for ? and the general cases
528+ * of * and + are somewhat optimized: they use the same NOTHING node as
529+ * both the endmarker for their branch list and the body of the last branch.
530+ * It might seem that this node could be dispensed with entirely, but the
531+ * endmarker role is not redundant.
532+ */
533+static char *
534+regpiece(struct match_globals *g, int *flagp)
2380c486 535+{
e3fad6a6
AM
536+ register char *ret;
537+ register char op;
538+ register char *next;
539+ int flags;
2380c486 540+
e3fad6a6
AM
541+ ret = regatom(g, &flags);
542+ if (ret == NULL)
543+ return(NULL);
2380c486 544+
e3fad6a6
AM
545+ op = *g->regparse;
546+ if (!ISMULT(op)) {
547+ *flagp = flags;
548+ return(ret);
2380c486
JR
549+ }
550+
e3fad6a6
AM
551+ if (!(flags&HASWIDTH) && op != '?')
552+ FAIL("*+ operand could be empty");
553+ *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH);
2380c486 554+
e3fad6a6
AM
555+ if (op == '*' && (flags&SIMPLE))
556+ reginsert(g, STAR, ret);
557+ else if (op == '*') {
558+ /* Emit x* as (x&|), where & means "self". */
559+ reginsert(g, BRANCH, ret); /* Either x */
560+ regoptail(g, ret, regnode(g, BACK)); /* and loop */
561+ regoptail(g, ret, ret); /* back */
562+ regtail(g, ret, regnode(g, BRANCH)); /* or */
563+ regtail(g, ret, regnode(g, NOTHING)); /* null. */
564+ } else if (op == '+' && (flags&SIMPLE))
565+ reginsert(g, PLUS, ret);
566+ else if (op == '+') {
567+ /* Emit x+ as x(&|), where & means "self". */
568+ next = regnode(g, BRANCH); /* Either */
569+ regtail(g, ret, next);
570+ regtail(g, regnode(g, BACK), ret); /* loop back */
571+ regtail(g, next, regnode(g, BRANCH)); /* or */
572+ regtail(g, ret, regnode(g, NOTHING)); /* null. */
573+ } else if (op == '?') {
574+ /* Emit x? as (x|) */
575+ reginsert(g, BRANCH, ret); /* Either x */
576+ regtail(g, ret, regnode(g, BRANCH)); /* or */
577+ next = regnode(g, NOTHING); /* null. */
578+ regtail(g, ret, next);
579+ regoptail(g, ret, next);
2380c486 580+ }
e3fad6a6
AM
581+ g->regparse++;
582+ if (ISMULT(*g->regparse))
583+ FAIL("nested *?+");
2380c486 584+
e3fad6a6 585+ return(ret);
2380c486
JR
586+}
587+
e3fad6a6
AM
588+/*
589+ - regatom - the lowest level
590+ *
591+ * Optimization: gobbles an entire sequence of ordinary characters so that
592+ * it can turn them into a single node, which is smaller to store and
593+ * faster to run. Backslashed characters are exceptions, each becoming a
594+ * separate node; the code is simpler that way and it's not worth fixing.
595+ */
596+static char *
597+regatom(struct match_globals *g, int *flagp)
2380c486 598+{
e3fad6a6
AM
599+ register char *ret;
600+ int flags;
2380c486 601+
e3fad6a6 602+ *flagp = WORST; /* Tentatively. */
2380c486 603+
e3fad6a6
AM
604+ switch (*g->regparse++) {
605+ case '^':
606+ ret = regnode(g, BOL);
607+ break;
608+ case '$':
609+ ret = regnode(g, EOL);
610+ break;
611+ case '.':
612+ ret = regnode(g, ANY);
613+ *flagp |= HASWIDTH|SIMPLE;
614+ break;
615+ case '[': {
616+ register int class;
617+ register int classend;
2380c486 618+
e3fad6a6
AM
619+ if (*g->regparse == '^') { /* Complement of range. */
620+ ret = regnode(g, ANYBUT);
621+ g->regparse++;
622+ } else
623+ ret = regnode(g, ANYOF);
624+ if (*g->regparse == ']' || *g->regparse == '-')
625+ regc(g, *g->regparse++);
626+ while (*g->regparse != '\0' && *g->regparse != ']') {
627+ if (*g->regparse == '-') {
628+ g->regparse++;
629+ if (*g->regparse == ']' || *g->regparse == '\0')
630+ regc(g, '-');
631+ else {
632+ class = UCHARAT(g->regparse-2)+1;
633+ classend = UCHARAT(g->regparse);
634+ if (class > classend+1)
635+ FAIL("invalid [] range");
636+ for (; class <= classend; class++)
637+ regc(g, class);
638+ g->regparse++;
639+ }
640+ } else
641+ regc(g, *g->regparse++);
642+ }
643+ regc(g, '\0');
644+ if (*g->regparse != ']')
645+ FAIL("unmatched []");
646+ g->regparse++;
647+ *flagp |= HASWIDTH|SIMPLE;
648+ }
649+ break;
650+ case '(':
651+ ret = reg(g, 1, &flags);
652+ if (ret == NULL)
653+ return(NULL);
654+ *flagp |= flags&(HASWIDTH|SPSTART);
655+ break;
656+ case '\0':
657+ case '|':
658+ case ')':
659+ FAIL("internal urp"); /* Supposed to be caught earlier. */
660+ break;
661+ case '?':
662+ case '+':
663+ case '*':
664+ FAIL("?+* follows nothing");
665+ break;
666+ case '\\':
667+ if (*g->regparse == '\0')
668+ FAIL("trailing \\");
669+ ret = regnode(g, EXACTLY);
670+ regc(g, *g->regparse++);
671+ regc(g, '\0');
672+ *flagp |= HASWIDTH|SIMPLE;
673+ break;
674+ default: {
675+ register int len;
676+ register char ender;
2380c486 677+
e3fad6a6
AM
678+ g->regparse--;
679+ len = my_strcspn((const char *)g->regparse, (const char *)META);
680+ if (len <= 0)
681+ FAIL("internal disaster");
682+ ender = *(g->regparse+len);
683+ if (len > 1 && ISMULT(ender))
684+ len--; /* Back off clear of ?+* operand. */
685+ *flagp |= HASWIDTH;
686+ if (len == 1)
687+ *flagp |= SIMPLE;
688+ ret = regnode(g, EXACTLY);
689+ while (len > 0) {
690+ regc(g, *g->regparse++);
691+ len--;
692+ }
693+ regc(g, '\0');
694+ }
695+ break;
2380c486
JR
696+ }
697+
e3fad6a6
AM
698+ return(ret);
699+}
2380c486 700+
e3fad6a6
AM
701+/*
702+ - regnode - emit a node
703+ */
704+static char * /* Location. */
705+regnode(struct match_globals *g, char op)
706+{
707+ register char *ret;
708+ register char *ptr;
2380c486 709+
e3fad6a6
AM
710+ ret = g->regcode;
711+ if (ret == &g->regdummy) {
712+ g->regsize += 3;
713+ return(ret);
2380c486
JR
714+ }
715+
e3fad6a6
AM
716+ ptr = ret;
717+ *ptr++ = op;
718+ *ptr++ = '\0'; /* Null "next" pointer. */
719+ *ptr++ = '\0';
720+ g->regcode = ptr;
2380c486 721+
e3fad6a6
AM
722+ return(ret);
723+}
2380c486 724+
e3fad6a6
AM
725+/*
726+ - regc - emit (if appropriate) a byte of code
727+ */
728+static void
729+regc(struct match_globals *g, char b)
2380c486 730+{
e3fad6a6
AM
731+ if (g->regcode != &g->regdummy)
732+ *g->regcode++ = b;
733+ else
734+ g->regsize++;
2380c486
JR
735+}
736+
e3fad6a6
AM
737+/*
738+ - reginsert - insert an operator in front of already-emitted operand
739+ *
740+ * Means relocating the operand.
741+ */
742+static void
743+reginsert(struct match_globals *g, char op, char* opnd)
744+{
745+ register char *src;
746+ register char *dst;
747+ register char *place;
2380c486 748+
e3fad6a6
AM
749+ if (g->regcode == &g->regdummy) {
750+ g->regsize += 3;
751+ return;
2380c486 752+ }
2380c486 753+
e3fad6a6
AM
754+ src = g->regcode;
755+ g->regcode += 3;
756+ dst = g->regcode;
757+ while (src > opnd)
758+ *--dst = *--src;
2380c486 759+
e3fad6a6
AM
760+ place = opnd; /* Op node, where operand used to be. */
761+ *place++ = op;
762+ *place++ = '\0';
763+ *place++ = '\0';
2380c486
JR
764+}
765+
e3fad6a6
AM
766+/*
767+ - regtail - set the next-pointer at the end of a node chain
768+ */
769+static void
770+regtail(struct match_globals *g, char *p, char *val)
2380c486 771+{
e3fad6a6
AM
772+ register char *scan;
773+ register char *temp;
774+ register int offset;
2380c486 775+
e3fad6a6
AM
776+ if (p == &g->regdummy)
777+ return;
2380c486 778+
e3fad6a6
AM
779+ /* Find last node. */
780+ scan = p;
781+ for (;;) {
782+ temp = regnext(g, scan);
783+ if (temp == NULL)
784+ break;
785+ scan = temp;
2380c486 786+ }
e3fad6a6
AM
787+
788+ if (OP(scan) == BACK)
789+ offset = scan - val;
790+ else
791+ offset = val - scan;
792+ *(scan+1) = (offset>>8)&0377;
793+ *(scan+2) = offset&0377;
2380c486
JR
794+}
795+
e3fad6a6
AM
796+/*
797+ - regoptail - regtail on operand of first argument; nop if operandless
798+ */
799+static void
800+regoptail(struct match_globals *g, char *p, char *val)
2380c486 801+{
e3fad6a6
AM
802+ /* "Operandless" and "op != BRANCH" are synonymous in practice. */
803+ if (p == NULL || p == &g->regdummy || OP(p) != BRANCH)
804+ return;
805+ regtail(g, OPERAND(p), val);
2380c486
JR
806+}
807+
2380c486 808+/*
e3fad6a6 809+ * regexec and friends
2380c486
JR
810+ */
811+
2380c486 812+
e3fad6a6
AM
813+/*
814+ * Forwards.
815+ */
816+STATIC int regtry(struct match_globals *g, regexp *prog, char *string);
817+STATIC int regmatch(struct match_globals *g, char *prog);
818+STATIC int regrepeat(struct match_globals *g, char *p);
2380c486 819+
e3fad6a6
AM
820+#ifdef DEBUG
821+int regnarrate = 0;
822+void regdump();
823+STATIC char *regprop(char *op);
824+#endif
2380c486
JR
825+
826+/*
e3fad6a6 827+ - regexec - match a regexp against a string
2380c486 828+ */
e3fad6a6
AM
829+int
830+regexec(regexp *prog, char *string)
831+{
832+ register char *s;
833+ struct match_globals g;
2380c486 834+
e3fad6a6
AM
835+ /* Be paranoid... */
836+ if (prog == NULL || string == NULL) {
837+ printk("<3>Regexp: NULL parameter\n");
838+ return(0);
839+ }
2380c486 840+
e3fad6a6
AM
841+ /* Check validity of program. */
842+ if (UCHARAT(prog->program) != MAGIC) {
843+ printk("<3>Regexp: corrupted program\n");
844+ return(0);
845+ }
2380c486 846+
e3fad6a6
AM
847+ /* If there is a "must appear" string, look for it. */
848+ if (prog->regmust != NULL) {
849+ s = string;
850+ while ((s = strchr(s, prog->regmust[0])) != NULL) {
851+ if (strncmp(s, prog->regmust, prog->regmlen) == 0)
852+ break; /* Found it. */
853+ s++;
854+ }
855+ if (s == NULL) /* Not present. */
856+ return(0);
857+ }
2380c486 858+
e3fad6a6
AM
859+ /* Mark beginning of line for ^ . */
860+ g.regbol = string;
2380c486 861+
e3fad6a6
AM
862+ /* Simplest case: anchored match need be tried only once. */
863+ if (prog->reganch)
864+ return(regtry(&g, prog, string));
2380c486 865+
e3fad6a6
AM
866+ /* Messy cases: unanchored match. */
867+ s = string;
868+ if (prog->regstart != '\0')
869+ /* We know what char it must start with. */
870+ while ((s = strchr(s, prog->regstart)) != NULL) {
871+ if (regtry(&g, prog, s))
872+ return(1);
873+ s++;
874+ }
875+ else
876+ /* We don't -- general case. */
877+ do {
878+ if (regtry(&g, prog, s))
879+ return(1);
880+ } while (*s++ != '\0');
2380c486 881+
e3fad6a6
AM
882+ /* Failure. */
883+ return(0);
884+}
2380c486
JR
885+
886+/*
e3fad6a6 887+ - regtry - try match at specific point
2380c486 888+ */
e3fad6a6
AM
889+static int /* 0 failure, 1 success */
890+regtry(struct match_globals *g, regexp *prog, char *string)
2380c486 891+{
e3fad6a6
AM
892+ register int i;
893+ register char **sp;
894+ register char **ep;
2380c486 895+
e3fad6a6
AM
896+ g->reginput = string;
897+ g->regstartp = prog->startp;
898+ g->regendp = prog->endp;
899+
900+ sp = prog->startp;
901+ ep = prog->endp;
902+ for (i = NSUBEXP; i > 0; i--) {
903+ *sp++ = NULL;
904+ *ep++ = NULL;
905+ }
906+ if (regmatch(g, prog->program + 1)) {
907+ prog->startp[0] = string;
908+ prog->endp[0] = g->reginput;
909+ return(1);
910+ } else
911+ return(0);
2380c486
JR
912+}
913+
914+/*
e3fad6a6 915+ - regmatch - main matching routine
2380c486 916+ *
e3fad6a6
AM
917+ * Conceptually the strategy is simple: check to see whether the current
918+ * node matches, call self recursively to see whether the rest matches,
919+ * and then act accordingly. In practice we make some effort to avoid
920+ * recursion, in particular by going through "ordinary" nodes (that don't
921+ * need to know whether the rest of the match failed) by a loop instead of
922+ * by recursion.
2380c486 923+ */
e3fad6a6
AM
924+static int /* 0 failure, 1 success */
925+regmatch(struct match_globals *g, char *prog)
2380c486 926+{
e3fad6a6
AM
927+ register char *scan = prog; /* Current node. */
928+ char *next; /* Next node. */
2380c486 929+
e3fad6a6
AM
930+#ifdef DEBUG
931+ if (scan != NULL && regnarrate)
932+ fprintf(stderr, "%s(\n", regprop(scan));
933+#endif
934+ while (scan != NULL) {
935+#ifdef DEBUG
936+ if (regnarrate)
937+ fprintf(stderr, "%s...\n", regprop(scan));
938+#endif
939+ next = regnext(g, scan);
2380c486 940+
e3fad6a6
AM
941+ switch (OP(scan)) {
942+ case BOL:
943+ if (g->reginput != g->regbol)
944+ return(0);
945+ break;
946+ case EOL:
947+ if (*g->reginput != '\0')
948+ return(0);
949+ break;
950+ case ANY:
951+ if (*g->reginput == '\0')
952+ return(0);
953+ g->reginput++;
954+ break;
955+ case EXACTLY: {
956+ register int len;
957+ register char *opnd;
2380c486 958+
e3fad6a6
AM
959+ opnd = OPERAND(scan);
960+ /* Inline the first character, for speed. */
961+ if (*opnd != *g->reginput)
962+ return(0);
963+ len = strlen(opnd);
964+ if (len > 1 && strncmp(opnd, g->reginput, len) != 0)
965+ return(0);
966+ g->reginput += len;
967+ }
968+ break;
969+ case ANYOF:
970+ if (*g->reginput == '\0' || strchr(OPERAND(scan), *g->reginput) == NULL)
971+ return(0);
972+ g->reginput++;
973+ break;
974+ case ANYBUT:
975+ if (*g->reginput == '\0' || strchr(OPERAND(scan), *g->reginput) != NULL)
976+ return(0);
977+ g->reginput++;
978+ break;
979+ case NOTHING:
980+ case BACK:
981+ break;
982+ case OPEN+1:
983+ case OPEN+2:
984+ case OPEN+3:
985+ case OPEN+4:
986+ case OPEN+5:
987+ case OPEN+6:
988+ case OPEN+7:
989+ case OPEN+8:
990+ case OPEN+9: {
991+ register int no;
992+ register char *save;
2380c486 993+
e3fad6a6
AM
994+ no = OP(scan) - OPEN;
995+ save = g->reginput;
2380c486 996+
e3fad6a6
AM
997+ if (regmatch(g, next)) {
998+ /*
999+ * Don't set startp if some later
1000+ * invocation of the same parentheses
1001+ * already has.
1002+ */
1003+ if (g->regstartp[no] == NULL)
1004+ g->regstartp[no] = save;
1005+ return(1);
1006+ } else
1007+ return(0);
1008+ }
1009+ break;
1010+ case CLOSE+1:
1011+ case CLOSE+2:
1012+ case CLOSE+3:
1013+ case CLOSE+4:
1014+ case CLOSE+5:
1015+ case CLOSE+6:
1016+ case CLOSE+7:
1017+ case CLOSE+8:
1018+ case CLOSE+9:
1019+ {
1020+ register int no;
1021+ register char *save;
2380c486 1022+
e3fad6a6
AM
1023+ no = OP(scan) - CLOSE;
1024+ save = g->reginput;
2380c486 1025+
e3fad6a6
AM
1026+ if (regmatch(g, next)) {
1027+ /*
1028+ * Don't set endp if some later
1029+ * invocation of the same parentheses
1030+ * already has.
1031+ */
1032+ if (g->regendp[no] == NULL)
1033+ g->regendp[no] = save;
1034+ return(1);
1035+ } else
1036+ return(0);
1037+ }
1038+ break;
1039+ case BRANCH: {
1040+ register char *save;
2380c486 1041+
e3fad6a6
AM
1042+ if (OP(next) != BRANCH) /* No choice. */
1043+ next = OPERAND(scan); /* Avoid recursion. */
1044+ else {
1045+ do {
1046+ save = g->reginput;
1047+ if (regmatch(g, OPERAND(scan)))
1048+ return(1);
1049+ g->reginput = save;
1050+ scan = regnext(g, scan);
1051+ } while (scan != NULL && OP(scan) == BRANCH);
1052+ return(0);
1053+ /* NOTREACHED */
2380c486 1054+ }
e3fad6a6
AM
1055+ }
1056+ break;
1057+ case STAR:
1058+ case PLUS: {
1059+ register char nextch;
1060+ register int no;
1061+ register char *save;
1062+ register int min;
1063+
1064+ /*
1065+ * Lookahead to avoid useless match attempts
1066+ * when we know what character comes next.
1067+ */
1068+ nextch = '\0';
1069+ if (OP(next) == EXACTLY)
1070+ nextch = *OPERAND(next);
1071+ min = (OP(scan) == STAR) ? 0 : 1;
1072+ save = g->reginput;
1073+ no = regrepeat(g, OPERAND(scan));
1074+ while (no >= min) {
1075+ /* If it could work, try it. */
1076+ if (nextch == '\0' || *g->reginput == nextch)
1077+ if (regmatch(g, next))
1078+ return(1);
1079+ /* Couldn't or didn't -- back up. */
1080+ no--;
1081+ g->reginput = save + no;
1082+ }
1083+ return(0);
1084+ }
1085+ break;
1086+ case END:
1087+ return(1); /* Success! */
1088+ break;
1089+ default:
1090+ printk("<3>Regexp: memory corruption\n");
1091+ return(0);
1092+ break;
2380c486 1093+ }
e3fad6a6
AM
1094+
1095+ scan = next;
2380c486
JR
1096+ }
1097+
e3fad6a6
AM
1098+ /*
1099+ * We get here only if there's trouble -- normally "case END" is
1100+ * the terminating point.
1101+ */
1102+ printk("<3>Regexp: corrupted pointers\n");
1103+ return(0);
2380c486
JR
1104+}
1105+
1106+/*
e3fad6a6 1107+ - regrepeat - repeatedly match something simple, report how many
2380c486 1108+ */
e3fad6a6
AM
1109+static int
1110+regrepeat(struct match_globals *g, char *p)
2380c486 1111+{
e3fad6a6
AM
1112+ register int count = 0;
1113+ register char *scan;
1114+ register char *opnd;
2380c486 1115+
e3fad6a6
AM
1116+ scan = g->reginput;
1117+ opnd = OPERAND(p);
1118+ switch (OP(p)) {
1119+ case ANY:
1120+ count = strlen(scan);
1121+ scan += count;
1122+ break;
1123+ case EXACTLY:
1124+ while (*opnd == *scan) {
1125+ count++;
1126+ scan++;
1127+ }
1128+ break;
1129+ case ANYOF:
1130+ while (*scan != '\0' && strchr(opnd, *scan) != NULL) {
1131+ count++;
1132+ scan++;
1133+ }
1134+ break;
1135+ case ANYBUT:
1136+ while (*scan != '\0' && strchr(opnd, *scan) == NULL) {
1137+ count++;
1138+ scan++;
1139+ }
1140+ break;
1141+ default: /* Oh dear. Called inappropriately. */
1142+ printk("<3>Regexp: internal foulup\n");
1143+ count = 0; /* Best compromise. */
1144+ break;
2380c486 1145+ }
e3fad6a6 1146+ g->reginput = scan;
2380c486 1147+
e3fad6a6 1148+ return(count);
2380c486
JR
1149+}
1150+
1151+/*
e3fad6a6 1152+ - regnext - dig the "next" pointer out of a node
2380c486 1153+ */
e3fad6a6
AM
1154+static char*
1155+regnext(struct match_globals *g, char *p)
2380c486 1156+{
e3fad6a6 1157+ register int offset;
2380c486 1158+
e3fad6a6
AM
1159+ if (p == &g->regdummy)
1160+ return(NULL);
2380c486 1161+
e3fad6a6
AM
1162+ offset = NEXT(p);
1163+ if (offset == 0)
1164+ return(NULL);
2380c486 1165+
e3fad6a6
AM
1166+ if (OP(p) == BACK)
1167+ return(p-offset);
1168+ else
1169+ return(p+offset);
2380c486
JR
1170+}
1171+
e3fad6a6
AM
1172+#ifdef DEBUG
1173+
1174+STATIC char *regprop();
1175+
2380c486 1176+/*
e3fad6a6 1177+ - regdump - dump a regexp onto stdout in vaguely comprehensible form
2380c486 1178+ */
e3fad6a6
AM
1179+void
1180+regdump(regexp *r)
2380c486 1181+{
e3fad6a6
AM
1182+ register char *s;
1183+ register char op = EXACTLY; /* Arbitrary non-END op. */
2380c486 1184+ register char *next;
e3fad6a6 1185+ /* extern char *strchr(); */
2380c486 1186+
2380c486 1187+
e3fad6a6
AM
1188+ s = r->program + 1;
1189+ while (op != END) { /* While that wasn't END last time... */
1190+ op = OP(s);
1191+ printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */
1192+ next = regnext(s);
1193+ if (next == NULL) /* Next ptr. */
1194+ printf("(0)");
1195+ else
1196+ printf("(%d)", (s-r->program)+(next-s));
1197+ s += 3;
1198+ if (op == ANYOF || op == ANYBUT || op == EXACTLY) {
1199+ /* Literal string, where present. */
1200+ while (*s != '\0') {
1201+ putchar(*s);
1202+ s++;
1203+ }
1204+ s++;
1205+ }
1206+ putchar('\n');
2380c486 1207+ }
2380c486 1208+
e3fad6a6
AM
1209+ /* Header fields of interest. */
1210+ if (r->regstart != '\0')
1211+ printf("start `%c' ", r->regstart);
1212+ if (r->reganch)
1213+ printf("anchored ");
1214+ if (r->regmust != NULL)
1215+ printf("must have \"%s\"", r->regmust);
1216+ printf("\n");
2380c486
JR
1217+}
1218+
1219+/*
e3fad6a6 1220+ - regprop - printable representation of opcode
2380c486
JR
1221+ */
1222+static char *
e3fad6a6 1223+regprop(char *op)
2380c486 1224+{
e3fad6a6
AM
1225+#define BUFLEN 50
1226+ register char *p;
1227+ static char buf[BUFLEN];
2380c486 1228+
e3fad6a6 1229+ strcpy(buf, ":");
2380c486 1230+
e3fad6a6
AM
1231+ switch (OP(op)) {
1232+ case BOL:
1233+ p = "BOL";
2380c486 1234+ break;
e3fad6a6
AM
1235+ case EOL:
1236+ p = "EOL";
2380c486 1237+ break;
e3fad6a6
AM
1238+ case ANY:
1239+ p = "ANY";
2380c486 1240+ break;
e3fad6a6
AM
1241+ case ANYOF:
1242+ p = "ANYOF";
2380c486 1243+ break;
e3fad6a6
AM
1244+ case ANYBUT:
1245+ p = "ANYBUT";
2380c486 1246+ break;
e3fad6a6
AM
1247+ case BRANCH:
1248+ p = "BRANCH";
2380c486 1249+ break;
e3fad6a6
AM
1250+ case EXACTLY:
1251+ p = "EXACTLY";
2380c486 1252+ break;
e3fad6a6
AM
1253+ case NOTHING:
1254+ p = "NOTHING";
2380c486 1255+ break;
e3fad6a6
AM
1256+ case BACK:
1257+ p = "BACK";
1258+ break;
1259+ case END:
1260+ p = "END";
1261+ break;
1262+ case OPEN+1:
1263+ case OPEN+2:
1264+ case OPEN+3:
1265+ case OPEN+4:
1266+ case OPEN+5:
1267+ case OPEN+6:
1268+ case OPEN+7:
1269+ case OPEN+8:
1270+ case OPEN+9:
1271+ snprintf(buf+strlen(buf),BUFLEN-strlen(buf), "OPEN%d", OP(op)-OPEN);
1272+ p = NULL;
1273+ break;
1274+ case CLOSE+1:
1275+ case CLOSE+2:
1276+ case CLOSE+3:
1277+ case CLOSE+4:
1278+ case CLOSE+5:
1279+ case CLOSE+6:
1280+ case CLOSE+7:
1281+ case CLOSE+8:
1282+ case CLOSE+9:
1283+ snprintf(buf+strlen(buf),BUFLEN-strlen(buf), "CLOSE%d", OP(op)-CLOSE);
1284+ p = NULL;
1285+ break;
1286+ case STAR:
1287+ p = "STAR";
1288+ break;
1289+ case PLUS:
1290+ p = "PLUS";
1291+ break;
1292+ default:
1293+ printk("<3>Regexp: corrupted opcode\n");
2380c486
JR
1294+ break;
1295+ }
e3fad6a6
AM
1296+ if (p != NULL)
1297+ strncat(buf, p, BUFLEN-strlen(buf));
1298+ return(buf);
2380c486 1299+}
e3fad6a6 1300+#endif
2380c486 1301+
e3fad6a6
AM
1302+
1303diff -urNp -x '*.orig' linux-5.11/net/netfilter/regexp/regexp.h linux-5.11/net/netfilter/regexp/regexp.h
1304--- linux-5.11/net/netfilter/regexp/regexp.h 1970-01-01 01:00:00.000000000 +0100
1305+++ linux-5.11/net/netfilter/regexp/regexp.h 2021-02-24 13:21:48.335107407 +0100
1306@@ -0,0 +1,41 @@
2380c486 1307+/*
e3fad6a6
AM
1308+ * Definitions etc. for regexp(3) routines.
1309+ *
1310+ * Caveat: this is V8 regexp(3) [actually, a reimplementation thereof],
1311+ * not the System V one.
2380c486 1312+ */
2380c486 1313+
e3fad6a6
AM
1314+#ifndef REGEXP_H
1315+#define REGEXP_H
2380c486 1316+
2380c486 1317+
e3fad6a6
AM
1318+/*
1319+http://www.opensource.apple.com/darwinsource/10.3/expect-1/expect/expect.h ,
1320+which contains a version of this library, says:
1321+
1322+ *
1323+ * NSUBEXP must be at least 10, and no greater than 117 or the parser
1324+ * will not work properly.
1325+ *
1326+
1327+However, it looks rather like this library is limited to 10. If you think
1328+otherwise, let us know.
1329+*/
1330+
1331+#define NSUBEXP 10
1332+typedef struct regexp {
1333+ char *startp[NSUBEXP];
1334+ char *endp[NSUBEXP];
1335+ char regstart; /* Internal use only. */
1336+ char reganch; /* Internal use only. */
1337+ char *regmust; /* Internal use only. */
1338+ int regmlen; /* Internal use only. */
1339+ char program[1]; /* Unwarranted chumminess with compiler. */
1340+} regexp;
1341+
1342+regexp * regcomp(char *exp, int *patternsize);
1343+int regexec(regexp *prog, char *string);
1344+void regsub(regexp *prog, char *source, char *dest);
1345+void regerror(char *s);
2380c486 1346+
e3fad6a6
AM
1347+#endif
1348diff -urNp -x '*.orig' linux-5.11/net/netfilter/regexp/regmagic.h linux-5.11/net/netfilter/regexp/regmagic.h
1349--- linux-5.11/net/netfilter/regexp/regmagic.h 1970-01-01 01:00:00.000000000 +0100
1350+++ linux-5.11/net/netfilter/regexp/regmagic.h 2021-02-24 13:21:48.335107407 +0100
1351@@ -0,0 +1,5 @@
2380c486 1352+/*
e3fad6a6
AM
1353+ * The first byte of the regexp internal "program" is actually this magic
1354+ * number; the start node begins in the second byte.
2380c486 1355+ */
e3fad6a6
AM
1356+#define MAGIC 0234
1357diff -urNp -x '*.orig' linux-5.11/net/netfilter/regexp/regsub.c linux-5.11/net/netfilter/regexp/regsub.c
1358--- linux-5.11/net/netfilter/regexp/regsub.c 1970-01-01 01:00:00.000000000 +0100
1359+++ linux-5.11/net/netfilter/regexp/regsub.c 2021-02-24 13:21:48.335107407 +0100
1360@@ -0,0 +1,95 @@
2380c486 1361+/*
e3fad6a6
AM
1362+ * regsub
1363+ * @(#)regsub.c 1.3 of 2 April 86
1364+ *
1365+ * Copyright (c) 1986 by University of Toronto.
1366+ * Written by Henry Spencer. Not derived from licensed software.
1367+ *
1368+ * Permission is granted to anyone to use this software for any
1369+ * purpose on any computer system, and to redistribute it freely,
1370+ * subject to the following restrictions:
1371+ *
1372+ * 1. The author is not responsible for the consequences of use of
1373+ * this software, no matter how awful, even if they arise
1374+ * from defects in it.
1375+ *
1376+ * 2. The origin of this software must not be misrepresented, either
1377+ * by explicit claim or by omission.
1378+ *
1379+ * 3. Altered versions must be plainly marked as such, and must not
1380+ * be misrepresented as being the original software.
1381+ *
1382+ *
1383+ * This code was modified by Ethan Sommer to work within the kernel
1384+ * (it now uses kmalloc etc..)
2380c486 1385+ *
2380c486 1386+ */
e3fad6a6
AM
1387+#include "regexp.h"
1388+#include "regmagic.h"
1389+#include <linux/string.h>
2380c486 1390+
2380c486 1391+
e3fad6a6
AM
1392+#ifndef CHARBITS
1393+#define UCHARAT(p) ((int)*(unsigned char *)(p))
1394+#else
1395+#define UCHARAT(p) ((int)*(p)&CHARBITS)
1396+#endif
2380c486 1397+
e3fad6a6
AM
1398+#if 0
1399+//void regerror(char * s)
1400+//{
1401+// printk("regexp(3): %s", s);
1402+// /* NOTREACHED */
1403+//}
1404+#endif
2380c486
JR
1405+
1406+/*
e3fad6a6 1407+ - regsub - perform substitutions after a regexp match
2380c486 1408+ */
e3fad6a6
AM
1409+void
1410+regsub(regexp * prog, char * source, char * dest)
2380c486 1411+{
e3fad6a6
AM
1412+ register char *src;
1413+ register char *dst;
1414+ register char c;
1415+ register int no;
1416+ register int len;
1417+
1418+ /* Not necessary and gcc doesn't like it -MLS */
1419+ /*extern char *strncpy();*/
2380c486 1420+
e3fad6a6
AM
1421+ if (prog == NULL || source == NULL || dest == NULL) {
1422+ regerror("NULL parm to regsub");
1423+ return;
1424+ }
1425+ if (UCHARAT(prog->program) != MAGIC) {
1426+ regerror("damaged regexp fed to regsub");
2380c486 1427+ return;
2380c486
JR
1428+ }
1429+
e3fad6a6
AM
1430+ src = source;
1431+ dst = dest;
1432+ while ((c = *src++) != '\0') {
1433+ if (c == '&')
1434+ no = 0;
1435+ else if (c == '\\' && '0' <= *src && *src <= '9')
1436+ no = *src++ - '0';
1437+ else
1438+ no = -1;
2380c486 1439+
e3fad6a6
AM
1440+ if (no < 0) { /* Ordinary character. */
1441+ if (c == '\\' && (*src == '\\' || *src == '&'))
1442+ c = *src++;
1443+ *dst++ = c;
1444+ } else if (prog->startp[no] != NULL && prog->endp[no] != NULL) {
1445+ len = prog->endp[no] - prog->startp[no];
1446+ (void) strncpy(dst, prog->startp[no], len);
1447+ dst += len;
1448+ if (len != 0 && *(dst-1) == '\0') { /* strncpy hit NUL. */
1449+ regerror("damaged match string");
1450+ return;
1451+ }
1452+ }
1453+ }
1454+ *dst++ = '\0';
2380c486 1455+}
e3fad6a6
AM
1456diff -urNp -x '*.orig' linux-5.11/net/netfilter/xt_layer7.c linux-5.11/net/netfilter/xt_layer7.c
1457--- linux-5.11/net/netfilter/xt_layer7.c 1970-01-01 01:00:00.000000000 +0100
1458+++ linux-5.11/net/netfilter/xt_layer7.c 2021-02-24 13:21:48.335107407 +0100
1459@@ -0,0 +1,656 @@
2380c486 1460+/*
e3fad6a6 1461+ Kernel module to match application layer (OSI layer 7) data in connections.
2380c486 1462+
e3fad6a6 1463+ http://l7-filter.sf.net
2380c486 1464+
e3fad6a6 1465+ (C) 2003-2009 Matthew Strait and Ethan Sommer.
2380c486 1466+
e3fad6a6
AM
1467+ This program is free software; you can redistribute it and/or
1468+ modify it under the terms of the GNU General Public License
1469+ as published by the Free Software Foundation; either version
1470+ 2 of the License, or (at your option) any later version.
1471+ http://www.gnu.org/licenses/gpl.txt
1472+
1473+ Based on ipt_string.c (C) 2000 Emmanuel Roger <winfield@freegates.be>,
1474+ xt_helper.c (C) 2002 Harald Welte and cls_layer7.c (C) 2003 Matthew Strait,
1475+ Ethan Sommer, Justin Levandoski.
1476+*/
1477+
1478+#include <linux/spinlock.h>
1479+#include <linux/version.h>
1480+#include <net/ip.h>
1481+#include <net/tcp.h>
1482+#include <linux/module.h>
1483+#include <linux/skbuff.h>
1484+#include <linux/netfilter.h>
1485+#include <net/netfilter/nf_conntrack.h>
1486+#include <net/netfilter/nf_conntrack_core.h>
1487+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 27)
1488+#include <net/netfilter/nf_conntrack_extend.h>
1489+#include <net/netfilter/nf_conntrack_acct.h>
2380c486 1490+#endif
e3fad6a6
AM
1491+#include <linux/netfilter/x_tables.h>
1492+#include <linux/netfilter/xt_layer7.h>
1493+#include <linux/ctype.h>
1494+#include <linux/proc_fs.h>
2380c486 1495+
e3fad6a6 1496+#include "regexp/regexp.c"
2380c486 1497+
e3fad6a6
AM
1498+MODULE_LICENSE("GPL");
1499+MODULE_AUTHOR("Matthew Strait <quadong@users.sf.net>, Ethan Sommer <sommere@users.sf.net>");
1500+MODULE_DESCRIPTION("iptables application layer match module");
1501+MODULE_ALIAS("ipt_layer7");
1502+MODULE_VERSION("2.21");
2380c486 1503+
e3fad6a6
AM
1504+static int maxdatalen = 2048; // this is the default
1505+module_param(maxdatalen, int, 0444);
1506+MODULE_PARM_DESC(maxdatalen, "maximum bytes of data looked at by l7-filter");
1507+#ifdef CONFIG_NETFILTER_XT_MATCH_LAYER7_DEBUG
1508+ #define DPRINTK(format,args...) printk(format,##args)
1509+#else
1510+ #define DPRINTK(format,args...)
1511+#endif
2380c486 1512+
e3fad6a6
AM
1513+/* Number of packets whose data we look at.
1514+This can be modified through /proc/net/layer7_numpackets */
1515+static int num_packets = 10;
2380c486 1516+
e3fad6a6
AM
1517+static struct pattern_cache {
1518+ char * regex_string;
1519+ regexp * pattern;
1520+ struct pattern_cache * next;
1521+} * first_pattern_cache = NULL;
2380c486 1522+
e3fad6a6 1523+DEFINE_SPINLOCK(l7_lock);
2380c486 1524+
e3fad6a6
AM
1525+static int total_acct_packets(struct nf_conn *ct)
1526+{
1527+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 26)
1528+ BUG_ON(ct == NULL);
1529+ return (ct->counters[IP_CT_DIR_ORIGINAL].packets + ct->counters[IP_CT_DIR_REPLY].packets);
1530+#else
1531+ struct nf_conn_counter *acct;
2380c486 1532+
e3fad6a6
AM
1533+ BUG_ON(ct == NULL);
1534+ acct = nf_conn_acct_find(ct);
1535+ if (!acct)
1536+ return 0;
1537+ return (atomic64_read(&acct[IP_CT_DIR_ORIGINAL].packets) + atomic64_read(&acct[IP_CT_DIR_REPLY].packets));
1538+#endif
2380c486
JR
1539+}
1540+
e3fad6a6
AM
1541+#ifdef CONFIG_IP_NF_MATCH_LAYER7_DEBUG
1542+/* Converts an unfriendly string into a friendly one by
1543+replacing unprintables with periods and all whitespace with " ". */
1544+static char * friendly_print(unsigned char * s)
2380c486 1545+{
e3fad6a6
AM
1546+ char * f = kmalloc(strlen(s) + 1, GFP_ATOMIC);
1547+ int i;
2380c486 1548+
e3fad6a6
AM
1549+ if(!f) {
1550+ if (net_ratelimit())
1551+ printk(KERN_ERR "layer7: out of memory in "
1552+ "friendly_print, bailing.\n");
1553+ return NULL;
1554+ }
2380c486 1555+
e3fad6a6
AM
1556+ for(i = 0; i < strlen(s); i++){
1557+ if(isprint(s[i]) && s[i] < 128) f[i] = s[i];
1558+ else if(isspace(s[i])) f[i] = ' ';
1559+ else f[i] = '.';
2380c486 1560+ }
e3fad6a6
AM
1561+ f[i] = '\0';
1562+ return f;
2380c486
JR
1563+}
1564+
e3fad6a6 1565+static char dec2hex(int i)
2380c486 1566+{
e3fad6a6
AM
1567+ switch (i) {
1568+ case 0 ... 9:
1569+ return (i + '0');
2380c486 1570+ break;
e3fad6a6
AM
1571+ case 10 ... 15:
1572+ return (i - 10 + 'a');
2380c486 1573+ break;
e3fad6a6
AM
1574+ default:
1575+ if (net_ratelimit())
1576+ printk("layer7: Problem in dec2hex\n");
1577+ return '\0';
1578+ }
1579+}
2380c486 1580+
e3fad6a6
AM
1581+static char * hex_print(unsigned char * s)
1582+{
1583+ char * g = kmalloc(strlen(s)*3 + 1, GFP_ATOMIC);
1584+ int i;
2380c486 1585+
e3fad6a6
AM
1586+ if(!g) {
1587+ if (net_ratelimit())
1588+ printk(KERN_ERR "layer7: out of memory in hex_print, "
1589+ "bailing.\n");
1590+ return NULL;
1591+ }
2380c486 1592+
e3fad6a6
AM
1593+ for(i = 0; i < strlen(s); i++) {
1594+ g[i*3 ] = dec2hex(s[i]/16);
1595+ g[i*3 + 1] = dec2hex(s[i]%16);
1596+ g[i*3 + 2] = ' ';
1597+ }
1598+ g[i*3] = '\0';
2380c486 1599+
e3fad6a6
AM
1600+ return g;
1601+}
1602+#endif // DEBUG
2380c486 1603+
e3fad6a6
AM
1604+/* Use instead of regcomp. As we expect to be seeing the same regexps over and
1605+over again, it make sense to cache the results. */
1606+static regexp * compile_and_cache(const char * regex_string,
1607+ const char * protocol)
1608+{
1609+ struct pattern_cache * node = first_pattern_cache;
1610+ struct pattern_cache * last_pattern_cache = first_pattern_cache;
1611+ struct pattern_cache * tmp;
1612+ unsigned int len;
2380c486 1613+
e3fad6a6
AM
1614+ while (node != NULL) {
1615+ if (!strcmp(node->regex_string, regex_string))
1616+ return node->pattern;
2380c486 1617+
e3fad6a6
AM
1618+ last_pattern_cache = node;/* points at the last non-NULL node */
1619+ node = node->next;
1620+ }
1621+
1622+ /* If we reach the end of the list, then we have not yet cached
1623+ the pattern for this regex. Let's do that now.
1624+ Be paranoid about running out of memory to avoid list corruption. */
1625+ tmp = kmalloc(sizeof(struct pattern_cache), GFP_ATOMIC);
1626+
1627+ if(!tmp) {
1628+ if (net_ratelimit())
1629+ printk(KERN_ERR "layer7: out of memory in "
1630+ "compile_and_cache, bailing.\n");
1631+ return NULL;
1632+ }
1633+
1634+ tmp->regex_string = kmalloc(strlen(regex_string) + 1, GFP_ATOMIC);
1635+ tmp->pattern = kmalloc(sizeof(struct regexp), GFP_ATOMIC);
1636+ tmp->next = NULL;
2380c486 1637+
e3fad6a6
AM
1638+ if(!tmp->regex_string || !tmp->pattern) {
1639+ if (net_ratelimit())
1640+ printk(KERN_ERR "layer7: out of memory in "
1641+ "compile_and_cache, bailing.\n");
1642+ kfree(tmp->regex_string);
1643+ kfree(tmp->pattern);
1644+ kfree(tmp);
1645+ return NULL;
2380c486
JR
1646+ }
1647+
e3fad6a6
AM
1648+ /* Ok. The new node is all ready now. */
1649+ node = tmp;
1650+
1651+ if(first_pattern_cache == NULL) /* list is empty */
1652+ first_pattern_cache = node; /* make node the beginning */
1653+ else
1654+ last_pattern_cache->next = node; /* attach node to the end */
1655+
1656+ /* copy the string and compile the regex */
1657+ len = strlen(regex_string);
1658+ DPRINTK("About to compile this: \"%s\"\n", regex_string);
1659+ node->pattern = regcomp((char *)regex_string, &len);
1660+ if ( !node->pattern ) {
1661+ if (net_ratelimit())
1662+ printk(KERN_ERR "layer7: Error compiling regexp "
1663+ "\"%s\" (%s)\n",
1664+ regex_string, protocol);
1665+ /* pattern is now cached as NULL, so we won't try again. */
1666+ }
1667+
1668+ strcpy(node->regex_string, regex_string);
1669+ return node->pattern;
2380c486
JR
1670+}
1671+
e3fad6a6 1672+static int can_handle(const struct sk_buff *skb)
2380c486 1673+{
e3fad6a6
AM
1674+ if(!ip_hdr(skb)) /* not IP */
1675+ return 0;
1676+ if(ip_hdr(skb)->protocol != IPPROTO_TCP &&
1677+ ip_hdr(skb)->protocol != IPPROTO_UDP &&
1678+ ip_hdr(skb)->protocol != IPPROTO_ICMP)
1679+ return 0;
1680+ return 1;
1681+}
2380c486 1682+
e3fad6a6
AM
1683+/* Returns offset the into the skb->data that the application data starts */
1684+static int app_data_offset(const struct sk_buff *skb)
1685+{
1686+ /* In case we are ported somewhere (ebtables?) where ip_hdr(skb)
1687+ isn't set, this can be gotten from 4*(skb->data[0] & 0x0f) as well. */
1688+ int ip_hl = 4*ip_hdr(skb)->ihl;
1689+
1690+ if( ip_hdr(skb)->protocol == IPPROTO_TCP ) {
1691+ /* 12 == offset into TCP header for the header length field.
1692+ Can't get this with skb->h.th->doff because the tcphdr
1693+ struct doesn't get set when routing (this is confirmed to be
1694+ true in Netfilter as well as QoS.) */
1695+ int tcp_hl = 4*(skb->data[ip_hl + 12] >> 4);
1696+
1697+ return ip_hl + tcp_hl;
1698+ } else if( ip_hdr(skb)->protocol == IPPROTO_UDP ) {
1699+ return ip_hl + 8; /* UDP header is always 8 bytes */
1700+ } else if( ip_hdr(skb)->protocol == IPPROTO_ICMP ) {
1701+ return ip_hl + 8; /* ICMP header is 8 bytes */
1702+ } else {
1703+ if (net_ratelimit())
1704+ printk(KERN_ERR "layer7: tried to handle unknown "
1705+ "protocol!\n");
1706+ return ip_hl + 8; /* something reasonable */
1707+ }
1708+}
1709+
1710+/* handles whether there's a match when we aren't appending data anymore */
1711+static int match_no_append(struct nf_conn * conntrack,
1712+ struct nf_conn * master_conntrack,
1713+ enum ip_conntrack_info ctinfo,
1714+ enum ip_conntrack_info master_ctinfo,
1715+ const struct xt_layer7_info * info)
1716+{
1717+ /* If we're in here, throw the app data away */
1718+ if(master_conntrack->layer7.app_data != NULL) {
1719+
1720+ #ifdef CONFIG_IP_NF_MATCH_LAYER7_DEBUG
1721+ if(!master_conntrack->layer7.app_proto) {
1722+ char * f =
1723+ friendly_print(master_conntrack->layer7.app_data);
1724+ char * g =
1725+ hex_print(master_conntrack->layer7.app_data);
1726+ DPRINTK("\nl7-filter gave up after %d bytes "
1727+ "(%d packets):\n%s\n",
1728+ strlen(f), total_acct_packets(master_conntrack), f);
1729+ kfree(f);
1730+ DPRINTK("In hex: %s\n", g);
1731+ kfree(g);
2380c486 1732+ }
e3fad6a6
AM
1733+ #endif
1734+
1735+ kfree(master_conntrack->layer7.app_data);
1736+ master_conntrack->layer7.app_data = NULL; /* don't free again */
1737+ }
1738+
1739+ if(master_conntrack->layer7.app_proto){
1740+ /* Here child connections set their .app_proto (for /proc) */
1741+ if(!conntrack->layer7.app_proto) {
1742+ conntrack->layer7.app_proto =
1743+ kmalloc(strlen(master_conntrack->layer7.app_proto)+1,
1744+ GFP_ATOMIC);
1745+ if(!conntrack->layer7.app_proto){
1746+ if (net_ratelimit())
1747+ printk(KERN_ERR "layer7: out of memory "
1748+ "in match_no_append, "
1749+ "bailing.\n");
1750+ return 1;
1751+ }
1752+ strcpy(conntrack->layer7.app_proto,
1753+ master_conntrack->layer7.app_proto);
2380c486 1754+ }
e3fad6a6
AM
1755+
1756+ return (!strcmp(master_conntrack->layer7.app_proto,
1757+ info->protocol));
1758+ }
1759+ else {
1760+ /* If not classified, set to "unknown" to distinguish from
1761+ connections that are still being tested. */
1762+ master_conntrack->layer7.app_proto =
1763+ kmalloc(strlen("unknown")+1, GFP_ATOMIC);
1764+ if(!master_conntrack->layer7.app_proto){
1765+ if (net_ratelimit())
1766+ printk(KERN_ERR "layer7: out of memory in "
1767+ "match_no_append, bailing.\n");
1768+ return 1;
2380c486 1769+ }
e3fad6a6
AM
1770+ strcpy(master_conntrack->layer7.app_proto, "unknown");
1771+ return 0;
2380c486 1772+ }
2380c486
JR
1773+}
1774+
e3fad6a6
AM
1775+/* add the new app data to the conntrack. Return number of bytes added. */
1776+static int add_data(struct nf_conn * master_conntrack,
1777+ char * app_data, int appdatalen)
2380c486 1778+{
e3fad6a6
AM
1779+ int length = 0, i;
1780+ int oldlength = master_conntrack->layer7.app_data_len;
2380c486 1781+
e3fad6a6
AM
1782+ /* This is a fix for a race condition by Deti Fliegl. However, I'm not
1783+ clear on whether the race condition exists or whether this really
1784+ fixes it. I might just be being dense... Anyway, if it's not really
1785+ a fix, all it does is waste a very small amount of time. */
1786+ if(!master_conntrack->layer7.app_data) return 0;
2380c486 1787+
e3fad6a6
AM
1788+ /* Strip nulls. Make everything lower case (our regex lib doesn't
1789+ do case insensitivity). Add it to the end of the current data. */
1790+ for(i = 0; i < maxdatalen-oldlength-1 &&
1791+ i < appdatalen; i++) {
1792+ if(app_data[i] != '\0') {
1793+ /* the kernel version of tolower mungs 'upper ascii' */
1794+ master_conntrack->layer7.app_data[length+oldlength] =
1795+ isascii(app_data[i])?
1796+ tolower(app_data[i]) : app_data[i];
1797+ length++;
1798+ }
1799+ }
2380c486 1800+
e3fad6a6
AM
1801+ master_conntrack->layer7.app_data[length+oldlength] = '\0';
1802+ master_conntrack->layer7.app_data_len = length + oldlength;
2380c486 1803+
e3fad6a6
AM
1804+ return length;
1805+}
2380c486 1806+
e3fad6a6
AM
1807+/* taken from drivers/video/modedb.c */
1808+static int my_atoi(const char *s)
2380c486 1809+{
e3fad6a6 1810+ int val = 0;
2380c486 1811+
e3fad6a6
AM
1812+ for (;; s++) {
1813+ switch (*s) {
1814+ case '0'...'9':
1815+ val = 10*val+(*s-'0');
1816+ break;
1817+ default:
1818+ return val;
2380c486 1819+ }
2380c486 1820+ }
e3fad6a6
AM
1821+}
1822+
1823+/* write out num_packets to userland. */
1824+static int layer7_read_proc(char* page, char ** start, off_t off, int count,
1825+ int* eof, void * data)
1826+{
1827+ if(num_packets > 99 && net_ratelimit())
1828+ printk(KERN_ERR "layer7: NOT REACHED. num_packets too big\n");
2380c486 1829+
e3fad6a6
AM
1830+ page[0] = num_packets/10 + '0';
1831+ page[1] = num_packets%10 + '0';
1832+ page[2] = '\n';
1833+ page[3] = '\0';
1834+
1835+ *eof=1;
1836+
1837+ return 3;
2380c486
JR
1838+}
1839+
e3fad6a6
AM
1840+/* Read in num_packets from userland */
1841+static int layer7_write_proc(struct file* file, const char* buffer,
1842+ unsigned long count, void *data)
2380c486 1843+{
e3fad6a6 1844+ char * foo = kmalloc(count, GFP_ATOMIC);
2380c486 1845+
e3fad6a6
AM
1846+ if(!foo){
1847+ if (net_ratelimit())
1848+ printk(KERN_ERR "layer7: out of memory, bailing. "
1849+ "num_packets unchanged.\n");
1850+ return count;
1851+ }
2380c486 1852+
e3fad6a6
AM
1853+ if(copy_from_user(foo, buffer, count)) {
1854+ return -EFAULT;
2380c486 1855+ }
e3fad6a6
AM
1856+
1857+
1858+ num_packets = my_atoi(foo);
1859+ kfree (foo);
1860+
1861+ /* This has an arbitrary limit to make the math easier. I'm lazy.
1862+ But anyway, 99 is a LOT! If you want more, you're doing it wrong! */
1863+ if(num_packets > 99) {
1864+ printk(KERN_WARNING "layer7: num_packets can't be > 99.\n");
1865+ num_packets = 99;
1866+ } else if(num_packets < 1) {
1867+ printk(KERN_WARNING "layer7: num_packets can't be < 1.\n");
1868+ num_packets = 1;
1869+ }
1870+
1871+ return count;
2380c486 1872+}
e3fad6a6
AM
1873+
1874+static bool
1875+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)
1876+match(const struct sk_buff *skbin, struct xt_action_param *par)
1877+#else
1878+match(const struct sk_buff *skbin,
1879+ const struct net_device *in,
1880+ const struct net_device *out,
1881+ const struct xt_match *match,
1882+ const void *matchinfo,
1883+ int offset,
1884+ unsigned int protoff,
1885+ bool *hotdrop)
2380c486 1886+#endif
e3fad6a6
AM
1887+{
1888+ /* sidestep const without getting a compiler warning... */
1889+ struct sk_buff * skb = (struct sk_buff *)skbin;
2380c486 1890+
e3fad6a6
AM
1891+ const struct xt_layer7_info * info =
1892+ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)
1893+ par->matchinfo;
1894+ #else
1895+ matchinfo;
1896+ #endif
2380c486 1897+
e3fad6a6
AM
1898+ enum ip_conntrack_info master_ctinfo, ctinfo;
1899+ struct nf_conn *master_conntrack, *conntrack;
1900+ unsigned char * app_data;
1901+ unsigned int pattern_result, appdatalen;
1902+ regexp * comppattern;
2380c486 1903+
e3fad6a6
AM
1904+ /* Be paranoid/incompetent - lock the entire match function. */
1905+ spin_lock_bh(&l7_lock);
2380c486 1906+
e3fad6a6
AM
1907+ if(!can_handle(skb)){
1908+ DPRINTK("layer7: This is some protocol I can't handle.\n");
1909+ spin_unlock_bh(&l7_lock);
1910+ return info->invert;
1911+ }
2380c486 1912+
e3fad6a6
AM
1913+ /* Treat parent & all its children together as one connection, except
1914+ for the purpose of setting conntrack->layer7.app_proto in the actual
1915+ connection. This makes /proc/net/ip_conntrack more satisfying. */
1916+ if(!(conntrack = nf_ct_get(skb, &ctinfo)) ||
1917+ !(master_conntrack=nf_ct_get(skb,&master_ctinfo))){
1918+ DPRINTK("layer7: couldn't get conntrack.\n");
1919+ spin_unlock_bh(&l7_lock);
1920+ return info->invert;
1921+ }
2380c486 1922+
e3fad6a6
AM
1923+ /* Try to get a master conntrack (and its master etc) for FTP, etc. */
1924+ while (master_ct(master_conntrack) != NULL)
1925+ master_conntrack = master_ct(master_conntrack);
2380c486 1926+
e3fad6a6
AM
1927+ /* if we've classified it or seen too many packets */
1928+ if(total_acct_packets(master_conntrack) > num_packets ||
1929+ master_conntrack->layer7.app_proto) {
2380c486 1930+
e3fad6a6
AM
1931+ pattern_result = match_no_append(conntrack, master_conntrack,
1932+ ctinfo, master_ctinfo, info);
2380c486 1933+
e3fad6a6
AM
1934+ /* skb->cb[0] == seen. Don't do things twice if there are
1935+ multiple l7 rules. I'm not sure that using cb for this purpose
1936+ is correct, even though it says "put your private variables
1937+ there". But it doesn't look like it is being used for anything
1938+ else in the skbs that make it here. */
1939+ skb->cb[0] = 1; /* marking it seen here's probably irrelevant */
2380c486 1940+
e3fad6a6
AM
1941+ spin_unlock_bh(&l7_lock);
1942+ return (pattern_result ^ info->invert);
1943+ }
2380c486 1944+
e3fad6a6
AM
1945+ if(skb_is_nonlinear(skb)){
1946+ if(skb_linearize(skb) != 0){
1947+ if (net_ratelimit())
1948+ printk(KERN_ERR "layer7: failed to linearize "
1949+ "packet, bailing.\n");
1950+ spin_unlock_bh(&l7_lock);
1951+ return info->invert;
1952+ }
1953+ }
2380c486 1954+
e3fad6a6
AM
1955+ /* now that the skb is linearized, it's safe to set these. */
1956+ app_data = skb->data + app_data_offset(skb);
1957+ appdatalen = skb_tail_pointer(skb) - app_data;
2380c486 1958+
e3fad6a6
AM
1959+ /* the return value gets checked later, when we're ready to use it */
1960+ comppattern = compile_and_cache(info->pattern, info->protocol);
2380c486 1961+
e3fad6a6
AM
1962+ /* On the first packet of a connection, allocate space for app data */
1963+ if(total_acct_packets(master_conntrack) == 1 && !skb->cb[0] &&
1964+ !master_conntrack->layer7.app_data){
1965+ master_conntrack->layer7.app_data =
1966+ kmalloc(maxdatalen, GFP_ATOMIC);
1967+ if(!master_conntrack->layer7.app_data){
1968+ if (net_ratelimit())
1969+ printk(KERN_ERR "layer7: out of memory in "
1970+ "match, bailing.\n");
1971+ spin_unlock_bh(&l7_lock);
1972+ return info->invert;
1973+ }
2380c486 1974+
e3fad6a6 1975+ master_conntrack->layer7.app_data[0] = '\0';
2380c486 1976+ }
e3fad6a6
AM
1977+
1978+ /* Can be here, but unallocated, if numpackets is increased near
1979+ the beginning of a connection */
1980+ if(master_conntrack->layer7.app_data == NULL){
1981+ spin_unlock_bh(&l7_lock);
1982+ return info->invert; /* unmatched */
2380c486
JR
1983+ }
1984+
e3fad6a6
AM
1985+ if(!skb->cb[0]){
1986+ int newbytes;
1987+ newbytes = add_data(master_conntrack, app_data, appdatalen);
2380c486 1988+
e3fad6a6
AM
1989+ if(newbytes == 0) { /* didn't add any data */
1990+ skb->cb[0] = 1;
1991+ /* Didn't match before, not going to match now */
1992+ spin_unlock_bh(&l7_lock);
1993+ return info->invert;
2380c486
JR
1994+ }
1995+ }
e3fad6a6
AM
1996+
1997+ /* If looking for "unknown", then never match. "Unknown" means that
1998+ we've given up; we're still trying with these packets. */
1999+ if(!strcmp(info->protocol, "unknown")) {
2000+ pattern_result = 0;
2001+ /* If looking for "unset", then always match. "Unset" means that we
2002+ haven't yet classified the connection. */
2003+ } else if(!strcmp(info->protocol, "unset")) {
2004+ pattern_result = 2;
2005+ DPRINTK("layer7: matched unset: not yet classified "
2006+ "(%d/%d packets)\n",
2007+ total_acct_packets(master_conntrack), num_packets);
2008+ /* If the regexp failed to compile, don't bother running it */
2009+ } else if(comppattern &&
2010+ regexec(comppattern, master_conntrack->layer7.app_data)){
2011+ DPRINTK("layer7: matched %s\n", info->protocol);
2012+ pattern_result = 1;
2013+ } else pattern_result = 0;
2014+
2015+ if(pattern_result == 1) {
2016+ master_conntrack->layer7.app_proto =
2017+ kmalloc(strlen(info->protocol)+1, GFP_ATOMIC);
2018+ if(!master_conntrack->layer7.app_proto){
2019+ if (net_ratelimit())
2020+ printk(KERN_ERR "layer7: out of memory in "
2021+ "match, bailing.\n");
2022+ spin_unlock_bh(&l7_lock);
2023+ return (pattern_result ^ info->invert);
2024+ }
2025+ strcpy(master_conntrack->layer7.app_proto, info->protocol);
2026+ } else if(pattern_result > 1) { /* cleanup from "unset" */
2027+ pattern_result = 1;
2028+ }
2029+
2030+ /* mark the packet seen */
2031+ skb->cb[0] = 1;
2032+
2033+ spin_unlock_bh(&l7_lock);
2034+ return (pattern_result ^ info->invert);
2380c486 2035+}
2380c486 2036+
e3fad6a6
AM
2037+// load nf_conntrack_ipv4
2038+static int check(const struct xt_mtchk_param *par)
2039+{
2040+ if (nf_ct_l3proto_try_module_get(par->family) < 0) {
2041+ pr_info("can't load conntrack support for "
2042+ "proto=%d\n", par->family);
2043+ return -EINVAL;
2044+ }
2045+ return 0;
2046+}
2380c486 2047+
2380c486 2048+
e3fad6a6
AM
2049+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2, 6, 28)
2050+ static void destroy(const struct xt_mtdtor_param *par)
2051+ {
2052+ nf_ct_l3proto_module_put(par->family);
2053+ }
2054+#else
2055+ static void destroy(const struct xt_match *match, void *matchinfo)
2056+ {
2057+ nf_ct_l3proto_module_put(match->family);
2058+ }
2380c486
JR
2059+#endif
2060+
e3fad6a6
AM
2061+static struct xt_match xt_layer7_match[] __read_mostly = {
2062+{
2063+ .name = "layer7",
2064+ .family = AF_INET,
2065+ .checkentry = check,
2066+ .match = match,
2067+ .destroy = destroy,
2068+ .matchsize = sizeof(struct xt_layer7_info),
2069+ .me = THIS_MODULE
2070+}
2071+};
2380c486 2072+
e3fad6a6
AM
2073+static void layer7_cleanup_proc(void)
2074+{
2075+ remove_proc_entry("layer7_numpackets", init_net.proc_net);
2076+}
2380c486 2077+
e3fad6a6
AM
2078+/* register the proc file */
2079+static void layer7_init_proc(void)
2080+{
2081+ struct proc_dir_entry* entry;
2082+ entry = create_proc_entry("layer7_numpackets", 0644, init_net.proc_net);
2083+ entry->read_proc = layer7_read_proc;
2084+ entry->write_proc = layer7_write_proc;
2085+}
2380c486 2086+
e3fad6a6
AM
2087+static int __init xt_layer7_init(void)
2088+{
2089+ need_conntrack();
2090+
2091+ layer7_init_proc();
2092+ if(maxdatalen < 1) {
2093+ printk(KERN_WARNING "layer7: maxdatalen can't be < 1, "
2094+ "using 1\n");
2095+ maxdatalen = 1;
2096+ }
2097+ /* This is not a hard limit. It's just here to prevent people from
2098+ bringing their slow machines to a grinding halt. */
2099+ else if(maxdatalen > 65536) {
2100+ printk(KERN_WARNING "layer7: maxdatalen can't be > 65536, "
2101+ "using 65536\n");
2102+ maxdatalen = 65536;
2103+ }
2104+ return xt_register_matches(xt_layer7_match,
2105+ ARRAY_SIZE(xt_layer7_match));
2106+}
2107+
2108+static void __exit xt_layer7_fini(void)
2109+{
2110+ layer7_cleanup_proc();
2111+ xt_unregister_matches(xt_layer7_match, ARRAY_SIZE(xt_layer7_match));
2112+}
2113+
2114+module_init(xt_layer7_init);
2115+module_exit(xt_layer7_fini);
This page took 0.627862 seconds and 4 git commands to generate.