]> git.pld-linux.org Git - packages/cleanfeed.git/blame - cleanfeed.8
- drop obsolete and outdated manual inclusion of rpm macros
[packages/cleanfeed.git] / cleanfeed.8
CommitLineData
74370a5d
PG
1.rn '' }`
2''' $RCSfile$$Revision$$Date$
3'''
4''' $Log$
5'''
6.de Sh
7.br
8.if t .Sp
9.ne 5
10.PP
11\fB\\$1\fR
12.PP
13..
14.de Sp
15.if t .sp .5v
16.if n .sp
17..
18.de Ip
19.br
20.ie \\n(.$>=3 .ne \\$3
21.el .ne 3
22.IP "\\$1" \\$2
23..
24.de Vb
25.ft CW
26.nf
27.ne \\$1
28..
29.de Ve
30.ft R
31
32.fi
33..
34'''
35'''
36''' Set up \*(-- to give an unbreakable dash;
37''' string Tr holds user defined translation string.
38''' Bell System Logo is used as a dummy character.
39'''
40.tr \(*W-|\(bv\*(Tr
41.ie n \{\
42.ds -- \(*W-
43.ds PI pi
44.if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
45.if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch
46.ds L" ""
47.ds R" ""
48''' \*(M", \*(S", \*(N" and \*(T" are the equivalent of
49''' \*(L" and \*(R", except that they are used on ".xx" lines,
50''' such as .IP and .SH, which do another additional levels of
51''' double-quote interpretation
52.ds M" """
53.ds S" """
54.ds N" """""
55.ds T" """""
56.ds L' '
57.ds R' '
58.ds M' '
59.ds S' '
60.ds N' '
61.ds T' '
62'br\}
63.el\{\
64.ds -- \(em\|
65.tr \*(Tr
66.ds L" ``
67.ds R" ''
68.ds M" ``
69.ds S" ''
70.ds N" ``
71.ds T" ''
72.ds L' `
73.ds R' '
74.ds M' `
75.ds S' '
76.ds N' `
77.ds T' '
78.ds PI \(*p
79'br\}
80.\" If the F register is turned on, we'll generate
81.\" index entries out stderr for the following things:
82.\" TH Title
83.\" SH Header
84.\" Sh Subsection
85.\" Ip Item
86.\" X<> Xref (embedded
87.\" Of course, you have to process the output yourself
88.\" in some meaninful fashion.
89.if \nF \{
90.de IX
91.tm Index:\\$1\t\\n%\t"\\$2"
92..
93.nr % 0
94.rr F
95.\}
96.TH cleanfeed 8 "Version 0.95.7b" "26/Aug/98" "Cleanfeed - Because spam sucks"
97.UC
98.if n .hy 0
99.if n .na
100.ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
101.de CQ \" put $1 in typewriter font
102.ft CW
103'if n "\c
104'if t \\&\\$1\c
105'if n \\&\\$1\c
106'if n \&"
107\\&\\$2 \\$3 \\$4 \\$5 \\$6 \\$7
108'.ft R
109..
110.\" @(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2
111. \" AM - accent mark definitions
112.bd B 3
113. \" fudge factors for nroff and troff
114.if n \{\
115. ds #H 0
116. ds #V .8m
117. ds #F .3m
118. ds #[ \f1
119. ds #] \fP
120.\}
121.if t \{\
122. ds #H ((1u-(\\\\n(.fu%2u))*.13m)
123. ds #V .6m
124. ds #F 0
125. ds #[ \&
126. ds #] \&
127.\}
128. \" simple accents for nroff and troff
129.if n \{\
130. ds ' \&
131. ds ` \&
132. ds ^ \&
133. ds , \&
134. ds ~ ~
135. ds ? ?
136. ds ! !
137. ds /
138. ds q
139.\}
140.if t \{\
141. ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u"
142. ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u'
143. ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u'
144. ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u'
145. ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u'
146. ds ? \s-2c\h'-\w'c'u*7/10'\u\h'\*(#H'\zi\d\s+2\h'\w'c'u*8/10'
147. ds ! \s-2\(or\s+2\h'-\w'\(or'u'\v'-.8m'.\v'.8m'
148. ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u'
149. ds q o\h'-\w'o'u*8/10'\s-4\v'.4m'\z\(*i\v'-.4m'\s+4\h'\w'o'u*8/10'
150.\}
151. \" troff and (daisy-wheel) nroff accents
152.ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V'
153.ds 8 \h'\*(#H'\(*b\h'-\*(#H'
154.ds v \\k:\h'-(\\n(.wu*9/10-\*(#H)'\v'-\*(#V'\*(#[\s-4v\s0\v'\*(#V'\h'|\\n:u'\*(#]
155.ds _ \\k:\h'-(\\n(.wu*9/10-\*(#H+(\*(#F*2/3))'\v'-.4m'\z\(hy\v'.4m'\h'|\\n:u'
156.ds . \\k:\h'-(\\n(.wu*8/10)'\v'\*(#V*4/10'\z.\v'-\*(#V*4/10'\h'|\\n:u'
157.ds 3 \*(#[\v'.2m'\s-2\&3\s0\v'-.2m'\*(#]
158.ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#]
159.ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H'
160.ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u'
161.ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#]
162.ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#]
163.ds ae a\h'-(\w'a'u*4/10)'e
164.ds Ae A\h'-(\w'A'u*4/10)'E
165.ds oe o\h'-(\w'o'u*4/10)'e
166.ds Oe O\h'-(\w'O'u*4/10)'E
167. \" corrections for vroff
168.if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u'
169.if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u'
170. \" for low resolution devices (crt and lpr)
171.if \n(.H>23 .if \n(.V>19 \
172\{\
173. ds : e
174. ds 8 ss
175. ds v \h'-1'\o'\(aa\(ga'
176. ds _ \h'-1'^
177. ds . \h'-1'.
178. ds 3 3
179. ds o a
180. ds d- d\h'-1'\(ga
181. ds D- D\h'-1'\(hy
182. ds th \o'bp'
183. ds Th \o'LP'
184. ds ae ae
185. ds Ae AE
186. ds oe oe
187. ds Oe OE
188.\}
189.rm #[ #] #H #V #F C
190.SH "NAME"
191Cleanfeed \- spam filter for Usenet news servers
192.SH "SYNOPSIS"
193\fBINN:\fR Installed as \fBfilter_innd.pl\fR, location is configured into
194INN at compile time.
195.PP
196\fBHighwind servers:\fR <command line> \-program cleanfeed \-body
197.PP
198\fBNNTPRelay\fR: ExternalFilter=c:/perl/bin/perl.exe c:/news/cleanfeed.pl
199.SH "DESCRIPTION"
200A spam filter for Usenet servers. \fBCleanfeed\fR blocks spam on the way
201into your server, before it is written to disk or propagated to outbound
202feeds. It can also block binaries in non-binary newsgroups and includes
203several other features to keep your newsfeed clean.
204.PP
205Cleanfeed currently works with INN, Cyclone, Typhoon, Breeze, and
206NNTPRelay servers. See my webpage (listed at the end of this document)
207for pointers to information about using Cleanfeed with CNews, Diablo,
208Collabra, or INN versions earlier than 1.5.1.
209.SH "USAGE"
210For all versions, place the \fIcleanfeed.conf\fR configuration file
211somewhere, then edit the Cleanfeed source file and change the
212\fB$config_dir\fR option at the top to point to the directory where
213the config file lives.
214.Ip "\fB\s-1INN\s0\fR" 4
215Install the filter file (called cleanfeed) as \fIfilter_innd.pl\fR, and
216cleanfeed.conf, in the location you specified in \fIconfig.data\fR (\s-1INN\s0
2171.7.2 and earlier) or when configuring \s-1INN\s0 2.x (usually the bin/filter
218directory under the installation root). Make sure both files are readable
219by the news user. Once in place, the filter is loaded with the command
220\fBctlinnd reload filter.perl meow\fR. Filtering can be turned on with
221\fBctlinnd perl y\fR and turned off with \fBctlinnd perl n\fR.
222.Ip "\fBCyclone/Typhoon/Breeze\fR" 4
223Add the \fB\-program\fR <file> and \fB\-body\fR options to the \fIbin/start\fR
224script, where <file> is the location and name of the Cleanfeed
225program. Restart the server. Cleanfeed will run as an external process
226(standalone mode). \s-1IMPORTANT\s0: make sure both cleanfeed and cleanfeed.conf
227are readable by the news user! Double-check the permissions as this is
228a fairly common mistake!
229.Ip "\fBNNTPRelay\fR" 4
230Find the ExternalFilter directive in \fIconfig.txt\fR and make it look like:
231.Sp
232ExternalFilter=c:/perl/bin/perl.exe c:/news/cleanfeed.pl
233.Sp
234Cleanfeed will run as an external process (standalone mode).
235.PP
236More detailed installation instructions are provided later in this
237document.
238.SH "CONFIGURATION OPTIONS"
239Configuration is accomplished by setting the various options in the
240\fIcleanfeed.conf\fR configuration file. This file is evaluated as Perl
241code, so comments can be included in the usual Perl # syntax. A
242sample default file is included with the distribution.
243.PP
244If you would rather not use \fIcleanfeed.conf\fR, you can set its
245location to \*(L"undef\*(R" in the source and edit the configuration
246variables directly in the source file.
247.PP
248\fIcleanfeed.conf\fR has two sections (which define perl hashes):
249\fB%config_local\fR and \fB%config_append\fR. Entries in \fB%config_local\fR
250will override the default settings of the same name in the Cleanfeed
251source. Entries in \fB%config_append\fR can be used to add to most of
252the default regular expressions, for items such as \fBbadguys\fR,
253\fBbin_allowed\fR, \fBpoison_groups\fR, etc. Settings in \fB%config_append\fR
254for these items will be appended to the default regexps, seperated by
255\*(L"|\*(R" (or).
256.PP
257If you want to completely override the default regexps for these options,
258rather than just add to the defaults, you can add an entry for them into
259the \fB%config_local\fR section of \fIcleanfeed.conf\fR.
260.PP
261All of this is done quite blindly, so if you do anything odd, be careful.
262(Cleanfeed will remove the common mistake of including two \*(L"|\*(R" (or) signs
263in a row.) All config options are exposed to \fB%config_local\fR, including
264any that may not be present in the sample file. Only the defined list of
265options are exposed to \fB%config_append\fR.
266.PP
267Options that are on/off or yes/no should be set to 1 for on/yes, or 0
268for off/no.
269.PP
270First, you need to tell Cleanfeed which news server software you are
271using. At the top of the file, set the appropriate variable to 1. For
272INN, set \fB$inn\fR; for Cyclone, Typhoon, or Breeze, set \fB$highwind\fR; and
273for NNTPRelay, set \fB$nntprelay\fR. Ensure the other two (the ones you're
274not using) are set to 0.
275.Sh "\fBGeneral Settings\fR"
276.Ip "\fBaggressive\fR" 4
277Set this to 0 to disable all content-based filters. Helpful to please
278paranoid lawyers, or paranoid customers.
279.Ip "\fBactive_file\fR" 8
280Set this to the full path to an active file, to allow Cleanfeed to know
281what groups are moderated. This is normally your server's active file,
282but it doesn't have to be; it is possible, for example, to run Cyclone
283with no active file, but give one to Cleanfeed anyway.
284.Sh "\fB\s-1MD5\s0 Body Filter Settings\fR"
285.Ip "\fBdo_md5\fR" 8
286When turned on, the \s-1MD5\s0 \s-1EMP\s0 checks will be done. This should be left
287on unless you have a really good reason to turn it off. If you're
288running Hippo along with Cleanfeed, you might feel Cleanfeed's \s-1MD5\s0
289checks are redundant and want to turn them off, for example. It
290would probably be better to leave it on with the history turned
291down, instead.
292.Ip "\fBmd5maxmultiposts\fR" 8
293Start rejecting articles after we have seen this many copies, according
294to the \s-1MD5\s0 checksum filter.
295.Ip "\fBMD5History\fR" 8
296How many articles to remember for \s-1MD5-\s0based \s-1EMP\s0 comparison. Since the \s-1MD5\s0
297filter is not prone to false positives, setting this higher is a good idea
298to catch more spam, if you have the \s-1RAM\s0 to spare.
299.Ip "\fBMD5maxlife\fR" 8
300When a spam is identified by the \s-1MD5\s0 \s-1EMP\s0 filter, it is saved for continual
301rejection. \fBMD5maxlife\fR specifies how long, in hours, to keep a saved
302\s-1MD5\s0 id which is no longer getting any hits. (A spam id which is still
303getting matches will be saved regardless of age.) 24 hours works well.
304.Ip "\fBfuzzy_md5\fR" 8
305When turned on, the message bodies will be munged up a bit before \s-1MD5\s0
306checksums are generated. Whitespace and other non-alphanumeric
307characters are stripped and letters are forced to lowercase, as well
308as a couple other bits of treachery to try to defeat the \*(L"hashbuster\*(R"
309spam-bots. This adds a bit of \*(L"fuzziness\*(R" to the \s-1MD5\s0 filter, and
310results in a performance hit as well.
311.Sp
312Since the smarter spammers have discovered hashbusting, I recommend
313that this be turned on.
314.Ip "\fBfuzzy_max_length\fR" 8
315Sets the maximum amount of lines for an article body to be subject to
316the \fBfuzzy_md5\fR munging (above). This keeps extremely large articles
317out of those nasty regular expressions.
318.Ip "\fBmd5_skips_followups\fR" 8
319Determines whether the \s-1MD5\s0 filter checks articles with References
320headers. The default is to skip them. Setting this option to 0
321will result in all articles passing through the \s-1MD5\s0 filter, which
322can result in a major performance hit, but does close another hole
323in the filter. If you turn this off, you should increase \fBMD5history\fR
324as well to avoid shortening your \*(L"window\*(R".
325.Ip "\fBMD5HistSize\fR" 8
326The maximum allowed size of the \s-1EMP\s0 memory for the \s-1MD5-\s0checksum \s-1EMP\s0 filter.
327Use this as a \*(L"sanity check\*(R" to prevent a sudden burst of spam from eating
328up all of your memory. It should be set high enough so that you normally
329never hit this number; use the \fBMD5MaxLife\fR to expire the hash instead.
330.Sh "\fBHeader-Based \s-1EMP\s0 Filter Settings\fR"
331.Ip "\fBdo_phl\fR" 8
332Turns on the \s-1NNTP\s0\-Posting-Host/Lines \s-1EMP\s0 filter. This filter identifies
333spam by identical posting-host headers and article sizes in a short period
334of time. You really don't want to turn this off.
335.Ip "\fBdo_fsl\fR" 8
336Turns on the From/Subject/Lines \s-1EMP\s0 filter. This filter identifies spam
337by identical From and Subject headers and article sizes in a short period
338of time. This is the one that gets the least number of hits these days,
339so you won't lose much by shutting it off.
340.Ip "\fBmaxmultiposts\fR" 8
341Start rejecting articles after we have seen this many copies, according
342to the header-based \s-1EMP\s0 filter. Since false positives are somewhat more
343likely with this filter than with \s-1MD5\s0, this should be set appropriately
344higher to reduce the odds.
345.Ip "\fBArticleHistory\fR" 8
346How many ids to remember for header-based \s-1EMP\s0 comparison. Setting this
347higher will catch more spam because there will be a larger \*(L"window\*(R" to
348look at. Larger settings will also consume more memory and have a (small)
349impact on performance, as well as slightly increase the chance of a false
350positive (since the sample size will be larger). Most articles will
351actually take up two entries in this history because there are two
352different header-based filters.
353.Ip "\fBEMPmaxlife\fR" 8
354Same as \fBMD5maxlife\fR but for the header-based \s-1EMP\s0 filter.
355.Ip "\fBEMPHistSize\fR" 8
356Same as \fBMD5HistSize\fR but for the header-based \s-1EMP\s0 filter. If you are
357running the header-based filter but not the \s-1MD5\s0 filter for whatever
358reason, set this high.
359.Sh "\fBExcessive Crosspost Settings\fR"
360.Ip "\fBmaxgroups\fR" 8
361Reject articles crossposted so that followups will be to more than
362this many newsgroups.
363.Ip "\fBlow_xpost_maxgroups\fR" 8
364Specify a special, lower crosspost limit for certain groups, specifed
365by regular expression in \fBlow_xpost_groups\fR (below). Useful for being
366more strict in groups plagued by crossposting, such as sex, binaries,
367and jobs groups. (Replaces the old \fBtfjmaxgroups\fR option.)
368.Sh "\fBMisplaced Binaries Filter\fR"
369.Ip "\fBblock_binaries\fR" 8
370Enables blocking of binary posts in non-binary newsgroups. Which newsgroups
371allow binaries is configured with \fBbin_allowed\fR (below).
372.Ip "\fBmax_encoded_lines\fR" 8
373Sets the number of uuencoded or base64-encoded lines to allow before
374considering a post to be a binary. This should be set high enough to pass
375regular \s-1PGP\s0 signatures. (Those satanic Netscape crypto-sigs can die along
376with the other binaries.) Default is 15 lines, which may be a little low if
377you are lenient, which you're not.
378.Ip "\fBbinaries_in_mod_groups\fR" 8
379If set, binaries are allowed in spite of \fBblock_binaries\fR if they are
380posted only to moderated groups (requires \fBactive_file\fR).
381.Sh "\fB\s-1HTML\s0\fR"
382.Ip "\fBblock_mime_html\fR" 8
383Enables blocking of \s-1MIME\s0\-encapsulated \s-1HTML\s0 posts. This does \s-1NOT\s0 affect
384straight text/html or multipart/alternative posts of the type created by
385misconfigured Netscape and Microsoft \*(L"newsreaders\*(R", but \s-1ONLY\s0 posts which
386are \s-1MIME\s0\-encapsulated \s-1HTML\s0, a favorite format of sex spammers which
387often sneaks in under the \s-1EMP\s0 radar.
388.Ip "\fBblock_html\fR" 8
389Enables blocking of \s-1HTML\s0 and multipart/alternative posts. You can specify
390group patterns where \s-1HTML\s0 is allowed by setting html_allowed (below).
391.Sh "\fBCancel Message Filtering\fR"
392.Ip "\fBblock_late_cancels\fR" 8
393If turned on, cancels for recently rejected articles will be rejected.
394Set the window with \fBMIDmaxlife\fR (below). This will result in a
395\fIhuge\fR number of rejections if you have multiple full feeds and you
396aren't backlogging. If you are concerned about your downstream sites
397receiving the cancels, leave this off. If you need a performance boost,
398turn it on.
399.Ip "\fBMIDmaxlife\fR" 8
400How long to remember rejected message-ids so cancels for these posts can
401later be rejected. Specified in hours. This only has an effect if
402\fBblock_late_cancels\fR is enabled (above).
403.Sh "\fBDisabling Other Filters\fR"
404.Ip "\fBdo_scoring_filter\fR" 8
405Enables the (new) \*(L"scoring\*(R" filter. You probably want to leave this on,
406even if you need to turn of \fBaggressive\fR mode (turning off \fBaggressive\fR
407mode will disable the content-based parts of the scoring filter).
408.Ip "\fBdo_mid_filter\fR (\s-1INN\s0 only)" 8
409Enables the message-id filter. This requires an additional patch to
410\s-1INN\s0 1.7.2, which is included with Cleanfeed (but optional). The patch
411adds a new Perl hook to check message-id's during the \s-1NNTP\s0 \s-1CHECK\s0
412transaction, and decide whether to refuse the article. There is a
413patch for this for \s-1INN\s0 2.0 which may get incorporated into the \s-1INN\s0
414distribution at some point. The default is off.
415.Ip "\fBdo_bot_checks\fR" 8
416Enables the filters that check for spam bot signatures. The only reason
417you would ever want to turn this off is if you've written your own
418version, or something. Otherwise, leave it on.
419.Ip "\fBdo_supersedes_filter\fR" 8
420Enables the Excessive Supersedes filter, to catch rogue Supersedes
421attacks. This filter begins dropping articles with Supersedes headers
422if too many appear from the same posting-host in a short time. Moderated
423groups are given a higher limit (if \fBactive_file\fR is set), as is
424news.answers. Default is on.
425.Ip "\fBcheck_supersedes_path\fR" 8
426If set, \fBbad_cancel_paths\fR will also be applied to Supersedes articles.
427Articles with Supersedes headers, where a path element matches the regexp
428in \fBbad_cancel_paths\fR, will be dropped. Default is on.
429.Ip "\fBdrop_useless_controls\fR" 8
430If set, all control messages of types sendsys, senduuname, and version
431will be dropped. These are no longer useful and are a hole for
432denial-of-service attacks due to the way \s-1INN\s0 and some other servers
433handle them. On by default.
434.Ip "\fBdrop_ihave_sendme\fR" 8
435If set, control messages of types ihave and sendme will be dropped.
436See \fBdrop_useless_controls\fR. If you use these types of control messages,
437turn this off. If you're not sure, then you're not using them.
438.Ip "\fBdrop_control_with_supersedes\fR" 8
439Drops any and all control messages which contain a Supersedes header.
440Since control messages are not passed through the same filters as regular
441messages, a rogue Supersedes attack can use control messages to avoid
442filtering; this option closes this hole. Legitimate control messages
443don't have Supersedes headers. On by default.
444.Sh "\fBHash-Trimming\fR"
445.Ip "\fBtrimcycles\fR" 8
446The \s-1EMP\s0 memories are trimmed every \fBtrimcycles\fR times through the filter.
447.Ip "\fBEMPstarttrimming\fR" 8
448Tells the filter not to waste time trimming the \s-1EMP\s0 memories until they
449have this many entries. Just a minor performance enhancement during
450the first hours the filter is running or when you first start \fBinnd\fR.
451.Sh "\fBLogging\fR"
452.Ip "\fBverbose\fR" 8
453When turned on, verbose logging to news.notice will happen; spam domains
454will be listed, etc. When off, only general messages will be logged,
455making the news.daily summaries less interesting but much shorter and
456more to the point. (There is, alas, no way to shut off news.notice
457logging entirely.) (news.notice only applies to \s-1INN\s0.) Note that this
458will not reduce the number of log entries, but only their verbosity.
459.Ip "\fBlogfile\fR (Standalone Mode)" 8
460If set to the path to a file, this will enable logging of message-ids
461of all articles processed by the filter. Rejections will be logged
462with the reason for rejection. Note that this will create a very large
463logfile which you will need to rotate or delete (see \fBmax_log_size\fR,
464below).
465.Ip "\fBreportfile\fR (Standalone Mode)" 8
466If set to the path to a file, this will enable generation of a simple
467report of articles accepted and rejected. The report file will contain
468one entry per line with the start time, end time, number of articles
469accepted, and number of articles rejected, tab-separated.
470.Ip "\fBlog_accepts\fR (Standalone Mode)" 8
471When using the above logfiles, this setting determines whether articles
472accepted should be logged. When disabled, only rejections will be logged.
473.Ip "\fBmax_log_size\fR (Standalone Mode)" 8
474The size at which to rotate the \fBlogfile\fR. This will be replaced by
475time-based rotation at some point.
476.Ip "\fBstatfile\fR" 8
477If this is set to the full path of a file, a crude stats file will be
478written each time the filter is reloaded with \fBctlinnd reload
479filter.perl meow\fR (for \s-1INN\s0) or whenever the Cleanfeed process receives a
480\s-1SIGUSR1\s0 (for standalone mode). The file shows how many entries are
481present in each of the \s-1EMP\s0 histories, \s-1MID\s0 history and excessive
482supersedes history; timer information if enabled (see \fBtimer_info\fR);
483and the contents of all configuration settings. Posting-hosts in for
484each supersedes entry will be listed, along with their counts; these
485are not being rejected unless they are over the threshold. The
486default for this is undef, which disables creation of the stat file.
487.Sp
488More comprehensive stats are planned for the future.
489.Sh "\fBTiming Info\fR"
490.Ip "\fBtimer_info\fR" 8
491When enabled, Cleanfeed will generate timing statistics telling you
492how many articles per second are being examined by the filter and
493being accepted by the filter. This information will appear in the
494statfile if this is enabled, and in the output of \s-1INN\s0's \fBctlinnd mode\fR
495if the \fImode.patch\fR is applied to \s-1INN\s0. Note that the accepted/second
496rate is not necessarily the rate at which your server is accepting
497articles; articles can be rejected by the server after Cleanfeed
498passes them, for example if they are posted to groups not in your
499active file.
500.Ip "\fBtimer_interval\fR" 8
501The period over which to average timing information, in seconds. The
502default is 600 seconds, or 5 minutes.
503.Sh "\fBDebugging\fR"
504.Ip "\fBdebug_batch_directory\fR" 8
505Specifies a directory where debugging \*(L"batchfiles\*(R" can be written.
506See the Hacker's Guide in this document for more information.
507.Ip "\fBdebug_batch_size\fR" 8
508The maximum size of a debugging batchfile before it gets rotated.
509Rotation is done by renaming the file to file.1, file.2, etc.,
510using the lowest number that doesn't already exist.
511.Sh "\fBRegular Expressions\fR"
512You can add to most of these regular expressions in the \fB%config_append\fR
513section of \fIcleanfeed.conf\fR; settings you add there will be added to
514the defaults, rather than overriding them. If you want to completely
515override the default settings you can add entries for these to the
516\fB%config_local\fR section instead.
517.Ip "\fBbin_allowed\fR" 8
518This is a regular expression telling the anti-binary filter in which
519newsgroups binaries are allowed. If all groups in the Newsgroups header
520match this pattern, binaries are allowed through the filter. (This
521obviously has no effect when the binary filter is disabled.) If the
522binary filter is enabled and this is set to a null string (by overriding
523the default in the local config) the result will be blocking all binaries
524regardless of where they are posted.
525.Ip "\fBpoison_groups\fR" 8
526If any groups in the Newsgroups header match this regexp, the article
527will be rejected. Thus you can reject crossposts to certain groups even
528if they are also posted to groups you carry.
529.Ip "\fBhtml_allowed\fR" 8
530This is a regular expression telling the anti-\s-1HTML\s0 filter in which
531newsgroups \s-1HTML\s0 and multipart/alternative posts are allowed. This
532only has an effect if \fBblock_html\fR is turned on (above). The default
533(to allow \s-1HTML\s0 in microsoft.* groups) can be added to in \fIcleanfeed.conf\fR.
534.Sp
535If you don't want to allow \s-1HTML\s0 anywhere, not even the microsoft.*
536groups, override this setting in the local configuration and set it
537to a null string or undef.
538.Ip "\fBmd5exclude\fR" 8
539If an article is posted only to groups matching this regexp, the \s-1MD5\s0 \s-1EMP\s0
540filter will not be applied. Useful for \*(L"test\*(R" groups where it's okay
541for lots of the posts to be the same.
542.Ip "\fBallexclude\fR" 8
543If an article is posted only to groups matching this regexp, \s-1NO\s0 checks
544are applied at all.
545.Ip "\fBlow_xpost_groups\fR" 8
546If a group matches this regular expression, it gets a special crosspost
547limit, set in \fBlow_xpost_maxgroups\fR, rather than the general crosspost
548limit set in \fBmaxgroups\fR. This is useful for groups plagued by excessive
549crossposting, such as sex, binaries, and jobs groups. The default is
550to limit crossposts to 6 groups in test, forsale, and jobs groups.
551Setting this to a null string, or undef, will disable this feature.
552.Ip "\fBbadguys\fR" 8
553This is a monster regular expression containing domains of known spammers.
554Only the \*(L"middle\*(R" part of the domains are listed; these are checked as
555email addresses in From headers by appending a list of top-level domains
556to the end, and as URLs by prepending http:// and an optional \*(L"www.\*(R". If
557you modify this list, be \fIvery\fR careful not to end up with \*(L"||\*(R" in there
558(two \*(L"or\*(R" signs in a row); this will match every single post that comes
559through, which is Bad.
560.Ip "\fBbaddomainpat\fR" 8
561If a post contains a \s-1URL\s0 for a site whose domain name matches this
562pattern (in .com, .net, and .nu TLDs only) the post will be rejected.
563For example, there are hundreds of spamming porn sites whose domain names
564begin or end with \*(L"xxx\*(R". This prevents us from having to keep up with
565their nonsense. Yes, it's a little aggressive, but it works.
566.Ip "\fBexempt\fR" 8
567Regular expression of \s-1NNTP\s0\-Posting-Hosts that are exempt from the
568posting-host-based \s-1EMP\s0 filter. This is for high-output systems where
569all posts contain the same \s-1NNTP\s0\-Posting-Host header, such as \s-1AOL\s0, which
570if not exempted would end up hitting the posting-host \s-1EMP\s0 filter with
571all of their posts. There aren't many of these out there; a \*(L"regular\*(R"
572multi-user system does not present a problem because the filter doesn't
573kick in until it sees a large number of posts from the same posting-host
574and also of the same length, in a short period of time.
575.Ip "\fBsupersedes_exempt\fR" 8
576Regular expression of \s-1NNTP\s0\-Posting-Hosts that are exempt from the
577excessive supersedes filter. Generally this will be systems which
578post a lot of FAQs.
579.Ip "\fBbad_cancel_paths\fR" 8
580Cancel messages will be rejected if the Path header contains elements
581matching this regular expression. Also applied to the \s-1NNTP\s0\-Posting-Host.
582If \fBcheck_supersedes_path\fR is set, this will also be checked against
583the Path header of articles with Supersedes headers. This list contains
584sites which are or have recently been the source of rogue cancel attacks.
585.Ip "\fBrefuse_messageids\fR (\s-1INN\s0 only)" 8
586If you have \fBdo_mid_filter\fR (above) enabled, and you have the optional
587message-id patch applied to \s-1INN\s0 (or otherwise have obtained the hook
588for filter_messageid in \s-1INN\s0 2.0), this regular expression will be applied
589to message-ids as they are offered to your server, and they will be
590refused if it matches.
591.Ip "\fBnet_abuse_groups\fR" 8
592.Ip "\fBspam_report_groups\fR" 8
593These regular expressions are used to exempt certain groups from certain
594filters; for example, groups expected to contain spam reports, example
595spams, NoCeM notices, etc. These are not in \fIcleanfeed.conf\fR; if you
596need to add to them please let me know.
597.PP
598After modifying the filter file, always check for mistakes by typing:
599.PP
600.Vb 1
601\& perl -cw filter_innd.pl (or cleanfeed or whatever you called it)
602.Ve
603There should be no errors and no warnings.
604.PP
605You can check \fIcleanfeed.conf\fR with:
606.PP
607.Vb 1
608\& perl -cw cleanfeed.conf
609.Ve
610You will get several warnings about variables being used only once;
611these can be ignored.
612.PP
613If you are running \s-1INN\s0, you can modify the file and reload it with
614\fBctlinnd reload filter.perl meow\fR while the server is running. The
615configuration in f<cleanfeed.conf> will be reloaded at this time as
616well.
617.PP
618With the Highwind servers, modifying the program will require a server
619restart (use the \fIbin/restart\fR script). Note that this will result in
620all connections (including newsreader clients) being dropped. This
621is not my fault. :)
622.PP
623When in standalone mode, configuration from \fIcleanfeed.conf\fR can be
624reloaded by sending Cleanfeed a \s-1SIGHUP\s0.
625.PP
626I have no idea what NNTPRelay does, but I'm guessing it needs a restart
627as well.
628.PP
629\s-1IMPORTANT\s0 \s-1NOTE\s0: A common mistake is not setting file permissions on
630cleanfeed/filter_innd.pl, cleanfeed.conf, and cleanfeed.local so that
631they are readable by the news user. Please double-check your permissions!
632If Cleanfeed is running, and fails to successfully load cleanfeed.conf,
633it will use the default settings instead of those you specified in the
634config file.
635.SH "INSTALLATION \- INN"
636These instructions assume you have the Perl hooks compiled into INN.
637If you don't, you will need to add them and rebuild the INN distribution
638before proceeding.
639.PP
640With INN, Perl is embedded into the innd program. The filter file
641defines subroutines that are called by innd at the appropriate times.
642.Sh "\s-1SYSTEM\s0 \s-1REQUIREMENTS\s0"
643In order to run Cleanfeed with \s-1INN\s0, you will need:
644.Ip "\(bu" 4
645\s-1INN\s0 1.5.1 or later (1.7.2+insync1.1d or 2.1 recommended)
646.Ip "\(bu" 4
647Perl 5.004 or later
648.Ip "\(bu" 4
649Perl hooks compiled into \s-1INN\s0
650.Ip "\(bu" 4
651The \s-1MD5\s0 Perl module
652.PP
653\s-1INN\s0 is available from:
654 http://www.isc.org/inn.html
655.PP
656The Insync distribution of \s-1INN\s0 (highly recommended if you aren't running
657\s-1INN\s0 2.1) is available from:
658 http://www.insync.net/~aos/inn.html
659.PP
660The \s-1MD5\s0 Perl module is available from:
661 http://www.perl.com/\s-1CPAN\s0\-local/modules/by-module/\s-1MD5\s0/
662.PP
663Perl itself is available from the Perl home page:
664 http://www.perl.com/
665.Sh "\s-1PATCHES\s0 \s-1AND\s0 \s-1STUFF\s0"
666\s-1INN\s0 2.0 includes everything you need to run Cleanfeed, except the \s-1MD5\s0
667Perl module.
668.PP
669With earlier versions, Cleanfeed requires some patches to \s-1INN\s0 in order
670to function properly.
671.PP
672If you are running \s-1INN\s0 1.7.2+insync1.1d, you already have the original
673\fIfilter.patch\fR and the \fIdynamic-load.patch\fR; You need only apply the
674\fIupgrade.patch\fR.
675.PP
676None of these patches are against \s-1INN\s0 2.1; the \*(L"extra feature\*(R" ones
677like \fImode.patch\fR may not apply to 2.1. Ports are always welcome.
678.Ip "\fBfilter.patch\fR" 4
679This patch provides the basic functionality for Cleanfeed by making some
680extra headers available to the Perl filter, as well as message bodies.
681This patch was changed in version 0.95.3. It is against \s-1INN\s0 1.7.2 and
682should be applied in the innd directory. This patch is included in the
683insync \*(L"megapatch\*(R" for \s-1INN\s0 as of version 1.1c, so if you are running this
684version of \s-1INN\s0 you need not apply this patch. Not necessary for \s-1INN\s0 2.x.
685.Ip "\fBdynamic-load.patch\fR" 4
686This patch enables \s-1INN\s0's Perl interpreter to load dynamic modules. It is
687necessary for \s-1MD5\s0 support. The patch is against \s-1INN\s0 1.7+insync and should
688be applied in the lib directory (\s-1NOT\s0 the innd directory). It applies cleanly
689to other versions of \s-1INN\s0 including 1.5.1 and 1.7.2. This patch is included
690in the insync \*(L"megapatch\*(R" for \s-1INN\s0 as of version 1.1d, so if you are running
691this version of \s-1INN\s0 you need not apply this patch. Not necessary for \s-1INN\s0 2.x.
692.Sp
693If you are still using \s-1INN\s0 1.5.1, you can use \fIdynamic-1.5.1.patch\fR instead.
694.Sp
695In order to compile \s-1INN\s0 with the new patch, you need to edit the \s-1PERL_LIB\s0
696entry in \fIconfig.data\fR. Type this command at the shell, and paste its output
697into \fIconfig.data\fR as \s-1PERL_LIB\s0:
698.Sp
699.Vb 1
700\& perl -MExtUtils::Embed -e ldopts
701.Ve
702Most systems also allow you to simply enter that line in backquotes as \s-1PERL_LIB\s0.
703.Sp
704\fBThis patch requires Perl 5.004 or later! \s-1INN\s0 will not compile linked with
705Perl 5.003 after following these instructions!\fR
706.Sp
707\fB\s-1AIX\s0:\fR There is a problem with Perl dynamic loading from \s-1INN\s0 under the
708\s-1AIX\s0 operating system. In simple terms, it doesn't work. This seems to
709be a problem with the gcc compiler. Success has been reported by
710rebuilding both Perl and \s-1INN\s0 with \s-1IBM\s0's commercial compiler CSet
711(a.k.a. xlC).
712.Sp
713\fBSolaris:\fR There have been multiple reports of Cleanfeed not working
714under Solaris if any part of the system -- \s-1INN\s0, Perl, or the \s-1MD5\s0 module --
715are compiled using egcs. Success has been reported by recompiling
716everything with gcc, and by upgrading to the very newest egcs.
717.Ip "\fBupgrade.patch\fR" 4
718For current users of Cleanfeed, this is a patch for an already-patched
719\s-1INN\s0, or for 1.7.2+insync1.1d, to bring you up to the new version of the
720Cleanfeed patch. Not applying this patch right now will only lose you a
721couple of filters, and nothing will break if you don't apply it (no
722changes to the filter source or configuration will be required).
723.Ip "\fBmessageid.patch\fR" 4
724This is a patch which adds a new Perl hook to innd, filter_messageid.
725This allows you to run a Perl subroutine against each message-id as
726it is offered to your server, and decide whether to refuse the article
727before it is even sent to your server. Cleanfeed includes a small
728filter_messageid. This patch is entirely optional.
729.Ip "\fBmode.patch\fR" 4
730This patch adds a line to \s-1INN\s0's \fBctlinnd mode\fR output for Perl filter
731status. The output line is generated by the \fBfilter_stats\fR subroutine.
732The default output contains the number of articles accepted, rejected
733and refused since the filter started, and the sizes of the \s-1EMP\s0,
734Message-\s-1ID\s0, and Excessive Supersedes hashes. If \fBtimer_info\fR is enabled,
735this will also include the rate in articles per second (rounded to the
736nearest tenth) at which articles were examined (total sent through the
737filter) and accepted by the filter, averaged over the \fBtimer_interval\fR
738number of seconds.
739.PP
740After applying the patches, rebuild all of \s-1INN\s0 and do a \*(L"make update\*(R".
741The first patch (\fIfilter.patch\fR) only requires innd to be rebuilt, but
742the \fIdynamic-load.patch\fR requires you to rebuild the whole distribution.
743Current users upgrading with \fIupgrade.patch\fR need only rebuild innd and
744reinstall that executable.
745.PP
746Thus:
747.PP
748.Vb 12
749\& cd inn [to the top-level source directory]
750\& make clean
751\& cd innd
752\& cp wherever/filter.patch . [from the Cleanfeed distribution]
753\& patch <filter.patch
754\& cd ../lib
755\& cp wherever/dynamic-load.patch [from the Cleanfeed distribution]
756\& patch <dynamic-load.patch
757\& cd ../config
758\& emacs config.data [edit the PERL_LIB entry as above]
759\& make all
760\& make update
761.Ve
762Finally, you need to install the \s-1MD5\s0 Perl module, no matter what version of
763\s-1INN\s0 you are running.
764.Sh "\s-1INSTALLING\s0 \s-1CLEANFEED\s0 \- \s-1INN\s0"
765In \s-1INN\s0 1.7.2 and earlier, the location where \s-1INN\s0 looks for the Perl filter
766is set in \fIconfig.data\fR, as _PATH_PERL_FILTER_INND. By default, the
767filename is \fIfilter_innd.pl\fR. The Cleanfeed filter program file should
768be installed in this location. \s-1INN\s0 comes with an example filter_innd.pl
769file; move this file (or whatever other filter is in place) out of the way
770first.
771.PP
772Before putting the filter in place, edit the file, changing \fB$config_dir\fR
773to the location of your \fIcleanfeed.conf\fR file.
774.PP
775After editing the file, always check for errors with the command:
776.PP
777.Vb 1
778\& perl -cw filter_innd.pl
779.Ve
780Once the file is in place, tell innd to reload it:
781.PP
782.Vb 1
783\& ctlinnd reload filter.perl meow
784.Ve
785And, if Perl filtering is currently disabled, enable it:
786.PP
787.Vb 1
788\& ctlinnd perl y
789.Ve
790Now, you can watch it working by looking at your news.notice log:
791.PP
792.Vb 1
793\& tail -f /var/log/news/news.notice
794.Ve
795If your server is running a full feed, you should start seeing a
796constant stream of rejections almost immediately.
797.SH "INSTALLATION \- HIGHWIND SERVERS"
798The various Highwind server packages (Cyclone, Typhoon, and Breeze)
799all have the same external filter interface. The filter runs as
800its own process, reading from standard input and writing to standard
801output.
802.Sh "\s-1SYSTEM\s0 \s-1REQUIREMENTS\s0"
803In order to run Cleanfeed with a Highwind server, you will need:
804.Ip "\(bu" 4
805Cyclone, Typhoon or Breeze
806.Ip "\(bu" 4
807Perl 5.003 or later
808.Ip "\(bu" 4
809The \s-1MD5\s0 Perl module
810.PP
811The Highwind servers are commercial products. For more information:
812 http://www.highwind.com/
813.PP
814The \s-1MD5\s0 Perl module is available from:
815 http://www.perl.com/\s-1CPAN\s0\-local/modules/by-module/\s-1MD5\s0/
816.PP
817Perl itself is available from the Perl home page:
818 http://www.perl.com/
819.Sh "\s-1INSTALLING\s0 \s-1CLEANFEED\s0 \- \s-1HIGHWIND\s0"
820The Cleanfeed program file should be installed as \*(L"cleanfeed\*(R" in your
821news server's bin directory (cyclone/bin, etc). Make it owned by
822news:news and make it executable.
823.PP
824Before putting the filter in place, edit the file, changing \fB$config_dir\fR
825to the location of your \fIcleanfeed.conf\fR file. Also ensure that the
826shebang line (the first line of the file, starting with #!) points to
827the correct location of your perl executable.
828.PP
829After editing the file, always check for errors with the command:
830.PP
831.Vb 1
832\& perl -cw cleanfeed
833.Ve
834There should be no warnings.
835.PP
836Now, edit your \fIbin/start\fR script. You need to add two options to the
837command line that starts up the server process, the \fB\-program\fR option to
838tell it what program to use as a filter, and the \fB\-body\fR option to tell
839it to send the bodies as well as the headers.
840.PP
841typhoond \-program /typhoon/bin/cleanfeed \-body
842.PP
843\&...along with whatever else you have cluttering up the command line.
844.PP
845(Highwind has indicated that this may/will be a config file option
846in a future release.)
847.PP
848Now you can restart the server with the \fIbin/restart\fR script. Check
849to make sure Cleanfeed is running, with \*(L"ps \-ef\*(R" or \*(L"top\*(R". If
850Cyclone/Typhoon is unable to start the filter for some reason, it will
851log an error via syslog. The error will not be terribly helpful.
852.PP
853You can make Cleanfeed reload its configuration from \fIcleanfeed.conf\fR
854and local code from \fIcleanfeed.local\fR by sending it a \s-1SIGHUP\s0.
855.SH "INSTALLATION \- NNTPRELAY"
856Please note that I do not have an NNTPRelay server, nor access to one,
857nor much interest in mucking around with Windows NT, and thus I have
858not tested the NNTPRelay filtering support myself. The necessary changes
859and notes were contributed by someone else. Additions and improvements
860to this documentation would be most welcome.
861.PP
862The filter interface in NNTPRelay is pretty much the same as in the
863Highwind servers.
864.Sh "\s-1SYSTEM\s0 \s-1REQUIREMENTS\s0"
865In order to run Cleanfeed with NNTPRelay, you will need:
866.Ip "\(bu" 4
867NNTPRelay version 1.1b4 or later
868.Ip "\(bu" 4
869Perl 5.003 or later
870.Ip "\(bu" 4
871The \s-1MD5\s0 Perl module
872.PP
873NNTPRelay is available from:
874 http://nntprelay.maxwell.syr.edu/
875.PP
876An \s-1NT\s0 binary release of Perl 5.004, which apparently includes the \s-1MD5\s0
877module, can be found at:
878 http://www.perl.com/\s-1CPAN/\s0ports/win32/Standard/x86
879.PP
880The \s-1MD5\s0 module (in source code) can be found at:
881 http://www.perl.com/\s-1CPAN\s0\-local/modules/by-module/\s-1MD5\s0/
882.Sh "\s-1INSTALLING\s0 \s-1CLEANFEED\s0 \- \s-1NNTPRELAY\s0"
883Before putting the filter in place, edit the file, changing \fB$config_dir\fR
884to the location of your \fIcleanfeed.conf\fR file.
885.PP
886Install the Cleanfeed program file wherever is appropriate on
887your system, as \*(L"cleanfeed.pl\*(R". Edit NNTPRelay's \fIconfig.txt\fR
888file, adding an entry like this:
889.PP
890.Vb 1
891\& ExternalFilter=c:/perl/bin/perl.exe c:/news/cleanfeed.pl
892.Ve
893Of course, use the correct path to your Perl executable and to
894the Cleanfeed program file. Now restart NNTPRelay. If you
895defined a logfile in Cleanfeed, it should appear.
896.SH "THE HACKER'S GUIDE"
897Cleanfeed will look for a file called \fIcleanfeed.local\fR, in the same
898directory as \fIcleanfeed.conf\fR. If this file exists, it will be loaded
899and evaluated as Perl code right after the config file. This enables
900you to provide your own local filter code which will survive an upgrade
901of the main Cleanfeed source.
902.PP
903It will be reloaded when the filter is reloaded with \fBctlinnd reload
904filter.perl meow\fR (for INN), or when configuration is reloaded with a
905SIGHUP (in standalone mode). This means that you can modify the running
906code without restarting Cleanfeed.
907.PP
908\fIcleanfeed.local\fR can define a number of different subroutines, which,
909if defined, will be called at various points in the filter process.
910Other subroutines can, of course, be defined as required by your code.
911.PP
912The file is simply re-evaluated each time. So, if you remove a subroutine
913from the file completely, that subroutine will remain defined after the
914reload, because nothing replaced it. You will need instead to define it
915as an empty subroutine, or explicitely undef it, to make it go away.
916.Sh "\s-1STUFF\s0 \s-1YOU\s0 \s-1CAN\s0 \s-1DEFINE\s0"
917Cleanfeed will call the following subroutines, if they are defined.
918See the section on return values for instructions on what your code
919should return.
920.Ip "\fBlocal_config\fR" 4
921This is called after configuration is loaded, each time. It will be
922called when the filter is reloaded (with \s-1INN\s0) or when configuration
923is reloaded with \s-1SIGHUP\s0 (running standalone), as well as when the
924filter is first run. No return value is expected.
925.Ip "\fBlocal_filter_before_emp\fR" 4
926Called for each (non-control) article, before any other filters.
927General-purpose spam filters shouldn't go here, because you really
928want to populate the \s-1EMP\s0 hashes first.
929.Ip "\fBlocal_filter_after_emp\fR" 4
930Called for each (non-control) article, after the \s-1EMP\s0 filters but
931before any other filters.
932.Ip "\fBlocal_filter_middle\fR" 4
933Called for each (non-control) article, after the \*(L"simple\*(R" filters
934but before the \*(L"expensive\*(R" body checks.
935.Ip "\fBlocal_filter_scoring\fR" 4
936Called during the scoring filter. Return the value, positive or
937negative, by which to adjust the article's score.
938.Sp
939\fBWarning: Here there be dragons!\fR If you're going to play with
940this please examine the existing source, and use the debugging
941routines to watch what you're doing.
942.Ip "\fBlocal_filter_last\fR" 4
943Called for each (non-control) article, after all other filters
944are done.
945.Ip "\fBlocal_filter_cancel\fR" 4
946Called for all cancel control messages.
947.Ip "\fBlocal_filter_newrmgroup\fR" 4
948Called for all newgroup and rmgroup control messages.
949.Sh "\s-1RETURN\s0 \s-1VALUES\s0"
950The general filtering subroutines you can define (\fBlocal_filter_before_emp\fR,
951\fBlocal_filter_after_emp\fR, \fBlocal_filter_middle\fR, \fBlocal_filter_last\fR,
952\fBlocal_filter_cancel\fR, and \fBlocal_filter_newrmgroup\fR) are expected to
953return a value indicating whether you want to accept the article being
954examined. If the article is okay, you should return "" (empty string),
955in which case filtering will proceed as usual. If you want to reject the
956article, you return any other string, which will be used as the reason.
957.PP
958The rejection code actually expects two return values -- the first string
959is the \*(L"verbose\*(R" rejection message, and the second is the \*(L"non-verbose\*(R"
960message (see the \fBverbose\fR configuration option). If only one is
961supplied, it will be used for both purposes.
962.PP
963The scoring filter calls \fBlocal_filter_scoring\fR, which is expected
964to return the value, postive or negative, by which the article's score
965should be adjusted.
966.Sh "\s-1WHAT\s0 \s-1YOU\s0 \s-1GET\s0"
967Your subroutines get information about the article in several variables.
968.Ip "\fB%hdr\fR" 4
969A hash containing the article headers. The key is the header name, in
970\*(L"canonical\*(R" case as \s-1INN\s0 likes them; the value is the content of the header.
971When running under \s-1INN\s0, only headers known to \s-1INN\s0 will be included in the
972hash (which includes any header used anywhere in Cleanfeed). In standalone
973mode, all headers will be present, but only the known headers will be sent
974in canonical case; others will have the header name (and thus hash key) in
975whatever case they are in the article itself, making them difficult to find
976and use consistently.
977.Sp
978The message body is in this hash under the key _\|_BODY_\|_. If running \s-1INN\s0
9792.x with storageapi, it will be provided in wireformat, with lines
980terminated in \er\en rather than just \en. With the traditional spool
981format (and in all cases with \s-1INN\s0 prior to 2.x) lines will be terminated
982only with \en.
983.Sp
984Examples:
985.Sp
986To get the Subject header as a scalar: \f(CW$hdr\fR{'Subject'}
987.Sp
988To get the entire message body as a scalar: \f(CW$hdr\fR{'_\|_BODY_\|_'}
989.Ip "\fB%lch\fR" 4
990A hash containing lowercased versions of some of the article headers.
991The hash keys are the header names in all lowercase; the values are the
992contents of the headers, with all letters forced to lowercase.
993.Sp
994Currently, the only headers added to this hash are From, Organization,
995Subject, Content-Type, X\-Newsreader, X\-Newsposter, Message-\s-1ID\s0, and Sender.
996.Sp
997This hash is not availabe to \fBlocal_filter_before_emp\fR.
998.Ip "\fB@groups\fR" 4
999An array containing the newsgroups the article is posted to (from the
1000Newsgroups header). You can find out how many groups the article is
1001crossposted to with \*(L"scalar \f(CW@groups\fR\*(R".
1002.Ip "\fB@followups\fR" 4
1003An array containing the newsgroups to which followups are set (from the
1004Followup-To header). If the article has no Followup-To header, this
1005array will be identical to \f(CW@groups\fR. You can find out how many groups
1006followups are set to with \*(L"scalar \f(CW@followups\fR\*(R". This is the preferred
1007way to limit crossposting, because limiting only by the Newsgroups
1008header will catch FAQs and such.
1009.Ip "\fB$lines\fR" 4
1010The number of lines in the message body. This is not taken from the Lines
1011header as that can be client-supplied to fool filtering; this is determined
1012by counting the lines in the message body.
1013.Ip "\fB%gr\fR" 4
1014A hash containing information about the groups the article is posted
1015to. This isn't very straightforward and may not be useful to you, but
1016I'm including it in this documentation for completeness. The following
1017entries may be present in this hash:
1018.Sp
1019\fB$gr{'net'}\fR \- the number of net.* (Usenet \s-1II\s0) newsgroups the article is
1020posted to, if any.
1021.Sp
1022\fB$gr{'other'}\fR \- the number of non-net.* groups the article is posted to.
1023.Sp
1024\fB$gr{'md5skip'}\fR \- true if the article should be exempted from the \s-1MD5\s0
1025body checks (if all newsgroups match the regexp in \fBmd5exclude\fR).
1026.Sp
1027\fB$gr{'binary'}\fR \- true if the article is posted only to groups where
1028binaries are allowed (if all newsgroups match \fBbin_allowed\fR).
1029.Sp
1030\fB$gr{'html'}\fR \- true if the article is posted only to groups where html
1031is allowed (if all newsgroups match \fBhtml_allowed\fR).
1032.Sp
1033\fB$gr{'poison'}\fR \- number of \*(L'poison\*(R' newsgroups this article is posted
1034to (matching \fBpoison_groups\fR). If this is present, you'll only see this
1035entry in \fBlocal_filter_before_emp\fR and \fBlocal_filter_after_emp\fR because
1036it will be rejected after that.
1037.Sp
1038\fB$gr{'abuse'}\fR \- number of \*(L'net abuse\*(R' newsgroups this article is posted
1039to (matching \fBnet_abuse_groups\fR).
1040.Sp
1041\fB$gr{'reports'}\fR \- number of \*(L'spam reports\*(R' newsgroups this article is
1042posted to (matching \fBspam_report_groups\fR).
1043.Sp
1044\fB$gr{'low_xpost'}\fR \- number of \*(L'low crosspost limit\*(R' groups this article
1045is posted to (matching \fBlow_xpost_groups\fR).
1046.Sp
1047\fB$gr{'mod'}\fR \- number of moderated groups this article is posted to
1048(requires that Cleanfeed have an active file).
1049.Sp
1050\fB$gr{'allmod'}\fR \- true if this article is posted only to moderated groups.
1051.Sp
1052\fB$gr{'faq'}\fR \- true if this article is crossposted to news.answers.
1053.Ip "\fB%config\fR" 4
1054A hash containing all configuration options.
1055.Sh "\s-1DEBUGGING\s0"
1056When you make filtering changes, you should always check the results for
1057false positives. I've provided two subroutines to help you do this:
1058\fBwriteheaders()\fR and \fBwritefull()\fR.
1059.PP
1060First, make sure \fBdebug_batch_directory\fR is set in your configuration.
1061Set this to a directory that is writable by the news user.
1062.PP
1063Call either of these subroutines with one argument, the basename of the
1064batch file you want to write the current article to. \fBwriteheaders\fR
1065will dump the article's headers out to the file (with \s-1INN\s0 this will only
1066give you the known headers). \fBwritefull\fR will dump the full article,
1067headers (again, known headers with \s-1INN\s0) and body. The file will be
1068rotated if it becomes larger than \fBdebug_batch_size\fR, set in your
1069configuration. The rotation is simple, a number is appended to the end
1070of the file, and incremented until the filename does not exist. You'll
1071have to delete the old files yourself.
1072.PP
1073When testing a new filter, simply call \fBwriteheaders ("batchfile")\fR or
1074\fBwritefull ("batchfile")\fR when you're going to reject an article.
1075Then you can look at the file to make sure you're doing what you think
1076you're doing.
1077.SH "SIGNALS"
1078When running under Cyclone, Typhoon, Breeze, or NNTPRelay (standalone
1079mode), Cleanfeed will catch SIGHUP, and reload its configuration from
1080\fIcleanfeed.conf\fR. It will also reload and reevaluate \fIcleanfeed.local\fR
1081if you're using it. Note that, unlike INN, there is no way to reload the
1082filter code itself without restarting the server.
1083.PP
1084Cleanfeed in standalone mode will also catch SIGUSR1 and write its crude
1085current-status file (see \fBstatfile\fR in the config section) on the next
1086cycle through the filter.
1087.PP
1088(I honestly don't know if SIGUSR1 and SIGHUP are things which exist on NT
1089for NNTPRelay.)
1090.SH "CREDITS"
1091Written by Jeremy Nixon <jeremy@exit109.com>.
1092.PP
1093Originally based on Jeff Garzik's EMP filter.
1094.PP
1095I can't possibly mention everyone who has submitted ideas or fixes
1096for the filter, but I'd like to acknowledge the substantial
1097contributions of several people: Danhiel Baker, Frank Copeland,
1098Brian Moore, John Payne, Russ Allbery, David Riley, and SeokChan LEE.
1099Thanks, guys.
1100.PP
1101\fIdynamic-load.patch\fR is from Piers Cawley.
1102The body-filtering portion of the INN \fIfilter.patch\fR is from Jeff Garzik.
1103\fImessageid.patch\fR is from Ed Mooring.
1104\fImode.patch\fR is from John Payne.
1105.SH "COPYRIGHT"
1106Copyright 1997-1998 by Jeremy Nixon, All Rights Reserved.
1107.SH "LICENSE"
1108This software may be distributed freely, provided it is intact (including
1109all the files from the original archive). You may modify it, and you
1110may distribute your modified version, provided the original work is
1111credited to the appropriate authors, and your work is credited to you
1112(don't make changes and pass them off as my work), and that you aren't
1113charging for it.
1114.SH "AVAILABILITY"
1115This filter is available at:
1116.PP
1117http://www.exit109.com/~jeremy/news/antispam.html
1118ftp://ftp.exit109.com/users/jeremy/
1119
1120.rn }` ''
1121.IX Title "cleanfeed 8"
1122.IX Name "Cleanfeed - spam filter for Usenet news servers"
1123
1124.IX Header "NAME"
1125
1126.IX Header "SYNOPSIS"
1127
1128.IX Header "DESCRIPTION"
1129
1130.IX Header "USAGE"
1131
1132.IX Item "\fB\s-1INN\s0\fR"
1133
1134.IX Item "\fBCyclone/Typhoon/Breeze\fR"
1135
1136.IX Item "\fBNNTPRelay\fR"
1137
1138.IX Header "CONFIGURATION OPTIONS"
1139
1140.IX Subsection "\fBGeneral Settings\fR"
1141
1142.IX Item "\fBaggressive\fR"
1143
1144.IX Item "\fBactive_file\fR"
1145
1146.IX Subsection "\fB\s-1MD5\s0 Body Filter Settings\fR"
1147
1148.IX Item "\fBdo_md5\fR"
1149
1150.IX Item "\fBmd5maxmultiposts\fR"
1151
1152.IX Item "\fBMD5History\fR"
1153
1154.IX Item "\fBMD5maxlife\fR"
1155
1156.IX Item "\fBfuzzy_md5\fR"
1157
1158.IX Item "\fBfuzzy_max_length\fR"
1159
1160.IX Item "\fBmd5_skips_followups\fR"
1161
1162.IX Item "\fBMD5HistSize\fR"
1163
1164.IX Subsection "\fBHeader-Based \s-1EMP\s0 Filter Settings\fR"
1165
1166.IX Item "\fBdo_phl\fR"
1167
1168.IX Item "\fBdo_fsl\fR"
1169
1170.IX Item "\fBmaxmultiposts\fR"
1171
1172.IX Item "\fBArticleHistory\fR"
1173
1174.IX Item "\fBEMPmaxlife\fR"
1175
1176.IX Item "\fBEMPHistSize\fR"
1177
1178.IX Subsection "\fBExcessive Crosspost Settings\fR"
1179
1180.IX Item "\fBmaxgroups\fR"
1181
1182.IX Item "\fBlow_xpost_maxgroups\fR"
1183
1184.IX Subsection "\fBMisplaced Binaries Filter\fR"
1185
1186.IX Item "\fBblock_binaries\fR"
1187
1188.IX Item "\fBmax_encoded_lines\fR"
1189
1190.IX Item "\fBbinaries_in_mod_groups\fR"
1191
1192.IX Subsection "\fB\s-1HTML\s0\fR"
1193
1194.IX Item "\fBblock_mime_html\fR"
1195
1196.IX Item "\fBblock_html\fR"
1197
1198.IX Subsection "\fBCancel Message Filtering\fR"
1199
1200.IX Item "\fBblock_late_cancels\fR"
1201
1202.IX Item "\fBMIDmaxlife\fR"
1203
1204.IX Subsection "\fBDisabling Other Filters\fR"
1205
1206.IX Item "\fBdo_scoring_filter\fR"
1207
1208.IX Item "\fBdo_mid_filter\fR (\s-1INN\s0 only)"
1209
1210.IX Item "\fBdo_bot_checks\fR"
1211
1212.IX Item "\fBdo_supersedes_filter\fR"
1213
1214.IX Item "\fBcheck_supersedes_path\fR"
1215
1216.IX Item "\fBdrop_useless_controls\fR"
1217
1218.IX Item "\fBdrop_ihave_sendme\fR"
1219
1220.IX Item "\fBdrop_control_with_supersedes\fR"
1221
1222.IX Subsection "\fBHash-Trimming\fR"
1223
1224.IX Item "\fBtrimcycles\fR"
1225
1226.IX Item "\fBEMPstarttrimming\fR"
1227
1228.IX Subsection "\fBLogging\fR"
1229
1230.IX Item "\fBverbose\fR"
1231
1232.IX Item "\fBlogfile\fR (Standalone Mode)"
1233
1234.IX Item "\fBreportfile\fR (Standalone Mode)"
1235
1236.IX Item "\fBlog_accepts\fR (Standalone Mode)"
1237
1238.IX Item "\fBmax_log_size\fR (Standalone Mode)"
1239
1240.IX Item "\fBstatfile\fR"
1241
1242.IX Subsection "\fBTiming Info\fR"
1243
1244.IX Item "\fBtimer_info\fR"
1245
1246.IX Item "\fBtimer_interval\fR"
1247
1248.IX Subsection "\fBDebugging\fR"
1249
1250.IX Item "\fBdebug_batch_directory\fR"
1251
1252.IX Item "\fBdebug_batch_size\fR"
1253
1254.IX Subsection "\fBRegular Expressions\fR"
1255
1256.IX Item "\fBbin_allowed\fR"
1257
1258.IX Item "\fBpoison_groups\fR"
1259
1260.IX Item "\fBhtml_allowed\fR"
1261
1262.IX Item "\fBmd5exclude\fR"
1263
1264.IX Item "\fBallexclude\fR"
1265
1266.IX Item "\fBlow_xpost_groups\fR"
1267
1268.IX Item "\fBbadguys\fR"
1269
1270.IX Item "\fBbaddomainpat\fR"
1271
1272.IX Item "\fBexempt\fR"
1273
1274.IX Item "\fBsupersedes_exempt\fR"
1275
1276.IX Item "\fBbad_cancel_paths\fR"
1277
1278.IX Item "\fBrefuse_messageids\fR (\s-1INN\s0 only)"
1279
1280.IX Item "\fBnet_abuse_groups\fR"
1281
1282.IX Item "\fBspam_report_groups\fR"
1283
1284.IX Header "INSTALLATION \- INN"
1285
1286.IX Subsection "\s-1SYSTEM\s0 \s-1REQUIREMENTS\s0"
1287
1288.IX Item "\(bu"
1289
1290.IX Item "\(bu"
1291
1292.IX Item "\(bu"
1293
1294.IX Item "\(bu"
1295
1296.IX Subsection "\s-1PATCHES\s0 \s-1AND\s0 \s-1STUFF\s0"
1297
1298.IX Item "\fBfilter.patch\fR"
1299
1300.IX Item "\fBdynamic-load.patch\fR"
1301
1302.IX Item "\fBupgrade.patch\fR"
1303
1304.IX Item "\fBmessageid.patch\fR"
1305
1306.IX Item "\fBmode.patch\fR"
1307
1308.IX Subsection "\s-1INSTALLING\s0 \s-1CLEANFEED\s0 \- \s-1INN\s0"
1309
1310.IX Header "INSTALLATION \- HIGHWIND SERVERS"
1311
1312.IX Subsection "\s-1SYSTEM\s0 \s-1REQUIREMENTS\s0"
1313
1314.IX Item "\(bu"
1315
1316.IX Item "\(bu"
1317
1318.IX Item "\(bu"
1319
1320.IX Subsection "\s-1INSTALLING\s0 \s-1CLEANFEED\s0 \- \s-1HIGHWIND\s0"
1321
1322.IX Header "INSTALLATION \- NNTPRELAY"
1323
1324.IX Subsection "\s-1SYSTEM\s0 \s-1REQUIREMENTS\s0"
1325
1326.IX Item "\(bu"
1327
1328.IX Item "\(bu"
1329
1330.IX Item "\(bu"
1331
1332.IX Subsection "\s-1INSTALLING\s0 \s-1CLEANFEED\s0 \- \s-1NNTPRELAY\s0"
1333
1334.IX Header "THE HACKER'S GUIDE"
1335
1336.IX Subsection "\s-1STUFF\s0 \s-1YOU\s0 \s-1CAN\s0 \s-1DEFINE\s0"
1337
1338.IX Item "\fBlocal_config\fR"
1339
1340.IX Item "\fBlocal_filter_before_emp\fR"
1341
1342.IX Item "\fBlocal_filter_after_emp\fR"
1343
1344.IX Item "\fBlocal_filter_middle\fR"
1345
1346.IX Item "\fBlocal_filter_scoring\fR"
1347
1348.IX Item "\fBlocal_filter_last\fR"
1349
1350.IX Item "\fBlocal_filter_cancel\fR"
1351
1352.IX Item "\fBlocal_filter_newrmgroup\fR"
1353
1354.IX Subsection "\s-1RETURN\s0 \s-1VALUES\s0"
1355
1356.IX Subsection "\s-1WHAT\s0 \s-1YOU\s0 \s-1GET\s0"
1357
1358.IX Item "\fB%hdr\fR"
1359
1360.IX Item "\fB%lch\fR"
1361
1362.IX Item "\fB@groups\fR"
1363
1364.IX Item "\fB@followups\fR"
1365
1366.IX Item "\fB$lines\fR"
1367
1368.IX Item "\fB%gr\fR"
1369
1370.IX Item "\fB%config\fR"
1371
1372.IX Subsection "\s-1DEBUGGING\s0"
1373
1374.IX Header "SIGNALS"
1375
1376.IX Header "CREDITS"
1377
1378.IX Header "COPYRIGHT"
1379
1380.IX Header "LICENSE"
1381
1382.IX Header "AVAILABILITY"
1383
This page took 0.337605 seconds and 4 git commands to generate.