]>
Commit | Line | Data |
---|---|---|
3a10c86a PG |
1 | --- /usr/local/news/dev/cleanfeed/branches/cleanfeed-20020501/cleanfeed 2002-05-01 16:51:53.000000000 +0100 |
2 | +++ /usr/local/news/dev/cleanfeed/trunk/cleanfeed 2008-08-09 18:16:09.000000000 +0100 | |
3 | @@ -1,8 +1,12 @@ | |
4 | -# vim: set tabstop=4 shiftwidth=4 autoindent smartindent smarttab syntax=perl: | |
5 | +# vim: set tabstop=4 shiftwidth=4 expandtab syntax=perl: | |
6 | +# autoindent smartindent smarttab | |
7 | # | |
8 | # Copyright 1999 Jeremy Nixon <jeremy@exit109.com> | |
9 | # Copyright 2001 Marco d'Itri <md@linux.it> | |
10 | # | |
11 | +# Modified by Steve Crook (09th Aug 2008) (svn:r114) and redistributed in | |
12 | +# accordance with the terms of the license. | |
13 | +# | |
14 | # This software is distributed under the terms of the Artistic License. | |
15 | # Please see the LICENSE file in the distribution. | |
16 | # | |
17 | @@ -11,7 +15,7 @@ | |
18 | # Directory where cleanfeed.local and the other configuration files live. | |
19 | # Set this to undef to not use any external file. | |
20 | ||
21 | -$config_dir = '/news/bin/filter'; | |
22 | +$config_dir = '/usr/local/news/cleanfeed/etc'; | |
23 | ||
24 | ############################################################################## | |
25 | # Server configuration | |
26 | @@ -42,6 +46,8 @@ | |
27 | ||
28 | do_md5 => 1, # do the md5 checks? | |
29 | do_phl => 1, # do the posting-host/lines EMP check? | |
30 | + do_phn => 1, # do the posting-host/newsgroups EMP check? | |
31 | + do_phr => 1, # do posting-host (high risk groups) check? | |
32 | do_fsl => 1, # do the from/subject/lines EMP check? | |
33 | do_scoring_filter => 1, # use the scoring filter? | |
34 | ||
35 | @@ -54,6 +60,12 @@ | |
36 | PHLRateCutoff => 20, | |
37 | PHLRateCeiling => 80, | |
38 | PHLRateBaseInterval => 3600, | |
39 | + PHNRateCutoff => 150, | |
40 | + PHNRateCeiling => 200, | |
41 | + PHNRateBaseInterval => 3600, | |
42 | + PHRRateCutoff => 10, | |
43 | + PHRRateCeiling => 80, | |
44 | + PHRRateBaseInterval => 3600, | |
45 | FSLRateCutoff => 20, | |
46 | FSLRateCeiling => 40, | |
47 | FSLRateBaseInterval => 1000, | |
48 | @@ -65,10 +77,13 @@ | |
49 | stats_interval => 3600, # write status file every N seconds | |
50 | MIDmaxlife => 4, # time to keep rejected message-ids, in hours | |
51 | md5_skips_followups => 1, # avoid MD5 check on articles with References? | |
52 | + phn_aggressive => 1, # use path for phn filter when no posting host | |
53 | + phr_aggressive => 1, # use path for phr filter when no posting host | |
54 | do_mid_filter => 1, # use the message-id CHECK filter? (INN only) | |
55 | do_supersedes_filter => 1, # do the excessive supersedes filter? | |
56 | drop_useless_controls => 1, # drop sendsys, senduuname, version control msg | |
57 | drop_ihave_sendme => 1, # drop ihave, sendme control messages | |
58 | + bad_rate_reload => 10000, # Reload bad_* files after this many articles | |
59 | ||
60 | low_xpost_maxgroups => 6, # max xposts in low_xpost_groups | |
61 | meow_ext_maxgroups => 2, # max xposts from meow_groups to other groups | |
62 | @@ -104,16 +119,24 @@ | |
63 | debug_batch_size => 0, # max size of batch files before rotation | |
64 | ||
65 | ### binaries allowed if groups match | |
66 | - bin_allowed => '^\w+\.binae?r|^alt\.sex\.pictures|^fur\.artwork'. | |
67 | + bin_allowed => '^[a-z]+\.binae?r|^fur\.artwork'. | |
68 | '|^alt\.anonymous\.messages$|^de\.alt\.dateien|^rec\.games\.bolo$'. | |
69 | '|^comp\.security\.pgp\.test$|^sfnet\.tiedostot'. | |
70 | - '|^fido\.|^linux\.|^unidata\.', | |
71 | + '|^fido\.|^linux\.|^unidata\.|alt\.security\.keydist', | |
72 | + | |
73 | + # Groups matching this regex will accept binary UUenc and yEnc files | |
74 | + # where filename extensions match 'image_extensions'. | |
75 | + image_allowed => '\.pictures', | |
76 | + | |
77 | + # Extensions on image files that are allowed in 'image_allowed' groups. | |
78 | + # These are not case sensitive | |
79 | + image_extensions => 'jpe?g|png|gif', | |
80 | ||
81 | ### no binaries allowed even if bin_allowed matches | |
82 | bad_bin => '\.d$|^alt\.chello', | |
83 | ||
84 | ### md5 EMP check not done if groups match | |
85 | - md5exclude => '\.test(?:$|\.)|^es\.pruebas$', | |
86 | + md5exclude => '^perl\.cpan\.testers|^es\.pruebas$', | |
87 | ||
88 | ### reject all articles crossposted to groups matching this | |
89 | poison_groups => '^alt\.(?:binaires|bainaries)|sexzilla|^newsmon$'. | |
90 | @@ -122,11 +145,17 @@ | |
91 | : '|^alt\.hipcrime|^us\.hipcrime|^hipcrime|h\dpcr\dme'), | |
92 | ||
93 | ### no checks done if groups match | |
94 | - allexclude => '^clari\.|^biz\.clarinet\.', | |
95 | + allexclude => '^mailing\.', | |
96 | ||
97 | ### HTML allowed here (if block_html or block_multi_alt is turned on) | |
98 | html_allowed => '^microsoft\.', | |
99 | ||
100 | + ### MIME HTML allowed here (if block_mime_html is turned on) | |
101 | + mime_html_allowed => '', | |
102 | + | |
103 | + test_groups => '\.test(ing)?(?:$|\.)|^es\.pruebas|^borland\.public\.test2'. | |
104 | + '|^cern\.testnews', | |
105 | + | |
106 | ### groups where we restrict crossposts even more than normal | |
107 | low_xpost_groups => 'test|jobs|forsale', | |
108 | ||
109 | @@ -143,9 +172,31 @@ | |
110 | # FIXME currently disabled | |
111 | # baddomainpat => '[\w\-]+xxx|xxx[\w\-]+', | |
112 | ||
113 | + ### exclude these newsgroups from the fsl filter | |
114 | + fsl_exclude => 'comp\.lang\.ruby', | |
115 | + | |
116 | + ### exclude these newsgroups from the phl filter | |
117 | + phl_exclude => 'comp\.lang\.ruby|^microsoft\.|^alt\.bestjobsusa'. | |
118 | + '|\.bbs\.', | |
119 | + | |
120 | ### exempt these hosts from the NNTP-Posting-Host filter | |
121 | phl_exempt => '^localhost$|webtv\.net$|^newscene\.newscene\.com$'. | |
122 | - '|^freebsd\.csie\.nctu\.edu\.tw$|^ddt\.demos\.su$|^onlyNews customer$', | |
123 | + '|^freebsd\.csie\.nctu\.edu\.tw$|^ddt\.demos\.su$|^onlyNews customer$'. | |
124 | + '|localhost\.pld-linux\.org', | |
125 | + | |
126 | + ### exclude these newsgroups from the phn filter | |
127 | + phn_exclude => '^local\.|^alt\.anonymous\.messages|^alt\.sex\.'. | |
128 | + '|^\w+\.bin|^microsoft\.|\.bbs\.|^alt\.bestjobsusa|^mozilla\.'. | |
129 | + '|^gnus?\.|^alt\.pictures\.|^gmane\.|^fa\.|^stu\.|^corel\.|\.cvs\.'. | |
130 | + '|\.talk|^lists\.|^microsoft\.|^news\.lists\.filters', | |
131 | + | |
132 | + ### exempt these hosts from the phn filter | |
133 | + phn_exempt => '^localhost$|^127\.0\.0\.1$|localhost\.pld-linux\.org', | |
134 | + | |
135 | + phr_exempt => '^localhost$|^127\.0\.0\.1$', | |
136 | + | |
137 | + ### newsgroups that get frequently flooded | |
138 | + flood_groups => '', | |
139 | ||
140 | ### posting hosts exempt from excessive supersedes filter | |
141 | supersedes_exempt => '^localhost$|^penguin-lust\.mit\.edu$', | |
142 | @@ -221,10 +272,13 @@ | |
143 | # config_append adds to the config regexps | |
144 | if (%config_append) { | |
145 | foreach (qw(bin_allowed bad_bin md5exclude poison_groups | |
146 | - allexclude html_allowed low_xpost_groups no_cancel_groups | |
147 | - baddomainpat phl_exempt supersedes_exempt | |
148 | + allexclude html_allowed mime_html_allowed low_xpost_groups | |
149 | + test_groups no_cancel_groups baddomainpat fsl_exclude | |
150 | + phl_exempt phl_exclude supersedes_exempt | |
151 | + phn_exempt phr_exempt phn_exclude flood_groups | |
152 | refuse_messageids net_abuse_groups spam_report_groups | |
153 | - adult_groups not_adult_groups faq_groups badguys)) { | |
154 | + adult_groups not_adult_groups faq_groups badguys | |
155 | + image_allowed image_extensions)) { | |
156 | if (defined $config_append{$_}) { | |
157 | $config{$_} .= "|$config_append{$_}"; | |
158 | $config{$_} =~ s/\|\|/\|/g; | |
159 | @@ -301,8 +355,8 @@ | |
160 | $servPre = "(?:$free|cheap|unlimited|nationwide|$site_desc)"; | |
161 | $servPost = '(?:$free|minute|samples|800|900|no.?charge)'; | |
162 | $servStr = "(?:phone.{0,15}(?:$sex|fun)|(?:adult|r.?a.?p.?e|$sex).{0,10}(?:chat|site)". | |
163 | - "|(?:$sex).{0,15}(?:show|call|connection|vid(?:eo|s))". | |
164 | - '|hard.?core.(?:vid(?:eo|s)|amateur)|900.dateline|(?:mass|bulk).e?-?mail)'; | |
165 | + "|(?:$sex).{0,15}(?:show|call|connection|vid(?:eo|s)|dvd)". | |
166 | + '|hard.?core.(?:vid(?:eo|s)|dvd|amateur)|900.dateline|(?:mass|bulk).e?-?mail)'; | |
167 | $services = "(?:$servPre.{0,30}?$servStr)|(?:$servStr.{0,30}?$servPost)"; | |
168 | ||
169 | $free_stuff = "$free.{0,20}(?:password|membership|$pics|chat)". | |
170 | @@ -311,12 +365,12 @@ | |
171 | ||
172 | $sex_adjs = "$desc1|$sex|erotic|gay|amateur|lesbian|blow.?job|fetish". | |
173 | '|pre.?teen|nude|celeb|school.?girl|bondage|rape|torture'; | |
174 | -$porn = "(?:$sex_adjs).{0,25}(?:$pics|video|image|porn|photo|mpeg)"; | |
175 | +$porn = "(?:$sex_adjs).{0,25}(?:$pics|video|dvd|image|porn|photo|mpeg)"; | |
176 | ||
177 | $one_point_words = "teen|hot|sex|$free|credit|amateur|lolita|horne?y". | |
178 | - '|dildo|anal(?!yst)|oral|school.?girl|bondage|breast|vid(?:eo|s)|orgy|erotic|porn'. | |
179 | - '|fetish|whore|nympho|sucking|password|membership|make.money|fast.cash'. | |
180 | - '|barely.?(?:18|legal)|orgasm'; | |
181 | + '|dildo|anal(?!yst)|oral|school.?girl|bondage|breast|vid(?:eo|s)|dvd'. | |
182 | + '|orgy|erotic|porn|fetish|whore|nympho|sucking|password|membership'. | |
183 | + '|make.money|fast.cash|barely.?(?:18|legal)|orgasm'; | |
184 | $two_point_words = 'fuck|sluts|puss(?:y|ies)|\bcum|(?:hidden|live|free|dorm|spy).?cam'. | |
185 | '|le[sz]b(?:ian|o)|tit(?!an|ch)|dick(?!.?berg)|blow.?job|cock|clit'. | |
186 | '|pam(?:ela)?.anderson|twat|cunt|hard-?core|[^x]xxx|facial|gangbang'. | |
187 | @@ -380,10 +434,8 @@ | |
188 | } | |
189 | } | |
190 | ||
191 | - read_hash('bad_paths', \%Bad_Path); | |
192 | - read_hash('bad_cancel_paths', \%Bad_Cancel_Path); | |
193 | - read_hash('bad_adult_paths', \%Bad_Adult_Path); | |
194 | - read_hash('bad_hosts', \%Bad_Hosts); | |
195 | + # Read all the bad_* files | |
196 | + read_hashes(); | |
197 | ||
198 | # initialise the rate filters | |
199 | if ($config{do_md5}) { | |
200 | @@ -400,6 +452,20 @@ | |
201 | } else { | |
202 | undef $PHLhistory; | |
203 | } | |
204 | + if ($config{do_phn}) { | |
205 | + $PHNhistory = new Cleanfeed::RateLimit; | |
206 | + $PHNhistory->init($config{PHNRateCutoff}, $config{PHNRateCeiling}, | |
207 | + $config{PHNRateBaseInterval}); | |
208 | + } else { | |
209 | + undef $PHNhistory; | |
210 | + } | |
211 | + if ($config{do_phr}) { | |
212 | + $PHRhistory = new Cleanfeed::RateLimit; | |
213 | + $PHRhistory->init($config{PHRRateCutoff}, $config{PHRRateCeiling}, | |
214 | + $config{PHRRateBaseInterval}); | |
215 | + } else { | |
216 | + undef $PHRhistory; | |
217 | + } | |
218 | if ($config{do_fsl}) { | |
219 | $FSLhistory = new Cleanfeed::RateLimit; | |
220 | $FSLhistory->init($config{FSLRateCutoff}, $config{FSLRateCeiling}, | |
221 | @@ -427,6 +493,15 @@ | |
222 | $status{articles}++; | |
223 | $timer{articles}++ if $config{timer_info}; | |
224 | ||
225 | + # Reload the bad_* files every $bad_rate_reload articles accepted | |
226 | + if ($status{accepted} % $config{bad_rate_reload} == 0 | |
227 | + and $status{accepted} > $status{bad_reloaded}) { | |
228 | + slog('N', "Reloading bad files after $status{accepted} articles"); | |
229 | + read_hashes(); | |
230 | + # Prevent looping whilst waiting for another accepted article | |
231 | + $status{bad_reloaded} = $status{accepted}; | |
232 | + } | |
233 | + | |
234 | # break out newsgroups into an array | |
235 | @groups = split(/[,\s]+/, $hdr{Newsgroups}); | |
236 | if ($hdr{'Followup-To'}) { | |
237 | @@ -445,10 +520,16 @@ | |
238 | $gr{'rg_'.$item}++ if /$Restricted_Groups{$item}/; | |
239 | } | |
240 | $gr{skip}++ if $config{allexclude} and /$config{allexclude}/o; | |
241 | + $gr{fslskip}++ if $config{fslexclude} and /$config{fslexclude}/o; | |
242 | $gr{md5skip}++ if $config{md5exclude} and /$config{md5exclude}/o; | |
243 | + $gr{phnskip}++ if $config{phn_exclude} and /$config{phn_exclude}/o; | |
244 | + $gr{phlskip}++ if $config{phl_exclude} and /$config{phl_exclude}/o; | |
245 | + $gr{phrinc}++ if $config{flood_groups} and /$config{flood_groups}/o; | |
246 | $gr{binary}++ if $config{bin_allowed} and /$config{bin_allowed}/o; | |
247 | + $gr{image}++ if $config{image_allowed} and /$config{image_allowed}/o; | |
248 | $gr{bad_bin}++ if $config{bad_bin} and /$config{bad_bin}/o; | |
249 | $gr{html}++ if $config{html_allowed} and /$config{html_allowed}/o; | |
250 | + $gr{mime_html}++ if $config{mime_html_allowed} and /$config{mime_html_allowed}/o; | |
251 | $gr{poison}++ if $config{poison_groups} | |
252 | and /$config{poison_groups}/o; | |
253 | $gr{reports}++ if $config{spam_report_groups} | |
254 | @@ -459,7 +540,7 @@ | |
255 | and /$config{meow_groups}/o; | |
256 | $gr{no_cancel}++ if $config{no_cancel_groups} | |
257 | and /$config{no_cancel_groups}/o; | |
258 | - $gr{test}++ if /\.test\b/; | |
259 | + $gr{test}++ if /$config{test_groups}/o; | |
260 | $gr{adult}++ if /$config{adult_groups}/o | |
261 | and not /$config{not_adult_groups}/o; | |
262 | $gr{faq}++ if /$config{faq_groups}/o; | |
263 | @@ -468,15 +549,24 @@ | |
264 | } elsif (defined &INN::newsgroup) { | |
265 | $gr{mod}++ if INN::newsgroup($_) eq 'm'; | |
266 | } | |
267 | + | |
268 | + # Reject bad groups | |
269 | + return reject("Bad group ($_)", 'Bad group') if exists $Bad_Groups{$_}; | |
270 | } | |
271 | ||
272 | # these only count if all groups match | |
273 | $gr{skip} = ($gr{skip} == scalar @groups); | |
274 | $gr{md5skip} = ($gr{md5skip} == scalar @groups); | |
275 | + $gr{phnskip} = ($gr{phnskip} == scalar @groups); | |
276 | + $gr{phlskip} = ($gr{phlskip} == scalar @groups); | |
277 | + $gr{image} = (($gr{image} + $gr{binary}) >= scalar @groups); | |
278 | $gr{binary} = ($gr{binary} == scalar @groups); | |
279 | + $gr{reports} = ($gr{reports} == scalar @groups); | |
280 | $gr{binary} = 0 if $gr{bad_bin}; | |
281 | $gr{html} = ($gr{html} == scalar @groups); | |
282 | + $gr{mime_html} = ($gr{mime_html} == scalar @groups); | |
283 | $gr{allmod} = ($gr{mod} == scalar @groups); | |
284 | + $gr{alltest} = ($gr{test} == scalar @groups); | |
285 | ||
286 | # If all newsgroups are excluded from filtering, bail now | |
287 | return '' if $gr{skip}; | |
288 | @@ -488,7 +578,7 @@ | |
289 | # checks common to all article types ##################################### | |
290 | foreach (split(/\s+/, $hdr{'NNTP-Posting-Host'})) { | |
291 | return reject("Bad host ($hdr{'NNTP-Posting-Host'})", 'Bad site') | |
292 | - if exists $Bad_Hosts{$_}; | |
293 | + if exists $Bad_Hosts{$_} or exists $Bad_Hosts_Central{$_}; | |
294 | } | |
295 | ||
296 | @Path_Entries = split(/!/, $hdr{Path}); | |
297 | @@ -629,17 +719,29 @@ | |
298 | /mx; | |
299 | ||
300 | # binaries in non-binary newsgroups | |
301 | - if ($config{block_binaries}) { | |
302 | + if ($config{block_binaries} | |
303 | + and $lines > $config{max_encoded_lines}) { | |
304 | unless ($config{binaries_in_mod_groups} and $gr{allmod}) { | |
305 | - return reject('Binary in non-binary group') | |
306 | - if $lines > $config{max_encoded_lines} | |
307 | - and not $gr{binary} and is_binary(); | |
308 | - } | |
309 | - } | |
310 | + # We're only interested in binaries | |
311 | + if (is_binary()) { | |
312 | + # Is the binary an image? | |
313 | + if (is_image()) { | |
314 | + return reject("Binary image in non-image group") | |
315 | + if not $gr{image}; | |
316 | + # gr{image} is true when distro matches bin_allowed | |
317 | + # or image_allowed | |
318 | + } else { | |
319 | + return reject("Binary in non-binary group") | |
320 | + if not $gr{binary}; | |
321 | + # gr{binary} is true when distro matches bin_allowed | |
322 | + }; # End of is_image | |
323 | + }; # End of is_binary | |
324 | + }; # End of moderated groups | |
325 | + }; # End of max encoded lines | |
326 | ||
327 | # mime-encapsulated HTML (attached *.html file) | |
328 | return reject('Attached HTML file') | |
329 | - if $config{block_mime_html} | |
330 | + if $config{block_mime_html} and not $gr{mime_html} | |
331 | and $hdr{'Content-Disposition'} =~ /filename.*\.html?/ | |
332 | or $hdr{'Content-Base'} =~ /file:.*\.html?/ | |
333 | or ($lch{'content-type'} =~ m#multipart/(?:mixed|related)# | |
334 | @@ -722,7 +824,7 @@ | |
335 | ||
336 | # EMP checks ######################################################### | |
337 | # create MD5 body checksum hash. | |
338 | - if ($config{do_md5} and not $gr{md5skip} | |
339 | + if ($config{do_md5} and not $gr{md5skip} and not $gr{alltest} | |
340 | and not ($hdr{References} and $config{md5_skips_followups}) | |
341 | and (($config{md5_max_length} | |
342 | and $lines < $config{md5_max_length}) | |
343 | @@ -749,8 +851,8 @@ | |
344 | if (not $gr{reports}) { | |
345 | # create posting-host/lines hash | |
346 | if ($config{do_phl} and not $gr{allmod} | |
347 | - and $hdr{'NNTP-Posting-Host'} | |
348 | - and not $hdr{Newsgroups} =~ /^(?:tw\.bbs\.|fido7\.)/ #XXX FIXME | |
349 | + and $hdr{'NNTP-Posting-Host'} and not $gr{phlskip} | |
350 | + and not is_binary() and not $gr{alltest} | |
351 | and not $hdr{'NNTP-Posting-Host'} =~ /(?:$config{phl_exempt})/o | |
352 | and not ($gr{binary} and $lines > 100 | |
353 | and $hdr{Subject} =~ /[\(\[]\d+\/\d+[\)\]]/)) { | |
354 | @@ -758,8 +860,31 @@ | |
355 | if $PHLhistory->add("$hdr{'NNTP-Posting-Host'} $lines"); | |
356 | } | |
357 | ||
358 | + # create posting-host/newsgroups hash | |
359 | + if ($config{do_phn} and not $gr{allmod} | |
360 | + and not $gr{phrinc} and not $gr{phnskip} and not $gr{alltest} | |
361 | + and not ($gr{binary} and $lines > 100)) { | |
362 | + if ($hdr{'NNTP-Posting-Host'}) { | |
363 | + if (not $hdr{'NNTP-Posting-Host'} =~ /(?:$config{phn_exempt})/o) { | |
364 | + return reject('EMP (phn nph)', 'EMP') | |
365 | + if $PHNhistory->add("$hdr{'NNTP-Posting-Host'} $hdr{Newsgroups}"); | |
366 | + } | |
367 | + } | |
368 | + elsif ($config{phn_aggressive}) { | |
369 | + my $server; | |
370 | + $server = lc "$hdr{Path}"; | |
371 | + $server =~ s/(![^\.]+)+$//; | |
372 | + my $exc_count = ($server =~ tr/!//); | |
373 | + if ($exc_count > 1) { | |
374 | + $server =~ s/.*!//; | |
375 | + return reject('EMP (phn path)', 'EMP') | |
376 | + if $PHNhistory->add("$server $hdr{Newsgroups}"); | |
377 | + } | |
378 | + } | |
379 | + } | |
380 | + | |
381 | # create from/subject/lines hash | |
382 | - if ($config{do_fsl}) { | |
383 | + if ($config{do_fsl} and not $gr{fslskip} and not $gr{alltest}) { | |
384 | my $hash1; | |
385 | if (defined $hdr{Sender}) { | |
386 | $hash1 = lc "$hdr{Sender} $hdr{Subject}"; | |
387 | @@ -773,6 +898,28 @@ | |
388 | } | |
389 | } # not reports groups | |
390 | ||
391 | + # create high-risk newsgroups hash | |
392 | + if ($config{do_phr} and $gr{phrinc} | |
393 | + and not ($gr{binary} and $lines > 100)) { | |
394 | + if ($hdr{'NNTP-Posting-Host'}) { | |
395 | + if (not $hdr{'NNTP-Posting-Host'} =~ /(?:$config{phr_exempt})/o) { | |
396 | + return reject('EMP (phr nph)', 'EMP') | |
397 | + if $PHRhistory->add("$hdr{'NNTP-Posting-Host'}"); | |
398 | + } | |
399 | + } | |
400 | + elsif ($config{phr_aggressive}) { | |
401 | + my $server; | |
402 | + $server = lc "$hdr{Path}"; | |
403 | + $server =~ s/(![^\.]+)+$//; | |
404 | + my $exc_count = ($server =~ tr/!//); | |
405 | + if ($exc_count > 1) { | |
406 | + $server =~ s/.*!//; | |
407 | + return reject('EMP (phr path)', 'EMP') | |
408 | + if $PHRhistory->add("$server"); | |
409 | + } | |
410 | + } | |
411 | + } | |
412 | + | |
413 | # Supersedes checks ################################################## | |
414 | if ($hdr{Supersedes}) { | |
415 | foreach (@Path_Entries) { | |
416 | @@ -847,8 +994,15 @@ | |
417 | and $hdr{'NNTP-Posting-Host'} !~ /webtv\.net$/ | |
418 | and $lch{'message-id'} !~ /webtv\.net>$/; | |
419 | ||
420 | - $score += 1 if scalar @followups > 4; | |
421 | - $score += 2 if scalar @followups > 8; | |
422 | + $score += 1 if scalar @followups > 2; | |
423 | + $score += 2 if scalar @followups > 4; | |
424 | + $score += 1 if scalar @followups ge scalar @groups; | |
425 | + | |
426 | + # Add 1 to score for each followup not in groups | |
427 | + my %grps; | |
428 | + @grps{@groups} = (); # Convert array to hash (for exists) | |
429 | + for (@followups) { $score++ unless exists $grps{$_} }; | |
430 | + undef %grps; | |
431 | ||
432 | $score += 4 if $lch{from} =~ /$url2/o; | |
433 | ||
434 | @@ -943,12 +1097,12 @@ | |
435 | if exists $Bad_Cancel_Path{$_}; | |
436 | } | |
437 | ||
438 | - reject('User-issued spam cancel') | |
439 | + return reject('User-issued spam cancel') | |
440 | if $config{block_user_spamcancels} | |
441 | and $hdr{'X-Trace'} and $hdr{'NNTP-Posting-Host'} | |
442 | and $hdr{Path} =~ /!cyberspam!/; | |
443 | ||
444 | - reject('User-issued cancel') | |
445 | + return reject('User-issued cancel') | |
446 | if $config{block_user_cancels} | |
447 | and not $hdr{Path} =~ /!cyberspam!/; | |
448 | ||
449 | @@ -1047,8 +1201,12 @@ | |
450 | ) | |
451 | \s*\r?\n # trailing spaces and end of line | |
452 | ){$config{max_encoded_lines}} # at least this many lines | |
453 | - /mox or | |
454 | - $hdr{__BODY__} =~ / | |
455 | + /mox) { | |
456 | + $Cache_Is_Binary = 'uuencoded'; | |
457 | + return $Cache_Is_Binary; | |
458 | + } | |
459 | + | |
460 | + if ($hdr{__BODY__} =~ / | |
461 | (?: | |
462 | ^[ \t|>]* | |
463 | (?> | |
464 | @@ -1057,23 +1215,44 @@ | |
465 | \s*\r?\n | |
466 | ){$config{max_encoded_lines}} | |
467 | /mox) { | |
468 | - $Cache_Is_Binary = 1; | |
469 | - return 1; | |
470 | + $Cache_Is_Binary = 'Base64'; | |
471 | + return $Cache_Is_Binary; | |
472 | } | |
473 | ||
474 | - if ($hdr{__BODY__} =~ /^=ybegin (.+)$/m) { | |
475 | + if ($hdr{__BODY__} =~ /(?:^|\n)=ybegin (.+)$/m) { | |
476 | local $_ = $1; | |
477 | if (/line=/ and /size=/ and /name=/) { | |
478 | - $Cache_Is_Binary = 1; | |
479 | - return 1; | |
480 | + $Cache_Is_Binary = 'yEnc Encoded'; | |
481 | + return $Cache_Is_Binary; | |
482 | } | |
483 | } | |
484 | ||
485 | - | |
486 | $Cache_Is_Binary = 0; | |
487 | return 0; | |
488 | } | |
489 | ||
490 | +# Useful for groups where pictures are accepted, but not other binary formats. | |
491 | +sub is_image { | |
492 | + return 0 unless is_binary(); | |
493 | + if ($hdr{__BODY__} =~ / | |
494 | + ( # Start of uuEnc section | |
495 | + begin[ \t]+ # begin | |
496 | + [0-7]{3,4}[ \t]+ # 666 | |
497 | + | # Start of yEnc section | |
498 | + \=ybegin # ybegin | |
499 | + .+name\= # yEnc headers | |
500 | + ) | |
501 | + .+ # filename (greedy is good) | |
502 | + \.($config{image_extensions}) # image extension | |
503 | + \s*$ # end of line | |
504 | + /imox) { | |
505 | + $Cache_Is_Binary .= ' image'; | |
506 | + return 1; | |
507 | + }; | |
508 | + return 0; | |
509 | +}; | |
510 | + | |
511 | + | |
512 | # Attempt to determine the client software | |
513 | sub x_reader { | |
514 | return lc $hdr{'X-Newsreader'} || | |
515 | @@ -1150,13 +1329,16 @@ | |
516 | sub filter_stats { | |
517 | my $md5hashentries = $MD5history ? $MD5history->count : 0; | |
518 | my $phlhashentries = $PHLhistory ? $PHLhistory->count : 0; | |
519 | + my $phnhashentries = $PHNhistory ? $PHNhistory->count : 0; | |
520 | + my $phrhashentries = $PHRhistory ? $PHRhistory->count : 0; | |
521 | my $fslhashentries = $FSLhistory ? $FSLhistory->count : 0; | |
522 | my $superentries = $Suphistory ? $Suphistory->count : 0; | |
523 | my $midhistentries = $MIDhistory->count; | |
524 | ||
525 | my $string = "Pass: $status{accepted} Reject: $status{rejected}"; | |
526 | $string .= " Refuse: $status{refused}" if $config{do_mid_filter}; | |
527 | - $string .= " MD5: $md5hashentries PHL: $phlhashentries FSL: $fslhashentries"; | |
528 | + $string .= " MD5: $md5hashentries PHL: $phlhashentries PHN: $phnhashentries"; | |
529 | + $string .= " PHR: $phrhashentries FSL: $fslhashentries"; | |
530 | $string .= " Arts/sec: $timer{rate} Accept/sec: $timer{accept_rate}" | |
531 | if $config{timer_info} and $timer{rate}; | |
532 | $string .= " cleanfeed.conf NOT loaded!" if $Local_Conf_Err; | |
533 | @@ -1199,16 +1381,22 @@ | |
534 | ||
535 | my $md5hashentries = $MD5history ? $MD5history->count : 0; | |
536 | my $phlhashentries = $PHLhistory ? $PHLhistory->count : 0; | |
537 | + my $phnhashentries = $PHNhistory ? $PHNhistory->count : 0; | |
538 | + my $phrhashentries = $PHRhistory ? $PHRhistory->count : 0; | |
539 | my $fslhashentries = $FSLhistory ? $FSLhistory->count : 0; | |
540 | my $superentries = $Suphistory ? $Suphistory->count : 0; | |
541 | my $midhistentries = $MIDhistory->count; | |
542 | my $md5count = $MD5history ? $MD5history->overflowed : 0; | |
543 | my $phlcount = $PHLhistory ? $PHLhistory->overflowed : 0; | |
544 | + my $phncount = $PHNhistory ? $PHNhistory->overflowed : 0; | |
545 | + my $phrcount = $PHRhistory ? $PHRhistory->overflowed : 0; | |
546 | my $fslcount = $FSLhistory ? $FSLhistory->overflowed : 0; | |
547 | ||
548 | print HTML "\n<p>\n" | |
549 | . "<b>MD5 entries:</b> $md5hashentries <b>Rejecting:</b> $md5count<br>\n" | |
550 | . "<b>PHL entries:</b> $phlhashentries <b>Rejecting:</b> $phlcount<br>\n" | |
551 | + . "<b>PHN entries:</b> $phnhashentries <b>Rejecting:</b> $phncount<br>\n" | |
552 | + . "<b>PHR entries:</b> $phrhashentries <b>Rejecting:</b> $phrcount<br>\n" | |
553 | . "<b>FSL entries:</b> $fslhashentries <b>Rejecting:</b> $fslcount<br>\n" | |
554 | . "<b>MID history:</b> $midhistentries\n"; | |
555 | ||
556 | @@ -1243,6 +1431,8 @@ | |
557 | ||
558 | my $md5hashentries = $MD5history ? $MD5history->count : 0; | |
559 | my $phlhashentries = $PHLhistory ? $PHLhistory->count : 0; | |
560 | + my $phnhashentries = $PHNhistory ? $PHNhistory->count : 0; | |
561 | + my $phrhashentries = $PHRhistory ? $PHRhistory->count : 0; | |
562 | my $fslhashentries = $FSLhistory ? $FSLhistory->count : 0; | |
563 | my $superentries = $Suphistory ? $Suphistory->count : 0; | |
564 | my $midhistentries = $MIDhistory->count; | |
565 | @@ -1274,6 +1464,8 @@ | |
566 | print FILE "Refused: $status{refused}\n" if $config{do_mid_filter}; | |
567 | print FILE "MD5 entries: $md5hashentries\n" | |
568 | . "PHL entries: $phlhashentries\n" | |
569 | + . "PHN entries: $phnhashentries\n" | |
570 | + . "PHR entries: $phrhashentries\n" | |
571 | . "FSL entries: $fslhashentries\n" | |
572 | . "MID history: $midhistentries\n\n"; | |
573 | if ($config{timer_info} and $timer{rate}) { | |
574 | @@ -1327,6 +1519,8 @@ | |
575 | sub trimhashes { | |
576 | $MD5history->trim if $MD5history; | |
577 | $PHLhistory->trim if $PHLhistory; | |
578 | + $PHNhistory->trim if $PHNhistory; | |
579 | + $PHRhistory->trim if $PHRhistory; | |
580 | $FSLhistory->trim if $FSLhistory; | |
581 | $Suphistory->trim if $Suphistory; | |
582 | $MIDhistory->trim; | |
583 | @@ -1348,6 +1542,13 @@ | |
584 | ############################################################################## | |
585 | ||
586 | sub saveart { | |
587 | +#TODO: Messy! I need to tidy up the mess I've made of this sub. | |
588 | + # We currently recognise various formatting options: | |
589 | + # 0: Header and body truncated if over 50 lines (Default) | |
590 | + # 1: Header and full body regardless of length | |
591 | + # 2: Header only | |
592 | + # 3: Just NNTP-Posting-Host (If it exists in the message) | |
593 | + | |
594 | my ($file, $info, $format) = @_; | |
595 | $format ||= 0; | |
596 | ||
597 | @@ -1358,17 +1559,29 @@ | |
598 | slog('E', "Cannot open $file: $!"); | |
599 | return; | |
600 | } | |
601 | + if ($format == 3 and $hdr{'NNTP-Posting-Host'}) { | |
602 | + print LOCAL "$hdr{'NNTP-Posting-Host'}\n" | |
603 | + } | |
604 | + if ($format != 3) { | |
605 | print LOCAL "From foo\@bar Thu Jan 1 00:00:01 1970\n"; | |
606 | print LOCAL "INFO: $info\n" if $info; | |
607 | + print LOCAL "Binary: $Cache_Is_Binary\n" | |
608 | + if is_binary(); | |
609 | foreach (sort keys %hdr) { | |
610 | next if $_ eq '__BODY__' or $_ eq '__LINES__'; | |
611 | print LOCAL "$_: $hdr{$_}\n" | |
612 | } | |
613 | + } | |
614 | if ($format == 2) { | |
615 | print LOCAL "\n"; | |
616 | - } elsif ($format != 1 and $lines > 250) { | |
617 | - print LOCAL "\n" . substr($hdr{__BODY__}, 0, 15000) . "\n\n"; | |
618 | - } else { | |
619 | + } | |
620 | + if ($lines > 50 and $format == 0) { | |
621 | + print LOCAL "\n" . substr($hdr{__BODY__}, 0, 3000) . "\n\n"; | |
622 | + } | |
623 | + if ($lines > 50 and $format == 1) { | |
624 | + print LOCAL "\n$hdr{__BODY__}\n"; | |
625 | + } | |
626 | + if ($lines <= 50 and ($format == 0 or $format == 1)) { | |
627 | print LOCAL "\n$hdr{__BODY__}\n"; | |
628 | } | |
629 | close LOCAL; | |
630 | @@ -1399,6 +1612,8 @@ | |
631 | ||
632 | $MD5history->dump('MD5history', \*DUMP) if $MD5history; | |
633 | $PHLhistory->dump('PHLhistory', \*DUMP) if $PHLhistory; | |
634 | + $PHNhistory->dump('PHNhistory', \*DUMP) if $PHNhistory; | |
635 | + $PHRhistory->dump('PHRhistory', \*DUMP) if $PHRhistory; | |
636 | $FSLhistory->dump('FSLhistory', \*DUMP) if $FSLhistory; | |
637 | ||
638 | close DUMP; | |
639 | @@ -1414,6 +1629,8 @@ | |
640 | # delete the data of checks which have been disabled since the last dump | |
641 | undef $MD5history if not $config{do_md5}; | |
642 | undef $PHLhistory if not $config{do_phl}; | |
643 | + undef $PHNhistory if not $config{do_phn}; | |
644 | + undef $PHRhistory if not $config{do_phr}; | |
645 | undef $FSLhistory if not $config{do_fsl}; | |
646 | ||
647 | # We can't syslog at startup because INN doesn't provide the callbacks | |
648 | @@ -1429,6 +1646,15 @@ | |
649 | ############################################################################## | |
650 | # parse the data files | |
651 | ############################################################################## | |
652 | +sub read_hashes { | |
653 | + read_hash('bad_paths', \%Bad_Path); | |
654 | + read_hash('bad_cancel_paths', \%Bad_Cancel_Path); | |
655 | + read_hash('bad_adult_paths', \%Bad_Adult_Path); | |
656 | + read_hash('bad_hosts', \%Bad_Hosts); | |
657 | + read_hash('bad_hosts_central', \%Bad_Hosts_Central); | |
658 | + read_hash('bad_groups', \%Bad_Groups); | |
659 | +} | |
660 | + | |
661 | sub read_hash { | |
662 | my ($file, $hash) = @_; | |
663 |