]> git.pld-linux.org Git - projects/distfiles.git/blame_incremental - file-fetcher.pl
It's just file, not dir.
[projects/distfiles.git] / file-fetcher.pl
... / ...
CommitLineData
1#!/usr/bin/perl -w
2# $Id$
3
4use IPC::Run qw(run);
5
6$commits_list = "pld-cvs-commit\@lists.pld-linux.org";
7
8$spool_dir = "./spool";
9$copy_dir = "src"; # relative to ftp root
10$no_url_dir = "./upload";
11$df_server = "distfiles.pld-linux.org";
12$df_scp = "plddist\@ep09.pld-linux.org:ftp";
13$user_agent = "PLD/distfiles";
14
15@md5 = ();
16@url = ();
17$problems = "";
18$normal_out = "";
19$requester = "";
20$file = "";
21$fetched_count = 0;
22$force_reply = 0;
23$req_login = "nobody";
24@files = ();
25
26# try lookup some file in spool, exit if it cannot be done
27sub find_file_in_spool()
28{
29 opendir(DIR, $spool_dir) || die "can't opendir $spool_dir: $!";
30 while (1) {
31 my $f = readdir(DIR);
32 defined $f or last;
33 -f "$spool_dir/$f" or next;
34 $file = "$spool_dir/$f";
35 last;
36 }
37 closedir(DIR);
38
39 exit 0 if ($file eq "");
40}
41
42# read file from spool, and try unlink it. if cannot unlink -- exit
43# sets $requester (email), $problems, @md5 (arrays of md5's)
44# and @url (map from md5 to urls)
45sub read_spool_file()
46{
47 open(F, "< $file") || exit 0;
48 $requester = <F>;
49 chomp $requester;
50 $requester =~ /^[a-zA-Z_0-9@.-]+$/
51 or die "$file: evil requester: $requester";
52 $req_login = "";
53 $requester =~ /^([^@]+)\@/ and $req_login = $1;
54 $req_login =~ /^[a-z0-9A-Z_.]+$/ or die "$file: evil requester $requester";
55 my $flags = <F>;
56 $force_reply++ if ($flags =~ /force-reply/);
57
58 while (<F>) {
59 if (/^ERROR/) {
60 s/^ERROR: //;
61 $problems .= $_;
62 next;
63 }
64 if (/^([a-f0-9]{32})\s+((ftp|http|https|no-url|no-url-copy):\/\/([=\@\?a-z0-9A-Z:\+\~\.,\-\/_]|\%[0-9])+)\s*$/) {
65 if (/\/$/) {
66 $problems .= "$file: cannot fetch dir";
67 } else {
68 push @md5, $1;
69 push @url, $2;
70 }
71 } else {
72 $problems .= "FILE: $file: corrupted";
73 }
74 }
75 close(F);
76
77 unlink($file) || exit 0;
78}
79
80sub basename($)
81{
82 my $f = shift;
83 $f =~ s|.*/||;
84 return $f;
85}
86
87sub by_md5($$)
88{
89 my ($md5, $url) = @_;
90 $md5 =~ /^(.)(.)/;
91 return "/by-md5/$1/$2/$md5/" . basename($url);
92}
93
94sub got_on_distfiles($$)
95{
96 my ($md5, $url) = @_;
97 my $p = by_md5($md5, $url);
98 my $l = `lftp -c 'debug 0; open $df_server; quote size $p'`;
99 return $l =~ /^213 /;
100}
101
102sub copy_to_df($$)
103{
104 my ($from, $to) = @_;
105 my $cmd = "scp -r -B -q $from $df_scp/$to";
106 open(E, "$cmd 2>&1 |") or die;
107 my $oops = "";
108 while (<E>) {
109 $oops .= $_;
110 }
111 $oops .= "\nThe command has exited with a non-zero status."
112 unless (close (E));
113 $problems .= "scp problems: $cmd:\n$oops\n"
114 if ($oops ne "");
115 return ($oops ne "");
116}
117
118sub move_file($$$)
119{
120 my ($md5, $url, $local_copy) = @_;
121
122 my $bn = basename($url);
123
124 if ($local_copy ne "$tmp_dir/$md5/$bn") {
125 if (system("mv -f \"$local_copy\" \"$tmp_dir/$md5/$bn\"")) {
126 $problems .= "FATAL: cannot move $local_copy to $tmp_dir\n";
127 return;
128 }
129 $local_copy = "$tmp_dir/$md5/$bn";
130 }
131
132 if (open(D, "> $tmp_dir/$md5/$bn.desc")) {
133 print D "URL: $url\n";
134 print D "Login: $req_login\n";
135 print D "MD5: $md5\n";
136 print D 'Fetched-by: $Id$'."\n";
137 print D "Time: " . time . "\n";
138 close(D);
139 } else {
140 $problems .= "ERROR: cannot write $bn.desc\n";
141 }
142
143 my $dir = by_md5($md5, $url);
144 $dir =~ s|/[^/]+/[^/]+$||;
145 if (copy_to_df("$tmp_dir/$md5/", $dir) == 0) {
146 $normal_out .=
147 "STORED: $url\n" .
148 "\t$md5 " . basename($url) . "\n" .
149 "\tSize: " . (-s $local_copy) . " bytes\n";
150 $fetched_count++;
151 }
152}
153
154sub make_src_symlink($$)
155{
156 my ($md5, $url) = @_;
157
158 return unless ($url =~ /^no-url/);
159
160 my $b = basename($url);
161 if (open(S, "> $tmp_dir/$b.link")) {
162 if ($url =~ /^no-url-copy/) {
163 print S (".." . by_md5($md5, $url));
164 } else {
165 print S "REMOVE";
166 }
167 close(S);
168 copy_to_df("$tmp_dir/$b.link", "$copy_dir/$b.link");
169 } else {
170 $problems .= "ERROR: cannot write $tmp_dir/$b.link\n";
171 }
172}
173
174sub md5($)
175{
176 my $file = shift;
177 my $in = "";
178 my $md5 = "";
179 my $err = "";
180 my @cmd = ("md5sum", $file);
181
182 run \@cmd, \$in, \$md5, \$err;
183 if ($err ne "") {
184 chomp($err);
185 $problems .= "FATAL: " . $err . "\n";
186 return "error";
187 }
188 chomp $md5;
189 $md5 =~ /^([a-f0-9]{32})/ and $md5 = $1;
190 return $md5;
191}
192
193sub handle_no_url($$)
194{
195 my ($md5, $url) = @_;
196
197 unless ($url =~ m#://([^/]+)#) {
198 $problems .= "$url: corrupted! (no-url)";
199 return;
200 }
201 my $basename = $1;
202 my $file = "$no_url_dir/$req_login/$basename";
203
204 if (-f $file) {
205 my $computed_md5 = md5($file);
206 if ($computed_md5 ne $md5) {
207 $problems .= "FATAL: $file md5 mismatch, needed $md5, got $computed_md5\n";
208 } else {
209 move_file($md5, $url, $file);
210 make_src_symlink($md5, $url);
211 }
212 } else {
213 $problems .= "FATAL: $file was not uploaded\n";
214 }
215}
216
217sub fetch_file($$)
218{
219 my ($md5, $url) = @_;
220 my $out = "";
221 my $all_out = "";
222 my $bn = basename($url);
223 my $local = "$tmp_dir/$md5/$bn";
224 my $cmd = "wget -nv --no-check-certificate --user-agent=$user_agent -O $local \"$url\"";
225 my $cmd2 = "wget -nv --no-check-certificate --user-agent=$user_agent --passive-ftp -O $local \"$url\"";
226
227 push @files, $bn;
228
229 if (got_on_distfiles($md5, $url)) {
230 $normal_out .=
231 "ALREADY GOT: $url\n" .
232 "\t$md5 " . basename($url) . "\n";
233 make_src_symlink($md5, $url);
234 return;
235 }
236
237 mkdir("$tmp_dir/$md5");
238
239 if ($url =~ /^no-url/) {
240 handle_no_url($md5, $url);
241 return;
242 }
243
244 open(W, "$cmd 2>&1 |");
245 while (<W>) {
246 $all_out .= $_;
247 /URL:.*\s+\-\>\s+.*/ and next;
248 $out .= $_;
249 }
250 close(W);
251 if ($out ne "") {
252 $problems .= "$cmd:\n$out\n\n";
253 }
254 if (-f $local && -s $local == 0 && $url =~ /^ftp:/) {
255 $out = "";
256 open(W, "$cmd2 2>&1 |");
257 while (<W>) {
258 $all_out .= "\n\t\t$_";
259 /URL:.*\s+\-\>\s+.*/ and next;
260 $out .= $_;
261 }
262 close(W);
263 if ($out ne "") {
264 $problems .= "$cmd:\n$out\n\n";
265 }
266 }
267 if (-r $local && -s $local > 0) {
268 my $computed_md5 = md5($local);
269 if ($computed_md5 ne $md5) {
270 $problems .= "FATAL: $url md5 mismatch, needed $md5, got $computed_md5\n";
271 } else {
272 my $testcmd = "file \"$local\" |";
273 my $testres = "";
274 if ($url =~ /^(http|https):/ && $local =~ /\.(tar\.(bz2|gz)|tgz|zip|jar|xpi)$/) {
275 open(T, $testcmd) or die;
276 $testres = <T>;
277 close(T);
278 }
279 if ($testres =~ /empty|(ASCII|HTML|SGML).*text/) {
280 $testres =~ s/.*://;
281 $problems .= "FATAL: data returned from $url: $testres";
282 } else {
283 move_file($md5, $url, $local);
284 }
285 }
286 } elsif (-f $local && -s $local > 0) {
287 $problems .= "FATAL: $url ($md5) was not fetched ($cmd: $all_out): file is not readable\n";
288 } else {
289 $problems .= "FATAL: $url ($md5) was not fetched ($cmd: $all_out)\n";
290 }
291 # save space
292 unlink($local);
293}
294
295sub fetch_files()
296{
297 $problems .= "\n\n" if ($problems ne "");
298 foreach $i (0..$#md5) {
299 fetch_file($md5[$i], $url[$i]);
300 }
301}
302
303sub send_email()
304{
305 open(EMAIL, "| /usr/sbin/sendmail -t");
306 #open(EMAIL, "| cat");
307 my $marker = "";
308 if ($problems ne "") {
309 $marker = "ERRORS: ";
310 }
311 my $req_login;
312 $requester =~ /^(.*)\@/ and $req_login = $1;
313
314 splice(@files, 10, @files - 10, "...")
315 if (@files > 10);
316
317 print EMAIL
318"From: $req_login <$requester>
319To: $commits_list
320Cc: $requester
321Subject: DISTFILES: ${marker}@{files}
322Message-ID: <$$." . time . "\@distfiles.pld-linux.org>
323X-distfiles-program: file-fetcher.pl
324X-distfiles-version: " . '$Id$' . "
325
326$problems
327Files fetched: $fetched_count
328
329$normal_out
330
331--
332Virtually Yours: distfiles.
333";
334 close(EMAIL) or die;
335}
336
337sub make_tmp_dir()
338{
339 my $id = `uuidgen 2>/dev/null`;
340 chomp $id;
341 $id = rand if (!defined $id or $id eq "");
342 $tmp_dir = "./tmp/$id";
343 mkdir($tmp_dir) or die;
344}
345
346sub clean_tmp_dir()
347{
348 system("rm -rf $tmp_dir")
349 if ($tmp_dir ne "" && -d $tmp_dir);
350}
351
352umask(002);
353
354find_file_in_spool();
355read_spool_file();
356make_tmp_dir();
357fetch_files();
358send_email() unless (!$force_reply and $problems eq "" and $fetched_count == 0);
359clean_tmp_dir();
This page took 0.0415 seconds and 4 git commands to generate.