]> git.pld-linux.org Git - projects/distfiles.git/blame_incremental - file-fetcher.pl
- version from DEVEL seems to be functional: merged
[projects/distfiles.git] / file-fetcher.pl
... / ...
CommitLineData
1#!/usr/bin/perl -w
2# $Id$
3
4use IPC::Run qw(run);
5
6$commits_list = "pld-cvs-commit\@lists.pld-linux.org";
7
8$spool_dir = "./spool";
9$copy_dir = "src"; # relative to ftp root
10$no_url_dir = "./upload";
11$df_server = "distfiles.pld-linux.org";
12$df_scp = "plddist\@ep09.pld-linux.org:ftp";
13
14@md5 = ();
15%url = ();
16$problems = "";
17$normal_out = "";
18$requester = "";
19$file = "";
20$fetched_count = 0;
21$force_reply = 0;
22$req_login = "nobody";
23@files = ();
24
25# try lookup some file in spool, exit if it cannot be done
26sub find_file_in_spool()
27{
28 opendir(DIR, $spool_dir) || die "can't opendir $spool_dir: $!";
29 while (1) {
30 my $f = readdir(DIR);
31 defined $f or last;
32 -f "$spool_dir/$f" or next;
33 $file = "$spool_dir/$f";
34 last;
35 }
36 closedir(DIR);
37
38 exit 0 if ($file eq "");
39}
40
41# read file from spool, and try unlink it. if cannot unlink -- exit
42# sets $requester (email), $problems, @md5 (arrays of md5's)
43# and %url (map from md5 to urls)
44sub read_spool_file()
45{
46 open(F, "< $file") || exit 0;
47 $requester = <F>;
48 chomp $requester;
49 $requester =~ /^[a-zA-Z_\-0-9\@\.]+$/
50 or die "$file: evil requester: $requester";
51 $req_login = "";
52 $requester =~ /^([^@]+)\@/ and $req_login = $1;
53 $req_login =~ /^[a-z0-9A-Z_]+$/ or die "$file: evil requester $requester";
54 my $flags = <F>;
55 $force_reply++ if ($flags =~ /force-reply/);
56
57 while (<F>) {
58 if (/^ERROR/) {
59 s/^ERROR: //;
60 $problems .= $_;
61 next;
62 }
63 /^([a-f0-9]{32})\s+((ftp|http|https|no-url|no-url-copy):\/\/([=\@\?a-z0-9A-Z:\+\~\.\-\/_]|\%[0-9])+)\s*$/
64 or die "$file: corrupted";
65 push @md5, $1;
66 $url{$1} = $2;
67 /\/$/ and die "$file: cannot fetch dir";
68 }
69 close(F);
70
71 unlink($file) || exit 0;
72}
73
74sub basename($)
75{
76 my $f = shift;
77 $f =~ s|.*/||;
78 return $f;
79}
80
81sub by_md5($$)
82{
83 my ($md5, $url) = @_;
84 $md5 =~ /^(.)(.)/;
85 return "/by-md5/$1/$2/$md5/" . basename($url);
86}
87
88sub got_on_distfiles($$)
89{
90 my ($md5, $url) = @_;
91 my $p = by_md5($md5, $url);
92 my $l = `lftp -c 'debug 0; open $df_server; quote size $p'`;
93 return $l =~ /^213 /;
94}
95
96sub copy_to_df($$)
97{
98 my ($from, $to) = @_;
99 my $cmd = "scp -r -B -q $from $df_scp/$to";
100 open(E, "$cmd 2>&1 |") or die;
101 my $oops = "";
102 while (<E>) {
103 $oops .= $_;
104 }
105 $oops .= "\nThe command has exited with a non-zero status."
106 unless (close (E));
107 $problems .= "scp problems: $cmd:\n$oops\n"
108 if ($oops ne "");
109 return ($oops ne "");
110}
111
112sub move_file($$$)
113{
114 my ($md5, $url, $local_copy) = @_;
115
116 my $bn = basename($url);
117
118 if ($local_copy ne "$tmp_dir/$md5/$bn") {
119 if (system("mv -f \"$local_copy\" \"$tmp_dir/$md5/$bn\"")) {
120 $problems .= "FATAL: cannot move $local_copy to $tmp_dir\n";
121 return;
122 }
123 $local_copy = "$tmp_dir/$md5/$bn";
124 }
125
126 if (open(D, "> $tmp_dir/$md5/$bn.desc")) {
127 print D "URL: $url\n";
128 print D "Login: $req_login\n";
129 print D "MD5: $md5\n";
130 print D 'Fetched-by: $Id$'."\n";
131 print D "Time: " . time . "\n";
132 close(D);
133 } else {
134 $problems .= "ERROR: cannot write $bn.desc\n";
135 }
136
137 my $dir = by_md5($md5, $url);
138 $dir =~ s|/[^/]+/[^/]+$||;
139 if (copy_to_df("$tmp_dir/$md5/", $dir) == 0) {
140 $normal_out .=
141 "STORED: $url\n" .
142 "\t$md5 " . basename($url) . "\n" .
143 "\tSize: " . (-s $local_copy) . " bytes\n";
144 $fetched_count++;
145 }
146}
147
148sub make_src_symlink($$)
149{
150 my ($md5, $url) = @_;
151
152 return unless ($url =~ /^no-url/);
153
154 my $b = basename($url);
155 if (open(S, "> $tmp_dir/$b.link")) {
156 if ($url =~ /^no-url-copy/) {
157 print S (".." . by_md5($md5, $url));
158 } else {
159 print S "REMOVE";
160 }
161 close(S);
162 copy_to_df("$tmp_dir/$b.link", "$copy_dir/$b.link");
163 } else {
164 $problems .= "ERROR: cannot write $tmp_dir/$b.link\n";
165 }
166}
167
168sub md5($)
169{
170 my $file = shift;
171 my $in = "";
172 my $md5 = "";
173 my $err = "";
174 my @cmd = ("md5sum", $file);
175
176 run \@cmd, \$in, \$md5, \$err;
177 if ($err ne "") {
178 chomp($err);
179 $problems .= "FATAL: " . $err . "\n";
180 return "error";
181 }
182 chomp $md5;
183 $md5 =~ /^([a-f0-9]{32})/ and $md5 = $1;
184 return $md5;
185}
186
187sub handle_no_url($$)
188{
189 my ($md5, $url) = @_;
190
191 $url =~ m|://([^/]+)| or die "corrupted! (no-url)";
192 my $basename = $1;
193 my $file = "$no_url_dir/$req_login/$basename";
194
195 if (-f $file) {
196 my $computed_md5 = md5($file);
197 if ($computed_md5 ne $md5) {
198 $problems .= "FATAL: $file md5 mismatch, needed $md5, got $computed_md5\n";
199 } else {
200 move_file($md5, $url, $file);
201 make_src_symlink($md5, $url);
202 }
203 } else {
204 $problems .= "FATAL: $file was not uploaded\n";
205 }
206}
207
208sub fetch_file($$)
209{
210 my ($md5, $url) = @_;
211 my $out = "";
212 my $all_out = "";
213 my $bn = basename($url);
214 my $local = "$tmp_dir/$md5/$bn";
215 my $cmd = "wget -nv --no-check-certificate -O $local \"$url\"";
216 my $cmd2 = "wget -nv --no-check-certificate --passive-ftp -O $local \"$url\"";
217
218 push @files, $bn;
219
220 if (got_on_distfiles($md5, $url)) {
221 $normal_out .=
222 "ALREADY GOT: $url\n" .
223 "\t$md5 " . basename($url) . "\n";
224 make_src_symlink($md5, $url);
225 return;
226 }
227
228 mkdir("$tmp_dir/$md5") or die;
229
230 if ($url =~ /^no-url/) {
231 handle_no_url($md5, $url);
232 return;
233 }
234
235 open(W, "$cmd 2>&1 |");
236 while (<W>) {
237 $all_out .= $_;
238 /URL:.*\s+\-\>\s+.*/ and next;
239 $out .= $_;
240 }
241 close(W);
242 if ($out ne "") {
243 $problems .= "$cmd:\n$out\n\n";
244 }
245 if (-f $local && -s $local == 0 && $url =~ /^ftp:/) {
246 $out = "";
247 open(W, "$cmd2 2>&1 |");
248 while (<W>) {
249 $all_out .= "\n\t\t$_";
250 /URL:.*\s+\-\>\s+.*/ and next;
251 $out .= $_;
252 }
253 close(W);
254 if ($out ne "") {
255 $problems .= "$cmd:\n$out\n\n";
256 }
257 }
258 if (-r $local && -s $local > 0) {
259 my $computed_md5 = md5($local);
260 if ($computed_md5 ne $md5) {
261 $problems .= "FATAL: $url md5 mismatch, needed $md5, got $computed_md5\n";
262 } else {
263 my $testcmd = "file \"$local\" |";
264 my $testres = "";
265 if ($url =~ /^(http|https):/ && $local =~ /\.(tar\.(bz2|gz)|tgz|zip|jar|xpi)$/) {
266 open(T, $testcmd) or die;
267 $testres = <T>;
268 close(T);
269 }
270 if ($testres =~ /empty|(ASCII|HTML|SGML).*text/) {
271 $testres =~ s/.*://;
272 $problems .= "FATAL: data returned from $url: $testres";
273 } else {
274 move_file($md5, $url, $local);
275 }
276 }
277 } elsif (-f $local && -s $local > 0) {
278 $problems .= "FATAL: $url ($md5) was not fetched ($cmd: $all_out): file is not readable\n";
279 } else {
280 $problems .= "FATAL: $url ($md5) was not fetched ($cmd: $all_out)\n";
281 }
282 # save space
283 unlink($local);
284}
285
286sub fetch_files()
287{
288 $problems .= "\n\n" if ($problems ne "");
289 foreach $md5 (@md5) {
290 fetch_file($md5, $url{$md5});
291 }
292}
293
294sub send_email()
295{
296 open(EMAIL, "| /usr/sbin/sendmail -t");
297 #open(EMAIL, "| cat");
298 my $marker = "";
299 if ($problems ne "") {
300 $marker = "ERRORS: ";
301 }
302 my $req_login;
303 $requester =~ /^(.*)\@/ and $req_login = $1;
304
305 splice(@files, 10, @files - 10, "...")
306 if (@files > 10);
307
308 print EMAIL
309"From: $req_login <$requester>
310To: $commits_list
311Cc: $requester
312Subject: DISTFILES: ${marker}@{files}
313Message-ID: <$$." . time . "\@distfiles.pld-linux.org>
314X-distfiles-program: file-fetcher.pl
315X-distfiles-version: " . '$Id$' . "
316
317$problems
318Files fetched: $fetched_count
319
320$normal_out
321
322--
323Virtually Yours: distfiles.
324";
325 close(EMAIL) or die;
326}
327
328sub make_tmp_dir()
329{
330 my $id = `uuidgen 2>/dev/null`;
331 chomp $id;
332 $id = rand if (!defined $id or $id eq "");
333 $tmp_dir = "./tmp/$id";
334 mkdir($tmp_dir) or die;
335}
336
337sub clean_tmp_dir()
338{
339 system("rm -rf $tmp_dir")
340 if ($tmp_dir ne "" && -d $tmp_dir);
341}
342
343umask(002);
344
345find_file_in_spool();
346read_spool_file();
347make_tmp_dir();
348fetch_files();
349send_email() unless (!$force_reply and $problems eq "" and $fetched_count == 0);
350clean_tmp_dir();
This page took 0.052425 seconds and 4 git commands to generate.