]> git.pld-linux.org Git - projects/distfiles.git/blame - file-fetcher.pl
Don't die if md5 duplicates (opera.spec)
[projects/distfiles.git] / file-fetcher.pl
CommitLineData
7ebf9245 1#!/usr/bin/perl -w
b396edfa 2# $Id$
7ebf9245 3
56ea0f1e 4use IPC::Run qw(run);
5
7a0595ea 6$commits_list = "pld-cvs-commit\@lists.pld-linux.org";
7ebf9245 7
b5c674c5 8$spool_dir = "./spool";
01806192
MM
9$copy_dir = "src"; # relative to ftp root
10$no_url_dir = "./upload";
11$df_server = "distfiles.pld-linux.org";
51fe5d4e 12$df_scp = "plddist\@ep09.pld-linux.org:ftp";
7ebf9245
MM
13
14@md5 = ();
ccbaa374 15@url = ();
7ebf9245
MM
16$problems = "";
17$normal_out = "";
18$requester = "";
19$file = "";
47f8fda7
MM
20$fetched_count = 0;
21$force_reply = 0;
bca91c1b 22$req_login = "nobody";
44cd280f 23@files = ();
7ebf9245
MM
24
25# try lookup some file in spool, exit if it cannot be done
26sub find_file_in_spool()
27{
28 opendir(DIR, $spool_dir) || die "can't opendir $spool_dir: $!";
29 while (1) {
30 my $f = readdir(DIR);
31 defined $f or last;
32 -f "$spool_dir/$f" or next;
33 $file = "$spool_dir/$f";
34 last;
35 }
36 closedir(DIR);
37
38 exit 0 if ($file eq "");
39}
40
41# read file from spool, and try unlink it. if cannot unlink -- exit
42# sets $requester (email), $problems, @md5 (arrays of md5's)
ccbaa374 43# and @url (map from md5 to urls)
7ebf9245
MM
44sub read_spool_file()
45{
46 open(F, "< $file") || exit 0;
47 $requester = <F>;
48 chomp $requester;
49 $requester =~ /^[a-zA-Z_\-0-9\@\.]+$/
50 or die "$file: evil requester: $requester";
bca91c1b
MM
51 $req_login = "";
52 $requester =~ /^([^@]+)\@/ and $req_login = $1;
53 $req_login =~ /^[a-z0-9A-Z_]+$/ or die "$file: evil requester $requester";
47f8fda7
MM
54 my $flags = <F>;
55 $force_reply++ if ($flags =~ /force-reply/);
7ebf9245
MM
56
57 while (<F>) {
58 if (/^ERROR/) {
59 s/^ERROR: //;
60 $problems .= $_;
f2a640bf 61 next;
7ebf9245 62 }
e4708e3e 63 if (/^([a-f0-9]{32})\s+((ftp|http|https|no-url|no-url-copy):\/\/([=\@\?a-z0-9A-Z:\+\~\.,\-\/_]|\%[0-9])+)\s*$/) {
64 if (/\/$/) {
65 $problems .= "$file: cannot fetch dir";
66 } else {
67 push @md5, $1;
68 push @url, $2;
69 }
70 } else {
71 $problems .= "FILE: $file: corrupted";
72 }
7ebf9245
MM
73 }
74 close(F);
75
76 unlink($file) || exit 0;
77}
78
44cd280f
MM
79sub basename($)
80{
81 my $f = shift;
82 $f =~ s|.*/||;
83 return $f;
84}
85
01806192 86sub by_md5($$)
7ebf9245
MM
87{
88 my ($md5, $url) = @_;
7ebf9245 89 $md5 =~ /^(.)(.)/;
01806192 90 return "/by-md5/$1/$2/$md5/" . basename($url);
6ce39ee7
MM
91}
92
01806192 93sub got_on_distfiles($$)
6ce39ee7 94{
01806192
MM
95 my ($md5, $url) = @_;
96 my $p = by_md5($md5, $url);
97 my $l = `lftp -c 'debug 0; open $df_server; quote size $p'`;
98 return $l =~ /^213 /;
99}
100
101sub copy_to_df($$)
102{
103 my ($from, $to) = @_;
104 my $cmd = "scp -r -B -q $from $df_scp/$to";
105 open(E, "$cmd 2>&1 |") or die;
106 my $oops = "";
107 while (<E>) {
108 $oops .= $_;
109 }
219df45c
MM
110 $oops .= "\nThe command has exited with a non-zero status."
111 unless (close (E));
01806192
MM
112 $problems .= "scp problems: $cmd:\n$oops\n"
113 if ($oops ne "");
114 return ($oops ne "");
6ce39ee7
MM
115}
116
b5c674c5 117sub move_file($$$)
6ce39ee7 118{
b5c674c5 119 my ($md5, $url, $local_copy) = @_;
6ce39ee7 120
01806192 121 my $bn = basename($url);
6ce39ee7 122
01806192
MM
123 if ($local_copy ne "$tmp_dir/$md5/$bn") {
124 if (system("mv -f \"$local_copy\" \"$tmp_dir/$md5/$bn\"")) {
125 $problems .= "FATAL: cannot move $local_copy to $tmp_dir\n";
126 return;
127 }
128 $local_copy = "$tmp_dir/$md5/$bn";
129 }
6ce39ee7 130
01806192
MM
131 if (open(D, "> $tmp_dir/$md5/$bn.desc")) {
132 print D "URL: $url\n";
d286a317 133 print D "Login: $req_login\n";
01806192
MM
134 print D "MD5: $md5\n";
135 print D 'Fetched-by: $Id$'."\n";
136 print D "Time: " . time . "\n";
137 close(D);
7ebf9245 138 } else {
01806192
MM
139 $problems .= "ERROR: cannot write $bn.desc\n";
140 }
141
142 my $dir = by_md5($md5, $url);
143 $dir =~ s|/[^/]+/[^/]+$||;
144 if (copy_to_df("$tmp_dir/$md5/", $dir) == 0) {
44cd280f
MM
145 $normal_out .=
146 "STORED: $url\n" .
147 "\t$md5 " . basename($url) . "\n" .
01806192 148 "\tSize: " . (-s $local_copy) . " bytes\n";
47f8fda7 149 $fetched_count++;
7ebf9245
MM
150 }
151}
152
b5c674c5
MM
153sub make_src_symlink($$)
154{
155 my ($md5, $url) = @_;
01806192
MM
156
157 return unless ($url =~ /^no-url/);
158
159 my $b = basename($url);
160 if (open(S, "> $tmp_dir/$b.link")) {
b5c674c5 161 if ($url =~ /^no-url-copy/) {
01806192 162 print S (".." . by_md5($md5, $url));
b5c674c5 163 } else {
01806192 164 print S "REMOVE";
b5c674c5 165 }
01806192
MM
166 close(S);
167 copy_to_df("$tmp_dir/$b.link", "$copy_dir/$b.link");
b5c674c5 168 } else {
01806192 169 $problems .= "ERROR: cannot write $tmp_dir/$b.link\n";
b5c674c5
MM
170 }
171}
172
173sub md5($)
174{
175 my $file = shift;
56ea0f1e 176 my $in = "";
177 my $md5 = "";
178 my $err = "";
179 my @cmd = ("md5sum", $file);
180
181 run \@cmd, \$in, \$md5, \$err;
182 if ($err ne "") {
183 chomp($err);
a6b09732 184 $problems .= "FATAL: " . $err . "\n";
185 return "error";
56ea0f1e 186 }
187 chomp $md5;
b5c674c5
MM
188 $md5 =~ /^([a-f0-9]{32})/ and $md5 = $1;
189 return $md5;
190}
191
192sub handle_no_url($$)
193{
194 my ($md5, $url) = @_;
195
e4708e3e 196 unless ($url =~ m#://([^/]+)#) {
197 $problems .= "$url: corrupted! (no-url)";
198 return;
199 }
b5c674c5 200 my $basename = $1;
bca91c1b 201 my $file = "$no_url_dir/$req_login/$basename";
b5c674c5
MM
202
203 if (-f $file) {
204 my $computed_md5 = md5($file);
205 if ($computed_md5 ne $md5) {
206 $problems .= "FATAL: $file md5 mismatch, needed $md5, got $computed_md5\n";
207 } else {
208 move_file($md5, $url, $file);
209 make_src_symlink($md5, $url);
210 }
211 } else {
bca91c1b 212 $problems .= "FATAL: $file was not uploaded\n";
b5c674c5
MM
213 }
214}
215
7ebf9245
MM
216sub fetch_file($$)
217{
218 my ($md5, $url) = @_;
219 my $out = "";
08ea99b9 220 my $all_out = "";
01806192
MM
221 my $bn = basename($url);
222 my $local = "$tmp_dir/$md5/$bn";
a0896488
AG
223 my $cmd = "wget -nv --no-check-certificate -O $local \"$url\"";
224 my $cmd2 = "wget -nv --no-check-certificate --passive-ftp -O $local \"$url\"";
b5c674c5 225
01806192 226 push @files, $bn;
44cd280f 227
01806192 228 if (got_on_distfiles($md5, $url)) {
00eefa39
MM
229 $normal_out .=
230 "ALREADY GOT: $url\n" .
01806192 231 "\t$md5 " . basename($url) . "\n";
b5c674c5 232 make_src_symlink($md5, $url);
6ce39ee7
MM
233 return;
234 }
01806192 235
2ae8fdec 236 mkdir("$tmp_dir/$md5");
01806192 237
b5c674c5
MM
238 if ($url =~ /^no-url/) {
239 handle_no_url($md5, $url);
240 return;
241 }
242
7ebf9245
MM
243 open(W, "$cmd 2>&1 |");
244 while (<W>) {
08ea99b9 245 $all_out .= $_;
7ebf9245
MM
246 /URL:.*\s+\-\>\s+.*/ and next;
247 $out .= $_;
248 }
249 close(W);
250 if ($out ne "") {
251 $problems .= "$cmd:\n$out\n\n";
252 }
e23ae36a 253 if (-f $local && -s $local == 0 && $url =~ /^ftp:/) {
f4f705aa 254 $out = "";
c0467b40 255 open(W, "$cmd2 2>&1 |");
256 while (<W>) {
f4f705aa 257 $all_out .= "\n\t\t$_";
c0467b40 258 /URL:.*\s+\-\>\s+.*/ and next;
259 $out .= $_;
260 }
261 close(W);
262 if ($out ne "") {
263 $problems .= "$cmd:\n$out\n\n";
264 }
265 }
4db63fc1 266 if (-r $local && -s $local > 0) {
01806192 267 my $computed_md5 = md5($local);
7ebf9245
MM
268 if ($computed_md5 ne $md5) {
269 $problems .= "FATAL: $url md5 mismatch, needed $md5, got $computed_md5\n";
270 } else {
d642cf7e 271 my $testcmd = "file \"$local\" |";
272 my $testres = "";
a14ed9b7 273 if ($url =~ /^(http|https):/ && $local =~ /\.(tar\.(bz2|gz)|tgz|zip|jar|xpi)$/) {
d642cf7e 274 open(T, $testcmd) or die;
275 $testres = <T>;
276 close(T);
277 }
278 if ($testres =~ /empty|(ASCII|HTML|SGML).*text/) {
279 $testres =~ s/.*://;
280 $problems .= "FATAL: data returned from $url: $testres";
281 } else {
282 move_file($md5, $url, $local);
283 }
7ebf9245 284 }
4db63fc1 285 } elsif (-f $local && -s $local > 0) {
286 $problems .= "FATAL: $url ($md5) was not fetched ($cmd: $all_out): file is not readable\n";
7ebf9245 287 } else {
08ea99b9 288 $problems .= "FATAL: $url ($md5) was not fetched ($cmd: $all_out)\n";
7ebf9245 289 }
01806192
MM
290 # save space
291 unlink($local);
7ebf9245
MM
292}
293
294sub fetch_files()
295{
296 $problems .= "\n\n" if ($problems ne "");
ccbaa374 297 foreach $i (0..$#md5) {
298 fetch_file($md5[$i], $url[$i]);
7ebf9245
MM
299 }
300}
301
302sub send_email()
303{
9a1e36f3
MM
304 open(EMAIL, "| /usr/sbin/sendmail -t");
305 #open(EMAIL, "| cat");
44cd280f 306 my $marker = "";
7ebf9245 307 if ($problems ne "") {
44cd280f 308 $marker = "ERRORS: ";
7ebf9245 309 }
04550b88
MM
310 my $req_login;
311 $requester =~ /^(.*)\@/ and $req_login = $1;
44cd280f
MM
312
313 splice(@files, 10, @files - 10, "...")
314 if (@files > 10);
315
e157986d 316 print EMAIL
04550b88 317"From: $req_login <$requester>
482655f9
MM
318To: $commits_list
319Cc: $requester
44cd280f 320Subject: DISTFILES: ${marker}@{files}
e157986d
MM
321Message-ID: <$$." . time . "\@distfiles.pld-linux.org>
322X-distfiles-program: file-fetcher.pl
323X-distfiles-version: " . '$Id$' . "
7ebf9245
MM
324
325$problems
47f8fda7
MM
326Files fetched: $fetched_count
327
7ebf9245 328$normal_out
e157986d
MM
329
330--
d8cd573c 331Virtually Yours: distfiles.
7ebf9245
MM
332";
333 close(EMAIL) or die;
334}
335
01806192
MM
336sub make_tmp_dir()
337{
338 my $id = `uuidgen 2>/dev/null`;
339 chomp $id;
340 $id = rand if (!defined $id or $id eq "");
341 $tmp_dir = "./tmp/$id";
342 mkdir($tmp_dir) or die;
343}
344
345sub clean_tmp_dir()
346{
347 system("rm -rf $tmp_dir")
348 if ($tmp_dir ne "" && -d $tmp_dir);
349}
350
2e33bbbc
MM
351umask(002);
352
7ebf9245
MM
353find_file_in_spool();
354read_spool_file();
01806192 355make_tmp_dir();
7ebf9245 356fetch_files();
47f8fda7 357send_email() unless (!$force_reply and $problems eq "" and $fetched_count == 0);
01806192 358clean_tmp_dir();
This page took 0.072261 seconds and 4 git commands to generate.