]> git.pld-linux.org Git - projects/distfiles.git/blame - file-fetcher.pl
(Temporarily) support both scp and ftp
[projects/distfiles.git] / file-fetcher.pl
CommitLineData
7ebf9245 1#!/usr/bin/perl -w
56ea0f1e 2use IPC::Run qw(run);
3
631fa1b7
MM
4use Sys::Syslog;
5
6openlog("distfiles-ff", "pid", "user");
7
7a0595ea 8$commits_list = "pld-cvs-commit\@lists.pld-linux.org";
7ebf9245 9
b5c674c5 10$spool_dir = "./spool";
01806192
MM
11$copy_dir = "src"; # relative to ftp root
12$no_url_dir = "./upload";
13$df_server = "distfiles.pld-linux.org";
40c35f1c 14$df_scp = "plddist\@distfiles.pld-linux.org:ftp";
62f7358e 15$user_agent = "PLD/distfiles";
7ebf9245
MM
16
17@md5 = ();
ccbaa374 18@url = ();
7ebf9245
MM
19$problems = "";
20$normal_out = "";
21$requester = "";
22$file = "";
47f8fda7
MM
23$fetched_count = 0;
24$force_reply = 0;
bca91c1b 25$req_login = "nobody";
cba859d3 26$spec = "";
44cd280f 27@files = ();
7ebf9245 28
631fa1b7
MM
29sub fatal($)
30{
31 my $msg = shift;
32
33 syslog("err","FATAL: $msg");
34 die "$msg";
35}
36
7ebf9245
MM
37# try lookup some file in spool, exit if it cannot be done
38sub find_file_in_spool()
39{
631fa1b7 40 opendir(DIR, $spool_dir) || fatal("can't opendir $spool_dir: $!");
7ebf9245
MM
41 while (1) {
42 my $f = readdir(DIR);
43 defined $f or last;
44 -f "$spool_dir/$f" or next;
45 $file = "$spool_dir/$f";
46 last;
47 }
48 closedir(DIR);
49
50 exit 0 if ($file eq "");
51}
52
53# read file from spool, and try unlink it. if cannot unlink -- exit
576dff03 54# sets $requester (email), $problems, @md5 (arrays of md5's)
ccbaa374 55# and @url (map from md5 to urls)
7ebf9245
MM
56sub read_spool_file()
57{
631fa1b7 58 syslog("info","reading spool file $file");
7ebf9245
MM
59 open(F, "< $file") || exit 0;
60 $requester = <F>;
61 chomp $requester;
576dff03 62 $requester =~ /^[a-zA-Z_0-9@.-]+$/
631fa1b7 63 or fatal("$file: evil requester: $requester");
bca91c1b
MM
64 $req_login = "";
65 $requester =~ /^([^@]+)\@/ and $req_login = $1;
631fa1b7 66 $req_login =~ /^[a-z0-9A-Z_.]+$/ or fatal("$file: evil requester $requester");
cba859d3 67 $spec = <F>;
bf2219c8 68 chomp $spec;
cba859d3 69 $spec =~ s/\.spec$//;
bf2219c8 70
47f8fda7
MM
71 my $flags = <F>;
72 $force_reply++ if ($flags =~ /force-reply/);
576dff03 73
7ebf9245
MM
74 while (<F>) {
75 if (/^ERROR/) {
76 s/^ERROR: //;
77 $problems .= $_;
f2a640bf 78 next;
7ebf9245 79 }
8ecb7fad 80 if (/^([a-f0-9]{32})\s+((ftp|http|https|no-url|no-url-copy):\/\/([a-z0-9A-Z;:\=\?&\@\+\~\.,\-\/_]|\%[0-9])+(#\/[a-zA-Z0-9\._-]+)?)\s*$/) {
e4708e3e 81 if (/\/$/) {
7a1a3819 82 $problems .= "$file: cannot fetch dir";
e4708e3e 83 } else {
7a1a3819
ER
84 push @md5, $1;
85 push @url, $2;
e4708e3e 86 }
87 } else {
7a1a3819 88 $problems .= "FILE: $file: corrupted";
e4708e3e 89 }
7ebf9245
MM
90 }
91 close(F);
92
93 unlink($file) || exit 0;
94}
95
44cd280f
MM
96sub basename($)
97{
98 my $f = shift;
99 $f =~ s|.*/||;
100 return $f;
101}
102
01806192 103sub by_md5($$)
7ebf9245
MM
104{
105 my ($md5, $url) = @_;
7ebf9245 106 $md5 =~ /^(.)(.)/;
01806192 107 return "/by-md5/$1/$2/$md5/" . basename($url);
6ce39ee7
MM
108}
109
01806192 110sub got_on_distfiles($$)
6ce39ee7 111{
01806192
MM
112 my ($md5, $url) = @_;
113 my $p = by_md5($md5, $url);
114 my $l = `lftp -c 'debug 0; open $df_server; quote size $p'`;
115 return $l =~ /^213 /;
116}
117
118sub copy_to_df($$)
119{
120 my ($from, $to) = @_;
92eba545 121 my $cmd = "scp -pr -B -q $from $df_scp/$to";
631fa1b7 122 open(E, "$cmd 2>&1 |") or fatal("$cmd failed");
01806192
MM
123 my $oops = "";
124 while (<E>) {
125 $oops .= $_;
126 }
219df45c
MM
127 $oops .= "\nThe command has exited with a non-zero status."
128 unless (close (E));
01806192
MM
129 $problems .= "scp problems: $cmd:\n$oops\n"
130 if ($oops ne "");
631fa1b7 131 syslog("err","copy_to_df oops for '$cmd': $oops") if ($oops ne "");
01806192 132 return ($oops ne "");
6ce39ee7
MM
133}
134
b5c674c5 135sub move_file($$$)
6ce39ee7 136{
b5c674c5 137 my ($md5, $url, $local_copy) = @_;
6ce39ee7 138
01806192 139 my $bn = basename($url);
576dff03 140
01806192
MM
141 if ($local_copy ne "$tmp_dir/$md5/$bn") {
142 if (system("mv -f \"$local_copy\" \"$tmp_dir/$md5/$bn\"")) {
143 $problems .= "FATAL: cannot move $local_copy to $tmp_dir\n";
144 return;
145 }
146 $local_copy = "$tmp_dir/$md5/$bn";
147 }
025462c7
JR
148 # no-url local copy may have wrong permissions (it's created by ftp upload)
149 # always fix them
150 chmod(0644, $local_copy);
576dff03 151
01806192
MM
152 if (open(D, "> $tmp_dir/$md5/$bn.desc")) {
153 print D "URL: $url\n";
d286a317 154 print D "Login: $req_login\n";
01806192
MM
155 print D "MD5: $md5\n";
156 print D 'Fetched-by: $Id$'."\n";
157 print D "Time: " . time . "\n";
158 close(D);
7ebf9245 159 } else {
01806192
MM
160 $problems .= "ERROR: cannot write $bn.desc\n";
161 }
162
163 my $dir = by_md5($md5, $url);
164 $dir =~ s|/[^/]+/[^/]+$||;
165 if (copy_to_df("$tmp_dir/$md5/", $dir) == 0) {
576dff03 166 $normal_out .=
44cd280f
MM
167 "STORED: $url\n" .
168 "\t$md5 " . basename($url) . "\n" .
01806192 169 "\tSize: " . (-s $local_copy) . " bytes\n";
47f8fda7 170 $fetched_count++;
7ebf9245
MM
171 }
172}
173
b5c674c5
MM
174sub make_src_symlink($$)
175{
176 my ($md5, $url) = @_;
576dff03 177
01806192 178 return unless ($url =~ /^no-url/);
576dff03 179
01806192
MM
180 my $b = basename($url);
181 if (open(S, "> $tmp_dir/$b.link")) {
b5c674c5 182 if ($url =~ /^no-url-copy/) {
01806192 183 print S (".." . by_md5($md5, $url));
b5c674c5 184 } else {
01806192 185 print S "REMOVE";
b5c674c5 186 }
01806192
MM
187 close(S);
188 copy_to_df("$tmp_dir/$b.link", "$copy_dir/$b.link");
b5c674c5 189 } else {
01806192 190 $problems .= "ERROR: cannot write $tmp_dir/$b.link\n";
b5c674c5
MM
191 }
192}
193
194sub md5($)
195{
196 my $file = shift;
56ea0f1e 197 my $in = "";
198 my $md5 = "";
199 my $err = "";
200 my @cmd = ("md5sum", $file);
201
202 run \@cmd, \$in, \$md5, \$err;
203 if ($err ne "") {
204 chomp($err);
a6b09732 205 $problems .= "FATAL: " . $err . "\n";
206 return "error";
56ea0f1e 207 }
208 chomp $md5;
b5c674c5
MM
209 $md5 =~ /^([a-f0-9]{32})/ and $md5 = $1;
210 return $md5;
211}
212
213sub handle_no_url($$)
214{
215 my ($md5, $url) = @_;
576dff03 216
e4708e3e 217 unless ($url =~ m#://([^/]+)#) {
218 $problems .= "$url: corrupted! (no-url)";
219 return;
220 }
b5c674c5 221 my $basename = $1;
453b0d4b
MM
222 my $file = "$no_url_dir/$basename";
223
224 $file = "$no_url_dir/$req_login/$basename" unless (-f $file);
b5c674c5
MM
225
226 if (-f $file) {
227 my $computed_md5 = md5($file);
228 if ($computed_md5 ne $md5) {
453b0d4b 229 $problems .= "FATAL: $basename md5 mismatch, needed $md5, got $computed_md5\n";
b5c674c5
MM
230 } else {
231 move_file($md5, $url, $file);
232 make_src_symlink($md5, $url);
233 }
234 } else {
453b0d4b 235 $problems .= "FATAL: $basename was not uploaded\n";
b5c674c5
MM
236 }
237}
238
7ebf9245
MM
239sub fetch_file($$)
240{
241 my ($md5, $url) = @_;
242 my $out = "";
08ea99b9 243 my $all_out = "";
01806192
MM
244 my $bn = basename($url);
245 my $local = "$tmp_dir/$md5/$bn";
b1659ea9
KK
246 my @cmd = ("wget", "-nv", "--no-check-certificate", "--user-agent=$user_agent", "-O", $local, $url);
247 my $cmd_joined = join(' ', @cmd);
248 my @cmd2 = ("wget", "-nv", "--no-check-certificate", "--user-agent=$user_agent", "--passive-ftp", "-O", $local, $url);
249 my $cmd2_joined = join(' ', @cmd2);
b5c674c5 250
631fa1b7
MM
251 syslog("info","fetch_file($md5,$url)");
252
01806192 253 push @files, $bn;
44cd280f 254
b38176ba 255 if ( $bn =~ m/(%[0-9a-f]{2})/i ) {
256 $problems .= "$bn: refusing to download file with uri escape codes ($1) in the name\n";
257 $bn =~ s/%[0-9a-f]{2}/_/g;
f3eeba03 258 $problems .= "HINT: use $url?/$bn as source to rename the file\n\n";
b38176ba 259 return;
260 }
261
01806192 262 if (got_on_distfiles($md5, $url)) {
576dff03 263 $normal_out .=
7a1a3819
ER
264 "ALREADY GOT: $url\n" .
265 "\t$md5 " . basename($url) . "\n";
b5c674c5 266 make_src_symlink($md5, $url);
6ce39ee7
MM
267 return;
268 }
01806192 269
2ae8fdec 270 mkdir("$tmp_dir/$md5");
576dff03 271
b5c674c5
MM
272 if ($url =~ /^no-url/) {
273 handle_no_url($md5, $url);
274 return;
275 }
576dff03 276
b1659ea9 277 my $pid = open(W, "-|");
631fa1b7 278 fatal("Cannot fork $!") unless defined $pid;
b1659ea9 279 unless ( $pid ) {
631fa1b7
MM
280 open STDERR, ">&", \*STDOUT or fatal("$0: open: $!");
281 exec { $cmd[0] } @cmd or fatal("$0: exec: $!");
b1659ea9 282 }
7ebf9245 283 while (<W>) {
08ea99b9 284 $all_out .= $_;
7ebf9245
MM
285 /URL:.*\s+\-\>\s+.*/ and next;
286 $out .= $_;
287 }
288 close(W);
289 if ($out ne "") {
631fa1b7 290 syslog("err","$cmd_joined: $out");
b1659ea9 291 $problems .= "$cmd_joined:\n$out\n\n";
7ebf9245 292 }
d1c674a4 293 if ( $? ) {
b106d8c1 294 $problems .= sprintf "%s:\nexited with code %d (0x%02x)\n\n",
b1659ea9 295 $cmd_joined,
d1c674a4 296 $? >> 8,
297 $? & 0xff;
298 }
e23ae36a 299 if (-f $local && -s $local == 0 && $url =~ /^ftp:/) {
f4f705aa 300 $out = "";
b1659ea9 301 my $pid = open(W, "-|");
631fa1b7 302 fatal("Cannot fork $!") unless defined $pid;
b1659ea9 303 unless ( $pid ) {
631fa1b7
MM
304 open STDERR, ">&", \*STDOUT or fatal("$0: open: $!");
305 exec { $cmd2[0] } @cmd2 or fatal("$0: exec: $!");
b1659ea9 306 }
c0467b40 307 while (<W>) {
f4f705aa 308 $all_out .= "\n\t\t$_";
c0467b40 309 /URL:.*\s+\-\>\s+.*/ and next;
310 $out .= $_;
311 }
312 close(W);
313 if ($out ne "") {
b1659ea9 314 $problems .= "$cmd2_joined:\n$out\n\n";
d1c674a4 315 }
316 if ( $? ) {
b106d8c1 317 $problems .= sprintf "%s:\nexited with code %d (0x%02x)\n\n",
b1659ea9 318 $cmd2_joined,
d1c674a4 319 $? >> 8,
320 $? & 0xff;
c0467b40 321 }
322 }
4db63fc1 323 if (-r $local && -s $local > 0) {
01806192 324 my $computed_md5 = md5($local);
7ebf9245
MM
325 if ($computed_md5 ne $md5) {
326 $problems .= "FATAL: $url md5 mismatch, needed $md5, got $computed_md5\n";
327 } else {
d642cf7e 328 my $testcmd = "file \"$local\" |";
329 my $testres = "";
a14ed9b7 330 if ($url =~ /^(http|https):/ && $local =~ /\.(tar\.(bz2|gz)|tgz|zip|jar|xpi)$/) {
631fa1b7 331 open(T, $testcmd) or fatal("$testcmd failed");
d642cf7e 332 $testres = <T>;
333 close(T);
334 }
335 if ($testres =~ /empty|(ASCII|HTML|SGML).*text/) {
336 $testres =~ s/.*://;
337 $problems .= "FATAL: data returned from $url: $testres";
338 } else {
339 move_file($md5, $url, $local);
340 }
7ebf9245 341 }
4db63fc1 342 } elsif (-f $local && -s $local > 0) {
b1659ea9 343 $problems .= "FATAL: $url ($md5) was not fetched correctly ($cmd_joined: $all_out): file is not readable\n";
d1c674a4 344 } elsif (-f $local && not -s $local) {
b1659ea9 345 $problems .= "FATAL: $url ($md5) was not fetched correctly ($cmd_joined: $all_out): file fetched but has 0 length\n";
7ebf9245 346 } else {
b1659ea9 347 $problems .= "FATAL: $url ($md5) was not fetched correctly ($cmd_joined: $all_out)\n";
7ebf9245 348 }
01806192
MM
349 # save space
350 unlink($local);
7ebf9245
MM
351}
352
353sub fetch_files()
354{
355 $problems .= "\n\n" if ($problems ne "");
ccbaa374 356 foreach $i (0..$#md5) {
357 fetch_file($md5[$i], $url[$i]);
7ebf9245
MM
358 }
359}
360
361sub send_email()
362{
631fa1b7 363 syslog("info","sending email to $requester");
9a1e36f3
MM
364 open(EMAIL, "| /usr/sbin/sendmail -t");
365 #open(EMAIL, "| cat");
44cd280f 366 my $marker = "";
7ebf9245 367 if ($problems ne "") {
44cd280f 368 $marker = "ERRORS: ";
7ebf9245 369 }
04550b88
MM
370 my $req_login;
371 $requester =~ /^(.*)\@/ and $req_login = $1;
44cd280f
MM
372
373 splice(@files, 10, @files - 10, "...")
374 if (@files > 10);
576dff03
ER
375
376 print EMAIL
04550b88 377"From: $req_login <$requester>
482655f9
MM
378To: $commits_list
379Cc: $requester
8408fdb0 380Subject: DISTFILES: ${spec}: ${marker}@{files}
e157986d
MM
381Message-ID: <$$." . time . "\@distfiles.pld-linux.org>
382X-distfiles-program: file-fetcher.pl
383X-distfiles-version: " . '$Id$' . "
7ebf9245
MM
384
385$problems
47f8fda7
MM
386Files fetched: $fetched_count
387
7ebf9245 388$normal_out
e157986d
MM
389
390--
d8cd573c 391Virtually Yours: distfiles.
7ebf9245 392";
631fa1b7 393 close(EMAIL) or fatal("close() failed");
7ebf9245
MM
394}
395
01806192
MM
396sub make_tmp_dir()
397{
398 my $id = `uuidgen 2>/dev/null`;
399 chomp $id;
400 $id = rand if (!defined $id or $id eq "");
401 $tmp_dir = "./tmp/$id";
631fa1b7 402 mkdir($tmp_dir) or fatal("mkdir($tmp_dir) failed");
01806192
MM
403}
404
405sub clean_tmp_dir()
406{
407 system("rm -rf $tmp_dir")
408 if ($tmp_dir ne "" && -d $tmp_dir);
409}
410
2e33bbbc
MM
411umask(002);
412
7ebf9245
MM
413find_file_in_spool();
414read_spool_file();
01806192 415make_tmp_dir();
7ebf9245 416fetch_files();
47f8fda7 417send_email() unless (!$force_reply and $problems eq "" and $fetched_count == 0);
01806192 418clean_tmp_dir();
319fb6c1
ER
419
420# vim: ts=2:sw=2:et:fdm=marker
This page took 0.465776 seconds and 4 git commands to generate.