X-Git-Url: http://git.pld-linux.org/?a=blobdiff_plain;f=file-fetcher.pl;h=09cdfaa42eb1413b5104d322223d012ac9f46aee;hb=7263fd0318938f3a94f2a0b653e6d283db7a8785;hp=0e7085ccc29e518ea94a3ec481d30bc0e1a36177;hpb=7ebf92458a98d5e118e90f401e006c4a06821cc9;p=projects%2Fdistfiles.git diff --git a/file-fetcher.pl b/file-fetcher.pl old mode 100644 new mode 100755 index 0e7085c..09cdfaa --- a/file-fetcher.pl +++ b/file-fetcher.pl @@ -1,22 +1,43 @@ #!/usr/bin/perl -w -# +use IPC::Run qw(run); -$spool_dir = "./spool"; -$ftp_dir = "./ftp"; -$email_cc = ""; +use Sys::Syslog; + +openlog("distfiles-ff", "pid", "user"); + +$commits_list = "pld-cvs-commit\@lists.pld-linux.org"; +$spool_dir = "./spool"; +$copy_dir = "src"; # relative to ftp root +$no_url_dir = "./upload"; +$df_server = "distfiles.pld-linux.org"; +$df_scp = "plddist\@distfiles.pld-linux.org:ftp"; +$user_agent = "PLD/distfiles"; @md5 = (); -%url = (); +@url = (); $problems = ""; $normal_out = ""; $requester = ""; $file = ""; +$fetched_count = 0; +$force_reply = 0; +$req_login = "nobody"; +$spec = ""; +@files = (); + +sub fatal($) +{ + my $msg = shift; + + syslog("err","FATAL: $msg"); + die "$msg"; +} # try lookup some file in spool, exit if it cannot be done sub find_file_in_spool() { - opendir(DIR, $spool_dir) || die "can't opendir $spool_dir: $!"; + opendir(DIR, $spool_dir) || fatal("can't opendir $spool_dir: $!"); while (1) { my $f = readdir(DIR); defined $f or last; @@ -30,50 +51,186 @@ sub find_file_in_spool() } # read file from spool, and try unlink it. if cannot unlink -- exit -# sets $requester (email), $problems, @md5 (arrays of md5's) -# and %url (map from md5 to urls) +# sets $requester (email), $problems, @md5 (arrays of md5's) +# and @url (map from md5 to urls) sub read_spool_file() { + syslog("info","reading spool file $file"); open(F, "< $file") || exit 0; $requester = ; chomp $requester; - $requester =~ /^[a-zA-Z_\-0-9\@\.]+$/ - or die "$file: evil requester: $requester"; - + $requester =~ /^[a-zA-Z_0-9@.-]+$/ + or fatal("$file: evil requester: $requester"); + $req_login = ""; + $requester =~ /^([^@]+)\@/ and $req_login = $1; + $req_login =~ /^[a-z0-9A-Z_.]+$/ or fatal("$file: evil requester $requester"); + $spec = ; + chomp $spec; + $spec =~ s/\.spec$//; + + my $flags = ; + $force_reply++ if ($flags =~ /force-reply/); + while () { if (/^ERROR/) { s/^ERROR: //; $problems .= $_; + next; + } + if (/^([a-f0-9]{32})\s+((ftp|http|https|no-url|no-url-copy):\/\/([a-z0-9A-Z;:\=\?&\@\+\~\.,\-\/\#_]|\%[0-9])+(#\/[a-zA-Z0-9\._-]+)?)\s*$/) { + if (/\/$/) { + $problems .= "$file: cannot fetch dir"; + } else { + push @md5, $1; + push @url, $2; + } + } else { + $problems .= "FILE: $file: corrupted"; } - /^([a-f0-9]{32})\s+((ftp|http):\/\/([a-z0-9A-Z:\+\~\.\-\/_]|\%[0-9])+)\s*$/ - or die "$file: corrupted"; - push @md5, $1; - $url{$1} = $2; - /\/$/ and die "$file: cannot fetch dir"; } close(F); unlink($file) || exit 0; } -sub move_file($$) +sub basename($) +{ + my $f = shift; + $f =~ s|.*/||; + return $f; +} + +sub by_md5($$) { my ($md5, $url) = @_; - $md5 =~ /^(.)(.)/; - my $md5_dir = "$ftp_dir/by-md5/$1/$2/$md5"; - - mkdir("$ftp_dir/by-md5/$1"); - mkdir("$ftp_dir/by-md5/$1/$2"); - mkdir($md5_dir); - - $url =~ /\/([^\/]+)$/ or die; + return "/by-md5/$1/$2/$md5/" . basename($url); +} + +sub got_on_distfiles($$) +{ + my ($md5, $url) = @_; + my $p = by_md5($md5, $url); + my $l = `lftp -c 'debug 0; open $df_server; quote size \"$p\"'`; + return $l =~ /^213 /; +} + +sub copy_to_df($$) +{ + my ($from, $to) = @_; + my $cmd = "scp -pr -B -q $from $df_scp/$to"; + open(E, "$cmd 2>&1 |") or fatal("$cmd failed"); + my $oops = ""; + while () { + $oops .= $_; + } + $oops .= "\nThe command has exited with a non-zero status." + unless (close (E)); + $problems .= "scp problems: $cmd:\n$oops\n" + if ($oops ne ""); + syslog("err","copy_to_df oops for '$cmd': $oops") if ($oops ne ""); + return ($oops ne ""); +} + +sub move_file($$$) +{ + my ($md5, $url, $local_copy) = @_; + + my $bn = basename($url); + + if ($local_copy ne "$tmp_dir/$md5/$bn") { + if (system("mv -f \"$local_copy\" \"$tmp_dir/$md5/$bn\"")) { + $problems .= "FATAL: cannot move $local_copy to $tmp_dir\n"; + return; + } + $local_copy = "$tmp_dir/$md5/$bn"; + } + # no-url local copy may have wrong permissions (it's created by ftp upload) + # always fix them + chmod(0644, $local_copy); + + if (open(D, "> $tmp_dir/$md5/$bn.desc")) { + print D "URL: $url\n"; + print D "Login: $req_login\n"; + print D "MD5: $md5\n"; + print D 'Fetched-by: $Id$'."\n"; + print D "Time: " . time . "\n"; + close(D); + } else { + $problems .= "ERROR: cannot write $bn.desc\n"; + } + + my $dir = by_md5($md5, $url); + $dir =~ s|/[^/]+/[^/]+$||; + if (copy_to_df("$tmp_dir/$md5/", $dir) == 0) { + $normal_out .= + "STORED: $url\n" . + "\t$md5 " . basename($url) . "\n" . + "\tSize: " . (-s $local_copy) . " bytes\n"; + $fetched_count++; + } +} + +sub make_src_symlink($$) +{ + my ($md5, $url) = @_; + + return unless ($url =~ /^no-url/); + + my $b = basename($url); + if (open(S, "> $tmp_dir/$b.link")) { + if ($url =~ /^no-url-copy/) { + print S (".." . by_md5($md5, $url)); + } else { + print S "REMOVE"; + } + close(S); + copy_to_df("$tmp_dir/$b.link", "$copy_dir/$b.link"); + } else { + $problems .= "ERROR: cannot write $tmp_dir/$b.link\n"; + } +} + +sub md5($) +{ + my $file = shift; + my $in = ""; + my $md5 = ""; + my $err = ""; + my @cmd = ("md5sum", $file); + + run \@cmd, \$in, \$md5, \$err; + if ($err ne "") { + chomp($err); + $problems .= "FATAL: " . $err . "\n"; + return "error"; + } + chomp $md5; + $md5 =~ /^([a-f0-9]{32})/ and $md5 = $1; + return $md5; +} + +sub handle_no_url($$) +{ + my ($md5, $url) = @_; + + unless ($url =~ m#://([^/]+)#) { + $problems .= "$url: corrupted! (no-url)"; + return; + } my $basename = $1; - if (system("mv -f \"tmp/$md5\" \"$md5_dir/$basename\"")) { - $problems .= "FATAL: cannot mv file ($url)\n"; + my $file = "$no_url_dir/$basename"; + + if (-f $file) { + my $computed_md5 = md5($file); + if ($computed_md5 ne $md5) { + $problems .= "FATAL: $basename md5 mismatch, needed $md5, got $computed_md5\n"; + } else { + move_file($md5, $url, $file); + make_src_symlink($md5, $url); + } } else { - $normal_out .= "STORED: $url ($md5, " . - (-s "$md5_dir/$basename") . " bytes)\n"; + $problems .= "FATAL: $basename was not uploaded\n"; } } @@ -81,57 +238,181 @@ sub fetch_file($$) { my ($md5, $url) = @_; my $out = ""; - my $cmd = "wget -nv -O tmp/$md5 \"$url\""; - open(W, "$cmd 2>&1 |"); + my $all_out = ""; + my $bn = basename($url); + my $local = "$tmp_dir/$md5/$bn"; + my @cmd = ("wget", "-nv", "--no-check-certificate", "--user-agent=$user_agent", "-O", $local, $url); + my $cmd_joined = join(' ', @cmd); + my @cmd2 = ("wget", "-nv", "--no-check-certificate", "--user-agent=$user_agent", "--passive-ftp", "-O", $local, $url); + my $cmd2_joined = join(' ', @cmd2); + + syslog("info","fetch_file($md5,$url)"); + + push @files, $bn; + + if ( $bn =~ m/(%[0-9a-f]{2})/i ) { + $problems .= "$bn: refusing to download file with uri escape codes ($1) in the name\n"; + $bn =~ s/%[0-9a-f]{2}/_/g; + $problems .= "HINT: use $url?/$bn as source to rename the file\n\n"; + return; + } + + if (got_on_distfiles($md5, $url)) { + $normal_out .= + "ALREADY GOT: $url\n" . + "\t$md5 " . basename($url) . "\n"; + make_src_symlink($md5, $url); + return; + } + + mkdir("$tmp_dir/$md5"); + + if ($url =~ /^no-url/) { + handle_no_url($md5, $url); + return; + } + + my $pid = open(W, "-|"); + fatal("Cannot fork $!") unless defined $pid; + unless ( $pid ) { + open STDERR, ">&", \*STDOUT or fatal("$0: open: $!"); + exec { $cmd[0] } @cmd or fatal("$0: exec: $!"); + } while () { + $all_out .= $_; /URL:.*\s+\-\>\s+.*/ and next; $out .= $_; } close(W); if ($out ne "") { - $problems .= "$cmd:\n$out\n\n"; + syslog("err","$cmd_joined: $out"); + $problems .= "$cmd_joined:\n$out\n\n"; + } + if ( $? ) { + $problems .= sprintf "%s:\nexited with code %d (0x%02x)\n\n", + $cmd_joined, + $? >> 8, + $? & 0xff; } - if (-f "tmp/$md5" && -s "tmp/$md5" > 0) { - my $computed_md5 = `md5sum tmp/$md5`; - $computed_md5 =~ /^([a-f0-9]{32})/ and $computed_md5 = $1; + if (-f $local && -s $local == 0 && $url =~ /^ftp:/) { + $out = ""; + my $pid = open(W, "-|"); + fatal("Cannot fork $!") unless defined $pid; + unless ( $pid ) { + open STDERR, ">&", \*STDOUT or fatal("$0: open: $!"); + exec { $cmd2[0] } @cmd2 or fatal("$0: exec: $!"); + } + while () { + $all_out .= "\n\t\t$_"; + /URL:.*\s+\-\>\s+.*/ and next; + $out .= $_; + } + close(W); + if ($out ne "") { + $problems .= "$cmd2_joined:\n$out\n\n"; + } + if ( $? ) { + $problems .= sprintf "%s:\nexited with code %d (0x%02x)\n\n", + $cmd2_joined, + $? >> 8, + $? & 0xff; + } + } + if (-r $local && -s $local > 0) { + my $computed_md5 = md5($local); if ($computed_md5 ne $md5) { $problems .= "FATAL: $url md5 mismatch, needed $md5, got $computed_md5\n"; } else { - move_file($md5, $url); + my $testcmd = "file \"$local\" |"; + my $testres = ""; + if ($url =~ /^(http|https):/ && $local =~ /\.(tar\.(bz2|gz)|tgz|zip|jar|xpi)$/) { + open(T, $testcmd) or fatal("$testcmd failed"); + $testres = ; + close(T); + } + if ($testres =~ /empty|(ASCII|HTML|SGML).*text/) { + $testres =~ s/.*://; + $problems .= "FATAL: data returned from $url: $testres"; + } else { + move_file($md5, $url, $local); + } } + } elsif (-f $local && -s $local > 0) { + $problems .= "FATAL: $url ($md5) was not fetched correctly ($cmd_joined: $all_out): file is not readable\n"; + } elsif (-f $local && not -s $local) { + $problems .= "FATAL: $url ($md5) was not fetched correctly ($cmd_joined: $all_out): file fetched but has 0 length\n"; } else { - $problems .= "FATAL: $url ($md5) was not fetched\n"; + $problems .= "FATAL: $url ($md5) was not fetched correctly ($cmd_joined: $all_out)\n"; } + # save space + unlink($local); } sub fetch_files() { $problems .= "\n\n" if ($problems ne ""); - foreach $md5 (@md5) { - fetch_file($md5, $url{$md5}); + foreach $i (0..$#md5) { + fetch_file($md5[$i], $url[$i]); } } sub send_email() { - #open(EMAIL, "| /usr/sbin/sendmail -t"); - open(EMAIL, "| cat"); - my $marker = "OK"; + syslog("info","sending email to $requester"); + open(EMAIL, "| /usr/sbin/sendmail -t"); + #open(EMAIL, "| cat"); + my $marker = ""; if ($problems ne "") { - $marker = "ERRORS"; + $marker = "ERRORS: "; } - print EMAIL "To: $requester -Cc: $email_cc -Subject: [distfiles] sources fetched $marker -From: distfiles + my $req_login; + $requester =~ /^(.*)\@/ and $req_login = $1; + + splice(@files, 10, @files - 10, "...") + if (@files > 10); + + print EMAIL +"From: $req_login <$requester> +To: $commits_list +Cc: $requester +Subject: DISTFILES: ${spec}: ${marker}@{files} +Message-ID: <$$." . time . "\@distfiles.pld-linux.org> +X-distfiles-program: file-fetcher.pl +X-distfiles-version: " . '$Id$' . " $problems +Files fetched: $fetched_count + $normal_out + +-- +Virtually Yours: distfiles. "; - close(EMAIL) or die; + close(EMAIL) or fatal("close() failed"); +} + +sub make_tmp_dir() +{ + my $id = `uuidgen 2>/dev/null`; + chomp $id; + $id = rand if (!defined $id or $id eq ""); + $tmp_dir = "./tmp/$id"; + mkdir($tmp_dir) or fatal("mkdir($tmp_dir) failed"); } +sub clean_tmp_dir() +{ + system("rm -rf $tmp_dir") + if ($tmp_dir ne "" && -d $tmp_dir); +} + +umask(002); + find_file_in_spool(); read_spool_file(); +make_tmp_dir(); fetch_files(); -send_email(); +send_email() unless (!$force_reply and $problems eq "" and $fetched_count == 0); +clean_tmp_dir(); + +# vim: ts=2:sw=2:et:fdm=marker