]> git.pld-linux.org Git - projects/distfiles.git/blame - file-fetcher.pl
- make sure files from dropin are readable on ftp/distfiles
[projects/distfiles.git] / file-fetcher.pl
CommitLineData
7ebf9245 1#!/usr/bin/perl -w
56ea0f1e 2use IPC::Run qw(run);
3
7a0595ea 4$commits_list = "pld-cvs-commit\@lists.pld-linux.org";
7ebf9245 5
b5c674c5 6$spool_dir = "./spool";
01806192
MM
7$copy_dir = "src"; # relative to ftp root
8$no_url_dir = "./upload";
9$df_server = "distfiles.pld-linux.org";
40c35f1c 10$df_scp = "plddist\@distfiles.pld-linux.org:ftp";
62f7358e 11$user_agent = "PLD/distfiles";
7ebf9245
MM
12
13@md5 = ();
ccbaa374 14@url = ();
7ebf9245
MM
15$problems = "";
16$normal_out = "";
17$requester = "";
18$file = "";
47f8fda7
MM
19$fetched_count = 0;
20$force_reply = 0;
bca91c1b 21$req_login = "nobody";
cba859d3 22$spec = "";
44cd280f 23@files = ();
7ebf9245
MM
24
25# try lookup some file in spool, exit if it cannot be done
26sub find_file_in_spool()
27{
28 opendir(DIR, $spool_dir) || die "can't opendir $spool_dir: $!";
29 while (1) {
30 my $f = readdir(DIR);
31 defined $f or last;
32 -f "$spool_dir/$f" or next;
33 $file = "$spool_dir/$f";
34 last;
35 }
36 closedir(DIR);
37
38 exit 0 if ($file eq "");
39}
40
41# read file from spool, and try unlink it. if cannot unlink -- exit
576dff03 42# sets $requester (email), $problems, @md5 (arrays of md5's)
ccbaa374 43# and @url (map from md5 to urls)
7ebf9245
MM
44sub read_spool_file()
45{
46 open(F, "< $file") || exit 0;
47 $requester = <F>;
48 chomp $requester;
576dff03 49 $requester =~ /^[a-zA-Z_0-9@.-]+$/
7a1a3819 50 or die "$file: evil requester: $requester";
bca91c1b
MM
51 $req_login = "";
52 $requester =~ /^([^@]+)\@/ and $req_login = $1;
576dff03 53 $req_login =~ /^[a-z0-9A-Z_.]+$/ or die "$file: evil requester $requester";
cba859d3 54 $spec = <F>;
bf2219c8 55 chomp $spec;
cba859d3 56 $spec =~ s/\.spec$//;
bf2219c8 57
47f8fda7
MM
58 my $flags = <F>;
59 $force_reply++ if ($flags =~ /force-reply/);
576dff03 60
7ebf9245
MM
61 while (<F>) {
62 if (/^ERROR/) {
63 s/^ERROR: //;
64 $problems .= $_;
f2a640bf 65 next;
7ebf9245 66 }
8ecb7fad 67 if (/^([a-f0-9]{32})\s+((ftp|http|https|no-url|no-url-copy):\/\/([a-z0-9A-Z;:\=\?&\@\+\~\.,\-\/_]|\%[0-9])+(#\/[a-zA-Z0-9\._-]+)?)\s*$/) {
e4708e3e 68 if (/\/$/) {
7a1a3819 69 $problems .= "$file: cannot fetch dir";
e4708e3e 70 } else {
7a1a3819
ER
71 push @md5, $1;
72 push @url, $2;
e4708e3e 73 }
74 } else {
7a1a3819 75 $problems .= "FILE: $file: corrupted";
e4708e3e 76 }
7ebf9245
MM
77 }
78 close(F);
79
80 unlink($file) || exit 0;
81}
82
44cd280f
MM
83sub basename($)
84{
85 my $f = shift;
86 $f =~ s|.*/||;
87 return $f;
88}
89
01806192 90sub by_md5($$)
7ebf9245
MM
91{
92 my ($md5, $url) = @_;
7ebf9245 93 $md5 =~ /^(.)(.)/;
01806192 94 return "/by-md5/$1/$2/$md5/" . basename($url);
6ce39ee7
MM
95}
96
01806192 97sub got_on_distfiles($$)
6ce39ee7 98{
01806192
MM
99 my ($md5, $url) = @_;
100 my $p = by_md5($md5, $url);
101 my $l = `lftp -c 'debug 0; open $df_server; quote size $p'`;
102 return $l =~ /^213 /;
103}
104
105sub copy_to_df($$)
106{
107 my ($from, $to) = @_;
92eba545 108 my $cmd = "scp -pr -B -q $from $df_scp/$to";
01806192
MM
109 open(E, "$cmd 2>&1 |") or die;
110 my $oops = "";
111 while (<E>) {
112 $oops .= $_;
113 }
219df45c
MM
114 $oops .= "\nThe command has exited with a non-zero status."
115 unless (close (E));
01806192
MM
116 $problems .= "scp problems: $cmd:\n$oops\n"
117 if ($oops ne "");
118 return ($oops ne "");
6ce39ee7
MM
119}
120
b5c674c5 121sub move_file($$$)
6ce39ee7 122{
b5c674c5 123 my ($md5, $url, $local_copy) = @_;
6ce39ee7 124
01806192 125 my $bn = basename($url);
576dff03 126
01806192
MM
127 if ($local_copy ne "$tmp_dir/$md5/$bn") {
128 if (system("mv -f \"$local_copy\" \"$tmp_dir/$md5/$bn\"")) {
129 $problems .= "FATAL: cannot move $local_copy to $tmp_dir\n";
130 return;
131 }
132 $local_copy = "$tmp_dir/$md5/$bn";
133 }
025462c7
JR
134 # no-url local copy may have wrong permissions (it's created by ftp upload)
135 # always fix them
136 chmod(0644, $local_copy);
576dff03 137
01806192
MM
138 if (open(D, "> $tmp_dir/$md5/$bn.desc")) {
139 print D "URL: $url\n";
d286a317 140 print D "Login: $req_login\n";
01806192
MM
141 print D "MD5: $md5\n";
142 print D 'Fetched-by: $Id$'."\n";
143 print D "Time: " . time . "\n";
144 close(D);
7ebf9245 145 } else {
01806192
MM
146 $problems .= "ERROR: cannot write $bn.desc\n";
147 }
148
149 my $dir = by_md5($md5, $url);
150 $dir =~ s|/[^/]+/[^/]+$||;
151 if (copy_to_df("$tmp_dir/$md5/", $dir) == 0) {
576dff03 152 $normal_out .=
44cd280f
MM
153 "STORED: $url\n" .
154 "\t$md5 " . basename($url) . "\n" .
01806192 155 "\tSize: " . (-s $local_copy) . " bytes\n";
47f8fda7 156 $fetched_count++;
7ebf9245
MM
157 }
158}
159
b5c674c5
MM
160sub make_src_symlink($$)
161{
162 my ($md5, $url) = @_;
576dff03 163
01806192 164 return unless ($url =~ /^no-url/);
576dff03 165
01806192
MM
166 my $b = basename($url);
167 if (open(S, "> $tmp_dir/$b.link")) {
b5c674c5 168 if ($url =~ /^no-url-copy/) {
01806192 169 print S (".." . by_md5($md5, $url));
b5c674c5 170 } else {
01806192 171 print S "REMOVE";
b5c674c5 172 }
01806192
MM
173 close(S);
174 copy_to_df("$tmp_dir/$b.link", "$copy_dir/$b.link");
b5c674c5 175 } else {
01806192 176 $problems .= "ERROR: cannot write $tmp_dir/$b.link\n";
b5c674c5
MM
177 }
178}
179
180sub md5($)
181{
182 my $file = shift;
56ea0f1e 183 my $in = "";
184 my $md5 = "";
185 my $err = "";
186 my @cmd = ("md5sum", $file);
187
188 run \@cmd, \$in, \$md5, \$err;
189 if ($err ne "") {
190 chomp($err);
a6b09732 191 $problems .= "FATAL: " . $err . "\n";
192 return "error";
56ea0f1e 193 }
194 chomp $md5;
b5c674c5
MM
195 $md5 =~ /^([a-f0-9]{32})/ and $md5 = $1;
196 return $md5;
197}
198
199sub handle_no_url($$)
200{
201 my ($md5, $url) = @_;
576dff03 202
e4708e3e 203 unless ($url =~ m#://([^/]+)#) {
204 $problems .= "$url: corrupted! (no-url)";
205 return;
206 }
b5c674c5 207 my $basename = $1;
bca91c1b 208 my $file = "$no_url_dir/$req_login/$basename";
b5c674c5
MM
209
210 if (-f $file) {
211 my $computed_md5 = md5($file);
212 if ($computed_md5 ne $md5) {
213 $problems .= "FATAL: $file md5 mismatch, needed $md5, got $computed_md5\n";
214 } else {
215 move_file($md5, $url, $file);
216 make_src_symlink($md5, $url);
217 }
218 } else {
bca91c1b 219 $problems .= "FATAL: $file was not uploaded\n";
b5c674c5
MM
220 }
221}
222
7ebf9245
MM
223sub fetch_file($$)
224{
225 my ($md5, $url) = @_;
226 my $out = "";
08ea99b9 227 my $all_out = "";
01806192
MM
228 my $bn = basename($url);
229 my $local = "$tmp_dir/$md5/$bn";
b1659ea9
KK
230 my @cmd = ("wget", "-nv", "--no-check-certificate", "--user-agent=$user_agent", "-O", $local, $url);
231 my $cmd_joined = join(' ', @cmd);
232 my @cmd2 = ("wget", "-nv", "--no-check-certificate", "--user-agent=$user_agent", "--passive-ftp", "-O", $local, $url);
233 my $cmd2_joined = join(' ', @cmd2);
b5c674c5 234
01806192 235 push @files, $bn;
44cd280f 236
b38176ba 237 if ( $bn =~ m/(%[0-9a-f]{2})/i ) {
238 $problems .= "$bn: refusing to download file with uri escape codes ($1) in the name\n";
239 $bn =~ s/%[0-9a-f]{2}/_/g;
f3eeba03 240 $problems .= "HINT: use $url?/$bn as source to rename the file\n\n";
b38176ba 241 return;
242 }
243
01806192 244 if (got_on_distfiles($md5, $url)) {
576dff03 245 $normal_out .=
7a1a3819
ER
246 "ALREADY GOT: $url\n" .
247 "\t$md5 " . basename($url) . "\n";
b5c674c5 248 make_src_symlink($md5, $url);
6ce39ee7
MM
249 return;
250 }
01806192 251
2ae8fdec 252 mkdir("$tmp_dir/$md5");
576dff03 253
b5c674c5
MM
254 if ($url =~ /^no-url/) {
255 handle_no_url($md5, $url);
256 return;
257 }
576dff03 258
b1659ea9
KK
259 my $pid = open(W, "-|");
260 die "Cannot fork $!" unless defined $pid;
261 unless ( $pid ) {
262 open STDERR, ">&", \*STDOUT or die "$0: open: $!";
263 exec { $cmd[0] } @cmd or die "$0: exec: $!";
264 }
7ebf9245 265 while (<W>) {
08ea99b9 266 $all_out .= $_;
7ebf9245
MM
267 /URL:.*\s+\-\>\s+.*/ and next;
268 $out .= $_;
269 }
270 close(W);
271 if ($out ne "") {
b1659ea9 272 $problems .= "$cmd_joined:\n$out\n\n";
7ebf9245 273 }
d1c674a4 274 if ( $? ) {
b106d8c1 275 $problems .= sprintf "%s:\nexited with code %d (0x%02x)\n\n",
b1659ea9 276 $cmd_joined,
d1c674a4 277 $? >> 8,
278 $? & 0xff;
279 }
e23ae36a 280 if (-f $local && -s $local == 0 && $url =~ /^ftp:/) {
f4f705aa 281 $out = "";
b1659ea9
KK
282 my $pid = open(W, "-|");
283 die "Cannot fork $!" unless defined $pid;
284 unless ( $pid ) {
285 open STDERR, ">&", \*STDOUT or die "$0: open: $!";
286 exec { $cmd2[0] } @cmd2 or die "$0: exec: $!";
287 }
c0467b40 288 while (<W>) {
f4f705aa 289 $all_out .= "\n\t\t$_";
c0467b40 290 /URL:.*\s+\-\>\s+.*/ and next;
291 $out .= $_;
292 }
293 close(W);
294 if ($out ne "") {
b1659ea9 295 $problems .= "$cmd2_joined:\n$out\n\n";
d1c674a4 296 }
297 if ( $? ) {
b106d8c1 298 $problems .= sprintf "%s:\nexited with code %d (0x%02x)\n\n",
b1659ea9 299 $cmd2_joined,
d1c674a4 300 $? >> 8,
301 $? & 0xff;
c0467b40 302 }
303 }
4db63fc1 304 if (-r $local && -s $local > 0) {
01806192 305 my $computed_md5 = md5($local);
7ebf9245
MM
306 if ($computed_md5 ne $md5) {
307 $problems .= "FATAL: $url md5 mismatch, needed $md5, got $computed_md5\n";
308 } else {
d642cf7e 309 my $testcmd = "file \"$local\" |";
310 my $testres = "";
a14ed9b7 311 if ($url =~ /^(http|https):/ && $local =~ /\.(tar\.(bz2|gz)|tgz|zip|jar|xpi)$/) {
d642cf7e 312 open(T, $testcmd) or die;
313 $testres = <T>;
314 close(T);
315 }
316 if ($testres =~ /empty|(ASCII|HTML|SGML).*text/) {
317 $testres =~ s/.*://;
318 $problems .= "FATAL: data returned from $url: $testres";
319 } else {
320 move_file($md5, $url, $local);
321 }
7ebf9245 322 }
4db63fc1 323 } elsif (-f $local && -s $local > 0) {
b1659ea9 324 $problems .= "FATAL: $url ($md5) was not fetched correctly ($cmd_joined: $all_out): file is not readable\n";
d1c674a4 325 } elsif (-f $local && not -s $local) {
b1659ea9 326 $problems .= "FATAL: $url ($md5) was not fetched correctly ($cmd_joined: $all_out): file fetched but has 0 length\n";
7ebf9245 327 } else {
b1659ea9 328 $problems .= "FATAL: $url ($md5) was not fetched correctly ($cmd_joined: $all_out)\n";
7ebf9245 329 }
01806192
MM
330 # save space
331 unlink($local);
7ebf9245
MM
332}
333
334sub fetch_files()
335{
336 $problems .= "\n\n" if ($problems ne "");
ccbaa374 337 foreach $i (0..$#md5) {
338 fetch_file($md5[$i], $url[$i]);
7ebf9245
MM
339 }
340}
341
342sub send_email()
343{
9a1e36f3
MM
344 open(EMAIL, "| /usr/sbin/sendmail -t");
345 #open(EMAIL, "| cat");
44cd280f 346 my $marker = "";
7ebf9245 347 if ($problems ne "") {
44cd280f 348 $marker = "ERRORS: ";
7ebf9245 349 }
04550b88
MM
350 my $req_login;
351 $requester =~ /^(.*)\@/ and $req_login = $1;
44cd280f
MM
352
353 splice(@files, 10, @files - 10, "...")
354 if (@files > 10);
576dff03
ER
355
356 print EMAIL
04550b88 357"From: $req_login <$requester>
482655f9
MM
358To: $commits_list
359Cc: $requester
8408fdb0 360Subject: DISTFILES: ${spec}: ${marker}@{files}
e157986d
MM
361Message-ID: <$$." . time . "\@distfiles.pld-linux.org>
362X-distfiles-program: file-fetcher.pl
363X-distfiles-version: " . '$Id$' . "
7ebf9245
MM
364
365$problems
47f8fda7
MM
366Files fetched: $fetched_count
367
7ebf9245 368$normal_out
e157986d
MM
369
370--
d8cd573c 371Virtually Yours: distfiles.
7ebf9245
MM
372";
373 close(EMAIL) or die;
374}
375
01806192
MM
376sub make_tmp_dir()
377{
378 my $id = `uuidgen 2>/dev/null`;
379 chomp $id;
380 $id = rand if (!defined $id or $id eq "");
381 $tmp_dir = "./tmp/$id";
382 mkdir($tmp_dir) or die;
383}
384
385sub clean_tmp_dir()
386{
387 system("rm -rf $tmp_dir")
388 if ($tmp_dir ne "" && -d $tmp_dir);
389}
390
2e33bbbc
MM
391umask(002);
392
7ebf9245
MM
393find_file_in_spool();
394read_spool_file();
01806192 395make_tmp_dir();
7ebf9245 396fetch_files();
47f8fda7 397send_email() unless (!$force_reply and $problems eq "" and $fetched_count == 0);
01806192 398clean_tmp_dir();
319fb6c1
ER
399
400# vim: ts=2:sw=2:et:fdm=marker
This page took 0.557512 seconds and 4 git commands to generate.