]> git.pld-linux.org Git - projects/distfiles.git/blame - file-fetcher.pl
- allow ";" in uris
[projects/distfiles.git] / file-fetcher.pl
CommitLineData
7ebf9245 1#!/usr/bin/perl -w
b396edfa 2# $Id$
7ebf9245 3
56ea0f1e 4use IPC::Run qw(run);
5
7a0595ea 6$commits_list = "pld-cvs-commit\@lists.pld-linux.org";
7ebf9245 7
b5c674c5 8$spool_dir = "./spool";
01806192
MM
9$copy_dir = "src"; # relative to ftp root
10$no_url_dir = "./upload";
11$df_server = "distfiles.pld-linux.org";
51fe5d4e 12$df_scp = "plddist\@ep09.pld-linux.org:ftp";
62f7358e 13$user_agent = "PLD/distfiles";
7ebf9245
MM
14
15@md5 = ();
ccbaa374 16@url = ();
7ebf9245
MM
17$problems = "";
18$normal_out = "";
19$requester = "";
20$file = "";
47f8fda7
MM
21$fetched_count = 0;
22$force_reply = 0;
bca91c1b 23$req_login = "nobody";
cba859d3 24$spec = "";
44cd280f 25@files = ();
7ebf9245
MM
26
27# try lookup some file in spool, exit if it cannot be done
28sub find_file_in_spool()
29{
30 opendir(DIR, $spool_dir) || die "can't opendir $spool_dir: $!";
31 while (1) {
32 my $f = readdir(DIR);
33 defined $f or last;
34 -f "$spool_dir/$f" or next;
35 $file = "$spool_dir/$f";
36 last;
37 }
38 closedir(DIR);
39
40 exit 0 if ($file eq "");
41}
42
43# read file from spool, and try unlink it. if cannot unlink -- exit
576dff03 44# sets $requester (email), $problems, @md5 (arrays of md5's)
ccbaa374 45# and @url (map from md5 to urls)
7ebf9245
MM
46sub read_spool_file()
47{
48 open(F, "< $file") || exit 0;
49 $requester = <F>;
50 chomp $requester;
576dff03 51 $requester =~ /^[a-zA-Z_0-9@.-]+$/
7ebf9245 52 or die "$file: evil requester: $requester";
bca91c1b
MM
53 $req_login = "";
54 $requester =~ /^([^@]+)\@/ and $req_login = $1;
576dff03 55 $req_login =~ /^[a-z0-9A-Z_.]+$/ or die "$file: evil requester $requester";
cba859d3 56 $spec = <F>;
57 $spec =~ s/\.spec$//;
47f8fda7
MM
58 my $flags = <F>;
59 $force_reply++ if ($flags =~ /force-reply/);
576dff03 60
7ebf9245
MM
61 while (<F>) {
62 if (/^ERROR/) {
63 s/^ERROR: //;
64 $problems .= $_;
f2a640bf 65 next;
7ebf9245 66 }
e4708e3e 67 if (/^([a-f0-9]{32})\s+((ftp|http|https|no-url|no-url-copy):\/\/([=\@\?a-z0-9A-Z:\+\~\.,\-\/_]|\%[0-9])+)\s*$/) {
68 if (/\/$/) {
69 $problems .= "$file: cannot fetch dir";
70 } else {
71 push @md5, $1;
72 push @url, $2;
73 }
74 } else {
75 $problems .= "FILE: $file: corrupted";
76 }
7ebf9245
MM
77 }
78 close(F);
79
80 unlink($file) || exit 0;
81}
82
44cd280f
MM
83sub basename($)
84{
85 my $f = shift;
86 $f =~ s|.*/||;
87 return $f;
88}
89
01806192 90sub by_md5($$)
7ebf9245
MM
91{
92 my ($md5, $url) = @_;
7ebf9245 93 $md5 =~ /^(.)(.)/;
01806192 94 return "/by-md5/$1/$2/$md5/" . basename($url);
6ce39ee7
MM
95}
96
01806192 97sub got_on_distfiles($$)
6ce39ee7 98{
01806192
MM
99 my ($md5, $url) = @_;
100 my $p = by_md5($md5, $url);
101 my $l = `lftp -c 'debug 0; open $df_server; quote size $p'`;
102 return $l =~ /^213 /;
103}
104
105sub copy_to_df($$)
106{
107 my ($from, $to) = @_;
108 my $cmd = "scp -r -B -q $from $df_scp/$to";
109 open(E, "$cmd 2>&1 |") or die;
110 my $oops = "";
111 while (<E>) {
112 $oops .= $_;
113 }
219df45c
MM
114 $oops .= "\nThe command has exited with a non-zero status."
115 unless (close (E));
01806192
MM
116 $problems .= "scp problems: $cmd:\n$oops\n"
117 if ($oops ne "");
118 return ($oops ne "");
6ce39ee7
MM
119}
120
b5c674c5 121sub move_file($$$)
6ce39ee7 122{
b5c674c5 123 my ($md5, $url, $local_copy) = @_;
6ce39ee7 124
01806192 125 my $bn = basename($url);
576dff03 126
01806192
MM
127 if ($local_copy ne "$tmp_dir/$md5/$bn") {
128 if (system("mv -f \"$local_copy\" \"$tmp_dir/$md5/$bn\"")) {
129 $problems .= "FATAL: cannot move $local_copy to $tmp_dir\n";
130 return;
131 }
132 $local_copy = "$tmp_dir/$md5/$bn";
133 }
576dff03 134
01806192
MM
135 if (open(D, "> $tmp_dir/$md5/$bn.desc")) {
136 print D "URL: $url\n";
d286a317 137 print D "Login: $req_login\n";
01806192
MM
138 print D "MD5: $md5\n";
139 print D 'Fetched-by: $Id$'."\n";
140 print D "Time: " . time . "\n";
141 close(D);
7ebf9245 142 } else {
01806192
MM
143 $problems .= "ERROR: cannot write $bn.desc\n";
144 }
145
146 my $dir = by_md5($md5, $url);
147 $dir =~ s|/[^/]+/[^/]+$||;
148 if (copy_to_df("$tmp_dir/$md5/", $dir) == 0) {
576dff03 149 $normal_out .=
44cd280f
MM
150 "STORED: $url\n" .
151 "\t$md5 " . basename($url) . "\n" .
01806192 152 "\tSize: " . (-s $local_copy) . " bytes\n";
47f8fda7 153 $fetched_count++;
7ebf9245
MM
154 }
155}
156
b5c674c5
MM
157sub make_src_symlink($$)
158{
159 my ($md5, $url) = @_;
576dff03 160
01806192 161 return unless ($url =~ /^no-url/);
576dff03 162
01806192
MM
163 my $b = basename($url);
164 if (open(S, "> $tmp_dir/$b.link")) {
b5c674c5 165 if ($url =~ /^no-url-copy/) {
01806192 166 print S (".." . by_md5($md5, $url));
b5c674c5 167 } else {
01806192 168 print S "REMOVE";
b5c674c5 169 }
01806192
MM
170 close(S);
171 copy_to_df("$tmp_dir/$b.link", "$copy_dir/$b.link");
b5c674c5 172 } else {
01806192 173 $problems .= "ERROR: cannot write $tmp_dir/$b.link\n";
b5c674c5
MM
174 }
175}
176
177sub md5($)
178{
179 my $file = shift;
56ea0f1e 180 my $in = "";
181 my $md5 = "";
182 my $err = "";
183 my @cmd = ("md5sum", $file);
184
185 run \@cmd, \$in, \$md5, \$err;
186 if ($err ne "") {
187 chomp($err);
a6b09732 188 $problems .= "FATAL: " . $err . "\n";
189 return "error";
56ea0f1e 190 }
191 chomp $md5;
b5c674c5
MM
192 $md5 =~ /^([a-f0-9]{32})/ and $md5 = $1;
193 return $md5;
194}
195
196sub handle_no_url($$)
197{
198 my ($md5, $url) = @_;
576dff03 199
e4708e3e 200 unless ($url =~ m#://([^/]+)#) {
201 $problems .= "$url: corrupted! (no-url)";
202 return;
203 }
b5c674c5 204 my $basename = $1;
bca91c1b 205 my $file = "$no_url_dir/$req_login/$basename";
b5c674c5
MM
206
207 if (-f $file) {
208 my $computed_md5 = md5($file);
209 if ($computed_md5 ne $md5) {
210 $problems .= "FATAL: $file md5 mismatch, needed $md5, got $computed_md5\n";
211 } else {
212 move_file($md5, $url, $file);
213 make_src_symlink($md5, $url);
214 }
215 } else {
bca91c1b 216 $problems .= "FATAL: $file was not uploaded\n";
b5c674c5
MM
217 }
218}
219
7ebf9245
MM
220sub fetch_file($$)
221{
222 my ($md5, $url) = @_;
223 my $out = "";
08ea99b9 224 my $all_out = "";
01806192
MM
225 my $bn = basename($url);
226 my $local = "$tmp_dir/$md5/$bn";
8f58f574
JB
227 my $cmd = "wget -nv --no-check-certificate --user-agent=$user_agent -O $local \"$url\"";
228 my $cmd2 = "wget -nv --no-check-certificate --user-agent=$user_agent --passive-ftp -O $local \"$url\"";
b5c674c5 229
01806192 230 push @files, $bn;
44cd280f 231
01806192 232 if (got_on_distfiles($md5, $url)) {
576dff03 233 $normal_out .=
00eefa39 234 "ALREADY GOT: $url\n" .
01806192 235 "\t$md5 " . basename($url) . "\n";
b5c674c5 236 make_src_symlink($md5, $url);
6ce39ee7
MM
237 return;
238 }
01806192 239
2ae8fdec 240 mkdir("$tmp_dir/$md5");
576dff03 241
b5c674c5
MM
242 if ($url =~ /^no-url/) {
243 handle_no_url($md5, $url);
244 return;
245 }
576dff03 246
7ebf9245
MM
247 open(W, "$cmd 2>&1 |");
248 while (<W>) {
08ea99b9 249 $all_out .= $_;
7ebf9245
MM
250 /URL:.*\s+\-\>\s+.*/ and next;
251 $out .= $_;
252 }
253 close(W);
254 if ($out ne "") {
255 $problems .= "$cmd:\n$out\n\n";
256 }
e23ae36a 257 if (-f $local && -s $local == 0 && $url =~ /^ftp:/) {
f4f705aa 258 $out = "";
c0467b40 259 open(W, "$cmd2 2>&1 |");
260 while (<W>) {
f4f705aa 261 $all_out .= "\n\t\t$_";
c0467b40 262 /URL:.*\s+\-\>\s+.*/ and next;
263 $out .= $_;
264 }
265 close(W);
266 if ($out ne "") {
267 $problems .= "$cmd:\n$out\n\n";
268 }
269 }
4db63fc1 270 if (-r $local && -s $local > 0) {
01806192 271 my $computed_md5 = md5($local);
7ebf9245
MM
272 if ($computed_md5 ne $md5) {
273 $problems .= "FATAL: $url md5 mismatch, needed $md5, got $computed_md5\n";
274 } else {
d642cf7e 275 my $testcmd = "file \"$local\" |";
276 my $testres = "";
a14ed9b7 277 if ($url =~ /^(http|https):/ && $local =~ /\.(tar\.(bz2|gz)|tgz|zip|jar|xpi)$/) {
d642cf7e 278 open(T, $testcmd) or die;
279 $testres = <T>;
280 close(T);
281 }
282 if ($testres =~ /empty|(ASCII|HTML|SGML).*text/) {
283 $testres =~ s/.*://;
284 $problems .= "FATAL: data returned from $url: $testres";
285 } else {
286 move_file($md5, $url, $local);
287 }
7ebf9245 288 }
4db63fc1 289 } elsif (-f $local && -s $local > 0) {
290 $problems .= "FATAL: $url ($md5) was not fetched ($cmd: $all_out): file is not readable\n";
7ebf9245 291 } else {
08ea99b9 292 $problems .= "FATAL: $url ($md5) was not fetched ($cmd: $all_out)\n";
7ebf9245 293 }
01806192
MM
294 # save space
295 unlink($local);
7ebf9245
MM
296}
297
298sub fetch_files()
299{
300 $problems .= "\n\n" if ($problems ne "");
ccbaa374 301 foreach $i (0..$#md5) {
302 fetch_file($md5[$i], $url[$i]);
7ebf9245
MM
303 }
304}
305
306sub send_email()
307{
9a1e36f3
MM
308 open(EMAIL, "| /usr/sbin/sendmail -t");
309 #open(EMAIL, "| cat");
44cd280f 310 my $marker = "";
7ebf9245 311 if ($problems ne "") {
44cd280f 312 $marker = "ERRORS: ";
7ebf9245 313 }
04550b88
MM
314 my $req_login;
315 $requester =~ /^(.*)\@/ and $req_login = $1;
44cd280f
MM
316
317 splice(@files, 10, @files - 10, "...")
318 if (@files > 10);
576dff03
ER
319
320 print EMAIL
04550b88 321"From: $req_login <$requester>
482655f9
MM
322To: $commits_list
323Cc: $requester
8408fdb0 324Subject: DISTFILES: ${spec}: ${marker}@{files}
e157986d
MM
325Message-ID: <$$." . time . "\@distfiles.pld-linux.org>
326X-distfiles-program: file-fetcher.pl
327X-distfiles-version: " . '$Id$' . "
7ebf9245
MM
328
329$problems
47f8fda7
MM
330Files fetched: $fetched_count
331
7ebf9245 332$normal_out
e157986d
MM
333
334--
d8cd573c 335Virtually Yours: distfiles.
7ebf9245
MM
336";
337 close(EMAIL) or die;
338}
339
01806192
MM
340sub make_tmp_dir()
341{
342 my $id = `uuidgen 2>/dev/null`;
343 chomp $id;
344 $id = rand if (!defined $id or $id eq "");
345 $tmp_dir = "./tmp/$id";
346 mkdir($tmp_dir) or die;
347}
348
349sub clean_tmp_dir()
350{
351 system("rm -rf $tmp_dir")
352 if ($tmp_dir ne "" && -d $tmp_dir);
353}
354
2e33bbbc
MM
355umask(002);
356
7ebf9245
MM
357find_file_in_spool();
358read_spool_file();
01806192 359make_tmp_dir();
7ebf9245 360fetch_files();
47f8fda7 361send_email() unless (!$force_reply and $problems eq "" and $fetched_count == 0);
01806192 362clean_tmp_dir();
This page took 0.097358 seconds and 4 git commands to generate.