]> git.pld-linux.org Git - projects/distfiles.git/blame - file-fetcher.pl
It's just file, not dir.
[projects/distfiles.git] / file-fetcher.pl
CommitLineData
7ebf9245 1#!/usr/bin/perl -w
b396edfa 2# $Id$
7ebf9245 3
56ea0f1e 4use IPC::Run qw(run);
5
7a0595ea 6$commits_list = "pld-cvs-commit\@lists.pld-linux.org";
7ebf9245 7
b5c674c5 8$spool_dir = "./spool";
01806192
MM
9$copy_dir = "src"; # relative to ftp root
10$no_url_dir = "./upload";
11$df_server = "distfiles.pld-linux.org";
51fe5d4e 12$df_scp = "plddist\@ep09.pld-linux.org:ftp";
62f7358e 13$user_agent = "PLD/distfiles";
7ebf9245
MM
14
15@md5 = ();
ccbaa374 16@url = ();
7ebf9245
MM
17$problems = "";
18$normal_out = "";
19$requester = "";
20$file = "";
47f8fda7
MM
21$fetched_count = 0;
22$force_reply = 0;
bca91c1b 23$req_login = "nobody";
44cd280f 24@files = ();
7ebf9245
MM
25
26# try lookup some file in spool, exit if it cannot be done
27sub find_file_in_spool()
28{
29 opendir(DIR, $spool_dir) || die "can't opendir $spool_dir: $!";
30 while (1) {
31 my $f = readdir(DIR);
32 defined $f or last;
33 -f "$spool_dir/$f" or next;
34 $file = "$spool_dir/$f";
35 last;
36 }
37 closedir(DIR);
38
39 exit 0 if ($file eq "");
40}
41
42# read file from spool, and try unlink it. if cannot unlink -- exit
576dff03 43# sets $requester (email), $problems, @md5 (arrays of md5's)
ccbaa374 44# and @url (map from md5 to urls)
7ebf9245
MM
45sub read_spool_file()
46{
47 open(F, "< $file") || exit 0;
48 $requester = <F>;
49 chomp $requester;
576dff03 50 $requester =~ /^[a-zA-Z_0-9@.-]+$/
7ebf9245 51 or die "$file: evil requester: $requester";
bca91c1b
MM
52 $req_login = "";
53 $requester =~ /^([^@]+)\@/ and $req_login = $1;
576dff03 54 $req_login =~ /^[a-z0-9A-Z_.]+$/ or die "$file: evil requester $requester";
47f8fda7
MM
55 my $flags = <F>;
56 $force_reply++ if ($flags =~ /force-reply/);
576dff03 57
7ebf9245
MM
58 while (<F>) {
59 if (/^ERROR/) {
60 s/^ERROR: //;
61 $problems .= $_;
f2a640bf 62 next;
7ebf9245 63 }
e4708e3e 64 if (/^([a-f0-9]{32})\s+((ftp|http|https|no-url|no-url-copy):\/\/([=\@\?a-z0-9A-Z:\+\~\.,\-\/_]|\%[0-9])+)\s*$/) {
65 if (/\/$/) {
66 $problems .= "$file: cannot fetch dir";
67 } else {
68 push @md5, $1;
69 push @url, $2;
70 }
71 } else {
72 $problems .= "FILE: $file: corrupted";
73 }
7ebf9245
MM
74 }
75 close(F);
76
77 unlink($file) || exit 0;
78}
79
44cd280f
MM
80sub basename($)
81{
82 my $f = shift;
83 $f =~ s|.*/||;
84 return $f;
85}
86
01806192 87sub by_md5($$)
7ebf9245
MM
88{
89 my ($md5, $url) = @_;
7ebf9245 90 $md5 =~ /^(.)(.)/;
01806192 91 return "/by-md5/$1/$2/$md5/" . basename($url);
6ce39ee7
MM
92}
93
01806192 94sub got_on_distfiles($$)
6ce39ee7 95{
01806192
MM
96 my ($md5, $url) = @_;
97 my $p = by_md5($md5, $url);
98 my $l = `lftp -c 'debug 0; open $df_server; quote size $p'`;
99 return $l =~ /^213 /;
100}
101
102sub copy_to_df($$)
103{
104 my ($from, $to) = @_;
105 my $cmd = "scp -r -B -q $from $df_scp/$to";
106 open(E, "$cmd 2>&1 |") or die;
107 my $oops = "";
108 while (<E>) {
109 $oops .= $_;
110 }
219df45c
MM
111 $oops .= "\nThe command has exited with a non-zero status."
112 unless (close (E));
01806192
MM
113 $problems .= "scp problems: $cmd:\n$oops\n"
114 if ($oops ne "");
115 return ($oops ne "");
6ce39ee7
MM
116}
117
b5c674c5 118sub move_file($$$)
6ce39ee7 119{
b5c674c5 120 my ($md5, $url, $local_copy) = @_;
6ce39ee7 121
01806192 122 my $bn = basename($url);
576dff03 123
01806192
MM
124 if ($local_copy ne "$tmp_dir/$md5/$bn") {
125 if (system("mv -f \"$local_copy\" \"$tmp_dir/$md5/$bn\"")) {
126 $problems .= "FATAL: cannot move $local_copy to $tmp_dir\n";
127 return;
128 }
129 $local_copy = "$tmp_dir/$md5/$bn";
130 }
576dff03 131
01806192
MM
132 if (open(D, "> $tmp_dir/$md5/$bn.desc")) {
133 print D "URL: $url\n";
d286a317 134 print D "Login: $req_login\n";
01806192
MM
135 print D "MD5: $md5\n";
136 print D 'Fetched-by: $Id$'."\n";
137 print D "Time: " . time . "\n";
138 close(D);
7ebf9245 139 } else {
01806192
MM
140 $problems .= "ERROR: cannot write $bn.desc\n";
141 }
142
143 my $dir = by_md5($md5, $url);
144 $dir =~ s|/[^/]+/[^/]+$||;
145 if (copy_to_df("$tmp_dir/$md5/", $dir) == 0) {
576dff03 146 $normal_out .=
44cd280f
MM
147 "STORED: $url\n" .
148 "\t$md5 " . basename($url) . "\n" .
01806192 149 "\tSize: " . (-s $local_copy) . " bytes\n";
47f8fda7 150 $fetched_count++;
7ebf9245
MM
151 }
152}
153
b5c674c5
MM
154sub make_src_symlink($$)
155{
156 my ($md5, $url) = @_;
576dff03 157
01806192 158 return unless ($url =~ /^no-url/);
576dff03 159
01806192
MM
160 my $b = basename($url);
161 if (open(S, "> $tmp_dir/$b.link")) {
b5c674c5 162 if ($url =~ /^no-url-copy/) {
01806192 163 print S (".." . by_md5($md5, $url));
b5c674c5 164 } else {
01806192 165 print S "REMOVE";
b5c674c5 166 }
01806192
MM
167 close(S);
168 copy_to_df("$tmp_dir/$b.link", "$copy_dir/$b.link");
b5c674c5 169 } else {
01806192 170 $problems .= "ERROR: cannot write $tmp_dir/$b.link\n";
b5c674c5
MM
171 }
172}
173
174sub md5($)
175{
176 my $file = shift;
56ea0f1e 177 my $in = "";
178 my $md5 = "";
179 my $err = "";
180 my @cmd = ("md5sum", $file);
181
182 run \@cmd, \$in, \$md5, \$err;
183 if ($err ne "") {
184 chomp($err);
a6b09732 185 $problems .= "FATAL: " . $err . "\n";
186 return "error";
56ea0f1e 187 }
188 chomp $md5;
b5c674c5
MM
189 $md5 =~ /^([a-f0-9]{32})/ and $md5 = $1;
190 return $md5;
191}
192
193sub handle_no_url($$)
194{
195 my ($md5, $url) = @_;
576dff03 196
e4708e3e 197 unless ($url =~ m#://([^/]+)#) {
198 $problems .= "$url: corrupted! (no-url)";
199 return;
200 }
b5c674c5 201 my $basename = $1;
bca91c1b 202 my $file = "$no_url_dir/$req_login/$basename";
b5c674c5
MM
203
204 if (-f $file) {
205 my $computed_md5 = md5($file);
206 if ($computed_md5 ne $md5) {
207 $problems .= "FATAL: $file md5 mismatch, needed $md5, got $computed_md5\n";
208 } else {
209 move_file($md5, $url, $file);
210 make_src_symlink($md5, $url);
211 }
212 } else {
bca91c1b 213 $problems .= "FATAL: $file was not uploaded\n";
b5c674c5
MM
214 }
215}
216
7ebf9245
MM
217sub fetch_file($$)
218{
219 my ($md5, $url) = @_;
220 my $out = "";
08ea99b9 221 my $all_out = "";
01806192
MM
222 my $bn = basename($url);
223 my $local = "$tmp_dir/$md5/$bn";
8f58f574
JB
224 my $cmd = "wget -nv --no-check-certificate --user-agent=$user_agent -O $local \"$url\"";
225 my $cmd2 = "wget -nv --no-check-certificate --user-agent=$user_agent --passive-ftp -O $local \"$url\"";
b5c674c5 226
01806192 227 push @files, $bn;
44cd280f 228
01806192 229 if (got_on_distfiles($md5, $url)) {
576dff03 230 $normal_out .=
00eefa39 231 "ALREADY GOT: $url\n" .
01806192 232 "\t$md5 " . basename($url) . "\n";
b5c674c5 233 make_src_symlink($md5, $url);
6ce39ee7
MM
234 return;
235 }
01806192 236
2ae8fdec 237 mkdir("$tmp_dir/$md5");
576dff03 238
b5c674c5
MM
239 if ($url =~ /^no-url/) {
240 handle_no_url($md5, $url);
241 return;
242 }
576dff03 243
7ebf9245
MM
244 open(W, "$cmd 2>&1 |");
245 while (<W>) {
08ea99b9 246 $all_out .= $_;
7ebf9245
MM
247 /URL:.*\s+\-\>\s+.*/ and next;
248 $out .= $_;
249 }
250 close(W);
251 if ($out ne "") {
252 $problems .= "$cmd:\n$out\n\n";
253 }
e23ae36a 254 if (-f $local && -s $local == 0 && $url =~ /^ftp:/) {
f4f705aa 255 $out = "";
c0467b40 256 open(W, "$cmd2 2>&1 |");
257 while (<W>) {
f4f705aa 258 $all_out .= "\n\t\t$_";
c0467b40 259 /URL:.*\s+\-\>\s+.*/ and next;
260 $out .= $_;
261 }
262 close(W);
263 if ($out ne "") {
264 $problems .= "$cmd:\n$out\n\n";
265 }
266 }
4db63fc1 267 if (-r $local && -s $local > 0) {
01806192 268 my $computed_md5 = md5($local);
7ebf9245
MM
269 if ($computed_md5 ne $md5) {
270 $problems .= "FATAL: $url md5 mismatch, needed $md5, got $computed_md5\n";
271 } else {
d642cf7e 272 my $testcmd = "file \"$local\" |";
273 my $testres = "";
a14ed9b7 274 if ($url =~ /^(http|https):/ && $local =~ /\.(tar\.(bz2|gz)|tgz|zip|jar|xpi)$/) {
d642cf7e 275 open(T, $testcmd) or die;
276 $testres = <T>;
277 close(T);
278 }
279 if ($testres =~ /empty|(ASCII|HTML|SGML).*text/) {
280 $testres =~ s/.*://;
281 $problems .= "FATAL: data returned from $url: $testres";
282 } else {
283 move_file($md5, $url, $local);
284 }
7ebf9245 285 }
4db63fc1 286 } elsif (-f $local && -s $local > 0) {
287 $problems .= "FATAL: $url ($md5) was not fetched ($cmd: $all_out): file is not readable\n";
7ebf9245 288 } else {
08ea99b9 289 $problems .= "FATAL: $url ($md5) was not fetched ($cmd: $all_out)\n";
7ebf9245 290 }
01806192
MM
291 # save space
292 unlink($local);
7ebf9245
MM
293}
294
295sub fetch_files()
296{
297 $problems .= "\n\n" if ($problems ne "");
ccbaa374 298 foreach $i (0..$#md5) {
299 fetch_file($md5[$i], $url[$i]);
7ebf9245
MM
300 }
301}
302
303sub send_email()
304{
9a1e36f3
MM
305 open(EMAIL, "| /usr/sbin/sendmail -t");
306 #open(EMAIL, "| cat");
44cd280f 307 my $marker = "";
7ebf9245 308 if ($problems ne "") {
44cd280f 309 $marker = "ERRORS: ";
7ebf9245 310 }
04550b88
MM
311 my $req_login;
312 $requester =~ /^(.*)\@/ and $req_login = $1;
44cd280f
MM
313
314 splice(@files, 10, @files - 10, "...")
315 if (@files > 10);
576dff03
ER
316
317 print EMAIL
04550b88 318"From: $req_login <$requester>
482655f9
MM
319To: $commits_list
320Cc: $requester
44cd280f 321Subject: DISTFILES: ${marker}@{files}
e157986d
MM
322Message-ID: <$$." . time . "\@distfiles.pld-linux.org>
323X-distfiles-program: file-fetcher.pl
324X-distfiles-version: " . '$Id$' . "
7ebf9245
MM
325
326$problems
47f8fda7
MM
327Files fetched: $fetched_count
328
7ebf9245 329$normal_out
e157986d
MM
330
331--
d8cd573c 332Virtually Yours: distfiles.
7ebf9245
MM
333";
334 close(EMAIL) or die;
335}
336
01806192
MM
337sub make_tmp_dir()
338{
339 my $id = `uuidgen 2>/dev/null`;
340 chomp $id;
341 $id = rand if (!defined $id or $id eq "");
342 $tmp_dir = "./tmp/$id";
343 mkdir($tmp_dir) or die;
344}
345
346sub clean_tmp_dir()
347{
348 system("rm -rf $tmp_dir")
349 if ($tmp_dir ne "" && -d $tmp_dir);
350}
351
2e33bbbc
MM
352umask(002);
353
7ebf9245
MM
354find_file_in_spool();
355read_spool_file();
01806192 356make_tmp_dir();
7ebf9245 357fetch_files();
47f8fda7 358send_email() unless (!$force_reply and $problems eq "" and $fetched_count == 0);
01806192 359clean_tmp_dir();
This page took 0.164885 seconds and 4 git commands to generate.