]> git.pld-linux.org Git - projects/distfiles.git/blame - file-fetcher.pl
- allow ? in urls
[projects/distfiles.git] / file-fetcher.pl
CommitLineData
7ebf9245
MM
1#!/usr/bin/perl -w
2#
3
482655f9 4$commits_list = "pld-cvs-commit\@pld-linux.org";
7ebf9245 5
b5c674c5 6$spool_dir = "./spool";
01806192
MM
7$copy_dir = "src"; # relative to ftp root
8$no_url_dir = "./upload";
9$df_server = "distfiles.pld-linux.org";
10$df_scp = "plddist\@$df_server:ftp";
7ebf9245
MM
11
12@md5 = ();
13%url = ();
14$problems = "";
15$normal_out = "";
16$requester = "";
17$file = "";
47f8fda7
MM
18$fetched_count = 0;
19$force_reply = 0;
bca91c1b 20$req_login = "nobody";
44cd280f 21@files = ();
7ebf9245
MM
22
23# try lookup some file in spool, exit if it cannot be done
24sub find_file_in_spool()
25{
26 opendir(DIR, $spool_dir) || die "can't opendir $spool_dir: $!";
27 while (1) {
28 my $f = readdir(DIR);
29 defined $f or last;
30 -f "$spool_dir/$f" or next;
31 $file = "$spool_dir/$f";
32 last;
33 }
34 closedir(DIR);
35
36 exit 0 if ($file eq "");
37}
38
39# read file from spool, and try unlink it. if cannot unlink -- exit
40# sets $requester (email), $problems, @md5 (arrays of md5's)
41# and %url (map from md5 to urls)
42sub read_spool_file()
43{
44 open(F, "< $file") || exit 0;
45 $requester = <F>;
46 chomp $requester;
47 $requester =~ /^[a-zA-Z_\-0-9\@\.]+$/
48 or die "$file: evil requester: $requester";
bca91c1b
MM
49 $req_login = "";
50 $requester =~ /^([^@]+)\@/ and $req_login = $1;
51 $req_login =~ /^[a-z0-9A-Z_]+$/ or die "$file: evil requester $requester";
47f8fda7
MM
52 my $flags = <F>;
53 $force_reply++ if ($flags =~ /force-reply/);
7ebf9245
MM
54
55 while (<F>) {
56 if (/^ERROR/) {
57 s/^ERROR: //;
58 $problems .= $_;
f2a640bf 59 next;
7ebf9245 60 }
6847ae00 61 /^([a-f0-9]{32})\s+((ftp|http|no-url|no-url-copy):\/\/([\?a-z0-9A-Z:\+\~\.\-\/_]|\%[0-9])+)\s*$/
7ebf9245
MM
62 or die "$file: corrupted";
63 push @md5, $1;
64 $url{$1} = $2;
65 /\/$/ and die "$file: cannot fetch dir";
66 }
67 close(F);
68
69 unlink($file) || exit 0;
70}
71
44cd280f
MM
72sub basename($)
73{
74 my $f = shift;
75 $f =~ s|.*/||;
76 return $f;
77}
78
01806192 79sub by_md5($$)
7ebf9245
MM
80{
81 my ($md5, $url) = @_;
7ebf9245 82 $md5 =~ /^(.)(.)/;
01806192 83 return "/by-md5/$1/$2/$md5/" . basename($url);
6ce39ee7
MM
84}
85
01806192 86sub got_on_distfiles($$)
6ce39ee7 87{
01806192
MM
88 my ($md5, $url) = @_;
89 my $p = by_md5($md5, $url);
90 my $l = `lftp -c 'debug 0; open $df_server; quote size $p'`;
91 return $l =~ /^213 /;
92}
93
94sub copy_to_df($$)
95{
96 my ($from, $to) = @_;
97 my $cmd = "scp -r -B -q $from $df_scp/$to";
98 open(E, "$cmd 2>&1 |") or die;
99 my $oops = "";
100 while (<E>) {
101 $oops .= $_;
102 }
103 close(E);
104 $problems .= "scp problems: $cmd:\n$oops\n"
105 if ($oops ne "");
106 return ($oops ne "");
6ce39ee7
MM
107}
108
b5c674c5 109sub move_file($$$)
6ce39ee7 110{
b5c674c5 111 my ($md5, $url, $local_copy) = @_;
6ce39ee7 112
01806192 113 my $bn = basename($url);
6ce39ee7 114
01806192
MM
115 if ($local_copy ne "$tmp_dir/$md5/$bn") {
116 if (system("mv -f \"$local_copy\" \"$tmp_dir/$md5/$bn\"")) {
117 $problems .= "FATAL: cannot move $local_copy to $tmp_dir\n";
118 return;
119 }
120 $local_copy = "$tmp_dir/$md5/$bn";
121 }
6ce39ee7 122
01806192
MM
123 if (open(D, "> $tmp_dir/$md5/$bn.desc")) {
124 print D "URL: $url\n";
d286a317 125 print D "Login: $req_login\n";
01806192
MM
126 print D "MD5: $md5\n";
127 print D 'Fetched-by: $Id$'."\n";
128 print D "Time: " . time . "\n";
129 close(D);
7ebf9245 130 } else {
01806192
MM
131 $problems .= "ERROR: cannot write $bn.desc\n";
132 }
133
134 my $dir = by_md5($md5, $url);
135 $dir =~ s|/[^/]+/[^/]+$||;
136 if (copy_to_df("$tmp_dir/$md5/", $dir) == 0) {
44cd280f
MM
137 $normal_out .=
138 "STORED: $url\n" .
139 "\t$md5 " . basename($url) . "\n" .
01806192 140 "\tSize: " . (-s $local_copy) . " bytes\n";
47f8fda7 141 $fetched_count++;
7ebf9245
MM
142 }
143}
144
b5c674c5
MM
145sub make_src_symlink($$)
146{
147 my ($md5, $url) = @_;
01806192
MM
148
149 return unless ($url =~ /^no-url/);
150
151 my $b = basename($url);
152 if (open(S, "> $tmp_dir/$b.link")) {
b5c674c5 153 if ($url =~ /^no-url-copy/) {
01806192 154 print S (".." . by_md5($md5, $url));
b5c674c5 155 } else {
01806192 156 print S "REMOVE";
b5c674c5 157 }
01806192
MM
158 close(S);
159 copy_to_df("$tmp_dir/$b.link", "$copy_dir/$b.link");
b5c674c5 160 } else {
01806192 161 $problems .= "ERROR: cannot write $tmp_dir/$b.link\n";
b5c674c5
MM
162 }
163}
164
165sub md5($)
166{
167 my $file = shift;
168 my $md5 = `md5sum "$file" 2>/dev/null`;
169 $md5 =~ /^([a-f0-9]{32})/ and $md5 = $1;
170 return $md5;
171}
172
173sub handle_no_url($$)
174{
175 my ($md5, $url) = @_;
176
177 $url =~ m|://([^/]+)| or die "corrupted! (no-url)";
178 my $basename = $1;
bca91c1b 179 my $file = "$no_url_dir/$req_login/$basename";
b5c674c5
MM
180
181 if (-f $file) {
182 my $computed_md5 = md5($file);
183 if ($computed_md5 ne $md5) {
184 $problems .= "FATAL: $file md5 mismatch, needed $md5, got $computed_md5\n";
185 } else {
186 move_file($md5, $url, $file);
187 make_src_symlink($md5, $url);
188 }
189 } else {
bca91c1b 190 $problems .= "FATAL: $file was not uploaded\n";
b5c674c5
MM
191 }
192}
193
7ebf9245
MM
194sub fetch_file($$)
195{
196 my ($md5, $url) = @_;
197 my $out = "";
08ea99b9 198 my $all_out = "";
01806192
MM
199 my $bn = basename($url);
200 my $local = "$tmp_dir/$md5/$bn";
201 my $cmd = "wget -nv -O $local \"$url\"";
b5c674c5 202
01806192 203 push @files, $bn;
44cd280f 204
01806192 205 if (got_on_distfiles($md5, $url)) {
00eefa39
MM
206 $normal_out .=
207 "ALREADY GOT: $url\n" .
01806192 208 "\t$md5 " . basename($url) . "\n";
b5c674c5 209 make_src_symlink($md5, $url);
6ce39ee7
MM
210 return;
211 }
01806192
MM
212
213 mkdir("$tmp_dir/$md5") or die;
214
b5c674c5
MM
215 if ($url =~ /^no-url/) {
216 handle_no_url($md5, $url);
217 return;
218 }
219
7ebf9245
MM
220 open(W, "$cmd 2>&1 |");
221 while (<W>) {
08ea99b9 222 $all_out .= $_;
7ebf9245
MM
223 /URL:.*\s+\-\>\s+.*/ and next;
224 $out .= $_;
225 }
226 close(W);
227 if ($out ne "") {
228 $problems .= "$cmd:\n$out\n\n";
229 }
01806192
MM
230 if (-f $local && -s $local > 0) {
231 my $computed_md5 = md5($local);
7ebf9245
MM
232 if ($computed_md5 ne $md5) {
233 $problems .= "FATAL: $url md5 mismatch, needed $md5, got $computed_md5\n";
234 } else {
d642cf7e 235 my $testcmd = "file \"$local\" |";
236 my $testres = "";
237 if ($url =~ /^http:/ && $local =~ /\.(tar\.(bz2|gz)|tgz|zip|jar|xpi)$/) {
238 open(T, $testcmd) or die;
239 $testres = <T>;
240 close(T);
241 }
242 if ($testres =~ /empty|(ASCII|HTML|SGML).*text/) {
243 $testres =~ s/.*://;
244 $problems .= "FATAL: data returned from $url: $testres";
245 } else {
246 move_file($md5, $url, $local);
247 }
7ebf9245
MM
248 }
249 } else {
08ea99b9 250 $problems .= "FATAL: $url ($md5) was not fetched ($cmd: $all_out)\n";
7ebf9245 251 }
01806192
MM
252 # save space
253 unlink($local);
7ebf9245
MM
254}
255
256sub fetch_files()
257{
258 $problems .= "\n\n" if ($problems ne "");
259 foreach $md5 (@md5) {
260 fetch_file($md5, $url{$md5});
261 }
262}
263
264sub send_email()
265{
9a1e36f3
MM
266 open(EMAIL, "| /usr/sbin/sendmail -t");
267 #open(EMAIL, "| cat");
44cd280f 268 my $marker = "";
7ebf9245 269 if ($problems ne "") {
44cd280f 270 $marker = "ERRORS: ";
7ebf9245 271 }
04550b88
MM
272 my $req_login;
273 $requester =~ /^(.*)\@/ and $req_login = $1;
44cd280f
MM
274
275 splice(@files, 10, @files - 10, "...")
276 if (@files > 10);
277
e157986d 278 print EMAIL
04550b88 279"From: $req_login <$requester>
482655f9
MM
280To: $commits_list
281Cc: $requester
44cd280f 282Subject: DISTFILES: ${marker}@{files}
e157986d
MM
283Message-ID: <$$." . time . "\@distfiles.pld-linux.org>
284X-distfiles-program: file-fetcher.pl
285X-distfiles-version: " . '$Id$' . "
7ebf9245
MM
286
287$problems
47f8fda7
MM
288Files fetched: $fetched_count
289
7ebf9245 290$normal_out
e157986d
MM
291
292--
d8cd573c 293Virtually Yours: distfiles.
7ebf9245
MM
294";
295 close(EMAIL) or die;
296}
297
01806192
MM
298sub make_tmp_dir()
299{
300 my $id = `uuidgen 2>/dev/null`;
301 chomp $id;
302 $id = rand if (!defined $id or $id eq "");
303 $tmp_dir = "./tmp/$id";
304 mkdir($tmp_dir) or die;
305}
306
307sub clean_tmp_dir()
308{
309 system("rm -rf $tmp_dir")
310 if ($tmp_dir ne "" && -d $tmp_dir);
311}
312
2e33bbbc
MM
313umask(002);
314
7ebf9245
MM
315find_file_in_spool();
316read_spool_file();
01806192 317make_tmp_dir();
7ebf9245 318fetch_files();
47f8fda7 319send_email() unless (!$force_reply and $problems eq "" and $fetched_count == 0);
01806192 320clean_tmp_dir();
This page took 0.100895 seconds and 4 git commands to generate.