- "mktemp /tmp/XXXXXX" | getline tmpfile
- close("mktemp /tmp/XXXXXX")
-
- if (url ~ /^http:\/\/(download|dl).(sf|sourceforge).net\//) {
- gsub("^http://(download|dl).(sf|sourceforge).net/", "", url)
- url = "http://prdownloads.sourceforge.net/" substr(url, 1, 1) "/" substr(url, 1, 2) "/" url
- if (DEBUG) print "sf url, mungled url to: " url
- }
-
- if (DEBUG) print "Retrieving: " url
- errno=system("wget -O - \"" url "\" -t 3 -T 300 --passive-ftp > " tmpfile " 2>/dev/null" )
-
- if (errno==0) {
- while (getline oneline < tmpfile)
- odp=(odp " " oneline)
- if ( DEBUG ) print "Response: " odp
- }
-
- close(tmpfile)
+function get_links(url,filename, errno,link,oneline,retval,odp,wholeodp,lowerodp,tmpfile,cmd) {
+
+ wholeerr=""
+
+ tmpfile = mktemp()
+ tmpfileerr = mktemp()
+
+ if (url ~ /^http:\/\/(download|dl)\.(sf|sourceforge)\.net\//) {
+ # http://downloads.sourceforge.net/project/mediainfo/source/mediainfo/
+ gsub("^http://(download|dl)\.(sf|sourceforge)\.net/", "", url)
+
+ gsub("/.*", "", url)
+ url = "http://sourceforge.net/projects/" url "/files/"
+ d("sf url, mungled url to: " url)
+ }
+
+ if (url ~ /^http:\/\/(.*)\.googlecode\.com\/files\//) {
+ gsub("^http://", "", url)
+ gsub("\..*", "", url)
+ url = "http://code.google.com/p/" url "/downloads/list"
+ d("googlecode url, mungled url to: " url)
+ }
+
+ if (url ~ /^http:\/\/pecl.php.net\/get\//) {
+ gsub("-.*", "", filename)
+ url = "http://pecl.php.net/package/" filename
+ d("pecl.php.net url, mungled url to: " url)
+ }
+
+ if (url ~ /^(http|ftp):\/\/mysql.*\/Downloads\/MySQL-5.1\//) {
+ url = "http://dev.mysql.com/downloads/mysql/5.1.html#source"
+ d("mysql 5.1 url, mungled url to: " url)
+ }
+
+ if (url ~/^(http|https):\/\/launchpad\.net\/(.*)\//) {
+ gsub("^(http|https):\/\/launchpad\.net\/", "", url)
+ gsub("\/.*/", "", url)
+ url = "https://code.launchpad.net/" url "/+download"
+ d("main launchpad url, mungled url to: " url)
+ }
+
+ if (url ~/^(http|https):\/\/edge\.launchpad\.net\/(.*)\//) {
+ gsub("^(http|https):\/\/edge\.launchpad\.net\/", "", url)
+ gsub("\/.*/", "", url)
+ url = "https://edge.launchpad.net/" url "/+download"
+ d("edge launchpad url, mungled url to: " url)
+ }
+
+ if (url ~/^(http|https):\/\/github.com\/.*\/(.*)\/tarball\//) {
+ gsub("\/tarball\/.*", "/downloads", url)
+ d("github tarball url, mungled url to: " url)
+ }
+
+ if (url ~/^(http|https):\/\/cgit\..*\/(.*)\/snapshot\//) {
+ gsub("\/snapshot\/.*", "/", url)
+ d("cgit snapshot tarball url, mungled url to: " url)
+ }
+
+ if (url ~/^(http|https):\/\/www2\.aquamaniac\.de\/sites\/download\//) {
+ url = "http://www2.aquamaniac.de/sites/download/packages.php"
+ d("aquamaniac.de tarball url, mungled url to: " url)
+ }
+
+ d("Retrieving: " url)
+ user_agent = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2) Gecko/20100129 PLD/3.0 (Th) Iceweasel/3.6"
+ cmd = "wget -t 2 -T 45 --user-agent \"" user_agent "\" -nv -O - \"" url "\" --passive-ftp --no-check-certificate > " tmpfile " 2> " tmpfileerr
+ d("Execute: " cmd)
+ errno = system(cmd)
+ d("Execute done")
+
+ if (errno != 0) {
+ d("Reading failure response...")
+ wholeerr = ""
+ while (getline oneline < tmpfileerr)
+ wholeerr = (wholeerr " " oneline)
+ d("Error Response: " wholeerr)
+
+ system("rm -f " tmpfile)
+ system("rm -f " tmpfileerr)
+ retval = ("WGET ERROR: " errno ": " wholeerr)
+ return retval
+ }
+ system("rm -f " tmpfileerr)
+
+ urldir = url;
+ sub(/[^\/]+$/, "", urldir)
+
+if (USE_PERL) {
+ cmd = extract_links_cmd(tmpfile)
+ while (cmd | getline) {
+ tag = $1
+ link = substr($0, length(tag) + 2)
+
+ if (tag == "iframe") {
+ d("Frame: " link)
+ if (url !~ /\//) {
+ link = (urldir link)
+ d("Frame->: " link)
+ }
+
+ if (link_seen(link)) {
+ continue
+ }
+ retval = (retval " " get_links(link))
+ }
+
+ if (link_seen(link)) {
+ continue
+ }
+
+ retval = (retval " " link)
+ d("href(): " link)
+ }
+ close(cmd)