-#!/usr/bin/gawk -f
+#!/bin/awk -f
# $Revision$, $Date$
#
-# Copyright (C) 2000-2010 PLD-Team <feedback@pld-linux.org>
+# Copyright (C) 2000-2011 PLD-Team <feedback@pld-linux.org>
# Authors:
# Sebastian Zagrodzki <zagrodzki@pld-linux.org>
# Jacek Konieczny <jajcus@pld-linux.org>
return
}
- print strftime("%Y-%m-%d %H:%M:%S ") s >> "/dev/stderr"
-# print s >> "/dev/stderr"
+# print strftime("%Y-%m-%d %H:%M:%S ") s >> "/dev/stderr"
+ print s >> "/dev/stderr"
}
function fixedsub(s1,s2,t, ind) {
return link
}
+# use perl HTML::TreeBuilder module to extract links from html
+# it returns TAGNAME LINK in output which is pretty stright forward to parse in awk
+function extract_links_cmd(tmpfile) {
+ return "perl -MHTML::TreeBuilder -e ' \
+ my $content = join q//, <>; \
+ my $root = new HTML::TreeBuilder; \
+ $root->parse($content); \
+ \
+ my %links = (); \
+ for (@{$root->extract_links(qw(a iframe))}) { \
+ my($link, $element, $attr, $tag) = @$_; \
+ $links{$link} = $tag; \
+ } \
+ \
+ while (my($link, $tag) = each %links) { \
+ print $tag, q/ /, $link, $/; \
+ } \
+ ' " tmpfile
+}
+
# get all <A HREF=..> tags from specified URL
function get_links(url,filename, errno,link,oneline,retval,odp,wholeodp,lowerodp,tmpfile,cmd) {
tmpfile = mktemp()
tmpfileerr = mktemp()
- if (url ~ /^http:\/\/(download|downloads|dl)\.(sf|sourceforge)\.net\//) {
+ if (url ~ /^http:\/\/(download|dl)\.(sf|sourceforge)\.net\//) {
# http://downloads.sourceforge.net/project/mediainfo/source/mediainfo/
gsub("^http://(download|dl)\.(sf|sourceforge)\.net/", "", url)
- # http://downloads.sourceforge.net/project/mediainfo/source/mediainfo/
- gsub("^http://downloads\.(sf|sourceforge)\.net/project/", "", url)
gsub("/.*", "", url)
url = "http://sourceforge.net/projects/" url "/files/"
d("cgit snapshot tarball url, mungled url to: " url)
}
+ if (url ~/^(http|https):\/\/www2\.aquamaniac\.de\/sites\/download\//) {
+ url = "http://www2.aquamaniac.de/sites/download/packages.php"
+ d("aquamaniac.de tarball url, mungled url to: " url)
+ }
+
d("Retrieving: " url)
- cmd = "wget --user-agent \"Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2) Gecko/20100129 PLD/3.0 (Th) Iceweasel/3.6\" -nv -O - \"" url "\" -t 2 -T 45 --passive-ftp --no-check-certificate > " tmpfile " 2> " tmpfileerr
+ user_agent = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2) Gecko/20100129 PLD/3.0 (Th) Iceweasel/3.6"
+ cmd = "wget -t 2 -T 45 --user-agent \"" user_agent "\" -nv -O - \"" url "\" --passive-ftp --no-check-certificate > " tmpfile " 2> " tmpfileerr
d("Execute: " cmd)
errno = system(cmd)
d("Execute done")
retval = ("WGET ERROR: " errno ": " wholeerr)
return retval
}
+ system("rm -f " tmpfileerr)
+
+ urldir = url;
+ sub(/[^\/]+$/, "", urldir)
+
+if (USE_PERL) {
+ cmd = extract_links_cmd(tmpfile)
+ while (cmd | getline) {
+ tag = $1
+ link = substr($0, length(tag) + 2)
+
+ if (tag == "iframe") {
+ d("Frame: " link)
+ if (url !~ /\//) {
+ link = (urldir link)
+ d("Frame->: " link)
+ }
+
+ if (link_seen(link)) {
+ continue
+ }
+ retval = (retval " " get_links(link))
+ }
+
+ if (link_seen(link)) {
+ continue
+ }
+
+ retval = (retval " " link)
+ d("href(): " link)
+ }
+ close(cmd)
+ system("rm -f " tmpfile)
+
+ d("Returning: [" retval "]")
+ return retval
+}
wholeodp = ""
d("Reading success response...")
# d("Response: " wholeodp)
}
d("Reponse read done...")
-
system("rm -f " tmpfile)
- system("rm -f " tmpfileerr)
-
- urldir = url;
- sub(/[^\/]+$/, "", urldir)
while (match(wholeodp, /<([aA]|[fF][rR][aA][mM][eE])[ \t][^>]*>/) > 0) {
d("Processing links...")
}
}
- d("Returning: " retval)
+ d("Returning: [" retval "]")
return retval
}
return url
}
-function process_source(number,lurl,name,version) {
+function process_source(number, lurl, name, version) {
# fetches file list, and compares version numbers
d("Processing " lurl)
- if ( index(lurl,version)==0 ) {
- d("There is no version number.")
+ if (index(lurl, version) == 0) {
+ d("There is no version number ["version"] in ["lurl"]")
return 0
}
}
}
}
- if (finished==0)
+ if (finished == 0)
print name "(" number ") seems ok: " oldversion
else
print name "(" number ") [OLD] " oldversion " [NEW] " version
function rss_upgrade(name, ver, url, regex, cmd, nver) {
regex = "s/.*<title>" regex "<\/title>.*/\\1/p"
- cmd = "wget -q -O - " url " | sed -nre '" regex "' | head -n1"
+ cmd = "wget -t 2 -T 45 -q -O - " url " | sed -nre '" regex "' | head -n1"
d("rss_upgrade_cmd: " cmd)
cmd | getline nver
}
}
-function process_data(name,ver,rel,src) {
+function process_data(name, ver, rel, src) {
if (name ~ /^php-pear-/) {
return pear_upgrade(name, ver);
}
if (name == "vim") {
return vim_upgrade(name, ver);
}
+ if (name == "xulrunner") {
+ ver = subst_defines(DEFS["firefox_ver"], DEFS)
+ d("package xulrunner, change version to firefox ["ver"]")
+ }
# this function checks if substitutions were valid, and if true:
# processes each URL and tries to get current file list
for (i in src) {
- if ( src[i] ~ /%{nil}/ ) {
+ if (src[i] ~ /%{nil}/) {
gsub(/\%\{nil\}/, "", src[i])
}
if ( src[i] !~ /%{.*}/ && src[i] !~ /%[A-Za-z0-9_]/ ) {
d("Source: " src[i])
- process_source(i,src[i],name,ver)
+ process_source(i, src[i], name, ver)
} else {
print FNAME ":" i ": impossible substitution: " src[i]
}
# DEBUG = 1
errno=system("wget --help > /dev/null 2>&1")
- if (errno) {
+ if (errno && errno != 3) {
print "No wget installed!"
exit 1
}