# TODO:
# - "SourceXDownload" support (use given URLs if present instead of cut-down SourceX URLs)
# - "SourceXActiveFTP" support
+# - support debian/watch http://wiki.debian.org/debian/watch/
function d(s) {
if (!DEBUG) {
return
}
+
+# print strftime("%Y-%m-%d %H:%M:%S ") s >> "/dev/stderr"
print s >> "/dev/stderr"
}
}
# fix link to artificial one that will be recognized rest of this script
-function postfix_link(url, link) {
+function postfix_link(url, link, oldlink) {
oldlink = link
if ((url ~/^(http|https):\/\/github.com\//) && (link ~ /.*\/tarball\//)) {
gsub(".*\/tarball\/", "", link)
link = link ".tar.gz"
}
- d("POST FIXING URL [ " oldlink " ] to [ " link " ]")
+ if (oldlink != link) {
+ d("POST FIXED URL [ " oldlink " ] to [ " link " ]")
+ }
return link
}
+# use perl HTML::TreeBuilder module to extract links from html
+# it returns TAGNAME LINK in output which is pretty stright forward to parse in awk
+function extract_links_cmd(tmpfile) {
+ return "perl -MHTML::TreeBuilder -e ' \
+ my $content = join q//, <>; \
+ my $root = new HTML::TreeBuilder; \
+ $root->parse($content); \
+ \
+ my %links = (); \
+ for (@{$root->extract_links(qw(a iframe))}) { \
+ my($link, $element, $attr, $tag) = @$_; \
+ $links{$link} = $tag; \
+ } \
+ \
+ while (my($link, $tag) = each %links) { \
+ print $tag, q/ /, $link, $/; \
+ } \
+ ' " tmpfile
+}
+
# get all <A HREF=..> tags from specified URL
function get_links(url,filename, errno,link,oneline,retval,odp,wholeodp,lowerodp,tmpfile,cmd) {
tmpfile = mktemp()
tmpfileerr = mktemp()
- if (url ~ /^http:\/\/(download|dl).(sf|sourceforge).net\//) {
- gsub("^http://(download|dl).(sf|sourceforge).net/", "", url)
+ if (url ~ /^http:\/\/(download|dl)\.(sf|sourceforge)\.net\//) {
+ # http://downloads.sourceforge.net/project/mediainfo/source/mediainfo/
+ gsub("^http://(download|dl)\.(sf|sourceforge)\.net/", "", url)
+
gsub("/.*", "", url)
url = "http://sourceforge.net/projects/" url "/files/"
d("sf url, mungled url to: " url)
d("github tarball url, mungled url to: " url)
}
+ if (url ~/^(http|https):\/\/cgit\..*\/(.*)\/snapshot\//) {
+ gsub("\/snapshot\/.*", "/", url)
+ d("cgit snapshot tarball url, mungled url to: " url)
+ }
d("Retrieving: " url)
cmd = "wget --user-agent \"Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2) Gecko/20100129 PLD/3.0 (Th) Iceweasel/3.6\" -nv -O - \"" url "\" -t 2 -T 45 --passive-ftp --no-check-certificate > " tmpfile " 2> " tmpfileerr
errno = system(cmd)
d("Execute done")
- if (errno==0) {
- wholeodp = ""
- d("Reading succeess response...")
- while (getline oneline < tmpfile)
- wholeodp=(wholeodp " " oneline)
-# d("Response: " wholeodp)
- } else {
+ if (errno != 0) {
d("Reading failure response...")
wholeerr = ""
while (getline oneline < tmpfileerr)
- wholeerr=(wholeerr " " oneline)
+ wholeerr = (wholeerr " " oneline)
d("Error Response: " wholeerr)
+
+ system("rm -f " tmpfile)
+ system("rm -f " tmpfileerr)
+ retval = ("WGET ERROR: " errno ": " wholeerr)
+ return retval
}
+ system("rm -f " tmpfileerr)
+
+ urldir = url;
+ sub(/[^\/]+$/, "", urldir)
+
+if (USE_PERL) {
+ cmd = extract_links_cmd(tmpfile)
+ while (cmd | getline) {
+ tag = $1
+ link = substr($0, length(tag) + 2)
+
+ if (tag == "iframe") {
+ d("Frame: " link)
+ if (url !~ /\//) {
+ link = (urldir link)
+ d("Frame->: " link)
+ }
+
+ if (link_seen(link)) {
+ continue
+ }
+ retval = (retval " " get_links(link))
+ }
+
+ if (link_seen(link)) {
+ continue
+ }
+ retval = (retval " " link)
+ d("href(): " link)
+ }
+ close(cmd)
system("rm -f " tmpfile)
- system("rm -f " tmpfileerr)
- urldir=url;
- sub(/[^\/]+$/,"",urldir)
-
- if ( errno==0) {
- while (match(wholeodp, /<([aA]|[fF][rR][aA][mM][eE])[ \t][^>]*>/) > 0) {
- d("Processing links...")
- odp=substr(wholeodp,RSTART,RLENGTH);
- wholeodp=substr(wholeodp,RSTART+RLENGTH);
-
- lowerodp=tolower(odp);
- if (lowerodp ~ /<frame[ \t]/) {
- sub(/[sS][rR][cC]=[ \t]*/,"src=",odp);
- match(odp,/src="[^"]+"/)
- newurl=substr(odp,RSTART+5,RLENGTH-6)
- d("Frame: " newurl)
- if (newurl !~ /\//) {
- newurl=(urldir newurl)
- d("Frame->: " newurl)
- }
+ d("Returning: [" retval "]")
+ return retval
+}
- if (link_seen(newurl)) {
- newurl=""
- continue
- }
+ wholeodp = ""
+ d("Reading success response...")
+ while (getline oneline < tmpfile) {
+ wholeodp = (wholeodp " " oneline)
+# d("Response: " wholeodp)
+ }
+ d("Reponse read done...")
+ system("rm -f " tmpfile)
- retval=(retval " " get_links(newurl))
- } else if (lowerodp ~ /href=[ \t]*"[^"]*"/) {
- sub(/[hH][rR][eE][fF]=[ \t]*"/,"href=\"",odp)
- match(odp,/href="[^"]*"/)
- link=substr(odp,RSTART,RLENGTH)
- odp=substr(odp,1,RSTART) substr(odp,RSTART+RLENGTH)
- link=substr(link,7,length(link)-7)
- link=postfix_link(url, link)
-
- if (link_seen(link)) {
- link=""
- continue
- }
+ while (match(wholeodp, /<([aA]|[fF][rR][aA][mM][eE])[ \t][^>]*>/) > 0) {
+ d("Processing links...")
+ odp = substr(wholeodp,RSTART,RLENGTH);
+ wholeodp = substr(wholeodp,RSTART+RLENGTH);
+
+ lowerodp = tolower(odp);
+ if (lowerodp ~ /<frame[ \t]/) {
+ sub(/[sS][rR][cC]=[ \t]*/, "src=", odp);
+ match(odp, /src="[^"]+"/)
+ newurl = substr(odp, RSTART+5, RLENGTH-6)
+ d("Frame: " newurl)
+ if (newurl !~ /\//) {
+ newurl=(urldir newurl)
+ d("Frame->: " newurl)
+ }
- retval=(retval " " link)
- d("href(\"\"): " link)
- } else if (lowerodp ~ /href=[ \t]*'[^']*'/) {
- sub(/[hH][rR][eE][fF]=[ \t]*'/,"href='",odp)
- match(odp,/href='[^']*'/)
- link=substr(odp,RSTART,RLENGTH)
- odp=substr(odp,1,RSTART) substr(odp,RSTART+RLENGTH)
- link=substr(link,7,length(link)-7)
- link=postfix_link(url, link)
-
- if (link_seen(link)) {
- link=""
- continue
- }
+ if (link_seen(newurl)) {
+ newurl = ""
+ continue
+ }
- retval=(retval " " link)
- d("href(''): " link)
- } else if (lowerodp ~ /href=[ \t]*[^ \t>]*/) {
- sub(/[hH][rR][eE][fF]=[ \t]*/,"href=",odp)
- match(odp,/href=[^ \t>]*/)
- link=substr(odp,RSTART,RLENGTH)
- odp=substr(odp,1,RSTART) substr(odp,RSTART+RLENGTH)
- link=substr(link,6,length(link)-5)
-
- if (link_seen(link)) {
- link=""
- continue
- }
+ retval = (retval " " get_links(newurl))
+ } else if (lowerodp ~ /href=[ \t]*"[^"]*"/) {
+ sub(/[hH][rR][eE][fF]=[ \t]*"/,"href=\"",odp)
+ match(odp,/href="[^"]*"/)
+ link=substr(odp,RSTART,RLENGTH)
+ odp=substr(odp,1,RSTART) substr(odp,RSTART+RLENGTH)
+ link=substr(link,7,length(link)-7)
+ link=postfix_link(url, link)
+
+ if (link_seen(link)) {
+ link=""
+ continue
+ }
- retval=(retval " " link)
- d("href(): " link)
- } else {
- # <a ...> but not href - skip
- d("skipping <a > without href: " odp)
+ retval = (retval " " link)
+ d("href(\"\"): " link)
+ } else if (lowerodp ~ /href=[ \t]*'[^']*'/) {
+ sub(/[hH][rR][eE][fF]=[ \t]*'/,"href='",odp)
+ match(odp,/href='[^']*'/)
+ link=substr(odp,RSTART,RLENGTH)
+ odp=substr(odp,1,RSTART) substr(odp,RSTART+RLENGTH)
+ link=substr(link,7,length(link)-7)
+ link=postfix_link(url, link)
+
+ if (link_seen(link)) {
+ link=""
+ continue
}
+
+ retval = (retval " " link)
+ d("href(''): " link)
+ } else if (lowerodp ~ /href=[ \t]*[^ \t>]*/) {
+ sub(/[hH][rR][eE][fF]=[ \t]*/,"href=",odp)
+ match(odp,/href=[^ \t>]*/)
+ link=substr(odp,RSTART,RLENGTH)
+ odp=substr(odp,1,RSTART) substr(odp,RSTART+RLENGTH)
+ link=substr(link,6,length(link)-5)
+
+ if (link_seen(link)) {
+ link=""
+ continue
+ }
+
+ retval = (retval " " link)
+ d("href(): " link)
+ } else {
+ # <a ...> but not href - skip
+ d("skipping <a > without href: " odp)
}
- } else {
- retval=("WGET ERROR: " errno ": " wholeerr)
}
-
- d("Returning: " retval)
+ d("Returning: [" retval "]")
return retval
}
return url
}
-function process_source(number,lurl,name,version) {
+function process_source(number, lurl, name, version) {
# fetches file list, and compares version numbers
d("Processing " lurl)
- if ( index(lurl,version)==0 ) {
- d("There is no version number.")
+ if (index(lurl, version) == 0) {
+ d("There is no version number ["version"] in ["lurl"]")
return 0
}
d("and a file: " filename)
filenameexp=filename
- gsub("\+","\\+",filenameexp)
+ gsub("[+]","\\+",filenameexp)
sub(version,"[A-Za-z0-9.]+",filenameexp)
- gsub("\.","\\.",filenameexp)
+ gsub("[.]","\\.",filenameexp)
d("Expression: " filenameexp)
match(filename,version)
prever=substr(filename,1,RSTART-1)
references=0
finished=0
oldversion=version
- odp=get_links(newurl,filename)
+ odp = get_links(newurl, filename)
if( odp ~ "ERROR: ") {
print name "(" number ") " odp
} else {
}
}
}
- if (finished==0)
+ if (finished == 0)
print name "(" number ") seems ok: " oldversion
else
print name "(" number ") [OLD] " oldversion " [NEW] " version
}
}
+function rss_upgrade(name, ver, url, regex, cmd, nver) {
+ regex = "s/.*<title>" regex "<\/title>.*/\\1/p"
+ cmd = "wget -q -O - " url " | sed -nre '" regex "' | head -n1"
+
+ d("rss_upgrade_cmd: " cmd)
+ cmd | getline nver
+ close(cmd)
+
+ if (compare_ver(ver, nver)) {
+ print name " [OLD] " ver " [NEW] " nver
+ } else {
+ print name " seems ok: " ver
+ }
+}
+
+# check for ZF upgrade from rss
+function zf_upgrade(name, ver) {
+ rss_upgrade(name, ver, \
+ "http://devzone.zend.com/tag/Zend_Framework_Management/format/rss2.0", \
+ "Zend Framework ([^\\s]+) Released" \
+ );
+}
+
+function hudson_upgrade(name, ver) {
+ rss_upgrade(name, ver, \
+ "https://hudson.dev.java.net/servlets/ProjectRSS?type=news", \
+ "Hudson ([0-9.]+) released" \
+ );
+}
+
# upgrade check for pear package using PEAR CLI
function pear_upgrade(name, ver, pname, pearcmd, nver) {
pname = name;
}
}
-function process_data(name,ver,rel,src) {
+function process_data(name, ver, rel, src) {
if (name ~ /^php-pear-/) {
return pear_upgrade(name, ver);
}
+ if (name == "ZendFramework") {
+ return zf_upgrade(name, ver);
+ }
+ if (name == "hudson") {
+ return hudson_upgrade(name, ver);
+ }
if (name == "vim") {
return vim_upgrade(name, ver);
}
+ if (name == "xulrunner") {
+ ver = subst_defines(DEFS["firefox_ver"], DEFS)
+ d("package xulrunner, change version to firefox ["ver"]")
+ }
# this function checks if substitutions were valid, and if true:
# processes each URL and tries to get current file list
for (i in src) {
- if ( src[i] ~ /%{nil}/ ) {
+ if (src[i] ~ /%{nil}/) {
gsub(/\%\{nil\}/, "", src[i])
}
if ( src[i] !~ /%{.*}/ && src[i] !~ /%[A-Za-z0-9_]/ ) {
d("Source: " src[i])
- process_source(i,src[i],name,ver)
+ process_source(i, src[i], name, ver)
} else {
print FNAME ":" i ": impossible substitution: " src[i]
}