+# fix link to artificial one that will be recognized rest of this script
+function postfix_link(url, link, oldlink) {
+ oldlink = link
+ if ((url ~/^(http|https):\/\/github.com\//) && (link ~ /.*\/tarball\//)) {
+ gsub(".*\/tarball\/", "", link)
+ link = link ".tar.gz"
+ }
+ if (oldlink != link) {
+ d("POST FIXED URL [ " oldlink " ] to [ " link " ]")
+ }
+ return link
+}
+
+# use perl HTML::TreeBuilder module to extract links from html
+# it returns TAGNAME LINK in output which is pretty stright forward to parse in awk
+function extract_links_cmd(tmpfile) {
+ return "perl -MHTML::TreeBuilder -e ' \
+ my $content = join q//, <>; \
+ my $root = new HTML::TreeBuilder; \
+ $root->parse($content); \
+ \
+ my %links = (); \
+ for (@{$root->extract_links(qw(a iframe))}) { \
+ my($link, $element, $attr, $tag) = @$_; \
+ $links{$link} = $tag; \
+ } \
+ \
+ while (my($link, $tag) = each %links) { \
+ print $tag, q/ /, $link, $/; \
+ } \
+ ' " tmpfile
+}
+