-function get_links(url,filename,errno,link,oneline,retval,odp,wholeodp,lowerodp,tmpfile) {
+function mktemp( _cmd, _tmpfile) {
+ _cmd = "mktemp /tmp/XXXXXX"
+ _cmd | getline _tmpfile
+ close(_cmd)
+ return _tmpfile
+}
+
+# fix link to artificial one that will be recognized rest of this script
+function postfix_link(url, link, oldlink) {
+ oldlink = link
+ if ((url ~/^(http|https):\/\/github.com\//) && (link ~ /.*\/tarball\//)) {
+ gsub(".*\/tarball\/", "", link)
+ link = link ".tar.gz"
+ }
+ if (oldlink != link) {
+ d("POST FIXED URL [ " oldlink " ] to [ " link " ]")
+ }
+ return link
+}
+
+# use perl HTML::TreeBuilder module to extract links from html
+# it returns TAGNAME LINK in output which is pretty stright forward to parse in awk
+function extract_links_cmd(tmpfile) {
+ return "perl -MHTML::TreeBuilder -e ' \
+ my $content = join q//, <>; \
+ my $root = new HTML::TreeBuilder; \
+ $root->parse($content); \
+ \
+ my %links = (); \
+ for (@{$root->extract_links(qw(a iframe))}) { \
+ my($link, $element, $attr, $tag) = @$_; \
+ $links{$link} = $tag; \
+ } \
+ \
+ while (my($link, $tag) = each %links) { \
+ print $tag, q/ /, $link, $/; \
+ } \
+ ' " tmpfile
+}
+