]> git.pld-linux.org Git - packages/rpm-build-tools.git/commitdiff
- get_links() optimization (even 6+ times faster on perl-*, but needs some testing)
authorJakub Bogusz <qboosh@pld-linux.org>
Tue, 20 Dec 2005 22:02:58 +0000 (22:02 +0000)
committercvs2git <feedback@pld-linux.org>
Sun, 24 Jun 2012 12:13:13 +0000 (12:13 +0000)
Changed files:
    pldnotify.awk -> 1.53

pldnotify.awk

index 035440c5203962ac8a55112dc4b7b939691d25df..c618f87ca89eb977c1ac521e32870d771d4fc49d 100644 (file)
@@ -138,7 +138,7 @@ function compare_ver_dec(v1,v2) {
        return 0
 }
 
-function get_links(url,        errno,link,oneline,retval,odp,tmpfile) {
+function get_links(url,        errno,link,oneline,retval,odp,wholeodp,lowerodp,tmpfile) {
 # get all <A HREF=..> tags from specified URL
        "mktemp /tmp/XXXXXX" | getline tmpfile
        close("mktemp /tmp/XXXXXX")
@@ -154,30 +154,32 @@ function get_links(url,   errno,link,oneline,retval,odp,tmpfile) {
        
        if (errno==0) {
                while (getline oneline < tmpfile)
-                       odp=(odp " " oneline)
-               if ( DEBUG ) print "Response: " odp
+                       wholeodp=(wholeodp " " oneline)
+               if ( DEBUG ) print "Response: " wholeodp
        }
        
        close(tmpfile)
        system("rm -f " tmpfile)
        urldir=url;
        sub(/[^\/]+$/,"",urldir)
+
        if ( errno==0) {
-               while ((tolower(odp) ~ /<frame[ \t]/)||(tolower(odp) ~ /href=/)) {
-                       if (tolower(odp) ~ /<frame[ \t]/) {
-                               match(tolower(odp),/<frame[ \t][^>]*>/)
-                               ramka=substr(odp,RSTART,RLENGTH)
-                               odp=substr(odp,1,RSTART) substr(odp,RSTART+RLENGTH)
-                               sub(/[sS][rR][cC]=[ \t]*/,"src=",ramka);
-                               match(ramka,/src="[^"]+"/)
-                               newurl=substr(ramka,RSTART+5,RLENGTH-6)
+               while (match(wholeodp, /<([aA]|[fF][rR][aA][mM][eE])[ \t][^>]*>/) > 0) {
+                       odp=substr(wholeodp,RSTART,RLENGTH);
+                       wholeodp=substr(wholeodp,RSTART+RLENGTH);
+
+                       lowerodp=tolower(odp);
+                       if (lowerodp ~ /<frame[ \t]/) {
+                               sub(/[sS][rR][cC]=[ \t]*/,"src=",odp);
+                               match(odp,/src="[^"]+"/)
+                               newurl=substr(odp,RSTART+5,RLENGTH-6)
                                if (DEBUG) print "Frame: " newurl
                                if (newurl !~ /\//) {
                                        newurl=(urldir newurl)
                                        if (DEBUG) print "Frame->: " newurl
                                }
                                retval=(retval " " get_links(newurl))
-                       } else if (tolower(odp) ~ /href=[ \t]*"[^"]*"/) {
+                       } else if (lowerodp ~ /href=[ \t]*"[^"]*"/) {
                                sub(/[hH][rR][eE][fF]=[ \t]*"/,"href=\"",odp)
                                match(odp,/href="[^"]*"/)
                                link=substr(odp,RSTART,RLENGTH)
@@ -185,7 +187,7 @@ function get_links(url,     errno,link,oneline,retval,odp,tmpfile) {
                                link=substr(link,7,length(link)-7)
                                retval=(retval " " link)
                                if (DEBUG) print "href(\"\"): " link
-                       } else if (tolower(odp) ~ /href=[ \t]*'[^']*'/) {
+                       } else if (lowerodp ~ /href=[ \t]*'[^']*'/) {
                                sub(/[hH][rR][eE][fF]=[ \t]*'/,"href='",odp)
                                match(odp,/href='[^']*'/)
                                link=substr(odp,RSTART,RLENGTH)
@@ -193,7 +195,7 @@ function get_links(url,     errno,link,oneline,retval,odp,tmpfile) {
                                link=substr(link,7,length(link)-7)
                                retval=(retval " " link)
                                if (DEBUG) print "href(''): " link
-                       } else if (tolower(odp) ~ /href=[ \t]*[^ \t>]*/) {
+                       } else if (lowerodp ~ /href=[ \t]*[^ \t>]*/) {
                                sub(/[hH][rR][eE][fF]=[ \t]*/,"href=",odp)
                                match(odp,/href=[^ \t>]*/)
                                link=substr(odp,RSTART,RLENGTH)
@@ -202,8 +204,8 @@ function get_links(url,     errno,link,oneline,retval,odp,tmpfile) {
                                retval=(retval " " link)
                                if (DEBUG) print "href(): " link
                        } else {
-                               retval=(retval " INTERNAL_ERROR")
-                               break
+                               # <a ...> but not href - skip
+                               if (DEBUG) print "skipping <a > without href: " odp
                        }
                }
        } else {
This page took 0.054352 seconds and 4 git commands to generate.