From f223091c9cad3a30579e7bfdccfe4b1f8f8b30a6 Mon Sep 17 00:00:00 2001 From: Jakub Bogusz Date: Sun, 9 Feb 2014 17:54:34 +0100 Subject: [PATCH] - updated to 3.3.0 - removed obsolete add-handle_failures-option-to-make_links_absolute-to patch --- ...res-option-to-make_links_absolute-to.patch | 144 ------------------ python-lxml.spec | 6 +- 2 files changed, 2 insertions(+), 148 deletions(-) delete mode 100644 python-lxml-add-handle_failures-option-to-make_links_absolute-to.patch diff --git a/python-lxml-add-handle_failures-option-to-make_links_absolute-to.patch b/python-lxml-add-handle_failures-option-to-make_links_absolute-to.patch deleted file mode 100644 index 0c28250..0000000 --- a/python-lxml-add-handle_failures-option-to-make_links_absolute-to.patch +++ /dev/null @@ -1,144 +0,0 @@ -From ab497930d74c7bcf4b725809508a1fefef453faa Mon Sep 17 00:00:00 2001 -From: Stefan Behnel -Date: Fri, 15 Nov 2013 14:49:48 +0100 -Subject: [PATCH] add 'handle_failures' option to make_links_absolute() to - allow graceful handling of broken URLs - ---- - CHANGES.txt | 4 +++ - src/lxml/html/__init__.py | 49 +++++++++++++++++++++++++------ - src/lxml/html/tests/test_rewritelinks.txt | 21 ++++++++++--- - 3 files changed, 61 insertions(+), 13 deletions(-) - -diff --git a/src/lxml/html/__init__.py b/src/lxml/html/__init__.py -index ea88d2b..dd52611 100644 ---- a/src/lxml/html/__init__.py -+++ b/src/lxml/html/__init__.py -@@ -294,15 +294,21 @@ class HtmlMixin(object): - ## Link functions - ######################################## - -- def make_links_absolute(self, base_url=None, resolve_base_href=True): -+ def make_links_absolute(self, base_url=None, resolve_base_href=True, -+ handle_failures=None): - """ - Make all links in the document absolute, given the - ``base_url`` for the document (the full URL where the document -- came from), or if no ``base_url`` is given, then the ``.base_url`` of the document. -+ came from), or if no ``base_url`` is given, then the ``.base_url`` -+ of the document. - - If ``resolve_base_href`` is true, then any ```` - tags in the document are used *and* removed from the document. - If it is false then any such tag is ignored. -+ -+ If ``handle_failures`` is None (default), a failure to process -+ a URL will abort the processing. If set to 'ignore', errors -+ are ignored. If set to 'discard', failing URLs will be removed. - """ - if base_url is None: - base_url = self.base_url -@@ -311,24 +317,48 @@ class HtmlMixin(object): - "No base_url given, and the document has no base_url") - if resolve_base_href: - self.resolve_base_href() -- def link_repl(href): -- return urljoin(base_url, href) -+ -+ if handle_failures == 'ignore': -+ def link_repl(href): -+ try: -+ return urljoin(base_url, href) -+ except ValueError: -+ return href -+ elif handle_failures == 'discard': -+ def link_repl(href): -+ try: -+ return urljoin(base_url, href) -+ except ValueError: -+ return None -+ elif handle_failures is None: -+ def link_repl(href): -+ return urljoin(base_url, href) -+ else: -+ raise ValueError( -+ "unexpected value for handle_failures: %r" % handle_failures) -+ - self.rewrite_links(link_repl) - -- def resolve_base_href(self): -+ def resolve_base_href(self, handle_failures=None): - """ - Find any ```` tag in the document, and apply its - values to all links found in the document. Also remove the - tag once it has been applied. -+ -+ If ``handle_failures`` is None (default), a failure to process -+ a URL will abort the processing. If set to 'ignore', errors -+ are ignored. If set to 'discard', failing URLs will be removed. - """ - base_href = None -- basetags = self.xpath('//base[@href]|//x:base[@href]', namespaces={'x':XHTML_NAMESPACE}) -+ basetags = self.xpath('//base[@href]|//x:base[@href]', -+ namespaces={'x': XHTML_NAMESPACE}) - for b in basetags: - base_href = b.get('href') - b.drop_tree() - if not base_href: - return -- self.make_links_absolute(base_href, resolve_base_href=False) -+ self.make_links_absolute(base_href, resolve_base_href=False, -+ handle_failures=handle_failures) - - def iterlinks(self): - """ -@@ -434,6 +464,7 @@ class HtmlMixin(object): - base_href, resolve_base_href=resolve_base_href) - elif resolve_base_href: - self.resolve_base_href() -+ - for el, attrib, link, pos in self.iterlinks(): - new_link = link_repl_func(link.strip()) - if new_link == link: -diff --git a/src/lxml/html/tests/test_rewritelinks.txt b/src/lxml/html/tests/test_rewritelinks.txt -index 43dd99d..dd400b7 100644 ---- a/src/lxml/html/tests/test_rewritelinks.txt -+++ b/src/lxml/html/tests/test_rewritelinks.txt -@@ -185,6 +185,22 @@ An application of ``iterlinks()`` is ``make_links_absolute()``:: - - - -+If the document contains invalid links, you may choose to "discard" or "ignore" -+them by passing the respective option into the ``handle_failures`` argument:: -+ -+ >>> html = lxml.html.fromstring ('''\ -+ ...
-+ ... test2 -+ ...
''') -+ -+ >>> html.make_links_absolute(base_url="http://my.little.server/url/", -+ ... handle_failures="discard") -+ -+ >>> print(lxml.html.tostring (html, pretty_print=True, encoding='unicode')) -+
-+ test2 -+
-+ - Check if we can replace multiple links inside of the same text string:: - - >>> html = lxml.html.fromstring ("""\ -@@ -209,10 +225,7 @@ Check if we can replace multiple links inside of the same text string:: - - >>> html.make_links_absolute () - -- >>> try: _unicode = unicode -- ... except NameError: _unicode = str -- -- >>> print(lxml.html.tostring (html, pretty_print = True, encoding=_unicode)) -+ >>> print(lxml.html.tostring (html, pretty_print=True, encoding='unicode')) - - - Test --- -1.8.4.3 - diff --git a/python-lxml.spec b/python-lxml.spec index ef834c2..3b1a5c2 100644 --- a/python-lxml.spec +++ b/python-lxml.spec @@ -8,13 +8,12 @@ Summary: Python 2 binding for the libxml2 and libxslt libraries Summary(pl.UTF-8): Wiązanie Pythona 2 do bibliotek libxml2 i libxslt Name: python-%{module} -Version: 3.2.5 +Version: 3.3.0 Release: 1 License: BSD Group: Libraries/Python Source0: http://lxml.de/files/%{module}-%{version}.tgz -# Source0-md5: 6c4fb9b1840631cff09b8229a12a9ef7 -Patch0: %{name}-add-handle_failures-option-to-make_links_absolute-to.patch +# Source0-md5: ca2f02fd762f3614a1930c568847052b URL: http://lxml.de/ BuildRequires: libxml2-devel >= 1:2.7.8 BuildRequires: libxslt-devel >= 1.1.26 @@ -61,7 +60,6 @@ Dokumentacja API modułu lxml. %prep %setup -q -n %{module}-%{version} -%patch0 -p1 %build %if %{with python2} -- 2.43.0