Procházet zdrojové kódy

Fix unicode errors triggered by latest version of html5lib.

With the current versionof html5lib, the simple_footnotes plugin breaks with
an assertion failure. The root causes is that some strings in simple_footnotes
are unicode and others are str type. For html5lib to be happy, everything has
to be unicode.

This patch fixes the html5lib issue, and introduces no other issues I was able
to find.
John Mark Schofield před 8 roky
1 změnil soubory, kde provedl 33 přidání a 30 odebrání
  1. 33 30

+ 33 - 30

@@ -1,3 +1,6 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*- #
 from pelican import signals
 import html5lib
@@ -6,7 +9,7 @@ RAW_FOOTNOTE_CONTAINERS = ["code"]
 def getText(node, recursive = False):
     """Get all the text associated with this node.
        With recursive == True, all text from child nodes is retrieved."""
-    L = ['']
+    L = [u'']
     for n in node.childNodes:
         if n.nodeType in (node.TEXT_NODE, node.CDATA_SECTION_NODE):
@@ -14,7 +17,7 @@ def getText(node, recursive = False):
             if not recursive:
                 return None
         L.append(getText(n) )
-    return ''.join(L)
+    return u''.join(L)
 def sequence_gen(genlist):
     for gen in genlist:
@@ -25,16 +28,16 @@ def sequence_gen(genlist):
 def parse_for_footnotes(article_or_page_generator):
     all_content = [
       getattr(article_or_page_generator, attr, None) \
-      for attr in ['articles','drafts','pages'] ]
+      for attr in [u'articles',u'drafts',u'pages'] ]
     all_content = [ x for x in all_content if x is not None ]
     for article in sequence_gen(all_content):
-        if "[ref]" in article._content and "[/ref]" in article._content:
-            content = article._content.replace("[ref]", "<x-simple-footnote>").replace("[/ref]", "</x-simple-footnote>")
-            parser = html5lib.HTMLParser(tree=html5lib.getTreeBuilder("dom"))
+        if u"[ref]" in article._content and u"[/ref]" in article._content:
+            content = article._content.replace(u"[ref]", u"<x-simple-footnote>").replace(u"[/ref]", u"</x-simple-footnote>")
+            parser = html5lib.HTMLParser(tree=html5lib.getTreeBuilder(u"dom"))
             dom = parser.parse(content)
             endnotes = []
             count = 0
-            for footnote in dom.getElementsByTagName("x-simple-footnote"):
+            for footnote in dom.getElementsByTagName(u"x-simple-footnote"):
                 pn = footnote
                 leavealone = False
                 while pn:
@@ -45,41 +48,41 @@ def parse_for_footnotes(article_or_page_generator):
                 if leavealone:
                 count += 1
-                fnid = "sf-%s-%s" % (article.slug, count)
-                fnbackid = "%s-back" % (fnid,)
+                fnid = u"sf-%s-%s" % (article.slug, count)
+                fnbackid = u"%s-back" % (fnid,)
                 endnotes.append((footnote, fnid, fnbackid))
-                number = dom.createElement("sup")
-                number.setAttribute("id", fnbackid)
-                numbera = dom.createElement("a")
-                numbera.setAttribute("href", "#%s" % fnid)
-                numbera.setAttribute("class", "simple-footnote")
-                numbera.appendChild(dom.createTextNode(str(count)))
-                txt = getText(footnote, recursive=True).replace("\n", " ")
-                numbera.setAttribute("title", txt)
+                number = dom.createElement(u"sup")
+                number.setAttribute(u"id", fnbackid)
+                numbera = dom.createElement(u"a")
+                numbera.setAttribute(u"href", u"#%s" % fnid)
+                numbera.setAttribute(u"class", u"simple-footnote")
+                numbera.appendChild(dom.createTextNode(unicode(count)))
+                txt = getText(footnote, recursive=True).replace(u"\n", u" ")
+                numbera.setAttribute(u"title", txt)
                 footnote.parentNode.insertBefore(number, footnote)
             if endnotes:
-                ol = dom.createElement("ol")
-                ol.setAttribute("class", "simple-footnotes")
+                ol = dom.createElement(u"ol")
+                ol.setAttribute(u"class", u"simple-footnotes")
                 for e, fnid, fnbackid in endnotes:
-                    li = dom.createElement("li")
-                    li.setAttribute("id", fnid)
+                    li = dom.createElement(u"li")
+                    li.setAttribute(u"id", fnid)
                     while e.firstChild:
-                    backlink = dom.createElement("a")
-                    backlink.setAttribute("href", "#%s" % fnbackid)
-                    backlink.setAttribute("class", "simple-footnote-back")
+                    backlink = dom.createElement(u"a")
+                    backlink.setAttribute(u"href", u"#%s" % fnbackid)
+                    backlink.setAttribute(u"class", u"simple-footnote-back")
-                    li.appendChild(dom.createTextNode(" "))
+                    li.appendChild(dom.createTextNode(u" "))
-                dom.getElementsByTagName("body")[0].appendChild(ol)
+                dom.getElementsByTagName(u"body")[0].appendChild(ol)
                 s = html5lib.serializer.HTMLSerializer(omit_optional_tags=False, quote_attr_values='legacy')
-                output_generator = s.serialize(html5lib.treewalkers.getTreeWalker("dom")(dom.getElementsByTagName("body")[0]))
-                article._content =  "".join(list(output_generator)).replace(
-                    "<x-simple-footnote>", "[ref]").replace("</x-simple-footnote>", "[/ref]").replace(
-                    "<body>", "").replace("</body>", "")
+                output_generator = s.serialize(html5lib.treewalkers.getTreeWalker(u"dom")(dom.getElementsByTagName(u"body")[0]))
+                article._content =  u"".join(list(output_generator)).replace(
+                    u"<x-simple-footnote>", u"[ref]").replace(u"</x-simple-footnote>", u"[/ref]").replace(
+                    u"<body>", u"").replace(u"</body>", u"")
 def register():