Browse Source

Added fix for Unicode problems reported in issue #995.

jeffrimko 6 years ago
parent
commit
e0dc2271d5
1 changed files with 11 additions and 2 deletions
  1. 11 2
      asciidoc_reader/asciidoc_reader.py

+ 11 - 2
asciidoc_reader/asciidoc_reader.py

@@ -12,6 +12,7 @@ from pelican import signals
 import os
 import re
 import subprocess
+import sys
 
 def call(cmd):
     """Calls a CLI command and returns the stdout as string."""
@@ -23,6 +24,14 @@ def default():
         if len(call(cmd + " --help")):
             return cmd
 
+def fix_unicode(val):
+    if sys.version_info < (3,0):
+        val = unicode(val.decode("utf-8"))
+    else:
+        # This fixes an issue with character substitutions, e.g. 'ñ' to 'ñ'.
+        val = str.encode(val, "latin-1").decode("utf-8")
+    return val
+
 ALLOWED_CMDS = ["asciidoc", "asciidoctor"]
 
 ENABLED = None != default()
@@ -67,7 +76,7 @@ class AsciiDocReader(BaseReader):
                     elif line.count("=") == len(prev.strip()):
                         title = prev.strip()
                     if title:
-                        metadata['title'] = self.process_metadata('title', title)
+                        metadata['title'] = self.process_metadata('title', fix_unicode(title))
 
                 # Parse for other metadata.
                 regexp = re.compile(r"^:[A-z]+:\s*[A-z0-9]")
@@ -75,7 +84,7 @@ class AsciiDocReader(BaseReader):
                     toks = line.split(":", 2)
                     key = toks[1].strip().lower()
                     val = toks[2].strip()
-                    metadata[key] = self.process_metadata(key, val)
+                    metadata[key] = self.process_metadata(key, fix_unicode(val))
                 prev = line
         return metadata