Forráskód Böngészése

Merge pull request #19 from zemanel/w3c-validator-plugin

add w3c validator plugin
Justin Mayer 12 éve
szülő
commit
41732db062
3 módosított fájl, 97 hozzáadás és 0 törlés
  1. 26 0
      w3c_validate/README.md
  2. 2 0
      w3c_validate/__init__.py
  3. 69 0
      w3c_validate/wc3_validate.py

+ 26 - 0
w3c_validate/README.md

@@ -0,0 +1,26 @@
+# w3c_validate plugin
+
+W3C validator (http://validator.w3.org) plugin for generated HTML content.
+
+After all content is generated, output folder is traversed for HTML files, and
+their content validated on W3C and the results displayed, for example:
+
+    -> writing /tmp/_output/sitemap.xml
+    -> Validating: /tmp/_output/archives.html
+    ERROR: line: 2; col: 52; message: Bad value http://www.w3.org/1999/html for the attribute xmlns (only http://www.w3.org/1999/xhtml permitted here).
+    -> Validating: /tmp/_output/categories.html
+    ERROR: line: 2; col: 52; message: Bad value http://www.w3.org/1999/html for the attribute xmlns (only http://www.w3.org/1999/xhtml permitted here).
+
+## Dependencies
+
+* py_w3c, https://pypi.python.org/pypi/py_w3c/0.1.0 , which can be installed with pip:
+
+    $ pip install py_w3c
+
+## TODO
+
+[ ] - add tests
+
+
+
+

+ 2 - 0
w3c_validate/__init__.py

@@ -0,0 +1,2 @@
+# -*- coding: utf-8 -*-
+from .wc3_validate import *

+ 69 - 0
w3c_validate/wc3_validate.py

@@ -0,0 +1,69 @@
+# -*- coding: utf-8 -*-
+"""
+W3C HTML Validator plugin for genrated content.
+"""
+
+
+from pelican import signals
+import logging
+import os
+
+LOG = logging.getLogger(__name__)
+
+INCLUDE_TYPES = ['html']
+
+
+def validate_files(pelican):
+    """
+    Validate a generated HTML file
+    :param pelican: pelican object
+    """
+    for dirpath, _, filenames in os.walk(pelican.settings['OUTPUT_PATH']):
+        for name in filenames:
+            if should_validate(name):
+                filepath = os.path.join(dirpath, name)
+                validate(filepath)
+
+
+def validate(filename):
+    """
+    Use W3C validator service: https://bitbucket.org/nmb10/py_w3c/ .
+    :param filename: the filename to validate
+    """
+    import HTMLParser
+    from py_w3c.validators.html.validator import HTMLValidator
+
+    h = HTMLParser.HTMLParser()  # for unescaping WC3 messages
+
+    vld = HTMLValidator()
+    LOG.info("Validating: {0}".format(filename))
+
+    # call w3c webservice
+    vld.validate_file(filename)
+
+    # display errors and warning
+    for err in vld.errors:
+        LOG.error(u'line: {0}; col: {1}; message: {2}'.
+                  format(err['line'], err['col'], h.unescape(err['message']))
+                  )
+    for err in vld.warnings:
+        LOG.warning(u'line: {0}; col: {1}; message: {2}'.
+                    format(err['line'], err['col'], h.unescape(err['message']))
+                    )
+
+
+def should_validate(filename):
+    """Check if the filename is a type of file that should be validated.
+    :param filename: A file name to check against
+    """
+    for extension in INCLUDE_TYPES:
+        if filename.endswith(extension):
+            return True
+    return False
+
+
+def register():
+    """
+    Register Pelican signal for validating content after it is generated.
+    """
+    signals.finalized.connect(validate_files)