12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970 |
- # -*- coding: utf-8 -*-
- """
- W3C HTML Validator plugin for genrated content.
- """
- from pelican import signals
- import logging
- import os
- LOG = logging.getLogger(__name__)
- INCLUDE_TYPES = ['html']
- def validate_files(pelican):
- """
- Validate a generated HTML file
- :param pelican: pelican object
- """
- for dirpath, _, filenames in os.walk(pelican.settings['OUTPUT_PATH']):
- for name in filenames:
- if should_validate(name):
- filepath = os.path.join(dirpath, name)
- validate(filepath)
- def validate(filename):
- """
- Use W3C validator service: https://bitbucket.org/nmb10/py_w3c/ .
- :param filename: the filename to validate
- """
- import HTMLParser
- from py_w3c.validators.html.validator import HTMLValidator
- h = HTMLParser.HTMLParser() # for unescaping WC3 messages
- vld = HTMLValidator()
- LOG.info("Validating: {0}".format(filename))
- # call w3c webservice
- vld.validate_file(filename)
- # display errors and warning
- for err in vld.errors:
- LOG.error(u'line: {0}; col: {1}; message: {2}'.
- format(err['line'], err['col'], h.unescape(err['message']))
- )
- for err in vld.warnings:
- LOG.warning(u'line: {0}; col: {1}; message: {2}'.
- format(err['line'], err['col'], h.unescape(err['message']))
- )
- def should_validate(filename):
- """Check if the filename is a type of file that should be validated.
- :param filename: A file name to check against
- """
- for extension in INCLUDE_TYPES:
- if filename.endswith(extension):
- return True
- return False
- def register():
- """
- Register Pelican signal for validating content after it is generated.
- """
- signals.finalized.connect(validate_files)
|