# -*- coding: utf-8 -*-
"""
W3C HTML Validator plugin for genrated content.
"""
from pelican import signals
import logging
import os
LOG = logging.getLogger(__name__)
INCLUDE_TYPES = ['html']
def validate_files(pelican):
"""
Validate a generated HTML file
:param pelican: pelican object
"""
for dirpath, _, filenames in os.walk(pelican.settings['OUTPUT_PATH']):
for name in filenames:
if should_validate(name):
filepath = os.path.join(dirpath, name)
validate(filepath)
def validate(filename):
"""
Use W3C validator service: https://bitbucket.org/nmb10/py_w3c/ .
:param filename: the filename to validate
"""
import HTMLParser
from py_w3c.validators.html.validator import HTMLValidator
h = HTMLParser.HTMLParser() # for unescaping WC3 messages
vld = HTMLValidator()
LOG.info("Validating: {0}".format(filename))
# call w3c webservice
vld.validate_file(filename)
# display errors and warning
for err in vld.errors:
LOG.error(u'line: {0}; col: {1}; message: {2}'.
format(err['line'], err['col'], h.unescape(err['message']))
)
for err in vld.warnings:
LOG.warning(u'line: {0}; col: {1}; message: {2}'.
format(err['line'], err['col'], h.unescape(err['message']))
)
def should_validate(filename):
"""Check if the filename is a type of file that should be validated.
:param filename: A file name to check against
"""
for extension in INCLUDE_TYPES:
if filename.endswith(extension):
return True
return False
def register():
"""
Register Pelican signal for validating content after it is generated.
"""
signals.finalized.connect(validate_files)