wc3_validate.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. # -*- coding: utf-8 -*-
  2. """
  3. W3C HTML Validator plugin for genrated content.
  4. """
  5. from pelican import signals
  6. import logging
  7. import os
  8. LOG = logging.getLogger(__name__)
  9. INCLUDE_TYPES = ['html']
  10. def validate_files(pelican):
  11. """
  12. Validate a generated HTML file
  13. :param pelican: pelican object
  14. """
  15. for dirpath, _, filenames in os.walk(pelican.settings['OUTPUT_PATH']):
  16. for name in filenames:
  17. if should_validate(name):
  18. filepath = os.path.join(dirpath, name)
  19. validate(filepath)
  20. def validate(filename):
  21. """
  22. Use W3C validator service: https://bitbucket.org/nmb10/py_w3c/ .
  23. :param filename: the filename to validate
  24. """
  25. import HTMLParser
  26. from py_w3c.validators.html.validator import HTMLValidator
  27. h = HTMLParser.HTMLParser() # for unescaping WC3 messages
  28. vld = HTMLValidator()
  29. LOG.info("Validating: {0}".format(filename))
  30. # call w3c webservice
  31. vld.validate_file(filename)
  32. # display errors and warning
  33. for err in vld.errors:
  34. LOG.error(u'line: {0}; col: {1}; message: {2}'.
  35. format(err['line'], err['col'], h.unescape(err['message']))
  36. )
  37. for err in vld.warnings:
  38. LOG.warning(u'line: {0}; col: {1}; message: {2}'.
  39. format(err['line'], err['col'], h.unescape(err['message']))
  40. )
  41. def should_validate(filename):
  42. """Check if the filename is a type of file that should be validated.
  43. :param filename: A file name to check against
  44. """
  45. for extension in INCLUDE_TYPES:
  46. if filename.endswith(extension):
  47. return True
  48. return False
  49. def register():
  50. """
  51. Register Pelican signal for validating content after it is generated.
  52. """
  53. signals.finalized.connect(validate_files)