linker.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. # -*- coding: utf-8 -*-
  2. from __future__ import absolute_import
  3. import logging
  4. import re
  5. from six.moves.urllib.parse import urlparse, urlunparse
  6. from pelican import signals, contents
  7. from linker import content_objects
  8. logger = logging.getLogger("linker")
  9. class Link(object):
  10. """Represents an HTML link including a linker command.
  11. Typically, the Link is constructed from an SRE_Match after applying the
  12. provided Link.regex pattern to the HTML content of a content object.
  13. """
  14. # regex based on the one used in contents.py from pelican version 3.6.3
  15. regex = re.compile(
  16. r""" # EXAMPLE: <a rel="nofollow" href="{mailto}webmaster"
  17. (?P<markup><\s*[^\>]* # <a rel="nofollow" href= --> markup
  18. (?:href|src|poster|data|cite|formaction|action)\s*=)
  19. (?P<quote>["\']) # " --> quote
  20. \{(?P<cmd>.*?)\} # {mailto} --> cmd
  21. (?P<url>.*?) # webmaster --> __url (see path)
  22. \2 # " <-- quote
  23. """, re.X)
  24. def __init__(self, context, content_object, match):
  25. """Construct a Link from an SRE_Match.
  26. :param context: The shared context between generators.
  27. :param content_object: The associated pelican.contents.Content.
  28. :param match: An SRE_Match obtained by applying the regex to my content.
  29. """
  30. self.context = context
  31. self.content_object = content_object
  32. self.markup = match.group('markup')
  33. self.quote = match.group('quote')
  34. self.cmd = match.group('cmd')
  35. self.__url = urlparse(match.group('url'))
  36. self.path = self.__url.path
  37. def href(self): # rebuild matched URL using (possibly updated) self.path
  38. return urlunparse( self.__url._replace(path=self.path) )
  39. def html_code(self): # rebuild matched pattern from (possibly updated) self
  40. return ''.join((self.markup, self.quote, self.href(), self.quote))
  41. class LinkerBase(object):
  42. """Base class for performing the linker command magic.
  43. In order to provide the linker command 'foo' as in '<a href="{foo}contact',
  44. a responsible Linker class (e.g., FooLinker) should derive from LinkerBase
  45. and set FooLinker.commands to ['foo']. The linker command is processed when
  46. the overridden Linker.link(Link) is called.
  47. """
  48. commands = [] # link commands handled by the Linker. EXAMPLE: ['mailto']
  49. builtins = ['filename', 'attach', 'category', 'tag', 'author', 'index']
  50. def __init__(self, settings):
  51. self.settings = settings
  52. def link(self, link):
  53. raise NotImplementedError
  54. class Linkers(object):
  55. """Interface for all Linkers.
  56. This class contains a mapping of {cmd1: linker1, cmd2: linker2} to apply any
  57. registered linker command by passing the Link to the responsible Linker.
  58. (Idea based on pelican.readers.Readers, but with less customization options.)
  59. """
  60. def __init__(self, settings):
  61. self.settings = settings
  62. self.linkers = {}
  63. for linker_class in [LinkerBase] + LinkerBase.__subclasses__():
  64. for cmd in linker_class.commands:
  65. self.register_linker(cmd, linker_class)
  66. def register_linker(self, cmd, linker_class):
  67. if cmd in self.linkers: # check for existing registration of that cmd
  68. current_linker_class = self.linkers[cmd].__class__
  69. logger.warning(
  70. "%s is stealing the linker command %s from %s.",
  71. linker_class.__name__, cmd, current_linker_class.__name__
  72. )
  73. self.linkers[cmd] = linker_class(self.settings)
  74. def handle_links_in_content_object(self, context, content_object):
  75. # replace Link matches (with side effects on content and content_object)
  76. def replace_link_match(match):
  77. link = Link(context, content_object, match)
  78. if link.cmd in LinkerBase.builtins:
  79. return match.group(0) # builtin commands not handled here
  80. elif link.cmd in self.linkers:
  81. self.linkers[link.cmd].link(link) # let Linker process the Link
  82. else:
  83. logger.warning("Ignoring unknown linker command %s", link.cmd)
  84. return link.html_code() # return HTML to replace the matched link
  85. content_object._content = Link.regex.sub( # match, process and replace
  86. replace_link_match, content_object._content)
  87. def feed_context_to_linkers(generators):
  88. settings = generators[0].settings
  89. linkers = Linkers(settings)
  90. context = generators[0].context
  91. for co in context['content_objects']: # provided by plugin 'content_objects'
  92. if isinstance(co, contents.Static): continue
  93. if not co._content: continue
  94. linkers.handle_links_in_content_object(context, co)
  95. def register():
  96. content_objects.register()
  97. signals.all_generators_finalized.connect(feed_context_to_linkers)