123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137 |
- # -*- coding: utf-8 -*-
- from __future__ import absolute_import
- import logging
- import re
- from six.moves.urllib.parse import urlparse, urlunparse
- from pelican import signals, contents
- from linker import content_objects
- logger = logging.getLogger("linker")
- class Link(object):
- """Represents an HTML link including a linker command.
- Typically, the Link is constructed from an SRE_Match after applying the
- provided Link.regex pattern to the HTML content of a content object.
- """
- # regex based on the one used in contents.py from pelican version 3.6.3
- regex = re.compile(
- r""" # EXAMPLE: <a rel="nofollow" href="{mailto}webmaster"
- (?P<markup><\s*[^\>]* # <a rel="nofollow" href= --> markup
- (?:href|src|poster|data|cite|formaction|action)\s*=)
- (?P<quote>["\']) # " --> quote
- \{(?P<cmd>.*?)\} # {mailto} --> cmd
- (?P<url>.*?) # webmaster --> __url (see path)
- \2 # " <-- quote
- """, re.X)
- def __init__(self, context, content_object, match):
- """Construct a Link from an SRE_Match.
- :param context: The shared context between generators.
- :param content_object: The associated pelican.contents.Content.
- :param match: An SRE_Match obtained by applying the regex to my content.
- """
- self.context = context
- self.content_object = content_object
- self.markup = match.group('markup')
- self.quote = match.group('quote')
- self.cmd = match.group('cmd')
- self.__url = urlparse(match.group('url'))
- self.path = self.__url.path
- def href(self): # rebuild matched URL using (possibly updated) self.path
- return urlunparse( self.__url._replace(path=self.path) )
- def html_code(self): # rebuild matched pattern from (possibly updated) self
- return ''.join((self.markup, self.quote, self.href(), self.quote))
- class LinkerBase(object):
- """Base class for performing the linker command magic.
- In order to provide the linker command 'foo' as in '<a href="{foo}contact',
- a responsible Linker class (e.g., FooLinker) should derive from LinkerBase
- and set FooLinker.commands to ['foo']. The linker command is processed when
- the overridden Linker.link(Link) is called.
- """
- commands = [] # link commands handled by the Linker. EXAMPLE: ['mailto']
- builtins = ['filename', 'attach', 'category', 'tag', 'author', 'index']
- def __init__(self, settings):
- self.settings = settings
- def link(self, link):
- raise NotImplementedError
- class Linkers(object):
- """Interface for all Linkers.
- This class contains a mapping of {cmd1: linker1, cmd2: linker2} to apply any
- registered linker command by passing the Link to the responsible Linker.
- (Idea based on pelican.readers.Readers, but with less customization options.)
- """
- def __init__(self, settings):
- self.settings = settings
- self.linkers = {}
- for linker_class in [LinkerBase] + LinkerBase.__subclasses__():
- for cmd in linker_class.commands:
- self.register_linker(cmd, linker_class)
- def register_linker(self, cmd, linker_class):
- if cmd in self.linkers: # check for existing registration of that cmd
- current_linker_class = self.linkers[cmd].__class__
- logger.warning(
- "%s is stealing the linker command %s from %s.",
- linker_class.__name__, cmd, current_linker_class.__name__
- )
- self.linkers[cmd] = linker_class(self.settings)
- def handle_links_in_content_object(self, context, content_object):
- # replace Link matches (with side effects on content and content_object)
- def replace_link_match(match):
- link = Link(context, content_object, match)
- if link.cmd in LinkerBase.builtins:
- pass # builtin commands not handled here
- elif link.cmd in self.linkers:
- self.linkers[link.cmd].link(link) # let Linker process the Link
- else:
- logger.warning("Ignoring unknown linker command %s", link.cmd)
- return link.html_code() # return HTML to replace the matched link
- content_object._content = Link.regex.sub( # match, process and replace
- replace_link_match, content_object._content)
- def feed_context_to_linkers(generators):
- settings = generators[0].settings
- linkers = Linkers(settings)
- context = generators[0].context
- for co in context['content_objects']: # provided by plugin 'content_objects'
- if isinstance(co, contents.Static): continue
- if not co._content: continue
- linkers.handle_links_in_content_object(context, co)
- def register():
- content_objects.register()
- signals.all_generators_finalized.connect(feed_context_to_linkers)
|