pelican_mathjax_markdown_extension.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. # -*- coding: utf-8 -*-
  2. """
  3. Pelican Mathjax Markdown Extension
  4. ==================================
  5. An extension for the Python Markdown module that enables
  6. the Pelican python blog to process mathjax. This extension
  7. gives Pelican the ability to use Mathjax as a "first class
  8. citizen" of the blog
  9. """
  10. import markdown
  11. from markdown.util import etree
  12. from markdown.util import AtomicString
  13. class PelicanMathJaxPattern(markdown.inlinepatterns.Pattern):
  14. """Inline markdown processing that matches mathjax"""
  15. def __init__(self, pelican_mathjax_extension, tag, pattern):
  16. super(PelicanMathJaxPattern,self).__init__(pattern)
  17. self.math_tag_class = pelican_mathjax_extension.getConfig('math_tag_class')
  18. self.pelican_mathjax_extension = pelican_mathjax_extension
  19. self.tag = tag
  20. def handleMatch(self, m):
  21. node = markdown.util.etree.Element(self.tag)
  22. node.set('class', self.math_tag_class)
  23. prefix = '\\(' if m.group('prefix') == '$' else m.group('prefix')
  24. suffix = '\\)' if m.group('suffix') == '$' else m.group('suffix')
  25. node.text = markdown.util.AtomicString(prefix + m.group('math') + suffix)
  26. # If mathjax was successfully matched, then JavaScript needs to be added
  27. # for rendering. The boolean below indicates this
  28. self.pelican_mathjax_extension.mathjax_needed = True
  29. return node
  30. class PelicanMathJaxCorrectDisplayMath(markdown.treeprocessors.Treeprocessor):
  31. """Corrects invalid html that results from a <div> being put inside
  32. a <p> for displayed math"""
  33. def __init__(self, pelican_mathjax_extension):
  34. self.pelican_mathjax_extension = pelican_mathjax_extension
  35. def correct_html(self, root, children, div_math, insert_idx, text):
  36. """Separates out <div class="math"> from the parent tag <p>. Anything
  37. in between is put into its own parent tag of <p>"""
  38. current_idx = 0
  39. for idx in div_math:
  40. el = markdown.util.etree.Element('p')
  41. el.text = text
  42. el.extend(children[current_idx:idx])
  43. # Test to ensure that empty <p> is not inserted
  44. if len(el) != 0 or (el.text and not el.text.isspace()):
  45. root.insert(insert_idx, el)
  46. insert_idx += 1
  47. text = children[idx].tail
  48. children[idx].tail = None
  49. root.insert(insert_idx, children[idx])
  50. insert_idx += 1
  51. current_idx = idx+1
  52. el = markdown.util.etree.Element('p')
  53. el.text = text
  54. el.extend(children[current_idx:])
  55. if len(el) != 0 or (el.text and not el.text.isspace()):
  56. root.insert(insert_idx, el)
  57. def run(self, root):
  58. """Searches for <div class="math"> that are children in <p> tags and corrects
  59. the invalid HTML that results"""
  60. math_tag_class = self.pelican_mathjax_extension.getConfig('math_tag_class')
  61. for parent in root:
  62. div_math = []
  63. children = list(parent)
  64. for div in parent.findall('div'):
  65. if div.get('class') == math_tag_class:
  66. div_math.append(children.index(div))
  67. # Do not process further if no displayed math has been found
  68. if not div_math:
  69. continue
  70. insert_idx = list(root).index(parent)
  71. self.correct_html(root, children, div_math, insert_idx, parent.text)
  72. root.remove(parent) # Parent must be removed last for correct insertion index
  73. return root
  74. class PelicanMathJaxAddJavaScript(markdown.treeprocessors.Treeprocessor):
  75. """Tree Processor for adding Mathjax JavaScript to the blog"""
  76. def __init__(self, pelican_mathjax_extension):
  77. self.pelican_mathjax_extension = pelican_mathjax_extension
  78. def run(self, root):
  79. # If no mathjax was present, then exit
  80. if (not self.pelican_mathjax_extension.mathjax_needed):
  81. return root
  82. # Add the mathjax script to the html document
  83. mathjax_script = etree.Element('script')
  84. mathjax_script.set('type','text/javascript')
  85. mathjax_script.text = AtomicString(self.pelican_mathjax_extension.getConfig('mathjax_script'))
  86. root.append(mathjax_script)
  87. # Reset the boolean switch to false so that script is only added
  88. # to other pages if needed
  89. self.pelican_mathjax_extension.mathjax_needed = False
  90. return root
  91. class PelicanMathJaxExtension(markdown.Extension):
  92. """A markdown extension enabling mathjax processing in Markdown for Pelican"""
  93. def __init__(self, config):
  94. try:
  95. # Needed for markdown versions >= 2.5
  96. self.config['mathjax_script'] = ['', 'Mathjax JavaScript script']
  97. self.config['math_tag_class'] = ['math', 'The class of the tag in which mathematics is wrapped']
  98. self.config['auto_insert'] = [True, 'Determines if mathjax script is automatically inserted into content']
  99. super(PelicanMathJaxExtension,self).__init__(**config)
  100. except AttributeError:
  101. # Markdown versions < 2.5
  102. config['mathjax_script'] = [config['mathjax_script'], 'Mathjax JavaScript script']
  103. config['math_tag_class'] = [config['math_tag_class'], 'The class of the tag in which mathematic is wrapped']
  104. config['auto_insert'] = [config['auto_insert'], 'Determines if mathjax script is automatically inserted into content']
  105. super(PelicanMathJaxExtension,self).__init__(config)
  106. # Used as a flag to determine if javascript
  107. # needs to be injected into a document
  108. self.mathjax_needed = False
  109. def extendMarkdown(self, md, md_globals):
  110. # Regex to detect mathjax
  111. mathjax_inline_regex = r'(?P<prefix>\$)(?P<math>.+?)(?P<suffix>(?<!\s)\2)'
  112. mathjax_display_regex = r'(?P<prefix>\$\$|\\begin\{(.+?)\})(?P<math>.+?)(?P<suffix>\2|\\end\{\3\})'
  113. # Process mathjax before escapes are processed since escape processing will
  114. # intefer with mathjax. The order in which the displayed and inlined math
  115. # is registered below matters
  116. md.inlinePatterns.add('mathjax_displayed', PelicanMathJaxPattern(self, 'div', mathjax_display_regex), '<escape')
  117. md.inlinePatterns.add('mathjax_inlined', PelicanMathJaxPattern(self, 'span', mathjax_inline_regex), '<escape')
  118. # Correct the invalid HTML that results from teh displayed math (<div> tag within a <p> tag)
  119. md.treeprocessors.add('mathjax_correctdisplayedmath', PelicanMathJaxCorrectDisplayMath(self), '>inline')
  120. # If necessary, add the JavaScript Mathjax library to the document. This must
  121. # be last in the ordered dict (hence it is given the position '_end')
  122. if self.getConfig('auto_insert'):
  123. md.treeprocessors.add('mathjax_addjavascript', PelicanMathJaxAddJavaScript(self), '_end')