math.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. # -*- coding: utf-8 -*-
  2. """
  3. Math Render Plugin for Pelican
  4. ==============================
  5. This plugin allows your site to render Math. It supports both LaTeX and MathML
  6. using the MathJax JavaScript engine.
  7. Typogrify Compatibility
  8. -----------------------
  9. This plugin now plays nicely with Typogrify, but it requires
  10. Typogrify version 2.04 or above.
  11. User Settings
  12. -------------
  13. Users are also able to pass a dictionary of settings in the settings file which
  14. will control how the MathJax library renders things. This could be very useful
  15. for template builders that want to adjust the look and feel of the math.
  16. See README for more details.
  17. """
  18. from __future__ import print_function
  19. import os
  20. import re
  21. from pelican import signals
  22. from pelican import contents
  23. # Global Variables
  24. _TYPOGRIFY = None # if Typogrify is enabled, this is set to the typogrify.filter function
  25. _WRAP_LATEX = None # the tag to wrap LaTeX math in (needed to play nicely with Typogrify or for template designers)
  26. _MATH_REGEX = re.compile(r'(\$\$|\$|\\begin\{(.+?)\}|<(math)(?:\s.*?)?>).*?(\1|\\end\{\2\}|</\3>)', re.DOTALL | re.IGNORECASE) # used to detect math
  27. _MATH_SUMMARY_REGEX = None # used to match math in summary
  28. _MATH_INCOMPLETE_TAG_REGEX = None # used to match math that has been cut off in summary
  29. _MATHJAX_SETTINGS = {} # settings that can be specified by the user, used to control mathjax script settings
  30. with open (os.path.dirname(os.path.realpath(__file__))+'/mathjax_script.txt', 'r') as mathjax_script: # Read the mathjax javascript from file
  31. _MATHJAX_SCRIPT=mathjax_script.read()
  32. # Python standard library for binary search, namely bisect is cool but I need
  33. # specific business logic to evaluate my search predicate, so I am using my
  34. # own version
  35. def binary_search(match_tuple, ignore_within):
  36. """Determines if t is within tupleList. Using the fact that tupleList is
  37. ordered, binary search can be performed which is O(logn)
  38. """
  39. ignore = False
  40. if ignore_within == []:
  41. return False
  42. lo = 0
  43. hi = len(ignore_within)-1
  44. # Find first value in array where predicate is False
  45. # predicate function: tupleList[mid][0] < t[index]
  46. while lo < hi:
  47. mid = lo + (hi-lo+1)/2
  48. if ignore_within[mid][0] < match_tuple[0]:
  49. lo = mid
  50. else:
  51. hi = mid-1
  52. if lo >= 0 and lo <= len(ignore_within)-1:
  53. ignore = (ignore_within[lo][0] <= match_tuple[0] and ignore_within[lo][1] >= match_tuple[1])
  54. return ignore
  55. def ignore_content(content):
  56. """Creates a list of match span tuples for which content should be ignored
  57. e.g. <pre> and <code> tags
  58. """
  59. ignore_within = []
  60. # used to detect all <pre> and <code> tags. NOTE: Alter this regex should
  61. # additional tags need to be ignored
  62. ignore_regex = re.compile(r'<(pre|code)(?:\s.*?)?>.*?</(\1)>', re.DOTALL | re.IGNORECASE)
  63. for match in ignore_regex.finditer(content):
  64. ignore_within.append(match.span())
  65. return ignore_within
  66. def wrap_math(content, ignore_within):
  67. """Wraps math in user specified tags.
  68. This is needed for Typogrify to play nicely with math but it can also be
  69. styled by template providers
  70. """
  71. wrap_math.found_math = False
  72. def math_tag_wrap(match):
  73. """function for use in re.sub"""
  74. # determine if the tags are within <pre> and <code> blocks
  75. ignore = binary_search(match.span(1), ignore_within) or binary_search(match.span(4), ignore_within)
  76. if ignore or match.group(3) == 'math':
  77. if match.group(3) == 'math':
  78. # Will detect mml, but not wrap anything around it
  79. wrap_math.found_math = True
  80. return match.group(0)
  81. else:
  82. wrap_math.found_math = True
  83. return '<%s>%s</%s>' % (_WRAP_LATEX, match.group(0), _WRAP_LATEX)
  84. return (_MATH_REGEX.sub(math_tag_wrap, content), wrap_math.found_math)
  85. def process_summary(instance, ignore_within):
  86. """Summaries need special care. If Latex is cut off, it must be restored.
  87. In addition, the mathjax script must be included if necessary thereby
  88. making it independent to the template
  89. """
  90. process_summary.altered_summary = False
  91. insert_mathjax = False
  92. end_tag = '</%s>' % _WRAP_LATEX if _WRAP_LATEX is not None else ''
  93. # use content's _get_summary method to obtain summary
  94. summary = instance._get_summary()
  95. # Determine if there is any math in the summary which are not within the
  96. # ignore_within tags
  97. math_item = None
  98. for math_item in _MATH_SUMMARY_REGEX.finditer(summary):
  99. ignore = binary_search(math_item.span(2), ignore_within)
  100. if '...' not in math_item.group(5):
  101. ignore = ignore or binary_search(math_item.span(5), ignore_within)
  102. else:
  103. ignore = ignore or binary_search(math_item.span(6), ignore_within)
  104. if ignore:
  105. math_item = None # In <code> or <pre> tags, so ignore
  106. else:
  107. insert_mathjax = True
  108. # Repair the math if it was cut off math_item will be the final math
  109. # code matched that is not within <pre> or <code> tags
  110. if math_item and '...' in math_item.group(5):
  111. if math_item.group(3) is not None:
  112. end = r'\end{%s}' % math_item.group(3)
  113. elif math_item.group(4) is not None:
  114. end = r'</math>'
  115. elif math_item.group(2) is not None:
  116. end = math_item.group(2)
  117. search_regex = r'%s(%s.*?%s)' % (re.escape(instance._content[0:math_item.start(1)]), re.escape(math_item.group(1)), re.escape(end))
  118. math_match = re.search(search_regex, instance._content, re.DOTALL | re.IGNORECASE)
  119. if math_match:
  120. new_summary = summary.replace(math_item.group(0), math_match.group(1)+'%s ...' % end_tag)
  121. if new_summary != summary:
  122. if _MATHJAX_SETTINGS['auto_insert']:
  123. return new_summary+_MATHJAX_SCRIPT.format(**_MATHJAX_SETTINGS)
  124. else:
  125. instance.mathjax = True
  126. return new_summary
  127. def incomplete_end_latex_tag(match):
  128. """function for use in re.sub"""
  129. if binary_search(match.span(3), ignore_within):
  130. return match.group(0)
  131. process_summary.altered_summary = True
  132. return match.group(1) + match.group(4)
  133. # check for partial math tags at end. These must be removed
  134. summary = _MATH_INCOMPLETE_TAG_REGEX.sub(incomplete_end_latex_tag, summary)
  135. if process_summary.altered_summary or insert_mathjax:
  136. if insert_mathjax:
  137. if _MATHJAX_SETTINGS['auto_insert']:
  138. summary+= _MATHJAX_SCRIPT.format(**_MATHJAX_SETTINGS)
  139. else:
  140. instance.mathjax = True
  141. return summary
  142. return None # Making it explicit that summary was not altered
  143. def process_settings(settings):
  144. """Sets user specified MathJax settings (see README for more details)"""
  145. global _MATHJAX_SETTINGS
  146. # NOTE TO FUTURE DEVELOPERS: Look at the README and what is happening in
  147. # this function if any additional changes to the mathjax settings need to
  148. # be incorporated. Also, please inline comment what the variables
  149. # will be used for
  150. # Default settings
  151. _MATHJAX_SETTINGS['align'] = 'center' # controls alignment of of displayed equations (values can be: left, right, center)
  152. _MATHJAX_SETTINGS['indent'] = '0em' # if above is not set to 'center', then this setting acts as an indent
  153. _MATHJAX_SETTINGS['show_menu'] = 'true' # controls whether to attach mathjax contextual menu
  154. _MATHJAX_SETTINGS['process_escapes'] = 'true' # controls whether escapes are processed
  155. _MATHJAX_SETTINGS['latex_preview'] = 'TeX' # controls what user sees while waiting for LaTex to render
  156. _MATHJAX_SETTINGS['color'] = 'black' # controls color math is rendered in
  157. # Source for MathJax: default (below) is to automatically determine what protocol to use
  158. _MATHJAX_SETTINGS['source'] = """'https:' == document.location.protocol
  159. ? 'https://c328740.ssl.cf1.rackcdn.com/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML'
  160. : 'http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML'"""
  161. # This next setting controls whether the mathjax script should be automatically
  162. # inserted into the content. The mathjax script will not be inserted into
  163. # the content if no math is detected. For summaries that are present in the
  164. # index listings, mathjax script will also be automatically inserted.
  165. # Setting this value to false means the template must be altered if this
  166. # plugin is to work, and so it is only recommended for the template
  167. # designer who wants maximum control.
  168. _MATHJAX_SETTINGS['auto_insert'] = True # controls whether mathjax script is automatically inserted into the content
  169. if not isinstance(settings, dict):
  170. return
  171. # The following mathjax settings can be set via the settings dictionary
  172. # Iterate over dictionary in a way that is compatible with both version 2
  173. # and 3 of python
  174. for key, value in ((key, settings[key]) for key in settings):
  175. if key == 'auto_insert' and isinstance(value, bool):
  176. _MATHJAX_SETTINGS[key] = value
  177. if key == 'align' and isinstance(value, str):
  178. if value == 'left' or value == 'right' or value == 'center':
  179. _MATHJAX_SETTINGS[key] = value
  180. else:
  181. _MATHJAX_SETTINGS[key] = 'center'
  182. if key == 'indent':
  183. _MATHJAX_SETTINGS[key] = value
  184. if key == 'show_menu' and isinstance(value, bool):
  185. _MATHJAX_SETTINGS[key] = 'true' if value else 'false'
  186. if key == 'process_escapes' and isinstance(value, bool):
  187. _MATHJAX_SETTINGS[key] = 'true' if value else 'false'
  188. if key == 'latex_preview' and isinstance(value, str):
  189. _MATHJAX_SETTINGS[key] = value
  190. if key == 'color' and isinstance(value, str):
  191. _MATHJAX_SETTINGS[key] = value
  192. if key == 'ssl' and isinstance(value, str):
  193. if value == 'off':
  194. _MATHJAX_SETTINGS['source'] = "'http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML'"
  195. if value == 'force':
  196. _MATHJAX_SETTINGS['source'] = "'https://c328740.ssl.cf1.rackcdn.com/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML'"
  197. def process_content(instance):
  198. """Processes content, with logic to ensure that Typogrify does not clash
  199. with math.
  200. In addition, mathjax script is inserted at the end of the content thereby
  201. making it independent of the template
  202. """
  203. if not instance._content:
  204. return
  205. ignore_within = ignore_content(instance._content)
  206. if _WRAP_LATEX:
  207. instance._content, math = wrap_math(instance._content, ignore_within)
  208. else:
  209. math = True if _MATH_REGEX.search(instance._content) else False
  210. # The user initially set Typogrify to be True, but since it would clash
  211. # with math, we set it to False. This means that the default reader will
  212. # not call Typogrify, so it is called here, where we are able to control
  213. # logic for it ignore math if necessary
  214. if _TYPOGRIFY:
  215. # Tell Typogrify to ignore the tags that math has been wrapped in
  216. # also, Typogrify must always ignore mml (math) tags
  217. ignore_tags = [_WRAP_LATEX,'math'] if _WRAP_LATEX else ['math']
  218. # Exact copy of the logic as found in the default reader
  219. instance._content = _TYPOGRIFY(instance._content, ignore_tags)
  220. instance.metadata['title'] = _TYPOGRIFY(instance.metadata['title'], ignore_tags)
  221. if math:
  222. if _MATHJAX_SETTINGS['auto_insert']:
  223. # Mathjax script added to content automatically. Now it
  224. # does not need to be explicitly added to the template
  225. instance._content += _MATHJAX_SCRIPT.format(**_MATHJAX_SETTINGS)
  226. else:
  227. # Place the burden on ensuring mathjax script is available to
  228. # browser on the template designer (see README for more details)
  229. instance.mathjax = True
  230. # The summary needs special care because math math cannot just be cut
  231. # off
  232. summary = process_summary(instance, ignore_within)
  233. if summary is not None:
  234. instance._summary = summary
  235. def pelican_init(pelicanobj):
  236. """Intialializes certain global variables and sets typogogrify setting to
  237. False should it be set to True.
  238. """
  239. global _TYPOGRIFY
  240. global _WRAP_LATEX
  241. global _MATH_SUMMARY_REGEX
  242. global _MATH_INCOMPLETE_TAG_REGEX
  243. try:
  244. settings = pelicanobj.settings['MATH']
  245. except:
  246. settings = None
  247. process_settings(settings)
  248. # Allows MathJax script to be accessed from template should it be needed
  249. pelicanobj.settings['MATHJAXSCRIPT'] = _MATHJAX_SCRIPT.format(**_MATHJAX_SETTINGS)
  250. # If Typogrify set to True, then we need to handle it manually so it does
  251. # not conflict with LaTeX
  252. try:
  253. if pelicanobj.settings['TYPOGRIFY'] is True:
  254. pelicanobj.settings['TYPOGRIFY'] = False
  255. try:
  256. from typogrify.filters import typogrify
  257. # Determine if this is the correct version of Typogrify to use
  258. import inspect
  259. typogrify_args = inspect.getargspec(typogrify).args
  260. if len(typogrify_args) < 2 or 'ignore_tags' not in typogrify_args:
  261. raise TypeError('Incorrect version of Typogrify')
  262. # At this point, we are happy to use Typogrify, meaning
  263. # it is installed and it is a recent enough version
  264. # that can be used to ignore all math
  265. _TYPOGRIFY = typogrify
  266. _WRAP_LATEX = 'mathjax' # default to wrap mathjax content inside of
  267. except ImportError:
  268. print("\nTypogrify is not installed, so it is being ignored.\nIf you want to use it, please install via: pip install typogrify\n")
  269. except TypeError:
  270. print("\nA more recent version of Typogrify is needed for the render_math module.\nPlease upgrade Typogrify to the latest version (anything above version 2.04 is okay).\nTypogrify will be turned off due to this reason.\n")
  271. except KeyError:
  272. pass
  273. # Set _WRAP_LATEX to the settings tag if defined. The idea behind this is
  274. # to give template designers control over how math would be rendered
  275. try:
  276. if pelicanobj.settings['MATH']['wrap_latex']:
  277. _WRAP_LATEX = pelicanobj.settings['MATH']['wrap_latex']
  278. except (KeyError, TypeError):
  279. pass
  280. # regular expressions that depend on _WRAP_LATEX are set here
  281. tag_start= r'<%s>' % _WRAP_LATEX if not _WRAP_LATEX is None else ''
  282. tag_end = r'</%s>' % _WRAP_LATEX if not _WRAP_LATEX is None else ''
  283. math_summary_regex = r'((\$\$|\$|\\begin\{(.+?)\}|<(math)(?:\s.*?)?>).+?)(\2|\\end\{\3\}|</\4>|\s?\.\.\.)(%s|</\4>)?' % tag_end
  284. # NOTE: The logic in _get_summary will handle <math> correctly because it
  285. # is perceived as an html tag. Therefore we are only interested in handling
  286. # non mml (i.e. LaTex)
  287. incomplete_end_latex_tag = r'(.*)(%s)(\\\S*?|\$)\s*?(\s?\.\.\.)(%s)?$' % (tag_start, tag_end)
  288. _MATH_SUMMARY_REGEX = re.compile(math_summary_regex, re.DOTALL | re.IGNORECASE)
  289. _MATH_INCOMPLETE_TAG_REGEX = re.compile(incomplete_end_latex_tag, re.DOTALL | re.IGNORECASE)
  290. def register():
  291. """Plugin registration"""
  292. signals.initialized.connect(pelican_init)
  293. signals.content_object_init.connect(process_content)