latex.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. # -*- coding: utf-8 -*-
  2. """
  3. Latex Plugin For Pelican
  4. ========================
  5. This plugin allows you to write mathematical equations in your articles using Latex.
  6. It uses the MathJax Latex JavaScript library to render latex that is embedded in
  7. between `$..$` for inline math and `$$..$$` for displayed math. It also allows for
  8. writing equations in by using `\begin{equation}`...`\end{equation}`. No
  9. alteration to a template is required for this plugin to work, just install and
  10. use.
  11. Typogrify Compatibility
  12. -----------------------
  13. This plugin now plays nicely with typogrify, but it requires
  14. typogrify version 2.07 or above.
  15. User Settings
  16. -------------
  17. Users are also able to pass a dictionary of settings in the settings file which
  18. will control how the mathjax library renders thing. This could be very useful
  19. for template builders that want to adjust look and feel of the math.
  20. See README for more details.
  21. """
  22. from pelican import signals
  23. from pelican import contents
  24. import re
  25. # Global Variables
  26. _TYPOGRIFY = False # used to determine if we should process typogrify
  27. _WRAP_TAG = None # the tag to wrap mathjax in (needed to play nicely with typogrify or for template designers)
  28. _LATEX_REGEX = re.compile(r'(\$\$|\$|\\begin\{(.+?)\}|<(math).*?>).*?(\1|\\end\{\2\}|</\3>)', re.DOTALL | re.IGNORECASE) # used to detect latex
  29. _LATEX_SUMMARY_REGEX = None # used to match latex in summary
  30. _LATEX_PARTIAL_REGEX = None # used to match latex that has been cut off in summary
  31. _MATHJAX_SETTINGS = {} # settings that can be specified by the user, used to control mathjax script settings
  32. _MATHJAX_SCRIPT="""
  33. <script type= "text/javascript">
  34. if (!document.getElementById('mathjaxscript_pelican')) {{
  35. var s = document.createElement('script');
  36. s.id = 'mathjaxscript_pelican';
  37. s.type = 'text/javascript'; s.src = 'https:' == document.location.protocol ? 'https://c328740.ssl.cf1.rackcdn.com/mathjax/latest/MathJax.js' : 'http://cdn.mathjax.org/mathjax/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML';
  38. s[(window.opera ? "innerHTML" : "text")] =
  39. "MathJax.Hub.Config({{" +
  40. " config: ['MMLorHTML.js']," +
  41. " TeX: {{ extensions: ['AMSmath.js','AMSsymbols.js','noErrors.js','noUndefined.js'], equationNumbers: {{ autoNumber: 'AMS' }} }}," +
  42. " jax: ['input/TeX','input/MathML','output/HTML-CSS']," +
  43. " extensions: ['tex2jax.js','mml2jax.js','MathMenu.js','MathZoom.js']," +
  44. " displayAlign: '{align}'," +
  45. " displayIndent: '{indent}'," +
  46. " showMathMenu: {show_menu}," +
  47. " tex2jax: {{ " +
  48. " inlineMath: [ [\'$\',\'$\'] ], " +
  49. " displayMath: [ [\'$$\',\'$$\'] ]," +
  50. " processEscapes: {process_escapes}," +
  51. " preview: '{preview}'," +
  52. " }}, " +
  53. " 'HTML-CSS': {{ " +
  54. " styles: {{ '.MathJax_Display, .MathJax .mo, .MathJax .mi, .MathJax .mn': {{color: '{color} ! important'}} }}" +
  55. " }} " +
  56. "}}); ";
  57. (document.body || document.getElementsByTagName('head')[0]).appendChild(s);
  58. }}
  59. </script>
  60. """
  61. # Python standard library for binary search, namely bisect is cool but I need
  62. # specific business logic to evaluate my search predicate, so I am using my
  63. # own version
  64. def binary_search(match_tuple, ignore_within):
  65. """Determines if t is within tupleList. Using the fact that tupleList is
  66. ordered, binary search can be performed which is O(logn)
  67. """
  68. ignore = False
  69. if ignore_within == []:
  70. return False
  71. lo = 0
  72. hi = len(ignore_within)-1
  73. # Find first value in array where predicate is False
  74. # predicate function: tupleList[mid][0] < t[index]
  75. while lo < hi:
  76. mid = lo + (hi-lo+1)/2
  77. if ignore_within[mid][0] < match_tuple[0]:
  78. lo = mid
  79. else:
  80. hi = mid-1
  81. if lo >= 0 and lo <= len(ignore_within)-1:
  82. ignore = (ignore_within[lo][0] <= match_tuple[0] and ignore_within[lo][1] >= match_tuple[1])
  83. return ignore
  84. def ignore_content(content):
  85. """Creates a list of match span tuples for which content should be ignored
  86. e.g. <pre> and <code> tags
  87. """
  88. ignore_within = []
  89. # used to detect all <pre> and <code> tags. NOTE: Alter this regex should
  90. # additional tags need to be ignored
  91. ignore_regex = re.compile(r'<(pre|code).*?>.*?</(\1)>', re.DOTALL | re.IGNORECASE)
  92. for match in ignore_regex.finditer(content):
  93. ignore_within.append(match.span())
  94. return ignore_within
  95. def wrap_latex(content, ignore_within):
  96. """Wraps latex in user specified tags.
  97. This is needed for typogrify to play nicely with latex but it can also be
  98. styled by template providers
  99. """
  100. wrap_latex.foundlatex = False
  101. def math_tag_wrap(match):
  102. """function for use in re.sub"""
  103. # determine if the tags are within <pre> and <code> blocks
  104. ignore = binary_search(match.span(1), ignore_within) and binary_search(match.span(2), ignore_within)
  105. if ignore or match.group(3) == 'math':
  106. if match.group(3) == 'math':
  107. # Will detect mml, but not wrap anything around it
  108. wrap_latex.foundlatex = True
  109. return match.group(0)
  110. else:
  111. wrap_latex.foundlatex = True
  112. return '<%s>%s</%s>' % (_WRAP_TAG, match.group(0), _WRAP_TAG)
  113. return (_LATEX_REGEX.sub(math_tag_wrap, content), wrap_latex.foundlatex)
  114. def process_summary(instance, ignore_within):
  115. """Summaries need special care. If Latex is cut off, it must be restored.
  116. In addition, the mathjax script must be included if necessary thereby
  117. making it independent to the template
  118. """
  119. process_summary.altered_summary = False
  120. insert_mathjax_script = False
  121. end_tag = '</%s>' % _WRAP_TAG if _WRAP_TAG != None else ''
  122. # use content's _get_summary method to obtain summary
  123. summary = instance._get_summary()
  124. # Determine if there is any math in the summary which are not within the
  125. # ignore_within tags
  126. mathitem = None
  127. for mathitem in _LATEX_SUMMARY_REGEX.finditer(summary):
  128. if binary_search(mathitem.span(), ignore_within):
  129. mathitem = None # In <code> or <pre> tags, so ignore
  130. else:
  131. insert_mathjax_script = True
  132. # Repair the latex if it was cut off mathitem will be the final latex
  133. # code matched that is not within <pre> or <code> tags
  134. if mathitem and '...' in mathitem.group(6):
  135. if mathitem.group(3) is not None:
  136. end = r'\end{%s}' % mathitem.group(3)
  137. elif mathitem.group(4) is not None:
  138. end = r'</math>'
  139. elif mathitem.group(2) is not None:
  140. end = mathitem.group(2)
  141. search_regex = r'%s(%s.*?%s)' % (re.escape(instance._content[0:mathitem.start(1)]), re.escape(mathitem.group(1)), re.escape(end))
  142. latex_match = re.search(search_regex, instance._content, re.DOTALL | re.IGNORECASE)
  143. if latex_match:
  144. new_summary = summary.replace(mathitem.group(0), latex_match.group(1)+'%s ...' % end_tag)
  145. if new_summary != summary:
  146. return new_summary+_MATHJAX_SCRIPT.format(**_MATHJAX_SETTINGS)
  147. def partial_regex(match):
  148. """function for use in re.sub"""
  149. if binary_search(match.span(3), ignore_within):
  150. return match.group(0)
  151. process_summary.altered_summary = True
  152. return match.group(1) + match.group(4)
  153. # check for partial latex tags at end. These must be removed
  154. summary = _LATEX_PARTIAL_REGEX.sub(partial_regex, summary)
  155. if process_summary.altered_summary:
  156. return summary+_MATHJAX_SCRIPT.format(**_MATHJAX_SETTINGS) if insert_mathjax_script else summary
  157. return summary+_MATHJAX_SCRIPT.format(**_MATHJAX_SETTINGS) if insert_mathjax_script else None
  158. def process_settings(settings):
  159. """Sets user specified MathJax settings (see README for more details)"""
  160. global _MATHJAX_SETTINGS
  161. # NOTE TO FUTURE DEVELOPERS: Look at the README and what is happening in
  162. # this function if any additional changes to the mathjax settings need to
  163. # be incorporated. Also, please inline comment what the variables
  164. # will be used for
  165. # Default settings
  166. _MATHJAX_SETTINGS['align'] = 'center' # controls alignment of of displayed equations (values can be: left, right, center)
  167. _MATHJAX_SETTINGS['indent'] = '0em' # if above is not set to 'center', then this setting acts as an indent
  168. _MATHJAX_SETTINGS['show_menu'] = 'true' # controls whether to attach mathjax contextual menu
  169. _MATHJAX_SETTINGS['process_escapes'] = 'true' # controls whether escapes are processed
  170. _MATHJAX_SETTINGS['preview'] = 'TeX' # controls what user sees as preview
  171. _MATHJAX_SETTINGS['color'] = 'black' # controls color math is rendered in
  172. if not isinstance(settings, dict):
  173. return
  174. # The following mathjax settings can be set via the settings dictionary
  175. # Iterate over dictionary in a way that is compatible with both version 2
  176. # and 3 of python
  177. for key, value in ((key, settings[key]) for key in settings):
  178. if key == 'align' and isinstance(value, str):
  179. if value == 'left' or value == 'right' or value == 'center':
  180. _MATHJAX_SETTINGS[key] = value
  181. else:
  182. _MATHJAX_SETTINGS[key] = 'center'
  183. if key == 'indent':
  184. _MATHJAX_SETTINGS[key] = value
  185. if key == 'show_menu' and isinstance(value, bool):
  186. _MATHJAX_SETTINGS[key] = 'true' if value else 'false'
  187. if key == 'process_escapes' and isinstance(value, bool):
  188. _MATHJAX_SETTINGS[key] = 'true' if value else 'false'
  189. if key == 'preview' and isinstance(value, str):
  190. _MATHJAX_SETTINGS[key] = value
  191. if key == 'color' and isinstance(value, str):
  192. _MATHJAX_SETTINGS[key] = value
  193. def process_content(instance):
  194. """Processes content, with logic to ensure that typogrify does not clash
  195. with latex.
  196. In addition, mathjax script is inserted at the end of the content thereby
  197. making it independent of the template
  198. """
  199. if not instance._content:
  200. return
  201. ignore_within = ignore_content(instance._content)
  202. if _WRAP_TAG:
  203. instance._content, latex = wrap_latex(instance._content, ignore_within)
  204. else:
  205. latex = True if _LATEX_REGEX.search(instance._content) else False
  206. # The user initially set typogrify to be True, but since it would clash
  207. # with latex, we set it to False. This means that the default reader will
  208. # not call typogrify, so it is called here, where we are able to control
  209. # logic for it ignore latex if necessary
  210. if _TYPOGRIFY:
  211. # Tell typogrify to ignore the tags that latex has been wrapped in
  212. # also, typogrify must always ignore mml (math) tags
  213. ignore_tags = [_WRAP_TAG,'math'] if _WRAP_TAG else ['math']
  214. # Exact copy of the logic as found in the default reader
  215. from typogrify.filters import typogrify
  216. instance._content = typogrify(instance._content, ignore_tags)
  217. instance.metadata['title'] = typogrify(instance.metadata['title'], ignore_tags)
  218. if latex:
  219. # Mathjax script added to the end of article. Now it does not need to
  220. # be explicitly added to the template
  221. instance._content += _MATHJAX_SCRIPT.format(**_MATHJAX_SETTINGS)
  222. # The summary needs special care because latex math cannot just be cut
  223. # off
  224. summary = process_summary(instance, ignore_within)
  225. if summary != None:
  226. instance._summary = summary
  227. def pelican_init(pelicanobj):
  228. """Intialializes certain global variables and sets typogogrify setting to
  229. False should it be set to True.
  230. """
  231. global _TYPOGRIFY
  232. global _WRAP_TAG
  233. global _LATEX_SUMMARY_REGEX
  234. global _LATEX_PARTIAL_REGEX
  235. try:
  236. settings = pelicanobj.settings['LATEX']
  237. except:
  238. settings = None
  239. process_settings(settings)
  240. # Allows mathjax script to be accessed from template should it be needed
  241. pelicanobj.settings['MATHJAXSCRIPT'] = _MATHJAX_SCRIPT.format(**_MATHJAX_SETTINGS)
  242. # If typogrify set to True, then we need to handle it manually so it does
  243. # not conflict with Latex
  244. try:
  245. if pelicanobj.settings['TYPOGRIFY'] == True:
  246. pelicanobj.settings['TYPOGRIFY'] = False
  247. _WRAP_TAG = 'mathjax' # default to wrap mathjax content inside of
  248. _TYPOGRIFY = True
  249. except KeyError:
  250. pass
  251. # Set _WRAP_TAG to the settings tag if defined. The idea behind this is
  252. # to give template designers control over how math would be rendered
  253. try:
  254. if pelicanobj.settings['LATEX']['wrap']:
  255. _WRAP_TAG = pelicanobj.settings['LATEX']['wrap']
  256. except (KeyError, TypeError):
  257. pass
  258. # regular expressions that depend on _WRAP_TAG are set here
  259. tag_start= r'<%s>' % _WRAP_TAG if not _WRAP_TAG is None else ''
  260. tag_end = r'</%s>' % _WRAP_TAG if not _WRAP_TAG is None else ''
  261. latex_summary_regex = r'((\$\$|\$|\\begin\{(.+?)\}|<(math)(\s.*?)?>).+?)(\2|\\end\{\3\}|</\4>|\s?\.\.\.)(%s|</\4>)?' % tag_end
  262. # NOTE: The logic in _get_summary will handle <math> correctly because it
  263. # is perceived as an html tag. Therefore we are only interested in handling non mml
  264. latex_partial_regex = r'(.*)(%s)(\\\S*?|\$)\s*?(\s?\.\.\.)(%s)?$' % (tag_start, tag_end)
  265. _LATEX_SUMMARY_REGEX = re.compile(latex_summary_regex, re.DOTALL | re.IGNORECASE)
  266. _LATEX_PARTIAL_REGEX = re.compile(latex_partial_regex, re.DOTALL | re.IGNORECASE)
  267. def register():
  268. """Plugin registration"""
  269. signals.initialized.connect(pelican_init)
  270. signals.content_object_init.connect(process_content)