org_reader.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103
  1. # Copyright (C) 2017 Sébastien Gendre
  2. # This program is free software: you can redistribute it and/or modify
  3. # it under the terms of the GNU General Public License as published by
  4. # the Free Software Foundation, either version 3 of the License, or
  5. # (at your option) any later version.
  6. # This program is distributed in the hope that it will be useful,
  7. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9. # GNU General Public License for more details.
  10. # You should have received a copy of the GNU General Public License
  11. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  12. import re
  13. from orgco import convert_html
  14. from pelican import signals
  15. from pelican.readers import BaseReader
  16. from pelican.utils import pelican_open
  17. class OrgReader(BaseReader):
  18. """Reader for Org files"""
  19. enabled = True
  20. file_extensions = ['org']
  21. def __init__(self, *args, **kargs):
  22. """Init object construct with this class"""
  23. super(OrgReader, self).__init__(*args, **kargs)
  24. settings = self.settings['ORGMODE']
  25. settings.setdefault('code_highlight', True)
  26. self.code_highlight = settings['code_highlight']
  27. def _separate_header_and_content(self, text_lines):
  28. """
  29. From a given Org text, return the header separate from the content.
  30. The given text must be separate line by line and be a list.
  31. The return is a list of two items: header and content.
  32. Theses two items are text separate line by line in format of a list
  33. Keyword Arguments:
  34. text_lines -- A list, each item is a line of the texte
  35. Return:
  36. [
  37. header -- A list, each item is a line of the texte
  38. content -- A list, each item is a line of the texte
  39. ]
  40. """
  41. no_more_header = False
  42. expr_metadata = re.compile(r'^#\+[a-zA-Z]+:.*')
  43. header = []
  44. content = []
  45. for line in text_lines:
  46. metadata = expr_metadata.match(line)
  47. if metadata and not no_more_header:
  48. header.append(line)
  49. else:
  50. no_more_header = True
  51. content.append(line)
  52. return header, content
  53. def _parse_metadatas(self, text_lines):
  54. """
  55. From a given Org text, return the metadatas
  56. Keyword Arguments:
  57. text_lines -- A list, each item is a line of the texte
  58. Return:
  59. A dict containing metadatas
  60. """
  61. if not text_lines:
  62. return {}
  63. expr_metadata = re.compile(r'^#\+([a-zA-Z]+):(.*)')
  64. return {
  65. expr_metadata.match(line).group(1).lower()
  66. : expr_metadata.match(line).group(2).strip()
  67. for line in text_lines
  68. }
  69. def read(self, source_path):
  70. """
  71. Parse content and metadata of Org files
  72. Keyword Arguments:
  73. source_path -- Path to the Org file to parse
  74. """
  75. with pelican_open(source_path) as text:
  76. text_lines = list(text.splitlines())
  77. header, content = self._separate_header_and_content(text_lines)
  78. metadatas = self._parse_metadatas(header)
  79. metadatas_processed = {
  80. key
  81. : self.process_metadata(key, value)
  82. for key, value in metadatas.items()
  83. }
  84. content_html = convert_html("\n".join(content),
  85. highlight=self.code_highlight)
  86. return content_html, metadatas_processed
  87. def add_reader(readers):
  88. for ext in OrgReader.file_extensions:
  89. readers.reader_classes[ext] = OrgReader
  90. def register():
  91. signals.readers_init.connect(add_reader)