org_reader.py 3.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. # Copyright (C) 2017 Sébastien Gendre
  2. # This program is free software: you can redistribute it and/or modify
  3. # it under the terms of the GNU General Public License as published by
  4. # the Free Software Foundation, either version 3 of the License, or
  5. # (at your option) any later version.
  6. # This program is distributed in the hope that it will be useful,
  7. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9. # GNU General Public License for more details.
  10. # You should have received a copy of the GNU General Public License
  11. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  12. import re
  13. from orgpython import org_to_html
  14. from pelican import signals
  15. from pelican.readers import BaseReader
  16. from pelican.utils import pelican_open
  17. class OrgReader(BaseReader):
  18. """Reader for Org files"""
  19. enabled = True
  20. file_extensions = ['org']
  21. def _separate_header_and_content(self, text_lines):
  22. """
  23. From a given Org text, return the header separate from the content.
  24. The given text must be separate line by line and be a list.
  25. The return is a list of two items: header and content.
  26. Theses two items are text separate line by line in format of a list
  27. Keyword Arguments:
  28. text_lines -- A list, each item is a line of the texte
  29. Return:
  30. [
  31. header -- A list, each item is a line of the texte
  32. content -- A list, each item is a line of the texte
  33. ]
  34. """
  35. no_more_header = False
  36. expr_metadata = re.compile(r'^#\+[a-zA-Z]+:.*')
  37. header = []
  38. content = []
  39. for line in text_lines:
  40. metadata = expr_metadata.match(line)
  41. if metadata and not no_more_header:
  42. header.append(line)
  43. else:
  44. no_more_header = True
  45. content.append(line)
  46. return header, content
  47. def _parse_metadatas(self, text_lines):
  48. """
  49. From a given Org text, return the metadatas
  50. Keyword Arguments:
  51. text_lines -- A list, each item is a line of the texte
  52. Return:
  53. A dict containing metadatas
  54. """
  55. if not text_lines:
  56. return {}
  57. expr_metadata = re.compile(r'^#\+([a-zA-Z]+):(.*)')
  58. return {
  59. expr_metadata.match(line).group(1).lower()
  60. : expr_metadata.match(line).group(2).strip()
  61. for line in text_lines
  62. }
  63. def read(self, source_path):
  64. """
  65. Parse content and metadata of Org files
  66. Keyword Arguments:
  67. source_path -- Path to the Org file to parse
  68. """
  69. with pelican_open(source_path) as text:
  70. text_lines = list(text.splitlines())
  71. header, content = self._separate_header_and_content(text_lines)
  72. metadatas = self._parse_metadatas(header)
  73. metadatas_processed = {
  74. key
  75. : self.process_metadata(key, value)
  76. for key, value in metadatas.items()
  77. }
  78. content_html = org_to_html("\n".join(content))
  79. return content_html, metadatas_processed
  80. def add_reader(readers):
  81. for ext in OrgReader.file_extensions:
  82. readers.reader_classes[ext] = OrgReader
  83. def register():
  84. signals.readers_init.connect(add_reader)