org_reader.py 2.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. # Copyright (C) 2017 Sébastien Gendre
  2. # This program is free software: you can redistribute it and/or modify
  3. # it under the terms of the GNU General Public License as published by
  4. # the Free Software Foundation, either version 3 of the License, or
  5. # (at your option) any later version.
  6. # This program is distributed in the hope that it will be useful,
  7. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9. # GNU General Public License for more details.
  10. # You should have received a copy of the GNU General Public License
  11. # along with this program. If not, see <http://www.gnu.org/licenses/>.
  12. import re
  13. from orgpython import org_to_html
  14. from pelican import signals
  15. from pelican.readers import BaseReader
  16. from pelican.utils import pelican_open
  17. class OrgReader(BaseReader):
  18. """Reader for Org files"""
  19. enabled = True
  20. file_extensions = ['org']
  21. def _separate_header_and_content(self, text_lines):
  22. """
  23. From a given Org text, return the header separate from the content.
  24. The given text must be separate line by line and be a list.
  25. The return is a list of two items: header and content.
  26. Theses two items are text separate line by line in format of a list
  27. Keyword Arguments:
  28. text_lines -- A list, each item is a line of the texte
  29. Return:
  30. [
  31. header -- A list, each item is a line of the texte
  32. content -- A list, each item is a line of the texte
  33. ]
  34. """
  35. no_more_header = False
  36. expr_metadata = re.compile(r'^#\+[a-zA-Z]+:.*')
  37. header = []
  38. content = []
  39. for line in text_lines:
  40. metadata = expr_metadata.match(line)
  41. if metadata and not no_more_header:
  42. header.append(line)
  43. else:
  44. no_more_header = True
  45. content.append(line)
  46. return header, content
  47. def _parse_metadatas(self, text_lines):
  48. """
  49. From a given Org text, return the metadatas
  50. Keyword Arguments:
  51. text_lines -- A list, each item is a line of the texte
  52. Return:
  53. A dict containing metadatas
  54. """
  55. if not text_lines:
  56. return {}
  57. expr_metadata = re.compile(r'^#\+([a-zA-Z]+):(.*)')
  58. return {
  59. expr_metadata.match(line).group(1).lower()
  60. : expr_metadata.match(line).group(2).strip()
  61. for line in text_lines
  62. }
  63. def read(self, source_path):
  64. """
  65. Parse content and metadata of Org files
  66. Keyword Arguments:
  67. source_path -- Path to the Org file to parse
  68. """
  69. pass