Browse Source

Merge pull request #949 from ghost/org-reader-new

New Org reader
Justin Mayer 6 years ago
parent
commit
8465497a4d

File diff suppressed because it is too large
+ 12 - 0
org_emacs_reader/LICENSE


+ 4 - 4
org_reader/README.md

@@ -1,19 +1,19 @@
-# Org Reader
+# Org Emacs Reader
 
 Publish Emacs Org files alongside the rest of your website or blog.
 
-- `ORG_READER_EMACS_LOCATION`: Required. Location of Emacs binary.
+- `ORG_EMACS_READER_EMACS_LOCATION`: Required. Location of Emacs binary.
   If you use `Emacs for Mac OS X`,
   the location should be `/Applications/Emacs.app/Contents/MacOS/Emacs`,
   rather than `/usr/bin/emacs`.
 
-- `ORG_READER_EMACS_SETTINGS`: Optional. An absolute path to an Elisp file, to
+- `ORG_EMACS_READER_EMACS_SETTINGS`: Optional. An absolute path to an Elisp file, to
   run per invocation. Useful for initializing the `package` Emacs library if
   that's where your Org mode comes from, or any modifications to Org Export-
   related variables. If you want to use your standard emacs init file, you 
   can ignore this variable.
 
-- `ORG_READER_BACKEND`: Optional. A custom backend to provide to Org. Defaults
+- `ORG_EMACS_READER_BACKEND`: Optional. A custom backend to provide to Org. Defaults
   to `'html`.
 
 To provide metadata to Pelican, the following properties can be defined in

+ 1 - 0
org_emacs_reader/__init__.py

@@ -0,0 +1 @@
+from .org_emacs_reader import *

org_reader/org_reader.el → org_emacs_reader/org_reader.el


+ 125 - 0
org_emacs_reader/org_reader.py

@@ -0,0 +1,125 @@
+"""
+Org Reader
+==========
+
+Version 1.1.
+
+Relevant Pelican settings:
+
+- ORG_EMACS_READER_EMACS_LOCATION: Required. Location of Emacs binary.
+
+- ORG_EMACS_READER_EMACS_SETTINGS: Optional. An absolute path to an Elisp file, to
+  run per invocation. Useful for initializing the `package` Emacs library if
+  that's where your Org mode comes from, or any modifications to Org Export-
+  related variables.
+
+- ORG_EMACS_READER_BACKEND: Optional. A custom backend to provide to Org. Defaults
+  to 'html.
+
+To provide metadata to Pelican, the following properties can be defined in
+the org file's header:
+
+#+TITLE: The Title Of This BlogPost
+#+DATE: 2001-01-01
+#+CATEGORY: blog-category
+#+AUTHOR: My Name
+#+PROPERTY: LANGUAGE en
+#+PROPERTY: SUMMARY hello, this is the description
+#+PROPERTY: SLUG test_slug
+#+PROPERTY: MODIFIED [2015-12-29 Di]
+#+PROPERTY: TAGS my, first, tags
+#+PROPERTY: SAVE_AS alternative_filename.html
+
+- The TITLE is the only mandatory header property
+- Timestamps (DATE and MODIFIED) are optional and can be either a string of
+  %Y-%m-%d or an org timestamp
+- The property names (SUMMARY, SLUG, MODIFIED, TAGS, SAVE_AS) can be either
+  lower-case or upper-case
+- The slug is automatically the filename of the Org file, if not explicitly
+  specified
+- It is not possible to pass an empty property to Pelican.  For this plugin,
+  it makes no difference if a property is present in the Org file and left
+  empty, or if it is not defined at all.
+
+"""
+import os
+import json
+import logging
+import subprocess
+from pelican import readers
+from pelican import signals
+
+
+ELISP = os.path.join(os.path.dirname(__file__), 'org_emacs_reader.el')
+LOG = logging.getLogger(__name__)
+
+
+class OrgEmacsReader(readers.BaseReader):
+    enabled = True
+
+    EMACS_ARGS = ["-Q", "--batch"]
+    ELISP_EXEC = "(org->pelican \"{0}\" {1})"
+
+    file_extensions = ['org']
+
+    def __init__(self, settings):
+        super(OrgEmacsReader, self).__init__(settings)
+        assert 'ORG_EMACS_READER_EMACS_LOCATION' in self.settings, \
+            "No ORG_EMACS_READER_EMACS_LOCATION specified in settings"
+
+    def read(self, filename):
+        LOG.info("Reading Org file {0}".format(filename))
+        cmd = [self.settings['ORG_EMACS_READER_EMACS_LOCATION']]
+        cmd.extend(self.EMACS_ARGS)
+
+        if 'ORG_EMACS_READER_EMACS_SETTINGS' in self.settings:
+            cmd.append('-l')
+            cmd.append(self.settings['ORG_EMACS_READER_EMACS_SETTINGS'])
+
+        backend = self.settings.get('ORG_EMACS_READER_BACKEND', "'html")
+
+        cmd.append('-l')
+        cmd.append(ELISP)
+
+        cmd.append('--eval')
+        cmd.append(self.ELISP_EXEC.format(filename, backend))
+
+        LOG.debug("OrgEmacsReader: running command `{0}`".format(cmd))
+
+        json_result = subprocess.check_output(cmd, universal_newlines=True)
+        json_output = json.loads(json_result)
+
+        # get default slug from .org filename
+        default_slug, _ = os.path.splitext(os.path.basename(filename))
+
+        metadata = {'title': json_output['title'] or '',
+                    'date': json_output['date'] or '',
+                    'author': json_output['author'] or '',
+                    'lang': json_output['language'] or '',
+                    'category': json_output['category'] or '',
+                    'slug': json_output['slug'] or default_slug,
+                    'modified': json_output['modified'] or '',
+                    'tags': json_output['tags'] or '',
+                    'save_as': json_output['save_as'] or '',
+                    'summary': json_output['summary'] or ''}
+
+        # remove empty strings when necessary
+        for key in ['save_as', 'modified', 'lang', 'summary']:
+            if not metadata[key]:
+                metadata.pop(key)
+
+        parsed = {}
+        for key, value in metadata.items():
+            parsed[key] = self.process_metadata(key, value)
+
+        content = json_output['post']
+
+        return content, parsed
+
+
+def add_reader(readers):
+    readers.reader_classes['org'] = OrgEmacsReader
+
+
+def register():
+    signals.readers_init.connect(add_reader)

File diff suppressed because it is too large
+ 669 - 7
org_reader/LICENSE


+ 74 - 0
org_reader/README.org

@@ -0,0 +1,74 @@
+* Org Reader plugin
+
+This plugin add the possibility to write your pages and blog posts in
+Org format from Emacs Org-mode.
+
+To generate your website, you don't need Emacs, Pandoc or any other
+software.
+
+
+** Dependency
+
+   This plugin only need =orgco= python package.
+   
+   You can install it with =pip=:
+   #+BEGIN_SRC sh
+   pip install orgco
+   #+END_SRC
+
+** Install
+
+   First, install this plugin in path know by pelican. Set pelican
+   variable =PLUGIN_PATHS= in =pelicanconf.py= if necessary.
+
+   Second, enable this plugin by adding his name to the pelican
+   variable =PLUGINS= in =pelicanconf.py=
+   
+   Exemple:
+   #+BEGIN_SRC python
+   PLUGINS = ['org_reader']
+   #+END_SRC
+
+** Settings
+
+   With this plugin, you define settings with the =ORGMODE= variable,
+   a dictionary.
+
+   Available settings:
+   - =code_highlight=: =True= or =False=, enable code highlight with
+     =orgco=. Default is =True=.
+
+   Exemple of settings:
+   #+BEGIN_SRC python
+     ORGMODE = {
+	 'code_highlight': False,
+     }
+   #+END_SRC
+
+** Usage
+
+   Simply write content in Org format.
+
+** More informations
+
+*** Org-mode
+    
+     Org-mode is an Emacs mode for keeping notes, maintaining TODO
+     lists, planning projects, and authoring documents with a fast and
+     effective plain-text system.
+
+     - [[http://orgmode.org][Org-mode website]]
+     - [[https://www.gnu.org/software/emacs/][Emacs website]]
+     - [[https://www.youtube.com/watch?v=SzA2YODtgK4][Getting Started With Org Mode]]
+
+*** Orgco
+
+    #+BEGIN_QUOTE
+    With orgco you can convert Emacs’ orgmode to other formats.
+    #+END_QUOTE
+
+    - [[https://github.com/paetzke/orgco][orgco website]]
+
+*** Author
+    Sébastien Gendre <seb@k-7.ch>
+    [[https://k-7.ch/][Website]]

+ 14 - 0
org_reader/__init__.py

@@ -1 +1,15 @@
+# Copyright (C) 2017  Sébastien Gendre
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
 from .org_reader import *

+ 93 - 116
org_reader/org_reader.py

@@ -1,125 +1,102 @@
-"""
-Org Reader
-==========
-
-Version 1.1.
-
-Relevant Pelican settings:
-
-- ORG_READER_EMACS_LOCATION: Required. Location of Emacs binary.
-
-- ORG_READER_EMACS_SETTINGS: Optional. An absolute path to an Elisp file, to
-  run per invocation. Useful for initializing the `package` Emacs library if
-  that's where your Org mode comes from, or any modifications to Org Export-
-  related variables.
-
-- ORG_READER_BACKEND: Optional. A custom backend to provide to Org. Defaults
-  to 'html.
-
-To provide metadata to Pelican, the following properties can be defined in
-the org file's header:
-
-#+TITLE: The Title Of This BlogPost
-#+DATE: 2001-01-01
-#+CATEGORY: blog-category
-#+AUTHOR: My Name
-#+PROPERTY: LANGUAGE en
-#+PROPERTY: SUMMARY hello, this is the description
-#+PROPERTY: SLUG test_slug
-#+PROPERTY: MODIFIED [2015-12-29 Di]
-#+PROPERTY: TAGS my, first, tags
-#+PROPERTY: SAVE_AS alternative_filename.html
-
-- The TITLE is the only mandatory header property
-- Timestamps (DATE and MODIFIED) are optional and can be either a string of
-  %Y-%m-%d or an org timestamp
-- The property names (SUMMARY, SLUG, MODIFIED, TAGS, SAVE_AS) can be either
-  lower-case or upper-case
-- The slug is automatically the filename of the Org file, if not explicitly
-  specified
-- It is not possible to pass an empty property to Pelican.  For this plugin,
-  it makes no difference if a property is present in the Org file and left
-  empty, or if it is not defined at all.
-
-"""
-import os
-import json
-import logging
-import subprocess
-from pelican import readers
+# Copyright (C) 2017  Sébastien Gendre
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+import re
+from orgco import convert_html
 from pelican import signals
+from pelican.readers import BaseReader
+from pelican.utils import pelican_open
 
-
-ELISP = os.path.join(os.path.dirname(__file__), 'org_reader.el')
-LOG = logging.getLogger(__name__)
-
-
-class OrgReader(readers.BaseReader):
+class OrgReader(BaseReader):
+    """Reader for Org files"""
     enabled = True
-
-    EMACS_ARGS = ["-Q", "--batch"]
-    ELISP_EXEC = "(org->pelican \"{0}\" {1})"
-
     file_extensions = ['org']
 
-    def __init__(self, settings):
-        super(OrgReader, self).__init__(settings)
-        assert 'ORG_READER_EMACS_LOCATION' in self.settings, \
-            "No ORG_READER_EMACS_LOCATION specified in settings"
-
-    def read(self, filename):
-        LOG.info("Reading Org file {0}".format(filename))
-        cmd = [self.settings['ORG_READER_EMACS_LOCATION']]
-        cmd.extend(self.EMACS_ARGS)
-
-        if 'ORG_READER_EMACS_SETTINGS' in self.settings:
-            cmd.append('-l')
-            cmd.append(self.settings['ORG_READER_EMACS_SETTINGS'])
-
-        backend = self.settings.get('ORG_READER_BACKEND', "'html")
-
-        cmd.append('-l')
-        cmd.append(ELISP)
-
-        cmd.append('--eval')
-        cmd.append(self.ELISP_EXEC.format(filename, backend))
-
-        LOG.debug("OrgReader: running command `{0}`".format(cmd))
-
-        json_result = subprocess.check_output(cmd, universal_newlines=True)
-        json_output = json.loads(json_result)
-
-        # get default slug from .org filename
-        default_slug, _ = os.path.splitext(os.path.basename(filename))
-
-        metadata = {'title': json_output['title'] or '',
-                    'date': json_output['date'] or '',
-                    'author': json_output['author'] or '',
-                    'lang': json_output['language'] or '',
-                    'category': json_output['category'] or '',
-                    'slug': json_output['slug'] or default_slug,
-                    'modified': json_output['modified'] or '',
-                    'tags': json_output['tags'] or '',
-                    'save_as': json_output['save_as'] or '',
-                    'summary': json_output['summary'] or ''}
-
-        # remove empty strings when necessary
-        for key in ['save_as', 'modified', 'lang', 'summary']:
-            if not metadata[key]:
-                metadata.pop(key)
-
-        parsed = {}
-        for key, value in metadata.items():
-            parsed[key] = self.process_metadata(key, value)
-
-        content = json_output['post']
-
-        return content, parsed
-
-
+    def __init__(self, *args, **kargs):
+        """Init object construct with this class"""
+        super(OrgReader, self).__init__(*args, **kargs)
+        settings = self.settings['ORGMODE']
+        settings.setdefault('code_highlight', True)
+        self.code_highlight = settings['code_highlight']
+    
+    def _separate_header_and_content(self, text_lines):
+        """
+        From a given Org text, return the header separate from the content.
+        The given text must be separate line by line and be a list.
+        The return is a list of two items: header and content.
+        Theses two items are text separate line by line in format of a list
+        Keyword Arguments:
+        text_lines -- A list, each item is a line of the texte
+        Return:
+        [
+          header   -- A list, each item is a line of the texte
+          content  -- A list, each item is a line of the texte
+        ]
+        """
+        no_more_header = False
+        expr_metadata = re.compile(r'^#\+[a-zA-Z]+:.*')
+        header = []
+        content = []
+        for line in text_lines:
+            metadata = expr_metadata.match(line)
+            if metadata and not no_more_header:
+                header.append(line)
+            else:
+                no_more_header = True
+                content.append(line)
+        return header, content 
+
+    def _parse_metadatas(self, text_lines):
+        """
+        From a given Org text, return the metadatas 
+        Keyword Arguments:
+        text_lines -- A list, each item is a line of the texte
+        Return:
+        A dict containing metadatas
+        """
+        if not text_lines:
+            return {}
+        expr_metadata = re.compile(r'^#\+([a-zA-Z]+):(.*)')
+        return {
+            expr_metadata.match(line).group(1).lower()
+            : expr_metadata.match(line).group(2).strip()
+            for line in text_lines
+        }
+
+    def read(self, source_path):
+        """
+        Parse content and metadata of Org files
+        Keyword Arguments:
+        source_path -- Path to the Org file to parse
+        """
+        with pelican_open(source_path) as text:
+            text_lines = list(text.splitlines())
+
+        header, content = self._separate_header_and_content(text_lines)
+        metadatas = self._parse_metadatas(header)
+        metadatas_processed = {
+            key
+            : self.process_metadata(key, value)
+            for key, value in metadatas.items()
+        }
+        content_html = convert_html("\n".join(content),
+                                    highlight=self.code_highlight)
+        return content_html, metadatas_processed
+    
 def add_reader(readers):
-    readers.reader_classes['org'] = OrgReader
-
+    for ext in OrgReader.file_extensions:
+        readers.reader_classes[ext] = OrgReader
 
 def register():
     signals.readers_init.connect(add_reader)