Browse Source

Merge pull request #66 from talha131/json-serializer

Add JSON Serializer plugin to enable search
Alexis Metaireau 10 years ago
parent
commit
0d30333281
3 changed files with 142 additions and 0 deletions
  1. 65 0
      json_serializer/README.md
  2. 1 0
      json_serializer/__init__.py
  3. 76 0
      json_serializer/json_serializer.py

+ 65 - 0
json_serializer/README.md

@@ -0,0 +1,65 @@
+JSON Serializer
+===============
+
+A Pelican plugin to serialize HTML output to JSON
+
+Copyright (c) Talha Mansoor
+
+Author          | Talha Mansoor
+----------------|-----
+Author Email    | talha131@gmail.com 
+Author Homepage | http://onCrashReboot.com 
+Github Account  | https://github.com/talha131 
+
+Why do you need it?
+===================
+
+Static sites do not offer search feature out of the box. [Tipue Search](http://www.tipue.com/search/)
+is a jQuery plugin that search the static site without using any third party service, like DuckDuckGo or Google.
+
+Tipue Search offers 4 search modes. Its [JSON search mode](http://www.tipue.com/search/docs/json/) is the best search mode
+especially for large sites.
+
+Tipue's JSON search mode requires the textual content of site in JSON format.
+
+Requirements
+============
+
+JSON Serializer requires BeautifulSoup.
+
+```bash
+pip install beautifulsoup4
+```
+
+How JSON Serializer works
+=========================
+
+JSON Serializer serializes the generated HTML into JSON. Format of JSON is as follows
+
+```python
+{
+    "pages": [
+        { 
+            "text": "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Integer nec odio. Praesent libero. Sed cursus ante dapibus diam. Sed nisi. Nulla quis sem at nibh elementum imperdiet. Duis sagittis ipsum. Praesent mauris. Fusce nec tellus sed augue semper porta. Mauris massa. Vestibulum lacinia arcu eget nulla. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per inceptos himenaeos. Curabitur sodales ligula in libero.",
+            "tags": "Example Category",
+            "loc" : "http://oncrashreboot.com/plugin-example.html",
+            "title": "Everything you want to know about Lorem Ipsum"
+        },
+        { 
+            "text": "Sed dignissim lacinia nunc. Curabitur tortor. Pellentesque nibh. Aenean quam. In scelerisque sem at dolor. Maecenas mattis. Sed convallis tristique sem. Proin ut ligula vel nunc egestas porttitor. Morbi lectus risus, iaculis vel, suscipit quis, luctus non, massa. Fusce ac turpis quis ligula lacinia aliquet. Mauris ipsum. Nulla metus metus, ullamcorper vel, tincidunt sed, euismod in, nibh.",
+            "tags": "Example Category",
+            "loc" : "http://oncrashreboot.com/plugin-example-2.html",
+            "title": "Review of the book Lorem Ipsum"
+        }
+    ]
+}
+```
+
+JSON is written to file `tipuesearch_content.json` which is created in the root of `output` directory.
+
+How to use
+==========
+
+To utilize JSON Search mode, your theme needs to have Tipue Search properly configured in it. [Official documentation](http://www.tipue.com/search/docs/json/) has the required details.
+
+Pelican [Elegant Theme](https://github.com/talha131/pelican-elegant) has Tipue Search configured. You can view its code to understand the configuration. 

+ 1 - 0
json_serializer/__init__.py

@@ -0,0 +1 @@
+from .json_serializer import *

+ 76 - 0
json_serializer/json_serializer.py

@@ -0,0 +1,76 @@
+# -*- coding: utf-8 -*-
+"""
+JSON Serializer
+===============
+
+A Pelican plugin to serialize generated HTML to JSON
+that can be used by jQuery plugin - Tipue Search.
+
+Copyright (c) Talha Mansoor
+"""
+
+from __future__ import unicode_literals
+
+import os.path
+import json
+from bs4 import BeautifulSoup
+from codecs import open
+
+from pelican import signals
+
+
+class JSON_Generator(object):
+
+    def __init__(self, context, settings, path, theme, output_path, *null):
+
+        self.output_path = output_path
+        self.context = context
+        self.siteurl = settings.get('SITEURL')
+        self.json_nodes = []
+
+    def create_json_node(self, page):
+
+        if getattr(page, 'status', 'published') != 'published':
+            return
+
+        page_title = page.title
+
+        soup = BeautifulSoup(page.content, 'html.parser')
+        page_text = soup.get_text()
+
+        if getattr(page, 'category') == 'None':
+            page_category = ''
+        else:
+            page_category = page.category.name
+
+        page_url = self.siteurl + '/' + page.url
+
+        node = {'title': page_title,
+                'text': page_text,
+                'tags': page_category,
+                'loc': page_url}
+
+        self.json_nodes.append(node)
+
+    def generate_output(self, writer):
+        path = os.path.join(self.output_path, 'tipuesearch_content.json')
+
+        pages = self.context['pages'] + self.context['articles']
+
+        for article in self.context['articles']:
+            pages += article.translations
+
+        for page in pages:
+            self.create_json_node(page)
+        root_node = {'pages': self.json_nodes}
+
+        with open(path, 'w', encoding='utf-8') as fd:
+            json.dump(root_node, fd, indent=4)
+
+
+def get_generators(generators):
+    return JSON_Generator
+
+
+def register():
+    signals.get_generators.connect(get_generators)