|
@@ -15,6 +15,7 @@ import os.path
|
|
import json
|
|
import json
|
|
from bs4 import BeautifulSoup
|
|
from bs4 import BeautifulSoup
|
|
from codecs import open
|
|
from codecs import open
|
|
|
|
+from urlparse import urljoin
|
|
|
|
|
|
from pelican import signals
|
|
from pelican import signals
|
|
|
|
|
|
@@ -26,8 +27,11 @@ class Tipue_Search_JSON_Generator(object):
|
|
self.output_path = output_path
|
|
self.output_path = output_path
|
|
self.context = context
|
|
self.context = context
|
|
self.siteurl = settings.get('SITEURL')
|
|
self.siteurl = settings.get('SITEURL')
|
|
|
|
+ self.tpages = settings.get('TEMPLATE_PAGES')
|
|
|
|
+ self.output_path = output_path
|
|
self.json_nodes = []
|
|
self.json_nodes = []
|
|
|
|
|
|
|
|
+
|
|
def create_json_node(self, page):
|
|
def create_json_node(self, page):
|
|
|
|
|
|
if getattr(page, 'status', 'published') != 'published':
|
|
if getattr(page, 'status', 'published') != 'published':
|
|
@@ -54,6 +58,32 @@ class Tipue_Search_JSON_Generator(object):
|
|
|
|
|
|
self.json_nodes.append(node)
|
|
self.json_nodes.append(node)
|
|
|
|
|
|
|
|
+
|
|
|
|
+ def create_tpage_node(self, srclink):
|
|
|
|
+
|
|
|
|
+ srcfile = open(os.path.join(self.output_path, self.tpages[srclink]))
|
|
|
|
+ soup = BeautifulSoup(srcfile, 'html.parser')
|
|
|
|
+ page_text = soup.get_text()
|
|
|
|
+
|
|
|
|
+ # What happens if there is not a title.
|
|
|
|
+ if soup.title is not None:
|
|
|
|
+ page_title = soup.title.string
|
|
|
|
+ else:
|
|
|
|
+ page_title = ''
|
|
|
|
+
|
|
|
|
+ # Should set default category?
|
|
|
|
+ page_category = ''
|
|
|
|
+
|
|
|
|
+ page_url = urljoin(self.siteurl, self.tpages[srclink])
|
|
|
|
+
|
|
|
|
+ node = {'title': page_title,
|
|
|
|
+ 'text': page_text,
|
|
|
|
+ 'tags': page_category,
|
|
|
|
+ 'loc': page_url}
|
|
|
|
+
|
|
|
|
+ self.json_nodes.append(node)
|
|
|
|
+
|
|
|
|
+
|
|
def generate_output(self, writer):
|
|
def generate_output(self, writer):
|
|
path = os.path.join(self.output_path, 'tipuesearch_content.json')
|
|
path = os.path.join(self.output_path, 'tipuesearch_content.json')
|
|
|
|
|
|
@@ -62,6 +92,9 @@ class Tipue_Search_JSON_Generator(object):
|
|
for article in self.context['articles']:
|
|
for article in self.context['articles']:
|
|
pages += article.translations
|
|
pages += article.translations
|
|
|
|
|
|
|
|
+ for srclink in self.tpages:
|
|
|
|
+ self.create_tpage_node(srclink)
|
|
|
|
+
|
|
for page in pages:
|
|
for page in pages:
|
|
self.create_json_node(page)
|
|
self.create_json_node(page)
|
|
root_node = {'pages': self.json_nodes}
|
|
root_node = {'pages': self.json_nodes}
|