tipue_search.py 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. # -*- coding: utf-8 -*-
  2. """
  3. Tipue Search
  4. ============
  5. A Pelican plugin to serialize generated HTML to JSON
  6. that can be used by jQuery plugin - Tipue Search.
  7. Copyright (c) Talha Mansoor
  8. """
  9. from __future__ import unicode_literals
  10. import os.path
  11. import json
  12. from bs4 import BeautifulSoup
  13. from codecs import open
  14. from pelican import signals
  15. class Tipue_Search_JSON_Generator(object):
  16. def __init__(self, context, settings, path, theme, output_path, *null):
  17. self.output_path = output_path
  18. self.context = context
  19. self.siteurl = settings.get('SITEURL')
  20. self.json_nodes = []
  21. def create_json_node(self, page):
  22. if getattr(page, 'status', 'published') != 'published':
  23. return
  24. soup_title = BeautifulSoup(page.title.replace(' ', ' '))
  25. page_title = soup_title.get_text(' ', strip=True).replace('“', '"').replace('”', '"').replace('’', "'").replace('^', '^')
  26. soup_text = BeautifulSoup(page.content)
  27. page_text = soup_text.get_text(' ', strip=True).replace('“', '"').replace('”', '"').replace('’', "'").replace('¶', ' ').replace('^', '^')
  28. page_text = ' '.join(page_text.split())
  29. if getattr(page, 'category', 'None') == 'None':
  30. page_category = ''
  31. else:
  32. page_category = page.category.name
  33. page_url = self.siteurl + '/' + page.url
  34. node = {'title': page_title,
  35. 'text': page_text,
  36. 'tags': page_category,
  37. 'loc': page_url}
  38. self.json_nodes.append(node)
  39. def generate_output(self, writer):
  40. path = os.path.join(self.output_path, 'tipuesearch_content.json')
  41. pages = self.context['pages'] + self.context['articles']
  42. for article in self.context['articles']:
  43. pages += article.translations
  44. for page in pages:
  45. self.create_json_node(page)
  46. root_node = {'pages': self.json_nodes}
  47. with open(path, 'w', encoding='utf-8') as fd:
  48. json.dump(root_node, fd, separators=(',', ':'))
  49. def get_generators(generators):
  50. return Tipue_Search_JSON_Generator
  51. def register():
  52. signals.get_generators.connect(get_generators)