|
@@ -33,10 +33,12 @@ class Tipue_Search_JSON_Generator(object):
|
|
if getattr(page, 'status', 'published') != 'published':
|
|
if getattr(page, 'status', 'published') != 'published':
|
|
return
|
|
return
|
|
|
|
|
|
- page_title = page.title
|
|
|
|
|
|
+ soup_title = BeautifulSoup(page.title.replace(' ', ' '))
|
|
|
|
+ page_title = soup_title.get_text(' ', strip=True).replace('“', '"').replace('”', '"').replace('’', "'")
|
|
|
|
|
|
- soup = BeautifulSoup(page.content, 'html.parser')
|
|
|
|
- page_text = soup.get_text()
|
|
|
|
|
|
+ soup_text = BeautifulSoup(page.content)
|
|
|
|
+ page_text = soup_text.get_text(' ', strip=True).replace('“', '"').replace('”', '"').replace('’', "'").replace('¶', ' ')
|
|
|
|
+ page_text = ' '.join(page_text.split())
|
|
|
|
|
|
if getattr(page, 'category') == 'None':
|
|
if getattr(page, 'category') == 'None':
|
|
page_category = ''
|
|
page_category = ''
|
|
@@ -65,7 +67,7 @@ class Tipue_Search_JSON_Generator(object):
|
|
root_node = {'pages': self.json_nodes}
|
|
root_node = {'pages': self.json_nodes}
|
|
|
|
|
|
with open(path, 'w', encoding='utf-8') as fd:
|
|
with open(path, 'w', encoding='utf-8') as fd:
|
|
- json.dump(root_node, fd, indent=4)
|
|
|
|
|
|
+ json.dump(root_node, fd, separators=(',', ':'))
|
|
|
|
|
|
|
|
|
|
def get_generators(generators):
|
|
def get_generators(generators):
|