|
@@ -47,12 +47,9 @@ class Tipue_Search_JSON_Generator(object):
|
|
page_text = soup_text.get_text(' ', strip=True).replace('“', '"').replace('”', '"').replace('’', "'").replace('¶', ' ').replace('^', '^')
|
|
page_text = soup_text.get_text(' ', strip=True).replace('“', '"').replace('”', '"').replace('’', "'").replace('¶', ' ').replace('^', '^')
|
|
page_text = ' '.join(page_text.split())
|
|
page_text = ' '.join(page_text.split())
|
|
|
|
|
|
- if getattr(page, 'category', 'None') == 'None':
|
|
|
|
- page_category = ''
|
|
|
|
- else:
|
|
|
|
- page_category = page.category.name
|
|
|
|
|
|
+ page_category = page.category.name if getattr(page, 'category', 'None') != 'None' else ''
|
|
|
|
|
|
- page_url = self.siteurl + '/' + page.url
|
|
|
|
|
|
+ page_url = page.url if page.url else '.'
|
|
|
|
|
|
node = {'title': page_title,
|
|
node = {'title': page_title,
|
|
'text': page_text,
|
|
'text': page_text,
|
|
@@ -66,17 +63,11 @@ class Tipue_Search_JSON_Generator(object):
|
|
|
|
|
|
srcfile = open(os.path.join(self.output_path, self.tpages[srclink]), encoding='utf-8')
|
|
srcfile = open(os.path.join(self.output_path, self.tpages[srclink]), encoding='utf-8')
|
|
soup = BeautifulSoup(srcfile, 'html.parser')
|
|
soup = BeautifulSoup(srcfile, 'html.parser')
|
|
|
|
+ page_title = soup.title.string if soup.title is not None else ''
|
|
page_text = soup.get_text()
|
|
page_text = soup.get_text()
|
|
|
|
|
|
- # What happens if there is not a title.
|
|
|
|
- if soup.title is not None:
|
|
|
|
- page_title = soup.title.string
|
|
|
|
- else:
|
|
|
|
- page_title = ''
|
|
|
|
-
|
|
|
|
# Should set default category?
|
|
# Should set default category?
|
|
page_category = ''
|
|
page_category = ''
|
|
-
|
|
|
|
page_url = urljoin(self.siteurl, self.tpages[srclink])
|
|
page_url = urljoin(self.siteurl, self.tpages[srclink])
|
|
|
|
|
|
node = {'title': page_title,
|
|
node = {'title': page_title,
|