1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465 |
- """
- Extract Table of Content
- ========================
- A Pelican plugin to extract table of contents (ToC) from `article.content` and
- place it in its own `article.toc` variable for use in templates.
- """
- from os import path
- from bs4 import BeautifulSoup
- from pelican import signals, readers, contents
- import logging
- logger = logging.getLogger(__name__)
- def extract_toc(content):
- if isinstance(content, contents.Static):
- return
- soup = BeautifulSoup(content._content, 'html.parser')
- filename = content.source_path
- extension = path.splitext(filename)[1][1:]
- toc = None
-
- if not toc and readers.MarkdownReader.enabled and extension in readers.MarkdownReader.file_extensions:
- toc = soup.find('div', class_='toc')
- if toc:
- toc.extract()
-
- if not toc and readers.RstReader.enabled and extension in readers.RstReader.file_extensions:
- toc = soup.find('div', class_='contents topic')
- if toc:
- toc.extract()
- tag = BeautifulSoup(str(toc), 'html.parser')
- tag.div['class'] = 'toc'
- tag.div['id'] = ''
- p = tag.find('p', class_='topic-title first')
- if p:
- p.extract()
- toc = tag
-
- if 'pandoc_reader' in content.settings['PLUGINS']:
- try:
- from pandoc_reader import PandocReader
- except ImportError:
- PandocReader = False
- if not toc and PandocReader and PandocReader.enabled and extension in PandocReader.file_extensions:
- toc = soup.find('nav', id='TOC')
- if toc:
- toc.extract()
- content._content = soup.decode()
- content.toc = toc.decode()
- if content.toc.startswith('<html>'):
- content.toc = content.toc[12:-14]
- def register():
- signals.content_object_init.connect(extract_toc)
|