From c6d0c67bc072f260eed2e333fd2515c1298bb9a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jes=C3=BAs?= Date: Mon, 11 Nov 2019 17:51:26 -0500 Subject: Added support 'Table of Content' [Toc] --- plugins/extract_toc/extract_toc.py | 66 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 plugins/extract_toc/extract_toc.py (limited to 'plugins/extract_toc/extract_toc.py') diff --git a/plugins/extract_toc/extract_toc.py b/plugins/extract_toc/extract_toc.py new file mode 100644 index 0000000..c92c4a4 --- /dev/null +++ b/plugins/extract_toc/extract_toc.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +""" +Extract Table of Content +======================== + +A Pelican plugin to extract table of contents (ToC) from `article.content` and +place it in its own `article.toc` variable for use in templates. +""" + +from os import path +from bs4 import BeautifulSoup +from pelican import signals, readers, contents +import logging + +logger = logging.getLogger(__name__) + + +def extract_toc(content): + if isinstance(content, contents.Static): + return + + soup = BeautifulSoup(content._content, 'html.parser') + filename = content.source_path + extension = path.splitext(filename)[1][1:] + toc = None + + # default Markdown reader + if not toc and readers.MarkdownReader.enabled and extension in readers.MarkdownReader.file_extensions: + toc = soup.find('div', class_='toc') + if toc: + toc.extract() + if len(toc.find_next('ul').find_all('li')) == 0: + toc = None + + # default reStructuredText reader + if not toc and readers.RstReader.enabled and extension in readers.RstReader.file_extensions: + toc = soup.find('div', class_='contents topic') + if toc: + toc.extract() + tag = BeautifulSoup(str(toc), 'html.parser') + tag.div['class'] = 'toc' + tag.div['id'] = '' + p = tag.find('p', class_='topic-title first') + if p: + p.extract() + toc = tag + + # Pandoc reader (markdown and other formats) + if 'pandoc_reader' in content.settings['PLUGINS']: + try: + from pandoc_reader import PandocReader + except ImportError: + PandocReader = False + if not toc and PandocReader and PandocReader.enabled and extension in PandocReader.file_extensions: + toc = soup.find('nav', id='TOC') + + if toc: + toc.extract() + content._content = soup.decode() + content.toc = toc.decode() + if content.toc.startswith(''): + content.toc = content.toc[12:-14] + + +def register(): + signals.content_object_init.connect(extract_toc) -- cgit v1.2.3