diff options
author | Jesús <heckyel@hyperbola.info> | 2019-11-11 17:51:26 -0500 |
---|---|---|
committer | Jesús <heckyel@hyperbola.info> | 2019-11-11 17:51:26 -0500 |
commit | c6d0c67bc072f260eed2e333fd2515c1298bb9a2 (patch) | |
tree | c271015da3005b02ab6e427a2e31a28086870ee1 /plugins/extract_toc/extract_toc.py | |
parent | e4a8f433b6d67cb09fa38e2326d8d62f599c2a61 (diff) | |
download | cl-c6d0c67bc072f260eed2e333fd2515c1298bb9a2.tar.lz cl-c6d0c67bc072f260eed2e333fd2515c1298bb9a2.tar.xz cl-c6d0c67bc072f260eed2e333fd2515c1298bb9a2.zip |
Added support 'Table of Content' [Toc]
Diffstat (limited to 'plugins/extract_toc/extract_toc.py')
-rw-r--r-- | plugins/extract_toc/extract_toc.py | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/plugins/extract_toc/extract_toc.py b/plugins/extract_toc/extract_toc.py new file mode 100644 index 0000000..c92c4a4 --- /dev/null +++ b/plugins/extract_toc/extract_toc.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +""" +Extract Table of Content +======================== + +A Pelican plugin to extract table of contents (ToC) from `article.content` and +place it in its own `article.toc` variable for use in templates. +""" + +from os import path +from bs4 import BeautifulSoup +from pelican import signals, readers, contents +import logging + +logger = logging.getLogger(__name__) + + +def extract_toc(content): + if isinstance(content, contents.Static): + return + + soup = BeautifulSoup(content._content, 'html.parser') + filename = content.source_path + extension = path.splitext(filename)[1][1:] + toc = None + + # default Markdown reader + if not toc and readers.MarkdownReader.enabled and extension in readers.MarkdownReader.file_extensions: + toc = soup.find('div', class_='toc') + if toc: + toc.extract() + if len(toc.find_next('ul').find_all('li')) == 0: + toc = None + + # default reStructuredText reader + if not toc and readers.RstReader.enabled and extension in readers.RstReader.file_extensions: + toc = soup.find('div', class_='contents topic') + if toc: + toc.extract() + tag = BeautifulSoup(str(toc), 'html.parser') + tag.div['class'] = 'toc' + tag.div['id'] = '' + p = tag.find('p', class_='topic-title first') + if p: + p.extract() + toc = tag + + # Pandoc reader (markdown and other formats) + if 'pandoc_reader' in content.settings['PLUGINS']: + try: + from pandoc_reader import PandocReader + except ImportError: + PandocReader = False + if not toc and PandocReader and PandocReader.enabled and extension in PandocReader.file_extensions: + toc = soup.find('nav', id='TOC') + + if toc: + toc.extract() + content._content = soup.decode() + content.toc = toc.decode() + if content.toc.startswith('<html>'): + content.toc = content.toc[12:-14] + + +def register(): + signals.content_object_init.connect(extract_toc) |