aboutsummaryrefslogtreecommitdiffstats
path: root/plugins/tipue-search/tipue_search.py
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/tipue-search/tipue_search.py')
-rw-r--r--plugins/tipue-search/tipue_search.py108
1 files changed, 108 insertions, 0 deletions
diff --git a/plugins/tipue-search/tipue_search.py b/plugins/tipue-search/tipue_search.py
new file mode 100644
index 0000000..c5ad06d
--- /dev/null
+++ b/plugins/tipue-search/tipue_search.py
@@ -0,0 +1,108 @@
+# -*- coding: utf-8 -*-
+"""
+Tipue Search
+============
+
+A Pelican plugin to serialize generated HTML to JSON
+that can be used by jQuery plugin - Tipue Search.
+
+Copyright (c) Talha Mansoor
+"""
+
+from __future__ import unicode_literals
+
+import os.path
+import json
+from bs4 import BeautifulSoup
+from codecs import open
+try:
+ from urlparse import urljoin
+except ImportError:
+ from urllib.parse import urljoin
+
+from pelican import signals
+
+
+class Tipue_Search_JSON_Generator(object):
+
+ def __init__(self, context, settings, path, theme, output_path, *null):
+
+ self.output_path = output_path
+ self.context = context
+ self.siteurl = settings.get('SITEURL')
+ self.relative_urls = settings.get('RELATIVE_URLS')
+ self.tpages = settings.get('TEMPLATE_PAGES')
+ self.output_path = output_path
+ self.json_nodes = []
+
+
+ def create_json_node(self, page):
+
+ if getattr(page, 'status', 'published') != 'published':
+ return
+
+ soup_title = BeautifulSoup(page.title.replace(' ', ' '), 'html.parser')
+ page_title = soup_title.get_text(' ', strip=True).replace('“', '"').replace('”', '"').replace('’', "'").replace('^', '^')
+
+ soup_text = BeautifulSoup(page.content, 'html.parser')
+ page_text = soup_text.get_text(' ', strip=True).replace('“', '"').replace('”', '"').replace('’', "'").replace('¶', ' ').replace('^', '^')
+ page_text = ' '.join(page_text.split())
+
+ page_category = page.category.name if getattr(page, 'category', 'None') != 'None' else ''
+
+ page_url = '.'
+ if page.url:
+ page_url = page.url if self.relative_urls else (self.siteurl + '/' + page.url)
+
+ node = {'title': page_title,
+ 'text': page_text,
+ 'tags': page_category,
+ 'url': page_url}
+
+ self.json_nodes.append(node)
+
+
+ def create_tpage_node(self, srclink):
+
+ srcfile = open(os.path.join(self.output_path, self.tpages[srclink]), encoding='utf-8')
+ soup = BeautifulSoup(srcfile, 'html.parser')
+ page_title = soup.title.string if soup.title is not None else ''
+ page_text = soup.get_text()
+
+ # Should set default category?
+ page_category = ''
+ page_url = urljoin(self.siteurl, self.tpages[srclink])
+
+ node = {'title': page_title,
+ 'text': page_text,
+ 'tags': page_category,
+ 'url': page_url}
+
+ self.json_nodes.append(node)
+
+
+ def generate_output(self, writer):
+ path = os.path.join(self.output_path, 'tipuesearch_content.json')
+
+ pages = self.context['pages'] + self.context['articles']
+
+ for article in self.context['articles']:
+ pages += article.translations
+
+ for srclink in self.tpages:
+ self.create_tpage_node(srclink)
+
+ for page in pages:
+ self.create_json_node(page)
+ root_node = {'pages': self.json_nodes}
+
+ with open(path, 'w', encoding='utf-8') as fd:
+ json.dump(root_node, fd, separators=(',', ':'), ensure_ascii=False)
+
+
+def get_generators(generators):
+ return Tipue_Search_JSON_Generator
+
+
+def register():
+ signals.get_generators.connect(get_generators)