diff options
author | Jesús <heckyel@hyperbola.info> | 2020-06-10 16:11:18 -0500 |
---|---|---|
committer | Jesús <heckyel@hyperbola.info> | 2020-06-10 16:11:18 -0500 |
commit | 9d22ec021fa2cf461e6f62fef78103493e21b2ad (patch) | |
tree | 22a3fe19ed075e2debee75ec6ac8ea1e13827246 /plugins/tipue-search/tipue_search.py | |
parent | ed201535a3f08450238ef79c6d407e0db052e785 (diff) | |
download | libretube-9d22ec021fa2cf461e6f62fef78103493e21b2ad.tar.lz libretube-9d22ec021fa2cf461e6f62fef78103493e21b2ad.tar.xz libretube-9d22ec021fa2cf461e6f62fef78103493e21b2ad.zip |
tipuesearch_content.json: update filter only articles and improved JSON API
Diffstat (limited to 'plugins/tipue-search/tipue_search.py')
-rw-r--r-- | plugins/tipue-search/tipue_search.py | 107 |
1 files changed, 74 insertions, 33 deletions
diff --git a/plugins/tipue-search/tipue_search.py b/plugins/tipue-search/tipue_search.py index 2880850..8a62a68 100644 --- a/plugins/tipue-search/tipue_search.py +++ b/plugins/tipue-search/tipue_search.py @@ -32,66 +32,107 @@ class Tipue_Search_JSON_Generator(object): self.siteurl = settings.get('SITEURL') self.relative_urls = settings.get('RELATIVE_URLS') self.tpages = settings.get('TEMPLATE_PAGES') + self.tstatic = settings.get('THEME_STATIC_DIR') self.output_path = output_path self.json_nodes = [] - def create_json_node(self, page): + def create_json_node(self, article): - if getattr(page, 'status', 'published') != 'published': + if getattr(article, 'status', 'published') != 'published': return - soup_title = BeautifulSoup(page.title.replace(' ', ' '), 'html.parser') - page_title = soup_title.get_text(' ', strip=True).replace('“', '"').replace('”', '"').replace('’', "'").replace('^', '^') - - soup_text = BeautifulSoup(page.content, 'html.parser') - page_text = soup_text.get_text(' ', strip=True).replace('“', '"').replace('”', '"').replace('’', "'").replace('¶', ' ').replace('^', '^') - page_text = ' '.join(page_text.split()) - - page_category = page.category.name if getattr(page, 'category', 'None') != 'None' else '' - - page_url = '.' - if page.url: - page_url = page.url if self.relative_urls else (self.siteurl + '/' + page.url) - - node = {'title': page_title, - 'text': page_text, - 'tags': page_category, - 'url': page_url} + soup_title = BeautifulSoup(article.title.replace(' ', ' '), 'html.parser') + video_title = soup_title.get_text(' ', strip=True).replace('“', '"').replace('”', '"').replace('’', "'").replace('^', '^') + + soup_text = BeautifulSoup(article.content, 'html.parser') + video_text = soup_text.get_text(' ', strip=True).replace('“', '"').replace('”', '"').replace('’', "'").replace('¶', ' ').replace('^', '^') + video_text = ' '.join(video_text.split()) + + if self.relative_urls: + image_url = '.' + else: + image_url = self.siteurl + + # thumbnail + video_image = article.image if getattr( + article, 'image', 'None') != 'None' else '' + + url_image = "%s/%s/../wp-content/uploads/article/poster/%s" % ( + image_url, self.tstatic, video_image + ) + + # publish + video_publish = article.date.strftime("%a, %d %B, %Y") if getattr( + article, 'date', 'None') != 'None' else '' + + # author + video_author = str(article.author) if getattr( + article, 'author', 'None') != 'None' else '' + + # time + video_time = article.time if getattr( + article, 'time', 'None') != 'None' else '' + + video_url = '.' + if article.url: + video_url = article.url if self.relative_urls else ( + self.siteurl + '/' + article.url) + + video_src = article.og_video if getattr( + article, 'og_video', 'None') != 'None' else '' + + video_category = article.category.name if getattr( + article, 'category', 'None') != 'None' else '' + + node = {'title': video_title, + 'description': video_text, + 'videoThumbnail': url_image, + 'formatStreams': { + 'url': video_src, + }, + 'author': video_author, + 'publishedText': video_publish, + 'time': video_time, + 'tags': video_category, + 'url': video_url} self.json_nodes.append(node) def create_tpage_node(self, srclink): - srcfile = open(os.path.join(self.output_path, self.tpages[srclink]), encoding='utf-8') + srcfile = open(os.path.join(self.output_path, + self.tpages[srclink]), + encoding='utf-8') soup = BeautifulSoup(srcfile, 'html.parser') - page_title = soup.title.string if soup.title is not None else '' - page_text = soup.get_text() + video_title = soup.title.string if soup.title is not None else '' + video_text = soup.get_text() # Should set default category? - page_category = '' - page_url = urljoin(self.siteurl, self.tpages[srclink]) + video_category = '' + video_url = urljoin(self.siteurl, self.tpages[srclink]) - node = {'title': page_title, - 'text': page_text, - 'tags': page_category, - 'url': page_url} + node = {'title': video_title, + 'text': video_text, + 'tags': video_category, + 'url': video_url} self.json_nodes.append(node) def generate_output(self, writer): path = os.path.join(self.output_path, 'tipuesearch_content.json') - pages = self.context['pages'] + self.context['articles'] + articles = self.context['articles'] for article in self.context['articles']: - pages += article.translations + articles += article.translations for srclink in self.tpages: self.create_tpage_node(srclink) - for page in pages: - self.create_json_node(page) - root_node = {'pages': self.json_nodes} + for article in articles: + self.create_json_node(article) + + root_node = {'videos': self.json_nodes} with open(path, 'w', encoding='utf-8') as fd: json.dump(root_node, fd, separators=(',', ':'), ensure_ascii=False) |