From 1156b0998758ee803c7e8ae0cc2beb5181c232a3 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Fri, 21 Jun 2019 21:41:41 -0700 Subject: Refactor search page --- server.py | 2 +- youtube/search.py | 119 +++++++++++--------------- youtube/static/shared.css | 6 ++ youtube/templates/base.html | 4 +- youtube/templates/common_elements.html | 152 +++++++++++++++++++++++++++++++++ youtube/templates/search.html | 54 ++++++++++++ youtube/yt_data_extract.py | 113 ++++++++++++++++++++---- 7 files changed, 364 insertions(+), 86 deletions(-) create mode 100644 youtube/templates/common_elements.html create mode 100644 youtube/templates/search.html diff --git a/server.py b/server.py index 917be79..559166b 100644 --- a/server.py +++ b/server.py @@ -6,7 +6,7 @@ from youtube import yt_app from youtube import util # these are just so the files get run - they import yt_app and add routes to it -from youtube import watch +from youtube import watch, search import settings diff --git a/youtube/search.py b/youtube/search.py index 0cef0f3..fcc352f 100644 --- a/youtube/search.py +++ b/youtube/search.py @@ -1,16 +1,12 @@ -from youtube import util, html_common, yt_data_extract, proto +from youtube import util, yt_data_extract, proto, local_playlist +from youtube import yt_app import json import urllib -import html -from string import Template import base64 from math import ceil - - -with open("yt_search_results_template.html", "r") as file: - yt_search_results_template = file.read() - +from flask import request +import flask # Sort: 1 # Upload date: 2 @@ -58,41 +54,32 @@ def get_search_json(query, page, autocorrect, sort, filters): content = util.fetch_url(url, headers=headers, report_text="Got search results") info = json.loads(content) return info - -showing_results_for = Template(''' -
Showing results for $corrected_query
-
Search instead for $original_query
-''') -did_you_mean = Template(''' -
Did you mean $corrected_query
-''') -def get_search_page(env, start_response): - start_response('200 OK', [('Content-type','text/html'),]) - parameters = env['parameters'] - if len(parameters) == 0: - return html_common.yt_basic_template.substitute( - page_title = "Search", - header = html_common.get_header(), - style = '', - page = '', - ).encode('utf-8') - query = parameters["query"][0] - page = parameters.get("page", "1")[0] - autocorrect = int(parameters.get("autocorrect", "1")[0]) - sort = int(parameters.get("sort", "0")[0]) + +@yt_app.route('/search') +def get_search_page(): + if len(request.args) == 0: + return flask.render_template('base.html', title="Search") + + if 'query' not in request.args: + abort(400) + + query = request.args.get("query") + page = request.args.get("page", "1") + autocorrect = int(request.args.get("autocorrect", "1")) + sort = int(request.args.get("sort", "0")) filters = {} - filters['time'] = int(parameters.get("time", "0")[0]) - filters['type'] = int(parameters.get("type", "0")[0]) - filters['duration'] = int(parameters.get("duration", "0")[0]) + filters['time'] = int(request.args.get("time", "0")) + filters['type'] = int(request.args.get("type", "0")) + filters['duration'] = int(request.args.get("duration", "0")) info = get_search_json(query, page, autocorrect, sort, filters) estimated_results = int(info[1]['response']['estimatedResults']) estimated_pages = ceil(estimated_results/20) results = info[1]['response']['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'] - - corrections = '' - result_list_html = "" + + parsed_results = [] + corrections = {'type': None} for renderer in results: type = list(renderer.keys())[0] if type == 'shelfRenderer': @@ -102,41 +89,39 @@ def get_search_page(env, start_response): corrected_query_string = parameters.copy() corrected_query_string['query'] = [renderer['correctedQueryEndpoint']['searchEndpoint']['query']] corrected_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(corrected_query_string, doseq=True) - corrections = did_you_mean.substitute( - corrected_query_url = corrected_query_url, - corrected_query = yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']), - ) + + corrections = { + 'type': 'did_you_mean', + 'corrected_query': yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']), + 'corrected_query_url': corrected_query_url, + } continue if type == 'showingResultsForRenderer': renderer = renderer[type] no_autocorrect_query_string = parameters.copy() no_autocorrect_query_string['autocorrect'] = ['0'] no_autocorrect_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(no_autocorrect_query_string, doseq=True) - corrections = showing_results_for.substitute( - corrected_query = yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']), - original_query_url = no_autocorrect_query_url, - original_query = html.escape(renderer['originalQuery']['simpleText']), - ) + + corrections = { + 'type': 'showing_results_for', + 'corrected_query': yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']), + 'original_query_url': no_autocorrect_query_url, + 'original_query': renderer['originalQuery']['simpleText'], + } continue - result_list_html += html_common.renderer_html(renderer, current_query_string=env['QUERY_STRING']) - - page = int(page) - if page <= 5: - page_start = 1 - page_end = min(9, estimated_pages) - else: - page_start = page - 4 - page_end = min(page + 4, estimated_pages) - - - result = Template(yt_search_results_template).substitute( - header = html_common.get_header(query), - results = result_list_html, - page_title = query + " - Search", - search_box_value = html.escape(query), - number_of_results = '{:,}'.format(estimated_results), - number_of_pages = '{:,}'.format(estimated_pages), - page_buttons = html_common.page_buttons_html(page, estimated_pages, util.URL_ORIGIN + "/search", env['QUERY_STRING']), - corrections = corrections - ) - return result.encode('utf-8') + + info = yt_data_extract.parse_info_prepare_for_html(renderer) + if info['type'] != 'unsupported': + parsed_results.append(info) + + return flask.render_template('search.html', + header_playlist_names = local_playlist.get_playlist_names(), + query = query, + estimated_results = estimated_results, + estimated_pages = estimated_pages, + corrections = corrections, + results = parsed_results, + parameters_dictionary = request.args, + ) + + diff --git a/youtube/static/shared.css b/youtube/static/shared.css index 1b25d7f..a360972 100644 --- a/youtube/static/shared.css +++ b/youtube/static/shared.css @@ -219,6 +219,12 @@ address{ max-height:2.4em; overflow:hidden; } + .medium-item .stats > *::after{ + content: " | "; + } + .medium-item .stats > *:last-child::after{ + content: ""; + } .medium-item .description{ grid-column: 2 / span 2; diff --git a/youtube/templates/base.html b/youtube/templates/base.html index e98f972..eafd369 100644 --- a/youtube/templates/base.html +++ b/youtube/templates/base.html @@ -2,13 +2,14 @@ - {% block page_title %}{% endblock %} + {% block page_title %}{{ title }}{% endblock %} @@ -105,6 +106,7 @@
{% block main %} +{{ main }} {% endblock %}
diff --git a/youtube/templates/common_elements.html b/youtube/templates/common_elements.html new file mode 100644 index 0000000..9f2aa3f --- /dev/null +++ b/youtube/templates/common_elements.html @@ -0,0 +1,152 @@ +{% macro text_runs(runs) %} + {%- if runs[0] is mapping -%} + {%- for text_run in runs -%} + {%- if text_run.get("bold", false) -%} + {{ text_run["text"] }} + {%- elif text_run.get('italics', false) -%} + {{ text_run["text"] }} + {%- else -%} + {{ text_run["text"] }} + {%- endif -%} + {%- endfor -%} + {%- else -%} + {{ runs }} + {%- endif -%} +{% endmacro %} + +{% macro small_item(info) %} +
+
+ {% if info['type'] == 'video' %} + + + {{ info['duration'] }} + + {{ info['title'] }} + +
{{ info['author'] }}
+ {{ info['views'] }} + + {% elif info['type'] == 'playlist' %} + + +
+ {{ info['size'] }} +
+
+ {{ info['title'] }} + +
{{ info['author'] }}
+ {% else %} + Error: unsupported item type + {% endif %} +
+ {% if info['type'] == 'video' %} + + {% endif %} +
+{% endmacro %} + +{% macro get_stats(info) %} + {% if 'author_url' is in(info) %} +
By {{ info['author'] }}
+ {% else %} +
{{ info['author'] }}
+ {% endif %} + {% if 'views' is in(info) %} + {{ info['views'] }} + {% endif %} + {% if 'published' is in(info) %} + + {% endif %} +{% endmacro %} + + + +{% macro medium_item(info) %} +
+
+ {% if info['type'] == 'video' %} + + + {{ info['duration'] }} + + + {{ info['title'] }} + +
+ {{ get_stats(info) }} +
+ + {{ text_runs(info['description']) }} + {{ info['badges']|join(' | ') }} + {% elif info['type'] == 'playlist' %} + + +
+ {{ info['size'] }} +
+
+ + {{ info['title'] }} + +
+ {{ get_stats(info) }} +
+ {% elif info['type'] == 'channel' %} + + + + + {{ info['title'] }} + + {{ info['subscriber_count'] }} + {{ info['size'] }} + + {{ text_runs(info['description']) }} + {% else %} + Error: unsupported item type + {% endif %} +
+ {% if info['type'] == 'video' %} + + {% endif %} +
+{% endmacro %} + + +{% macro item(info) %} + {% if info['item_size'] == 'small' %} + {{ small_item(info) }} + {% elif info['item_size'] == 'medium' %} + {{ medium_item(info) }} + {% else %} + Error: Unknown item size + {% endif %} +{% endmacro %} + + + +{% macro page_buttons(estimated_pages, url, parameters_dictionary) %} + {% set current_page = parameters_dictionary.get('page', 1)|int %} + {% set parameters_dictionary = parameters_dictionary.to_dict() %} + {% if current_page is le(5) %} + {% set page_start = 1 %} + {% set page_end = [9, estimated_pages]|min %} + {% else %} + {% set page_start = current_page - 4 %} + {% set page_end = [current_page + 4, estimated_pages]|min %} + {% endif %} + + {% for page in range(page_start, page_end+1) %} + {% if page == current_page %} +
{{ page }}
+ {% else %} + {# IMPORTANT: Jinja SUCKS #} + {# https://stackoverflow.com/questions/36886650/how-to-add-a-new-entry-into-a-dictionary-object-while-using-jinja2 #} + {% set _ = parameters_dictionary.__setitem__('page', page) %} + {{ page }} + {% endif %} + {% endfor %} + +{% endmacro %} diff --git a/youtube/templates/search.html b/youtube/templates/search.html new file mode 100644 index 0000000..1086cfd --- /dev/null +++ b/youtube/templates/search.html @@ -0,0 +1,54 @@ +{% set search_box_value = query %} +{% extends "base.html" %} +{% block page_title %}{{ query + ' - Search' }}{% endblock %} +{% import "common_elements.html" as common_elements %} +{% block style %} + main{ + display:grid; + grid-template-columns: minmax(0px, 1fr) 800px minmax(0px,2fr); + max-width:100vw; + } + + + #number-of-results{ + font-weight:bold; + } + #result-info{ + grid-row: 1; + grid-column:2; + align-self:center; + } + .page-button-row{ + grid-column: 2; + justify-self: center; + } + + + .item-list{ + grid-row: 2; + grid-column: 2; + } + .badge{ + background-color:#cccccc; + } +{% endblock style %} + +{% block main %} +
+
Approximately {{ '{:,}'.format(estimated_results) }} results ({{ '{:,}'.format(estimated_pages) }} pages)
+{% if corrections['type'] == 'showing_results_for' %} +
Showing results for {{ corrections['corrected_query']|safe }}
+
Search instead for {{ corrections['original_query'] }}
+{% elif corrections['type'] == 'did_you_mean' %} +
Did you mean {{ corrections['corrected_query']|safe }}
+{% endif %} +
+
+ {% for info in results %} + {{ common_elements.item(info) }} + {% endfor %} +
+ +{% endblock main %} diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py index 5483911..a487c57 100644 --- a/youtube/yt_data_extract.py +++ b/youtube/yt_data_extract.py @@ -1,4 +1,7 @@ +from youtube import util + import html +import json # videos (all of type str): @@ -138,9 +141,83 @@ dispatch = { } -def renderer_info(renderer): +def ajax_info(item_json): + try: + info = {} + for key, node in item_json.items(): + try: + simple_key, function = dispatch[key] + except KeyError: + continue + info[simple_key] = function(node) + return info + except KeyError: + print(item_json) + raise + + + +def prefix_urls(item): + try: + item['thumbnail'] = '/' + item['thumbnail'].lstrip('/') + except KeyError: + pass + + try: + item['author_url'] = util.URL_ORIGIN + item['author_url'] + except KeyError: + pass + +def add_extra_html_info(item): + if item['type'] == 'video': + item['url'] = util.URL_ORIGIN + '/watch?v=' + item['id'] + + video_info = {} + for key in ('id', 'title', 'author', 'duration'): + try: + video_info[key] = item[key] + except KeyError: + video_info[key] = '' + + item['video_info'] = json.dumps(video_info) + + elif item['type'] == 'playlist': + item['url'] = util.URL_ORIGIN + '/playlist?list=' + item['id'] + elif item['type'] == 'channel': + item['url'] = util.URL_ORIGIN + "/channel/" + item['id'] + + +def renderer_info(renderer, additional_info={}): + type = list(renderer.keys())[0] + renderer = renderer[type] + info = {} + if type == 'itemSectionRenderer': + return renderer_info(renderer['contents'][0], additional_info) + + if type in ('movieRenderer', 'clarificationRenderer'): + info['type'] = 'unsupported' + return info + + info.update(additional_info) + + if type.startswith('compact'): + info['item_size'] = 'small' + else: + info['item_size'] = 'medium' + + if type in ('compactVideoRenderer', 'videoRenderer', 'gridVideoRenderer'): + info['type'] = 'video' + elif type in ('playlistRenderer', 'compactPlaylistRenderer', 'gridPlaylistRenderer', + 'radioRenderer', 'compactRadioRenderer', 'gridRadioRenderer', + 'showRenderer', 'compactShowRenderer', 'gridShowRenderer'): + info['type'] = 'playlist' + elif type == 'channelRenderer': + info['type'] = 'channel' + else: + info['type'] = 'unsupported' + return info + try: - info = {} if 'viewCountText' in renderer: # prefer this one as it contains all the digits info['views'] = get_text(renderer['viewCountText']) elif 'shortViewCountText' in renderer: @@ -183,23 +260,25 @@ def renderer_info(renderer): except KeyError: continue info[simple_key] = function(node) + if info['type'] == 'video' and 'duration' not in info: + info['duration'] = 'Live' + return info except KeyError: print(renderer) raise - -def ajax_info(item_json): - try: - info = {} - for key, node in item_json.items(): - try: - simple_key, function = dispatch[key] - except KeyError: - continue - info[simple_key] = function(node) - return info - except KeyError: - print(item_json) - raise - + + + + #print(renderer) + #raise NotImplementedError('Unknown renderer type: ' + type) + return '' + +def parse_info_prepare_for_html(renderer): + item = renderer_info(renderer) + prefix_urls(item) + add_extra_html_info(item) + + return item + -- cgit v1.2.3