diff options
Diffstat (limited to 'youtube')
-rw-r--r-- | youtube/search.py | 119 | ||||
-rw-r--r-- | youtube/static/shared.css | 6 | ||||
-rw-r--r-- | youtube/templates/base.html | 4 | ||||
-rw-r--r-- | youtube/templates/common_elements.html | 152 | ||||
-rw-r--r-- | youtube/templates/search.html | 54 | ||||
-rw-r--r-- | youtube/yt_data_extract.py | 113 |
6 files changed, 363 insertions, 85 deletions
diff --git a/youtube/search.py b/youtube/search.py index 0cef0f3..fcc352f 100644 --- a/youtube/search.py +++ b/youtube/search.py @@ -1,16 +1,12 @@ -from youtube import util, html_common, yt_data_extract, proto +from youtube import util, yt_data_extract, proto, local_playlist +from youtube import yt_app import json import urllib -import html -from string import Template import base64 from math import ceil - - -with open("yt_search_results_template.html", "r") as file: - yt_search_results_template = file.read() - +from flask import request +import flask # Sort: 1 # Upload date: 2 @@ -58,41 +54,32 @@ def get_search_json(query, page, autocorrect, sort, filters): content = util.fetch_url(url, headers=headers, report_text="Got search results") info = json.loads(content) return info - -showing_results_for = Template(''' - <div>Showing results for <a>$corrected_query</a></div> - <div>Search instead for <a href="$original_query_url">$original_query</a></div> -''') -did_you_mean = Template(''' - <div>Did you mean <a href="$corrected_query_url">$corrected_query</a></div> -''') -def get_search_page(env, start_response): - start_response('200 OK', [('Content-type','text/html'),]) - parameters = env['parameters'] - if len(parameters) == 0: - return html_common.yt_basic_template.substitute( - page_title = "Search", - header = html_common.get_header(), - style = '', - page = '', - ).encode('utf-8') - query = parameters["query"][0] - page = parameters.get("page", "1")[0] - autocorrect = int(parameters.get("autocorrect", "1")[0]) - sort = int(parameters.get("sort", "0")[0]) + +@yt_app.route('/search') +def get_search_page(): + if len(request.args) == 0: + return flask.render_template('base.html', title="Search") + + if 'query' not in request.args: + abort(400) + + query = request.args.get("query") + page = request.args.get("page", "1") + autocorrect = int(request.args.get("autocorrect", "1")) + sort = int(request.args.get("sort", "0")) filters = {} - filters['time'] = int(parameters.get("time", "0")[0]) - filters['type'] = int(parameters.get("type", "0")[0]) - filters['duration'] = int(parameters.get("duration", "0")[0]) + filters['time'] = int(request.args.get("time", "0")) + filters['type'] = int(request.args.get("type", "0")) + filters['duration'] = int(request.args.get("duration", "0")) info = get_search_json(query, page, autocorrect, sort, filters) estimated_results = int(info[1]['response']['estimatedResults']) estimated_pages = ceil(estimated_results/20) results = info[1]['response']['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'] - - corrections = '' - result_list_html = "" + + parsed_results = [] + corrections = {'type': None} for renderer in results: type = list(renderer.keys())[0] if type == 'shelfRenderer': @@ -102,41 +89,39 @@ def get_search_page(env, start_response): corrected_query_string = parameters.copy() corrected_query_string['query'] = [renderer['correctedQueryEndpoint']['searchEndpoint']['query']] corrected_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(corrected_query_string, doseq=True) - corrections = did_you_mean.substitute( - corrected_query_url = corrected_query_url, - corrected_query = yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']), - ) + + corrections = { + 'type': 'did_you_mean', + 'corrected_query': yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']), + 'corrected_query_url': corrected_query_url, + } continue if type == 'showingResultsForRenderer': renderer = renderer[type] no_autocorrect_query_string = parameters.copy() no_autocorrect_query_string['autocorrect'] = ['0'] no_autocorrect_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(no_autocorrect_query_string, doseq=True) - corrections = showing_results_for.substitute( - corrected_query = yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']), - original_query_url = no_autocorrect_query_url, - original_query = html.escape(renderer['originalQuery']['simpleText']), - ) + + corrections = { + 'type': 'showing_results_for', + 'corrected_query': yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']), + 'original_query_url': no_autocorrect_query_url, + 'original_query': renderer['originalQuery']['simpleText'], + } continue - result_list_html += html_common.renderer_html(renderer, current_query_string=env['QUERY_STRING']) - - page = int(page) - if page <= 5: - page_start = 1 - page_end = min(9, estimated_pages) - else: - page_start = page - 4 - page_end = min(page + 4, estimated_pages) - - - result = Template(yt_search_results_template).substitute( - header = html_common.get_header(query), - results = result_list_html, - page_title = query + " - Search", - search_box_value = html.escape(query), - number_of_results = '{:,}'.format(estimated_results), - number_of_pages = '{:,}'.format(estimated_pages), - page_buttons = html_common.page_buttons_html(page, estimated_pages, util.URL_ORIGIN + "/search", env['QUERY_STRING']), - corrections = corrections - ) - return result.encode('utf-8') + + info = yt_data_extract.parse_info_prepare_for_html(renderer) + if info['type'] != 'unsupported': + parsed_results.append(info) + + return flask.render_template('search.html', + header_playlist_names = local_playlist.get_playlist_names(), + query = query, + estimated_results = estimated_results, + estimated_pages = estimated_pages, + corrections = corrections, + results = parsed_results, + parameters_dictionary = request.args, + ) + + diff --git a/youtube/static/shared.css b/youtube/static/shared.css index 1b25d7f..a360972 100644 --- a/youtube/static/shared.css +++ b/youtube/static/shared.css @@ -219,6 +219,12 @@ address{ max-height:2.4em; overflow:hidden; } + .medium-item .stats > *::after{ + content: " | "; + } + .medium-item .stats > *:last-child::after{ + content: ""; + } .medium-item .description{ grid-column: 2 / span 2; diff --git a/youtube/templates/base.html b/youtube/templates/base.html index e98f972..eafd369 100644 --- a/youtube/templates/base.html +++ b/youtube/templates/base.html @@ -2,13 +2,14 @@ <html> <head> <meta charset="utf-8"> - <title>{% block page_title %}{% endblock %}</title> + <title>{% block page_title %}{{ title }}{% endblock %}</title> <link href="/youtube.com/static/shared.css" type="text/css" rel="stylesheet"> <link href="/youtube.com/static/comments.css" type="text/css" rel="stylesheet"> <link href="/youtube.com/static/favicon.ico" type="image/x-icon" rel="icon"> <link title="Youtube local" href="/youtube.com/opensearch.xml" rel="search" type="application/opensearchdescription+xml"> <style type="text/css"> {% block style %} +{{ style }} {% endblock %} </style> </head> @@ -105,6 +106,7 @@ </header> <main> {% block main %} +{{ main }} {% endblock %} </main> </body> diff --git a/youtube/templates/common_elements.html b/youtube/templates/common_elements.html new file mode 100644 index 0000000..9f2aa3f --- /dev/null +++ b/youtube/templates/common_elements.html @@ -0,0 +1,152 @@ +{% macro text_runs(runs) %} + {%- if runs[0] is mapping -%} + {%- for text_run in runs -%} + {%- if text_run.get("bold", false) -%} + <b>{{ text_run["text"] }}</b> + {%- elif text_run.get('italics', false) -%} + <i>{{ text_run["text"] }}</i> + {%- else -%} + {{ text_run["text"] }} + {%- endif -%} + {%- endfor -%} + {%- else -%} + {{ runs }} + {%- endif -%} +{% endmacro %} + +{% macro small_item(info) %} + <div class="small-item-box"> + <div class="small-item"> + {% if info['type'] == 'video' %} + <a class="video-thumbnail-box" href="{{ info['url'] }}" title="{{ info['title'] }}"> + <img class="video-thumbnail-img" src="{{ info['thumbnail'] }}"> + <span class="video-duration">{{ info['duration'] }}</span> + </a> + <a class="title" href="{{ info['url'] }}" title="{{ info['title'] }}">{{ info['title'] }}</a> + + <address>{{ info['author'] }}</address> + <span class="views">{{ info['views'] }}</span> + + {% elif info['type'] == 'playlist' %} + <a class="playlist-thumbnail-box" href="{{ info['url'] }}" title="{{ info['title'] }}"> + <img class="playlist-thumbnail-img" src="{{ info['thumbnail'] }}"> + <div class="playlist-thumbnail-info"> + <span>{{ info['size'] }}</span> + </div> + </a> + <a class="title" href="{{ info['url'] }}" title="{{ info['title'] }}">{{ info['title'] }}</a> + + <address>{{ info['author'] }}</address> + {% else %} + Error: unsupported item type + {% endif %} + </div> + {% if info['type'] == 'video' %} + <input class="item-checkbox" type="checkbox" name="video_info_list" value="{{ info['video_info'] }}" form="playlist-edit"> + {% endif %} + </div> +{% endmacro %} + +{% macro get_stats(info) %} + {% if 'author_url' is in(info) %} + <address>By <a href="{{ info['author_url'] }}">{{ info['author'] }}</a></address> + {% else %} + <address><b>{{ info['author'] }}</b></address> + {% endif %} + {% if 'views' is in(info) %} + <span class="views">{{ info['views'] }}</span> + {% endif %} + {% if 'published' is in(info) %} + <time>{{ info['published'] }}</time> + {% endif %} +{% endmacro %} + + + +{% macro medium_item(info) %} + <div class="medium-item-box"> + <div class="medium-item"> + {% if info['type'] == 'video' %} + <a class="video-thumbnail-box" href="{{ info['url'] }}" title="{{ info['title'] }}"> + <img class="video-thumbnail-img" src="{{ info['thumbnail'] }}"> + <span class="video-duration">{{ info['duration'] }}</span> + </a> + + <a class="title" href="{{ info['url'] }}" title="{{ info['title'] }}">{{ info['title'] }}</a> + + <div class="stats"> + {{ get_stats(info) }} + </div> + + <span class="description">{{ text_runs(info['description']) }}</span> + <span class="badges">{{ info['badges']|join(' | ') }}</span> + {% elif info['type'] == 'playlist' %} + <a class="playlist-thumbnail-box" href="{{ info['url'] }}" title="{{ info['title'] }}"> + <img class="playlist-thumbnail-img" src="{{ info['thumbnail'] }}"> + <div class="playlist-thumbnail-info"> + <span>{{ info['size'] }}</span> + </div> + </a> + + <a class="title" href="{{ info['url'] }}" title="{{ info['title'] }}">{{ info['title'] }}</a> + + <div class="stats"> + {{ get_stats(info) }} + </div> + {% elif info['type'] == 'channel' %} + <a class="video-thumbnail-box" href="{{ info['url'] }}" title="{{ info['title'] }}"> + <img class="video-thumbnail-img" src="{{ info['thumbnail'] }}"> + </a> + + <a class="title" href="{{ info['url'] }}">{{ info['title'] }}</a> + + <span>{{ info['subscriber_count'] }}</span> + <span>{{ info['size'] }}</span> + + <span class="description">{{ text_runs(info['description']) }}</span> + {% else %} + Error: unsupported item type + {% endif %} + </div> + {% if info['type'] == 'video' %} + <input class="item-checkbox" type="checkbox" name="video_info_list" value="{{ info['video_info'] }}" form="playlist-edit"> + {% endif %} + </div> +{% endmacro %} + + +{% macro item(info) %} + {% if info['item_size'] == 'small' %} + {{ small_item(info) }} + {% elif info['item_size'] == 'medium' %} + {{ medium_item(info) }} + {% else %} + Error: Unknown item size + {% endif %} +{% endmacro %} + + + +{% macro page_buttons(estimated_pages, url, parameters_dictionary) %} + {% set current_page = parameters_dictionary.get('page', 1)|int %} + {% set parameters_dictionary = parameters_dictionary.to_dict() %} + {% if current_page is le(5) %} + {% set page_start = 1 %} + {% set page_end = [9, estimated_pages]|min %} + {% else %} + {% set page_start = current_page - 4 %} + {% set page_end = [current_page + 4, estimated_pages]|min %} + {% endif %} + + {% for page in range(page_start, page_end+1) %} + {% if page == current_page %} + <div class="page-button">{{ page }}</div> + {% else %} + {# IMPORTANT: Jinja SUCKS #} + {# https://stackoverflow.com/questions/36886650/how-to-add-a-new-entry-into-a-dictionary-object-while-using-jinja2 #} + {% set _ = parameters_dictionary.__setitem__('page', page) %} + <a class="page-button" href="{{ url + '?' + parameters_dictionary|urlencode }}">{{ page }}</a> + {% endif %} + {% endfor %} + +{% endmacro %} diff --git a/youtube/templates/search.html b/youtube/templates/search.html new file mode 100644 index 0000000..1086cfd --- /dev/null +++ b/youtube/templates/search.html @@ -0,0 +1,54 @@ +{% set search_box_value = query %} +{% extends "base.html" %} +{% block page_title %}{{ query + ' - Search' }}{% endblock %} +{% import "common_elements.html" as common_elements %} +{% block style %} + main{ + display:grid; + grid-template-columns: minmax(0px, 1fr) 800px minmax(0px,2fr); + max-width:100vw; + } + + + #number-of-results{ + font-weight:bold; + } + #result-info{ + grid-row: 1; + grid-column:2; + align-self:center; + } + .page-button-row{ + grid-column: 2; + justify-self: center; + } + + + .item-list{ + grid-row: 2; + grid-column: 2; + } + .badge{ + background-color:#cccccc; + } +{% endblock style %} + +{% block main %} + <div id="result-info"> + <div id="number-of-results">Approximately {{ '{:,}'.format(estimated_results) }} results ({{ '{:,}'.format(estimated_pages) }} pages)</div> +{% if corrections['type'] == 'showing_results_for' %} + <div>Showing results for <a>{{ corrections['corrected_query']|safe }}</a></div> + <div>Search instead for <a href="{{ corrections['original_query_url'] }}">{{ corrections['original_query'] }}</a></div> +{% elif corrections['type'] == 'did_you_mean' %} + <div>Did you mean <a href="{{ corrections['corrected_query_url'] }}">{{ corrections['corrected_query']|safe }}</a></div> +{% endif %} + </div> + <div class="item-list"> + {% for info in results %} + {{ common_elements.item(info) }} + {% endfor %} + </div> + <nav class="page-button-row"> + {{ common_elements.page_buttons(estimated_pages, '/https://www.youtube.com/search', parameters_dictionary) }} + </nav> +{% endblock main %} diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py index 5483911..a487c57 100644 --- a/youtube/yt_data_extract.py +++ b/youtube/yt_data_extract.py @@ -1,4 +1,7 @@ +from youtube import util + import html +import json # videos (all of type str): @@ -138,9 +141,83 @@ dispatch = { } -def renderer_info(renderer): +def ajax_info(item_json): + try: + info = {} + for key, node in item_json.items(): + try: + simple_key, function = dispatch[key] + except KeyError: + continue + info[simple_key] = function(node) + return info + except KeyError: + print(item_json) + raise + + + +def prefix_urls(item): + try: + item['thumbnail'] = '/' + item['thumbnail'].lstrip('/') + except KeyError: + pass + + try: + item['author_url'] = util.URL_ORIGIN + item['author_url'] + except KeyError: + pass + +def add_extra_html_info(item): + if item['type'] == 'video': + item['url'] = util.URL_ORIGIN + '/watch?v=' + item['id'] + + video_info = {} + for key in ('id', 'title', 'author', 'duration'): + try: + video_info[key] = item[key] + except KeyError: + video_info[key] = '' + + item['video_info'] = json.dumps(video_info) + + elif item['type'] == 'playlist': + item['url'] = util.URL_ORIGIN + '/playlist?list=' + item['id'] + elif item['type'] == 'channel': + item['url'] = util.URL_ORIGIN + "/channel/" + item['id'] + + +def renderer_info(renderer, additional_info={}): + type = list(renderer.keys())[0] + renderer = renderer[type] + info = {} + if type == 'itemSectionRenderer': + return renderer_info(renderer['contents'][0], additional_info) + + if type in ('movieRenderer', 'clarificationRenderer'): + info['type'] = 'unsupported' + return info + + info.update(additional_info) + + if type.startswith('compact'): + info['item_size'] = 'small' + else: + info['item_size'] = 'medium' + + if type in ('compactVideoRenderer', 'videoRenderer', 'gridVideoRenderer'): + info['type'] = 'video' + elif type in ('playlistRenderer', 'compactPlaylistRenderer', 'gridPlaylistRenderer', + 'radioRenderer', 'compactRadioRenderer', 'gridRadioRenderer', + 'showRenderer', 'compactShowRenderer', 'gridShowRenderer'): + info['type'] = 'playlist' + elif type == 'channelRenderer': + info['type'] = 'channel' + else: + info['type'] = 'unsupported' + return info + try: - info = {} if 'viewCountText' in renderer: # prefer this one as it contains all the digits info['views'] = get_text(renderer['viewCountText']) elif 'shortViewCountText' in renderer: @@ -183,23 +260,25 @@ def renderer_info(renderer): except KeyError: continue info[simple_key] = function(node) + if info['type'] == 'video' and 'duration' not in info: + info['duration'] = 'Live' + return info except KeyError: print(renderer) raise - -def ajax_info(item_json): - try: - info = {} - for key, node in item_json.items(): - try: - simple_key, function = dispatch[key] - except KeyError: - continue - info[simple_key] = function(node) - return info - except KeyError: - print(item_json) - raise - + + + + #print(renderer) + #raise NotImplementedError('Unknown renderer type: ' + type) + return '' + +def parse_info_prepare_for_html(renderer): + item = renderer_info(renderer) + prefix_urls(item) + add_extra_html_info(item) + + return item + |