From c362a5e834d88524c154cb010be9dc909dcbe25d Mon Sep 17 00:00:00 2001 From: James Taylor Date: Sun, 8 Sep 2019 18:06:30 -0700 Subject: Extraction: Move search extraction to yt_data_extract --- youtube/search.py | 58 +++++++++---------------------------------------------- 1 file changed, 9 insertions(+), 49 deletions(-) (limited to 'youtube/search.py') diff --git a/youtube/search.py b/youtube/search.py index e167279..81a69f2 100644 --- a/youtube/search.py +++ b/youtube/search.py @@ -5,7 +5,6 @@ import settings import json import urllib import base64 -from math import ceil import mimetypes from flask import request import flask @@ -74,59 +73,20 @@ def get_search_page(): filters['time'] = int(request.args.get("time", "0")) filters['type'] = int(request.args.get("type", "0")) filters['duration'] = int(request.args.get("duration", "0")) - info = get_search_json(query, page, autocorrect, sort, filters) - - estimated_results = int(info[1]['response']['estimatedResults']) - estimated_pages = ceil(estimated_results/20) + polymer_json = get_search_json(query, page, autocorrect, sort, filters) - # almost always is the first "section", but if there's an advertisement for a google product like Stadia or Home in the search results, then that becomes the first "section" and the search results are in the second. So just join all of them for resiliency - results = [] - for section in info[1]['response']['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents']: - results += section['itemSectionRenderer']['contents'] - - parsed_results = [] - corrections = {'type': None} - for renderer in results: - type = list(renderer.keys())[0] - if type == 'shelfRenderer': - continue - if type == 'didYouMeanRenderer': - renderer = renderer[type] - corrected_query_string = request.args.to_dict(flat=False) - corrected_query_string['query'] = [renderer['correctedQueryEndpoint']['searchEndpoint']['query']] - corrected_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(corrected_query_string, doseq=True) - - corrections = { - 'type': 'did_you_mean', - 'corrected_query': yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']), - 'corrected_query_url': corrected_query_url, - } - continue - if type == 'showingResultsForRenderer': - renderer = renderer[type] - no_autocorrect_query_string = request.args.to_dict(flat=False) - no_autocorrect_query_string['autocorrect'] = ['0'] - no_autocorrect_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(no_autocorrect_query_string, doseq=True) - - corrections = { - 'type': 'showing_results_for', - 'corrected_query': yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']), - 'original_query_url': no_autocorrect_query_url, - 'original_query': renderer['originalQuery']['simpleText'], - } - continue - - info = yt_data_extract.parse_info_prepare_for_html(renderer) - if info['type'] != 'unsupported': - parsed_results.append(info) + search_info = yt_data_extract.extract_search_info(polymer_json) + for item_info in search_info['items']: + yt_data_extract.prefix_urls(item_info) + yt_data_extract.add_extra_html_info(item_info) return flask.render_template('search.html', header_playlist_names = local_playlist.get_playlist_names(), query = query, - estimated_results = estimated_results, - estimated_pages = estimated_pages, - corrections = corrections, - results = parsed_results, + estimated_results = search_info['estimated_results'], + estimated_pages = search_info['estimated_pages'], + corrections = search_info['corrections'], + results = search_info['items'], parameters_dictionary = request.args, ) -- cgit v1.2.3 From dc6c370152d063ad4198c747fc12eb06fc1ec0e4 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Wed, 18 Sep 2019 21:39:53 -0700 Subject: Extraction: refactor response extraction to work with both mobile & desktop respones, also improve errors --- youtube/search.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'youtube/search.py') diff --git a/youtube/search.py b/youtube/search.py index 81a69f2..ba40f0b 100644 --- a/youtube/search.py +++ b/youtube/search.py @@ -76,6 +76,9 @@ def get_search_page(): polymer_json = get_search_json(query, page, autocorrect, sort, filters) search_info = yt_data_extract.extract_search_info(polymer_json) + if search_info['error']: + return flask.render_template('error.html', error_message = search_info['error']) + for item_info in search_info['items']: yt_data_extract.prefix_urls(item_info) yt_data_extract.add_extra_html_info(item_info) -- cgit v1.2.3 From 9abb83fdbc05294f186daeefff8c85cfda06b7d2 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Fri, 27 Sep 2019 19:27:19 -0700 Subject: Extraction: Fix did_you_mean and showing_results_for --- youtube/search.py | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'youtube/search.py') diff --git a/youtube/search.py b/youtube/search.py index ba40f0b..cb66744 100644 --- a/youtube/search.py +++ b/youtube/search.py @@ -83,6 +83,17 @@ def get_search_page(): yt_data_extract.prefix_urls(item_info) yt_data_extract.add_extra_html_info(item_info) + corrections = search_info['corrections'] + if corrections['type'] == 'did_you_mean': + corrected_query_string = request.args.to_dict(flat=False) + corrected_query_string['query'] = [corrections['corrected_query']] + corrections['corrected_query_url'] = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(corrected_query_string, doseq=True) + elif corrections['type'] == 'showing_results_for': + no_autocorrect_query_string = request.args.to_dict(flat=False) + no_autocorrect_query_string['autocorrect'] = ['0'] + no_autocorrect_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(no_autocorrect_query_string, doseq=True) + corrections['original_query_url'] = no_autocorrect_query_url + return flask.render_template('search.html', header_playlist_names = local_playlist.get_playlist_names(), query = query, -- cgit v1.2.3 From 98777ee82561ae205f156a7f8497728aecfa080c Mon Sep 17 00:00:00 2001 From: James Taylor Date: Wed, 18 Dec 2019 19:39:16 -0800 Subject: Extraction: Rewrite item_extraction for better error handling and readability, rename extracted names for more consistency --- youtube/search.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'youtube/search.py') diff --git a/youtube/search.py b/youtube/search.py index cb66744..a881557 100644 --- a/youtube/search.py +++ b/youtube/search.py @@ -79,9 +79,9 @@ def get_search_page(): if search_info['error']: return flask.render_template('error.html', error_message = search_info['error']) - for item_info in search_info['items']: - yt_data_extract.prefix_urls(item_info) - yt_data_extract.add_extra_html_info(item_info) + for extract_item_info in search_info['items']: + yt_data_extract.prefix_urls(extract_item_info) + yt_data_extract.add_extra_html_info(extract_item_info) corrections = search_info['corrections'] if corrections['type'] == 'did_you_mean': -- cgit v1.2.3 From d1d908d5b1aadb0dc75b25df1a47789c021f89e2 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Thu, 19 Dec 2019 19:48:53 -0800 Subject: Extraction: Move html post processing stuff from yt_data_extract to util --- youtube/search.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'youtube/search.py') diff --git a/youtube/search.py b/youtube/search.py index a881557..0f6bbc4 100644 --- a/youtube/search.py +++ b/youtube/search.py @@ -80,8 +80,8 @@ def get_search_page(): return flask.render_template('error.html', error_message = search_info['error']) for extract_item_info in search_info['items']: - yt_data_extract.prefix_urls(extract_item_info) - yt_data_extract.add_extra_html_info(extract_item_info) + util.prefix_urls(extract_item_info) + util.add_extra_html_info(extract_item_info) corrections = search_info['corrections'] if corrections['type'] == 'did_you_mean': -- cgit v1.2.3