aboutsummaryrefslogtreecommitdiffstats
path: root/youtube/search.py
diff options
context:
space:
mode:
authorJames Taylor <user234683@users.noreply.github.com>2019-12-19 21:33:54 -0800
committerJames Taylor <user234683@users.noreply.github.com>2019-12-19 21:33:54 -0800
commitb4406df9cf33c53b6e942e6a5c72d955f57c4b5f (patch)
tree4de0082ac9eb26a05188dd424835ea50b1483113 /youtube/search.py
parentb614fcdb8579ba29fccfa47eab1e2965cfb0beaa (diff)
parent6b7a1212e30b713453aa7d2b3a7122e97689dad0 (diff)
downloadyt-local-b4406df9cf33c53b6e942e6a5c72d955f57c4b5f.tar.lz
yt-local-b4406df9cf33c53b6e942e6a5c72d955f57c4b5f.tar.xz
yt-local-b4406df9cf33c53b6e942e6a5c72d955f57c4b5f.zip
Merge branch 'modular-data-extract'
Commits in this branch are prefixed with "Extraction:" This branch refactors data extraction. All such functionality has been moved to the yt_data_extract module. Responses from requests are given to the module and it parses them into a consistent, more useful format. The dependency on youtube-dl has also been dropped and this functionality has been built from scratch for these reasons: (1) I've noticed youtube-dl breaks more often than invidious (which uses watch page extraction built from scratch) in response to changes from Youtube, so I'm hoping what I wrote will also be less brittle. (2) Such breakage is inconvenient because I have to manually merge the fixes since I had to make changes to youtube-dl to make it do things such as extracting related videos. (3) I have no control over error handling and request pooling with youtube-dl, since it does all the requests (these would require intrusive changes I don't want to maintain). (4) I will now be able to finally display the number of comments and whether comments are disabled without making additional requests.
Diffstat (limited to 'youtube/search.py')
-rw-r--r--youtube/search.py74
1 files changed, 24 insertions, 50 deletions
diff --git a/youtube/search.py b/youtube/search.py
index e167279..0f6bbc4 100644
--- a/youtube/search.py
+++ b/youtube/search.py
@@ -5,7 +5,6 @@ import settings
import json
import urllib
import base64
-from math import ceil
import mimetypes
from flask import request
import flask
@@ -74,59 +73,34 @@ def get_search_page():
filters['time'] = int(request.args.get("time", "0"))
filters['type'] = int(request.args.get("type", "0"))
filters['duration'] = int(request.args.get("duration", "0"))
- info = get_search_json(query, page, autocorrect, sort, filters)
-
- estimated_results = int(info[1]['response']['estimatedResults'])
- estimated_pages = ceil(estimated_results/20)
-
- # almost always is the first "section", but if there's an advertisement for a google product like Stadia or Home in the search results, then that becomes the first "section" and the search results are in the second. So just join all of them for resiliency
- results = []
- for section in info[1]['response']['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents']:
- results += section['itemSectionRenderer']['contents']
-
- parsed_results = []
- corrections = {'type': None}
- for renderer in results:
- type = list(renderer.keys())[0]
- if type == 'shelfRenderer':
- continue
- if type == 'didYouMeanRenderer':
- renderer = renderer[type]
- corrected_query_string = request.args.to_dict(flat=False)
- corrected_query_string['query'] = [renderer['correctedQueryEndpoint']['searchEndpoint']['query']]
- corrected_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(corrected_query_string, doseq=True)
-
- corrections = {
- 'type': 'did_you_mean',
- 'corrected_query': yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']),
- 'corrected_query_url': corrected_query_url,
- }
- continue
- if type == 'showingResultsForRenderer':
- renderer = renderer[type]
- no_autocorrect_query_string = request.args.to_dict(flat=False)
- no_autocorrect_query_string['autocorrect'] = ['0']
- no_autocorrect_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(no_autocorrect_query_string, doseq=True)
-
- corrections = {
- 'type': 'showing_results_for',
- 'corrected_query': yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']),
- 'original_query_url': no_autocorrect_query_url,
- 'original_query': renderer['originalQuery']['simpleText'],
- }
- continue
-
- info = yt_data_extract.parse_info_prepare_for_html(renderer)
- if info['type'] != 'unsupported':
- parsed_results.append(info)
+ polymer_json = get_search_json(query, page, autocorrect, sort, filters)
+
+ search_info = yt_data_extract.extract_search_info(polymer_json)
+ if search_info['error']:
+ return flask.render_template('error.html', error_message = search_info['error'])
+
+ for extract_item_info in search_info['items']:
+ util.prefix_urls(extract_item_info)
+ util.add_extra_html_info(extract_item_info)
+
+ corrections = search_info['corrections']
+ if corrections['type'] == 'did_you_mean':
+ corrected_query_string = request.args.to_dict(flat=False)
+ corrected_query_string['query'] = [corrections['corrected_query']]
+ corrections['corrected_query_url'] = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(corrected_query_string, doseq=True)
+ elif corrections['type'] == 'showing_results_for':
+ no_autocorrect_query_string = request.args.to_dict(flat=False)
+ no_autocorrect_query_string['autocorrect'] = ['0']
+ no_autocorrect_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(no_autocorrect_query_string, doseq=True)
+ corrections['original_query_url'] = no_autocorrect_query_url
return flask.render_template('search.html',
header_playlist_names = local_playlist.get_playlist_names(),
query = query,
- estimated_results = estimated_results,
- estimated_pages = estimated_pages,
- corrections = corrections,
- results = parsed_results,
+ estimated_results = search_info['estimated_results'],
+ estimated_pages = search_info['estimated_pages'],
+ corrections = search_info['corrections'],
+ results = search_info['items'],
parameters_dictionary = request.args,
)