Merge branch 'modular-data-extract'

Commits in this branch are prefixed with "Extraction:" This branch refactors data extraction. All such functionality has been moved to the yt_data_extract module. Responses from requests are given to the module and it parses them into a consistent, more useful format. The dependency on youtube-dl has also been dropped and this functionality has been built from scratch for these reasons: (1) I've noticed youtube-dl breaks more often than invidious (which uses watch page extraction built from scratch) in response to changes from Youtube, so I'm hoping what I wrote will also be less brittle. (2) Such breakage is inconvenient because I have to manually merge the fixes since I had to make changes to youtube-dl to make it do things such as extracting related videos. (3) I have no control over error handling and request pooling with youtube-dl, since it does all the requests (these would require intrusive changes I don't want to maintain). (4) I will now be able to finally display the number of comments and whether comments are disabled without making additional requests.
author: James Taylor <user234683@users.noreply.github.com> 2019-12-19 21:33:54 -0800
committer: James Taylor <user234683@users.noreply.github.com> 2019-12-19 21:33:54 -0800
commit: b4406df9cf33c53b6e942e6a5c72d955f57c4b5f (patch)
tree: 4de0082ac9eb26a05188dd424835ea50b1483113 /youtube/search.py
parent: b614fcdb8579ba29fccfa47eab1e2965cfb0beaa (diff)
parent: 6b7a1212e30b713453aa7d2b3a7122e97689dad0 (diff)
download: yt-local-b4406df9cf33c53b6e942e6a5c72d955f57c4b5f.tar.lz
yt-local-b4406df9cf33c53b6e942e6a5c72d955f57c4b5f.tar.xz
yt-local-b4406df9cf33c53b6e942e6a5c72d955f57c4b5f.zip
1 files changed, 24 insertions, 50 deletions
diff --git a/youtube/search.py b/youtube/search.py
index e167279..0f6bbc4 100644
--- a/youtube/search.py
+++ b/youtube/search.py
@@ -5,7 +5,6 @@ import settings
 import json
 import urllib
 import base64
-from math import ceil
 import mimetypes
 from flask import request
 import flask
@@ -74,59 +73,34 @@ def get_search_page():
     filters['time'] = int(request.args.get("time", "0"))
     filters['type'] = int(request.args.get("type", "0"))
     filters['duration'] = int(request.args.get("duration", "0"))
-    info = get_search_json(query, page, autocorrect, sort, filters)
-    
-    estimated_results = int(info[1]['response']['estimatedResults'])
-    estimated_pages = ceil(estimated_results/20)
-
-    # almost always is the first "section", but if there's an advertisement for a google product like Stadia or Home in the search results, then that becomes the first "section" and the search results are in the second. So just join all of them for resiliency
-    results = []
-    for section in info[1]['response']['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents']:
-        results += section['itemSectionRenderer']['contents']
-
-    parsed_results = []
-    corrections = {'type': None}
-    for renderer in results:
-        type = list(renderer.keys())[0]
-        if type == 'shelfRenderer':
-            continue
-        if type == 'didYouMeanRenderer':
-            renderer = renderer[type]
-            corrected_query_string = request.args.to_dict(flat=False)
-            corrected_query_string['query'] = [renderer['correctedQueryEndpoint']['searchEndpoint']['query']]
-            corrected_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(corrected_query_string, doseq=True)
-
-            corrections = {
-                'type': 'did_you_mean',
-                'corrected_query': yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']),
-                'corrected_query_url': corrected_query_url,
-            }
-            continue
-        if type == 'showingResultsForRenderer':
-            renderer = renderer[type]
-            no_autocorrect_query_string = request.args.to_dict(flat=False)
-            no_autocorrect_query_string['autocorrect'] = ['0']
-            no_autocorrect_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(no_autocorrect_query_string, doseq=True)
-
-            corrections = {
-                'type': 'showing_results_for',
-                'corrected_query': yt_data_extract.format_text_runs(renderer['correctedQuery']['runs']),
-                'original_query_url': no_autocorrect_query_url,
-                'original_query': renderer['originalQuery']['simpleText'],
-            }
-            continue
-
-        info = yt_data_extract.parse_info_prepare_for_html(renderer)
-        if info['type'] != 'unsupported':
-            parsed_results.append(info)
+    polymer_json = get_search_json(query, page, autocorrect, sort, filters)
+
+    search_info = yt_data_extract.extract_search_info(polymer_json)
+    if search_info['error']:
+        return flask.render_template('error.html', error_message = search_info['error'])
+
+    for extract_item_info in search_info['items']:
+        util.prefix_urls(extract_item_info)
+        util.add_extra_html_info(extract_item_info)
+
+    corrections = search_info['corrections']
+    if corrections['type'] == 'did_you_mean':
+        corrected_query_string = request.args.to_dict(flat=False)
+        corrected_query_string['query'] = [corrections['corrected_query']]
+        corrections['corrected_query_url'] = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(corrected_query_string, doseq=True)
+    elif corrections['type'] == 'showing_results_for':
+        no_autocorrect_query_string = request.args.to_dict(flat=False)
+        no_autocorrect_query_string['autocorrect'] = ['0']
+        no_autocorrect_query_url = util.URL_ORIGIN + '/search?' + urllib.parse.urlencode(no_autocorrect_query_string, doseq=True)
+        corrections['original_query_url'] = no_autocorrect_query_url
 
     return flask.render_template('search.html',
         header_playlist_names = local_playlist.get_playlist_names(),
         query = query,
-        estimated_results = estimated_results,
-        estimated_pages = estimated_pages,
-        corrections = corrections,
-        results = parsed_results,
+        estimated_results = search_info['estimated_results'],
+        estimated_pages = search_info['estimated_pages'],
+        corrections = search_info['corrections'],
+        results = search_info['items'],
         parameters_dictionary = request.args,
     )
author	James Taylor <user234683@users.noreply.github.com>	2019-12-19 21:33:54 -0800
committer	James Taylor <user234683@users.noreply.github.com>	2019-12-19 21:33:54 -0800
commit	b4406df9cf33c53b6e942e6a5c72d955f57c4b5f (patch)
tree	4de0082ac9eb26a05188dd424835ea50b1483113 /youtube/search.py
parent	b614fcdb8579ba29fccfa47eab1e2965cfb0beaa (diff)
parent	6b7a1212e30b713453aa7d2b3a7122e97689dad0 (diff)
download	yt-local-b4406df9cf33c53b6e942e6a5c72d955f57c4b5f.tar.lz yt-local-b4406df9cf33c53b6e942e6a5c72d955f57c4b5f.tar.xz yt-local-b4406df9cf33c53b6e942e6a5c72d955f57c4b5f.zip