From 89e5761f8d9ae4221c4a97eca3c0fce3405a5bc4 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Sun, 8 Sep 2019 18:42:08 -0700 Subject: Extraction: Move playlist extraction to yt_data_extract --- youtube/playlist.py | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) (limited to 'youtube/playlist.py') diff --git a/youtube/playlist.py b/youtube/playlist.py index 3e5b0d2..2f7abdc 100644 --- a/youtube/playlist.py +++ b/youtube/playlist.py @@ -89,28 +89,20 @@ def get_playlist_page(): ) gevent.joinall(tasks) first_page_json, this_page_json = tasks[0].value, tasks[1].value - - try: # first page - video_list = this_page_json['response']['contents']['singleColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['playlistVideoListRenderer']['contents'] - except KeyError: # other pages - video_list = this_page_json['response']['continuationContents']['playlistVideoListContinuation']['contents'] - - parsed_video_list = [yt_data_extract.parse_info_prepare_for_html(video_json) for video_json in video_list] - - - metadata = yt_data_extract.renderer_info(first_page_json['response']['header']) - yt_data_extract.prefix_urls(metadata) - if 'description' not in metadata: - metadata['description'] = '' + info = yt_data_extract.extract_playlist_info(this_page_json) + if page != '1': + info['metadata'] = yt_data_extract.extract_playlist_metadata(first_page_json) - video_count = int(metadata['size'].replace(',', '')) - metadata['size'] += ' videos' + yt_data_extract.prefix_urls(info['metadata']) + for item in info['items']: + yt_data_extract.prefix_urls(item) + yt_data_extract.add_extra_html_info(item) return flask.render_template('playlist.html', - video_list = parsed_video_list, - num_pages = math.ceil(video_count/20), + video_list = info['items'], + num_pages = math.ceil(info['metadata']['size']/20), parameters_dictionary = request.args, - **metadata + **info['metadata'] ).encode('utf-8') -- cgit v1.2.3 From dc6c370152d063ad4198c747fc12eb06fc1ec0e4 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Wed, 18 Sep 2019 21:39:53 -0700 Subject: Extraction: refactor response extraction to work with both mobile & desktop respones, also improve errors --- youtube/playlist.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'youtube/playlist.py') diff --git a/youtube/playlist.py b/youtube/playlist.py index 2f7abdc..bc2c417 100644 --- a/youtube/playlist.py +++ b/youtube/playlist.py @@ -91,6 +91,9 @@ def get_playlist_page(): first_page_json, this_page_json = tasks[0].value, tasks[1].value info = yt_data_extract.extract_playlist_info(this_page_json) + if info['error']: + return flask.render_template('error.html', error_message = info['error']) + if page != '1': info['metadata'] = yt_data_extract.extract_playlist_metadata(first_page_json) -- cgit v1.2.3 From 98777ee82561ae205f156a7f8497728aecfa080c Mon Sep 17 00:00:00 2001 From: James Taylor Date: Wed, 18 Dec 2019 19:39:16 -0800 Subject: Extraction: Rewrite item_extraction for better error handling and readability, rename extracted names for more consistency --- youtube/playlist.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'youtube/playlist.py') diff --git a/youtube/playlist.py b/youtube/playlist.py index bc2c417..ced0644 100644 --- a/youtube/playlist.py +++ b/youtube/playlist.py @@ -98,13 +98,19 @@ def get_playlist_page(): info['metadata'] = yt_data_extract.extract_playlist_metadata(first_page_json) yt_data_extract.prefix_urls(info['metadata']) - for item in info['items']: + for item in info.get('items', ()): yt_data_extract.prefix_urls(item) yt_data_extract.add_extra_html_info(item) + if 'id' in item: + item['thumbnail'] = '/https://i.ytimg.com/vi/' + item['id'] + '/default.jpg' + + video_count = yt_data_extract.default_multi_get(info, 'metadata', 'video_count') + if video_count is None: + video_count = 40 return flask.render_template('playlist.html', - video_list = info['items'], - num_pages = math.ceil(info['metadata']['size']/20), + video_list = info.get('items', []), + num_pages = math.ceil(video_count/20), parameters_dictionary = request.args, **info['metadata'] -- cgit v1.2.3 From f6bf5213a579b16e17e8d72b51b090ffe4bc9bdb Mon Sep 17 00:00:00 2001 From: James Taylor Date: Wed, 18 Dec 2019 19:43:55 -0800 Subject: Extraction: rename multi_get functions to more descriptive names --- youtube/playlist.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube/playlist.py') diff --git a/youtube/playlist.py b/youtube/playlist.py index ced0644..5dc8ab7 100644 --- a/youtube/playlist.py +++ b/youtube/playlist.py @@ -104,7 +104,7 @@ def get_playlist_page(): if 'id' in item: item['thumbnail'] = '/https://i.ytimg.com/vi/' + item['id'] + '/default.jpg' - video_count = yt_data_extract.default_multi_get(info, 'metadata', 'video_count') + video_count = yt_data_extract.deep_get(info, 'metadata', 'video_count') if video_count is None: video_count = 40 -- cgit v1.2.3 From d1d908d5b1aadb0dc75b25df1a47789c021f89e2 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Thu, 19 Dec 2019 19:48:53 -0800 Subject: Extraction: Move html post processing stuff from yt_data_extract to util --- youtube/playlist.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'youtube/playlist.py') diff --git a/youtube/playlist.py b/youtube/playlist.py index 5dc8ab7..3ca235a 100644 --- a/youtube/playlist.py +++ b/youtube/playlist.py @@ -97,10 +97,10 @@ def get_playlist_page(): if page != '1': info['metadata'] = yt_data_extract.extract_playlist_metadata(first_page_json) - yt_data_extract.prefix_urls(info['metadata']) + util.prefix_urls(info['metadata']) for item in info.get('items', ()): - yt_data_extract.prefix_urls(item) - yt_data_extract.add_extra_html_info(item) + util.prefix_urls(item) + util.add_extra_html_info(item) if 'id' in item: item['thumbnail'] = '/https://i.ytimg.com/vi/' + item['id'] + '/default.jpg' -- cgit v1.2.3