diff options
author | James Taylor <user234683@users.noreply.github.com> | 2019-09-08 18:42:08 -0700 |
---|---|---|
committer | James Taylor <user234683@users.noreply.github.com> | 2019-09-08 18:42:08 -0700 |
commit | 89e5761f8d9ae4221c4a97eca3c0fce3405a5bc4 (patch) | |
tree | 02aa1b329ca5800992e077f988c19b14165cf5a0 /youtube | |
parent | c362a5e834d88524c154cb010be9dc909dcbe25d (diff) | |
download | yt-local-89e5761f8d9ae4221c4a97eca3c0fce3405a5bc4.tar.lz yt-local-89e5761f8d9ae4221c4a97eca3c0fce3405a5bc4.tar.xz yt-local-89e5761f8d9ae4221c4a97eca3c0fce3405a5bc4.zip |
Extraction: Move playlist extraction to yt_data_extract
Diffstat (limited to 'youtube')
-rw-r--r-- | youtube/playlist.py | 28 | ||||
-rw-r--r-- | youtube/templates/playlist.html | 2 | ||||
-rw-r--r-- | youtube/yt_data_extract.py | 27 |
3 files changed, 38 insertions, 19 deletions
diff --git a/youtube/playlist.py b/youtube/playlist.py index 3e5b0d2..2f7abdc 100644 --- a/youtube/playlist.py +++ b/youtube/playlist.py @@ -89,28 +89,20 @@ def get_playlist_page(): ) gevent.joinall(tasks) first_page_json, this_page_json = tasks[0].value, tasks[1].value - - try: # first page - video_list = this_page_json['response']['contents']['singleColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['playlistVideoListRenderer']['contents'] - except KeyError: # other pages - video_list = this_page_json['response']['continuationContents']['playlistVideoListContinuation']['contents'] - - parsed_video_list = [yt_data_extract.parse_info_prepare_for_html(video_json) for video_json in video_list] - - - metadata = yt_data_extract.renderer_info(first_page_json['response']['header']) - yt_data_extract.prefix_urls(metadata) - if 'description' not in metadata: - metadata['description'] = '' + info = yt_data_extract.extract_playlist_info(this_page_json) + if page != '1': + info['metadata'] = yt_data_extract.extract_playlist_metadata(first_page_json) - video_count = int(metadata['size'].replace(',', '')) - metadata['size'] += ' videos' + yt_data_extract.prefix_urls(info['metadata']) + for item in info['items']: + yt_data_extract.prefix_urls(item) + yt_data_extract.add_extra_html_info(item) return flask.render_template('playlist.html', - video_list = parsed_video_list, - num_pages = math.ceil(video_count/20), + video_list = info['items'], + num_pages = math.ceil(info['metadata']['size']/20), parameters_dictionary = request.args, - **metadata + **info['metadata'] ).encode('utf-8') diff --git a/youtube/templates/playlist.html b/youtube/templates/playlist.html index ab2640f..52c468e 100644 --- a/youtube/templates/playlist.html +++ b/youtube/templates/playlist.html @@ -55,7 +55,7 @@ <a class="playlist-author" href="{{ author_url }}">{{ author }}</a> <div class="playlist-stats"> <div>{{ views }}</div> - <div>{{ size }}</div> + <div>{{ size }} videos</div> </div> <div class="playlist-description">{{ common_elements.text_runs(description) }}</div> </div> diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py index 95c68bc..e7a2f1e 100644 --- a/youtube/yt_data_extract.py +++ b/youtube/yt_data_extract.py @@ -449,3 +449,30 @@ def extract_search_info(polymer_json): return info + +def extract_playlist_metadata(polymer_json): + metadata = renderer_info(polymer_json['response']['header']) + + if 'description' not in metadata: + metadata['description'] = '' + + metadata['size'] = int(metadata['size'].replace(',', '')) + + return metadata + +def extract_playlist_info(polymer_json): + info = {} + try: # first page + video_list = polymer_json['response']['contents']['singleColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['playlistVideoListRenderer']['contents'] + first_page = True + except KeyError: # other pages + video_list = polymer_json['response']['continuationContents']['playlistVideoListContinuation']['contents'] + first_page = False + + info['items'] = [renderer_info(renderer) for renderer in video_list] + + if first_page: + info['metadata'] = extract_playlist_metadata(polymer_json) + + return info + |