From 1156b0998758ee803c7e8ae0cc2beb5181c232a3 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Fri, 21 Jun 2019 21:41:41 -0700 Subject: Refactor search page --- youtube/yt_data_extract.py | 113 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 96 insertions(+), 17 deletions(-) (limited to 'youtube/yt_data_extract.py') diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py index 5483911..a487c57 100644 --- a/youtube/yt_data_extract.py +++ b/youtube/yt_data_extract.py @@ -1,4 +1,7 @@ +from youtube import util + import html +import json # videos (all of type str): @@ -138,9 +141,83 @@ dispatch = { } -def renderer_info(renderer): +def ajax_info(item_json): + try: + info = {} + for key, node in item_json.items(): + try: + simple_key, function = dispatch[key] + except KeyError: + continue + info[simple_key] = function(node) + return info + except KeyError: + print(item_json) + raise + + + +def prefix_urls(item): + try: + item['thumbnail'] = '/' + item['thumbnail'].lstrip('/') + except KeyError: + pass + + try: + item['author_url'] = util.URL_ORIGIN + item['author_url'] + except KeyError: + pass + +def add_extra_html_info(item): + if item['type'] == 'video': + item['url'] = util.URL_ORIGIN + '/watch?v=' + item['id'] + + video_info = {} + for key in ('id', 'title', 'author', 'duration'): + try: + video_info[key] = item[key] + except KeyError: + video_info[key] = '' + + item['video_info'] = json.dumps(video_info) + + elif item['type'] == 'playlist': + item['url'] = util.URL_ORIGIN + '/playlist?list=' + item['id'] + elif item['type'] == 'channel': + item['url'] = util.URL_ORIGIN + "/channel/" + item['id'] + + +def renderer_info(renderer, additional_info={}): + type = list(renderer.keys())[0] + renderer = renderer[type] + info = {} + if type == 'itemSectionRenderer': + return renderer_info(renderer['contents'][0], additional_info) + + if type in ('movieRenderer', 'clarificationRenderer'): + info['type'] = 'unsupported' + return info + + info.update(additional_info) + + if type.startswith('compact'): + info['item_size'] = 'small' + else: + info['item_size'] = 'medium' + + if type in ('compactVideoRenderer', 'videoRenderer', 'gridVideoRenderer'): + info['type'] = 'video' + elif type in ('playlistRenderer', 'compactPlaylistRenderer', 'gridPlaylistRenderer', + 'radioRenderer', 'compactRadioRenderer', 'gridRadioRenderer', + 'showRenderer', 'compactShowRenderer', 'gridShowRenderer'): + info['type'] = 'playlist' + elif type == 'channelRenderer': + info['type'] = 'channel' + else: + info['type'] = 'unsupported' + return info + try: - info = {} if 'viewCountText' in renderer: # prefer this one as it contains all the digits info['views'] = get_text(renderer['viewCountText']) elif 'shortViewCountText' in renderer: @@ -183,23 +260,25 @@ def renderer_info(renderer): except KeyError: continue info[simple_key] = function(node) + if info['type'] == 'video' and 'duration' not in info: + info['duration'] = 'Live' + return info except KeyError: print(renderer) raise - -def ajax_info(item_json): - try: - info = {} - for key, node in item_json.items(): - try: - simple_key, function = dispatch[key] - except KeyError: - continue - info[simple_key] = function(node) - return info - except KeyError: - print(item_json) - raise - + + + + #print(renderer) + #raise NotImplementedError('Unknown renderer type: ' + type) + return '' + +def parse_info_prepare_for_html(renderer): + item = renderer_info(renderer) + prefix_urls(item) + add_extra_html_info(item) + + return item + -- cgit v1.2.3 From d105d4520ff0bf529cfb18c9c16a22900ab7f481 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Thu, 4 Jul 2019 18:08:14 -0700 Subject: Convert playlist page to flask framework --- youtube/yt_data_extract.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'youtube/yt_data_extract.py') diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py index a487c57..a42b6a2 100644 --- a/youtube/yt_data_extract.py +++ b/youtube/yt_data_extract.py @@ -200,12 +200,12 @@ def renderer_info(renderer, additional_info={}): info.update(additional_info) - if type.startswith('compact'): + if type.startswith('compact') or type.startswith('playlist') or type.startswith('grid'): info['item_size'] = 'small' else: info['item_size'] = 'medium' - if type in ('compactVideoRenderer', 'videoRenderer', 'gridVideoRenderer'): + if type in ('compactVideoRenderer', 'videoRenderer', 'playlistVideoRenderer', 'gridVideoRenderer'): info['type'] = 'video' elif type in ('playlistRenderer', 'compactPlaylistRenderer', 'gridPlaylistRenderer', 'radioRenderer', 'compactRadioRenderer', 'gridRadioRenderer', @@ -213,6 +213,8 @@ def renderer_info(renderer, additional_info={}): info['type'] = 'playlist' elif type == 'channelRenderer': info['type'] = 'channel' + elif type == 'playlistHeaderRenderer': + info['type'] = 'playlist_metadata' else: info['type'] = 'unsupported' return info -- cgit v1.2.3 From 64434b02ca50c2a6324caa1355559bd881ba687e Mon Sep 17 00:00:00 2001 From: James Taylor Date: Sat, 6 Jul 2019 18:36:09 -0700 Subject: Convert channel page to flask framework --- youtube/yt_data_extract.py | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) (limited to 'youtube/yt_data_extract.py') diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py index a42b6a2..dca5964 100644 --- a/youtube/yt_data_extract.py +++ b/youtube/yt_data_extract.py @@ -36,19 +36,11 @@ import json - - def get_plain_text(node): try: - return html.escape(node['simpleText']) + return node['simpleText'] except KeyError: - return unformmated_text_runs(node['runs']) - -def unformmated_text_runs(runs): - result = '' - for text_run in runs: - result += html.escape(text_run["text"]) - return result + return ''.join(text_run['text'] for text_run in node['runs']) def format_text_runs(runs): if isinstance(runs, str): @@ -78,14 +70,19 @@ def get_url(node): def get_text(node): + if node == {}: + return '' try: return node['simpleText'] except KeyError: - pass + pass try: return node['runs'][0]['text'] except IndexError: # empty text runs return '' + except KeyError: + print(node) + raise def get_formatted_text(node): try: @@ -200,7 +197,7 @@ def renderer_info(renderer, additional_info={}): info.update(additional_info) - if type.startswith('compact') or type.startswith('playlist') or type.startswith('grid'): + if type.startswith('compact') or type.startswith('playlist'): info['item_size'] = 'small' else: info['item_size'] = 'medium' @@ -271,13 +268,8 @@ def renderer_info(renderer, additional_info={}): raise - - #print(renderer) - #raise NotImplementedError('Unknown renderer type: ' + type) - return '' - -def parse_info_prepare_for_html(renderer): - item = renderer_info(renderer) +def parse_info_prepare_for_html(renderer, additional_info={}): + item = renderer_info(renderer, additional_info) prefix_urls(item) add_extra_html_info(item) -- cgit v1.2.3 From c0617670f78bf61ccf3aa0c5904091146b630104 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Sat, 6 Jul 2019 19:26:07 -0700 Subject: Fix medium playlist items displaying incorrectly --- youtube/yt_data_extract.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'youtube/yt_data_extract.py') diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py index dca5964..c236c2f 100644 --- a/youtube/yt_data_extract.py +++ b/youtube/yt_data_extract.py @@ -197,7 +197,7 @@ def renderer_info(renderer, additional_info={}): info.update(additional_info) - if type.startswith('compact') or type.startswith('playlist'): + if type.startswith('compact') or (type.startswith('playlist') and type != 'playlistRenderer'): info['item_size'] = 'small' else: info['item_size'] = 'medium' -- cgit v1.2.3