From b027f66738f0038da083d191f84789840e366496 Mon Sep 17 00:00:00 2001 From: James Taylor Date: Thu, 26 Dec 2019 18:43:24 -0800 Subject: yt_data_extract.common: Simplify usage of get functions and remove dead code Change usage of multi_deep_get to multi_get where possible Remove checking of type from calls to get functions (because it's very unlikely Youtube suddenly changes the type without changing the name of the variable or anything, and it takes up unnecessary space) Remove all default=None arguments from get functions, since those are superflous. Remove list_types constant since it's no longer in use. --- youtube/yt_data_extract/common.py | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py index 85eb363..d056e5f 100644 --- a/youtube/yt_data_extract/common.py +++ b/youtube/yt_data_extract/common.py @@ -284,20 +284,13 @@ def extract_item_info(item, additional_info={}): def extract_response(polymer_json): '''return response, error''' - response = multi_deep_get(polymer_json, [1, 'response'], ['response'], default=None, types=dict) + response = multi_deep_get(polymer_json, [1, 'response'], ['response']) if response is None: return None, 'Failed to extract response' else: return response, None -list_types = { - 'sectionListRenderer', - 'itemSectionRenderer', - 'gridRenderer', - 'playlistVideoListRenderer', -} - item_types = { 'movieRenderer', 'didYouMeanRenderer', @@ -328,17 +321,17 @@ item_types = { } def _traverse_browse_renderer(renderer): - for tab in get(renderer, 'tabs', (), types=(list, tuple)): - tab_renderer = multi_deep_get(tab, ['tabRenderer'], ['expandableTabRenderer'], default=None, types=dict) + for tab in get(renderer, 'tabs', ()): + tab_renderer = multi_get(tab, 'tabRenderer', 'expandableTabRenderer') if tab_renderer is None: continue if tab_renderer.get('selected', False): - return get(tab_renderer, 'content', {}, types=(dict)) + return get(tab_renderer, 'content', {}) print('Could not find tab with content') return {} def _traverse_standard_list(renderer): - renderer_list = multi_deep_get(renderer, ['contents'], ['items'], default=(), types=(list, tuple)) + renderer_list = multi_get(renderer, 'contents', 'items', default=()) continuation = deep_get(renderer, 'continuations', 0, 'nextContinuationData', 'continuation') return renderer_list, continuation @@ -346,7 +339,7 @@ def _traverse_standard_list(renderer): nested_renderer_dispatch = { 'singleColumnBrowseResultsRenderer': _traverse_browse_renderer, 'twoColumnBrowseResultsRenderer': _traverse_browse_renderer, - 'twoColumnSearchResultsRenderer': lambda renderer: get(renderer, 'primaryContents', {}, types=dict), + 'twoColumnSearchResultsRenderer': lambda renderer: get(renderer, 'primaryContents', {}), } # these renderers contain a list of renderers inside them @@ -355,17 +348,17 @@ nested_renderer_list_dispatch = { 'itemSectionRenderer': _traverse_standard_list, 'gridRenderer': _traverse_standard_list, 'playlistVideoListRenderer': _traverse_standard_list, - 'singleColumnWatchNextResults': lambda r: (deep_get(r, 'results', 'results', 'contents', default=[], types=(list, tuple)), None), + 'singleColumnWatchNextResults': lambda r: (deep_get(r, 'results', 'results', 'contents', default=[]), None), } def extract_items(response, item_types=item_types): '''return items, ctoken''' if 'continuationContents' in response: # always has just the one [something]Continuation key, but do this just in case they add some tracking key or something - for key, renderer_continuation in get(response, 'continuationContents', {}, types=dict).items(): + for key, renderer_continuation in get(response, 'continuationContents', {}).items(): if key.endswith('Continuation'): # e.g. commentSectionContinuation, playlistVideoListContinuation - items = multi_deep_get(renderer_continuation, ['contents'], ['items'], default=[], types=(list, tuple)) - ctoken = deep_get(renderer_continuation, 'continuations', 0, 'nextContinuationData', 'continuation', default=None, types=str) + items = multi_get(renderer_continuation, 'contents', 'items', default=[]) + ctoken = deep_get(renderer_continuation, 'continuations', 0, 'nextContinuationData', 'continuation') return items, ctoken return [], None elif 'contents' in response: @@ -375,7 +368,7 @@ def extract_items(response, item_types=item_types): iter_stack = collections.deque() current_iter = iter(()) - renderer = get(response, 'contents', {}, types=dict) + renderer = get(response, 'contents', {}) while True: # mode 1: get a new renderer by iterating. -- cgit v1.2.3