diff options
author | James Taylor <user234683@users.noreply.github.com> | 2019-12-26 19:02:13 -0800 |
---|---|---|
committer | James Taylor <user234683@users.noreply.github.com> | 2019-12-26 19:02:13 -0800 |
commit | 8e8a1b70b698ce56fe6f4d2be87d91d71001bb11 (patch) | |
tree | bf4c501e650b3f6f5517eacc3fedba01030388c4 /youtube/yt_data_extract | |
parent | b027f66738f0038da083d191f84789840e366496 (diff) | |
download | yt-local-8e8a1b70b698ce56fe6f4d2be87d91d71001bb11.tar.lz yt-local-8e8a1b70b698ce56fe6f4d2be87d91d71001bb11.tar.xz yt-local-8e8a1b70b698ce56fe6f4d2be87d91d71001bb11.zip |
yt_data_extract: Split up extract_items so renderer extraction works independently
extract_items_from_renderer will extract given just a renderer rather than a response
Diffstat (limited to 'youtube/yt_data_extract')
-rw-r--r-- | youtube/yt_data_extract/common.py | 95 |
1 files changed, 48 insertions, 47 deletions
diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py index d056e5f..3dda8eb 100644 --- a/youtube/yt_data_extract/common.py +++ b/youtube/yt_data_extract/common.py @@ -291,7 +291,7 @@ def extract_response(polymer_json): return response, None -item_types = { +_item_types = { 'movieRenderer', 'didYouMeanRenderer', 'showingResultsForRenderer', @@ -350,8 +350,53 @@ nested_renderer_list_dispatch = { 'playlistVideoListRenderer': _traverse_standard_list, 'singleColumnWatchNextResults': lambda r: (deep_get(r, 'results', 'results', 'contents', default=[]), None), } +def extract_items_from_renderer(renderer, item_types=_item_types): + ctoken = None + items = [] + + iter_stack = collections.deque() + current_iter = iter(()) + + while True: + # mode 1: get a new renderer by iterating. + # goes down the stack for an iterator if one has been exhausted + if not renderer: + try: + renderer = current_iter.__next__() + except StopIteration: + try: + current_iter = iter_stack.pop() + except IndexError: + return items, ctoken + # Get new renderer or check that the one we got is good before + # proceeding to mode 2 + continue + + + # mode 2: dig into the current renderer + key, value = list(renderer.items())[0] + + # has a list in it, add it to the iter stack + if key in nested_renderer_list_dispatch: + renderer_list, continuation = nested_renderer_list_dispatch[key](value) + if renderer_list: + iter_stack.append(current_iter) + current_iter = iter(renderer_list) + if continuation: + ctoken = continuation -def extract_items(response, item_types=item_types): + # new renderer nested inside this one + elif key in nested_renderer_dispatch: + renderer = nested_renderer_dispatch[key](value) + continue # don't reset renderer to None + + # the renderer is an item + elif key in item_types: + items.append(renderer) + + renderer = None + +def extract_items(response, item_types=_item_types): '''return items, ctoken''' if 'continuationContents' in response: # always has just the one [something]Continuation key, but do this just in case they add some tracking key or something @@ -362,51 +407,7 @@ def extract_items(response, item_types=item_types): return items, ctoken return [], None elif 'contents' in response: - ctoken = None - items = [] - - iter_stack = collections.deque() - current_iter = iter(()) - renderer = get(response, 'contents', {}) - - while True: - # mode 1: get a new renderer by iterating. - # goes down the stack for an iterator if one has been exhausted - if not renderer: - try: - renderer = current_iter.__next__() - except StopIteration: - try: - current_iter = iter_stack.pop() - except IndexError: - return items, ctoken - # Get new renderer or check that the one we got is good before - # proceeding to mode 2 - continue - - - # mode 2: dig into the current renderer - key, value = list(renderer.items())[0] - - # has a list in it, add it to the iter stack - if key in nested_renderer_list_dispatch: - renderer_list, continuation = nested_renderer_list_dispatch[key](value) - if renderer_list: - iter_stack.append(current_iter) - current_iter = iter(renderer_list) - if continuation: - ctoken = continuation - - # new renderer nested inside this one - elif key in nested_renderer_dispatch: - renderer = nested_renderer_dispatch[key](value) - continue # don't reset renderer to None - - # the renderer is an item - elif key in item_types: - items.append(renderer) - - renderer = None + return extract_items_from_renderer(renderer, item_types=item_types) else: return [], None |