diff options
author | James Taylor <user234683@users.noreply.github.com> | 2021-07-27 21:35:11 -0700 |
---|---|---|
committer | Jesús <heckyel@hyperbola.info> | 2021-07-28 23:47:41 -0500 |
commit | 54b39f13034fdbcf427a21b3be8d56020516a764 (patch) | |
tree | 1a1cb075eb4d2becdf905bdb53e19bc2d6464694 /youtube/yt_data_extract/common.py | |
parent | f5f9b1c18172327ec310efc9274db30959ba334e (diff) | |
download | yt-local-54b39f13034fdbcf427a21b3be8d56020516a764.tar.lz yt-local-54b39f13034fdbcf427a21b3be8d56020516a764.tar.xz yt-local-54b39f13034fdbcf427a21b3be8d56020516a764.zip |
Fix missing likes, dislikes, & music list due to Youtube changes
Also moves some microformat extraction from
_extract_watch_info_mobile to extract_watch_info where it belongs.
_extract_watch_info_mobile is really only for stuff visible on the
page, and thus specialized for either mobile or desktop.
Signed-off-by: Jesús <heckyel@hyperbola.info>
Diffstat (limited to 'youtube/yt_data_extract/common.py')
-rw-r--r-- | youtube/yt_data_extract/common.py | 30 |
1 files changed, 23 insertions, 7 deletions
diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py index d03bd89..e87808b 100644 --- a/youtube/yt_data_extract/common.py +++ b/youtube/yt_data_extract/common.py @@ -403,6 +403,7 @@ nested_renderer_dispatch = { 'twoColumnBrowseResultsRenderer': _traverse_browse_renderer, 'twoColumnSearchResultsRenderer': lambda r: get(r, 'primaryContents', {}), 'richItemRenderer': lambda r: get(r, 'content', {}), + 'engagementPanelSectionListRenderer': lambda r: get(r, 'content', {}), } # these renderers contain a list of renderers inside them @@ -412,6 +413,8 @@ nested_renderer_list_dispatch = { 'gridRenderer': _traverse_standard_list, 'richGridRenderer': _traverse_standard_list, 'playlistVideoListRenderer': _traverse_standard_list, + 'structuredDescriptionContentRenderer': _traverse_standard_list, + 'slimVideoMetadataSectionRenderer': _traverse_standard_list, 'singleColumnWatchNextResults': lambda r: (deep_get(r, 'results', 'results', 'contents', default=[]), None), } def get_nested_renderer_list_function(key): @@ -475,8 +478,11 @@ def extract_items_from_renderer(renderer, item_types=_item_types): renderer = None -def extract_items(response, item_types=_item_types): +def extract_items(response, item_types=_item_types, + search_engagement_panels=False): '''return items, ctoken''' + items = [] + ctoken = None if 'continuationContents' in response: # sometimes there's another, empty, junk [something]Continuation key # find real one @@ -484,13 +490,23 @@ def extract_items(response, item_types=_item_types): 'continuationContents', {}).items(): # e.g. commentSectionContinuation, playlistVideoListContinuation if key.endswith('Continuation'): - items, cont = extract_items_from_renderer({key: renderer_cont}, + items, ctoken = extract_items_from_renderer( + {key: renderer_cont}, item_types=item_types) if items: - return items, cont - return [], None + break elif 'contents' in response: renderer = get(response, 'contents', {}) - return extract_items_from_renderer(renderer, item_types=item_types) - else: - return [], None + items, ctoken = extract_items_from_renderer( + renderer, + item_types=item_types) + + if search_engagement_panels and 'engagementPanels' in response: + for engagement_renderer in response['engagementPanels']: + additional_items, cont = extract_items_from_renderer( + engagement_renderer, + item_types=item_types) + items += additional_items + if cont and not ctoken: + ctoken = cont + return items, ctoken |