aboutsummaryrefslogtreecommitdiffstats
path: root/youtube/yt_data_extract/common.py
diff options
context:
space:
mode:
authorJames Taylor <user234683@users.noreply.github.com>2021-07-27 21:35:11 -0700
committerJesús <heckyel@hyperbola.info>2021-07-28 23:47:41 -0500
commit54b39f13034fdbcf427a21b3be8d56020516a764 (patch)
tree1a1cb075eb4d2becdf905bdb53e19bc2d6464694 /youtube/yt_data_extract/common.py
parentf5f9b1c18172327ec310efc9274db30959ba334e (diff)
downloadyt-local-54b39f13034fdbcf427a21b3be8d56020516a764.tar.lz
yt-local-54b39f13034fdbcf427a21b3be8d56020516a764.tar.xz
yt-local-54b39f13034fdbcf427a21b3be8d56020516a764.zip
Fix missing likes, dislikes, & music list due to Youtube changes
Also moves some microformat extraction from _extract_watch_info_mobile to extract_watch_info where it belongs. _extract_watch_info_mobile is really only for stuff visible on the page, and thus specialized for either mobile or desktop. Signed-off-by: Jesús <heckyel@hyperbola.info>
Diffstat (limited to 'youtube/yt_data_extract/common.py')
-rw-r--r--youtube/yt_data_extract/common.py30
1 files changed, 23 insertions, 7 deletions
diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py
index d03bd89..e87808b 100644
--- a/youtube/yt_data_extract/common.py
+++ b/youtube/yt_data_extract/common.py
@@ -403,6 +403,7 @@ nested_renderer_dispatch = {
'twoColumnBrowseResultsRenderer': _traverse_browse_renderer,
'twoColumnSearchResultsRenderer': lambda r: get(r, 'primaryContents', {}),
'richItemRenderer': lambda r: get(r, 'content', {}),
+ 'engagementPanelSectionListRenderer': lambda r: get(r, 'content', {}),
}
# these renderers contain a list of renderers inside them
@@ -412,6 +413,8 @@ nested_renderer_list_dispatch = {
'gridRenderer': _traverse_standard_list,
'richGridRenderer': _traverse_standard_list,
'playlistVideoListRenderer': _traverse_standard_list,
+ 'structuredDescriptionContentRenderer': _traverse_standard_list,
+ 'slimVideoMetadataSectionRenderer': _traverse_standard_list,
'singleColumnWatchNextResults': lambda r: (deep_get(r, 'results', 'results', 'contents', default=[]), None),
}
def get_nested_renderer_list_function(key):
@@ -475,8 +478,11 @@ def extract_items_from_renderer(renderer, item_types=_item_types):
renderer = None
-def extract_items(response, item_types=_item_types):
+def extract_items(response, item_types=_item_types,
+ search_engagement_panels=False):
'''return items, ctoken'''
+ items = []
+ ctoken = None
if 'continuationContents' in response:
# sometimes there's another, empty, junk [something]Continuation key
# find real one
@@ -484,13 +490,23 @@ def extract_items(response, item_types=_item_types):
'continuationContents', {}).items():
# e.g. commentSectionContinuation, playlistVideoListContinuation
if key.endswith('Continuation'):
- items, cont = extract_items_from_renderer({key: renderer_cont},
+ items, ctoken = extract_items_from_renderer(
+ {key: renderer_cont},
item_types=item_types)
if items:
- return items, cont
- return [], None
+ break
elif 'contents' in response:
renderer = get(response, 'contents', {})
- return extract_items_from_renderer(renderer, item_types=item_types)
- else:
- return [], None
+ items, ctoken = extract_items_from_renderer(
+ renderer,
+ item_types=item_types)
+
+ if search_engagement_panels and 'engagementPanels' in response:
+ for engagement_renderer in response['engagementPanels']:
+ additional_items, cont = extract_items_from_renderer(
+ engagement_renderer,
+ item_types=item_types)
+ items += additional_items
+ if cont and not ctoken:
+ ctoken = cont
+ return items, ctoken