From 1224dd88a387d23d7a908aa79b5de0debb51c8cd Mon Sep 17 00:00:00 2001 From: James Taylor Date: Fri, 10 Apr 2020 13:09:38 -0700 Subject: Fix related video extraction sometimes failing Youtube added some pointless variation in variable names --- youtube/yt_data_extract/common.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py index 974d981..3b2ebb5 100644 --- a/youtube/yt_data_extract/common.py +++ b/youtube/yt_data_extract/common.py @@ -223,10 +223,14 @@ def extract_item_info(item, additional_info={}): info['type'] = 'playlist' elif primary_type == 'channel': info['type'] = 'channel' + elif type == 'videoWithContextRenderer': # stupid exception + info['type'] = 'video' + primary_type = 'video' else: info['type'] = 'unsupported' - info['title'] = extract_str(item.get('title')) + # videoWithContextRenderer changes it to 'headline' just to be annoying + info['title'] = extract_str(multi_get(item, 'title', 'headline')) if primary_type != 'channel': info['author'] = extract_str(multi_get(item, 'longBylineText', 'shortBylineText', 'ownerText')) info['author_id'] = extract_str(multi_deep_get(item, @@ -256,7 +260,10 @@ def extract_item_info(item, additional_info={}): info['view_count'] = extract_int(item.get('viewCountText')) # dig into accessibility data to get view_count for videos marked as recommended, and to get time_published - accessibility_label = deep_get(item, 'title', 'accessibility', 'accessibilityData', 'label', default='') + accessibility_label = multi_deep_get(item, + ['title', 'accessibility', 'accessibilityData', 'label'], + ['headline', 'accessibility', 'accessibilityData', 'label'], + default='') timestamp = re.search(r'(\d+ \w+ ago)', accessibility_label) if timestamp: conservative_update(info, 'time_published', timestamp.group(1)) @@ -333,6 +340,7 @@ _item_types = { 'videoRenderer', 'compactVideoRenderer', 'compactAutoplayRenderer', + 'videoWithContextRenderer', 'gridVideoRenderer', 'playlistVideoRenderer', -- cgit v1.2.3