Fix related video extraction sometimes failing

Youtube added some pointless variation in variable names
author: James Taylor <user234683@users.noreply.github.com> 2020-04-10 13:09:38 -0700
committer: James Taylor <user234683@users.noreply.github.com> 2020-04-10 13:09:38 -0700
commit: 1224dd88a387d23d7a908aa79b5de0debb51c8cd (patch)
tree: 4b51729f510806e70b62f5190dfa239d57563949 /youtube/yt_data_extract/common.py
parent: 3e09193eafeb7072c46385b2c91e82fcd275cc25 (diff)
download: yt-local-1224dd88a387d23d7a908aa79b5de0debb51c8cd.tar.lz
yt-local-1224dd88a387d23d7a908aa79b5de0debb51c8cd.tar.xz
yt-local-1224dd88a387d23d7a908aa79b5de0debb51c8cd.zip
1 files changed, 10 insertions, 2 deletions
diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py
index 974d981..3b2ebb5 100644
--- a/youtube/yt_data_extract/common.py
+++ b/youtube/yt_data_extract/common.py
@@ -223,10 +223,14 @@ def extract_item_info(item, additional_info={}):
         info['type'] = 'playlist'
     elif primary_type == 'channel':
         info['type'] = 'channel'
+    elif type == 'videoWithContextRenderer': # stupid exception
+        info['type'] = 'video'
+        primary_type = 'video'
     else:
         info['type'] = 'unsupported'
 
-    info['title'] = extract_str(item.get('title'))
+    # videoWithContextRenderer changes it to 'headline' just to be annoying
+    info['title'] = extract_str(multi_get(item, 'title', 'headline'))
     if primary_type != 'channel':
         info['author'] = extract_str(multi_get(item, 'longBylineText', 'shortBylineText', 'ownerText'))
         info['author_id'] = extract_str(multi_deep_get(item,
@@ -256,7 +260,10 @@ def extract_item_info(item, additional_info={}):
         info['view_count'] = extract_int(item.get('viewCountText'))
 
         # dig into accessibility data to get view_count for videos marked as recommended, and to get time_published
-        accessibility_label = deep_get(item, 'title', 'accessibility', 'accessibilityData', 'label', default='')
+        accessibility_label = multi_deep_get(item,
+            ['title', 'accessibility', 'accessibilityData', 'label'],
+            ['headline', 'accessibility', 'accessibilityData', 'label'],
+            default='')
         timestamp = re.search(r'(\d+ \w+ ago)', accessibility_label)
         if timestamp:
             conservative_update(info, 'time_published', timestamp.group(1))
@@ -333,6 +340,7 @@ _item_types = {
     'videoRenderer',
     'compactVideoRenderer',
     'compactAutoplayRenderer',
+    'videoWithContextRenderer',
     'gridVideoRenderer',
     'playlistVideoRenderer',
author	James Taylor <user234683@users.noreply.github.com>	2020-04-10 13:09:38 -0700
committer	James Taylor <user234683@users.noreply.github.com>	2020-04-10 13:09:38 -0700
commit	1224dd88a387d23d7a908aa79b5de0debb51c8cd (patch)
tree	4b51729f510806e70b62f5190dfa239d57563949 /youtube/yt_data_extract/common.py
parent	3e09193eafeb7072c46385b2c91e82fcd275cc25 (diff)
download	yt-local-1224dd88a387d23d7a908aa79b5de0debb51c8cd.tar.lz yt-local-1224dd88a387d23d7a908aa79b5de0debb51c8cd.tar.xz yt-local-1224dd88a387d23d7a908aa79b5de0debb51c8cd.zip