aboutsummaryrefslogtreecommitdiffstats
path: root/youtube/yt_data_extract/common.py
diff options
context:
space:
mode:
authorJames Taylor <user234683@users.noreply.github.com>2020-04-10 13:09:38 -0700
committerJames Taylor <user234683@users.noreply.github.com>2020-04-10 13:09:38 -0700
commit1224dd88a387d23d7a908aa79b5de0debb51c8cd (patch)
tree4b51729f510806e70b62f5190dfa239d57563949 /youtube/yt_data_extract/common.py
parent3e09193eafeb7072c46385b2c91e82fcd275cc25 (diff)
downloadyt-local-1224dd88a387d23d7a908aa79b5de0debb51c8cd.tar.lz
yt-local-1224dd88a387d23d7a908aa79b5de0debb51c8cd.tar.xz
yt-local-1224dd88a387d23d7a908aa79b5de0debb51c8cd.zip
Fix related video extraction sometimes failing
Youtube added some pointless variation in variable names
Diffstat (limited to 'youtube/yt_data_extract/common.py')
-rw-r--r--youtube/yt_data_extract/common.py12
1 files changed, 10 insertions, 2 deletions
diff --git a/youtube/yt_data_extract/common.py b/youtube/yt_data_extract/common.py
index 974d981..3b2ebb5 100644
--- a/youtube/yt_data_extract/common.py
+++ b/youtube/yt_data_extract/common.py
@@ -223,10 +223,14 @@ def extract_item_info(item, additional_info={}):
info['type'] = 'playlist'
elif primary_type == 'channel':
info['type'] = 'channel'
+ elif type == 'videoWithContextRenderer': # stupid exception
+ info['type'] = 'video'
+ primary_type = 'video'
else:
info['type'] = 'unsupported'
- info['title'] = extract_str(item.get('title'))
+ # videoWithContextRenderer changes it to 'headline' just to be annoying
+ info['title'] = extract_str(multi_get(item, 'title', 'headline'))
if primary_type != 'channel':
info['author'] = extract_str(multi_get(item, 'longBylineText', 'shortBylineText', 'ownerText'))
info['author_id'] = extract_str(multi_deep_get(item,
@@ -256,7 +260,10 @@ def extract_item_info(item, additional_info={}):
info['view_count'] = extract_int(item.get('viewCountText'))
# dig into accessibility data to get view_count for videos marked as recommended, and to get time_published
- accessibility_label = deep_get(item, 'title', 'accessibility', 'accessibilityData', 'label', default='')
+ accessibility_label = multi_deep_get(item,
+ ['title', 'accessibility', 'accessibilityData', 'label'],
+ ['headline', 'accessibility', 'accessibilityData', 'label'],
+ default='')
timestamp = re.search(r'(\d+ \w+ ago)', accessibility_label)
if timestamp:
conservative_update(info, 'time_published', timestamp.group(1))
@@ -333,6 +340,7 @@ _item_types = {
'videoRenderer',
'compactVideoRenderer',
'compactAutoplayRenderer',
+ 'videoWithContextRenderer',
'gridVideoRenderer',
'playlistVideoRenderer',