aboutsummaryrefslogtreecommitdiffstats
path: root/youtube/yt_data_extract
diff options
context:
space:
mode:
authorJames Taylor <user234683@users.noreply.github.com>2020-06-28 13:18:54 -0700
committerJames Taylor <user234683@users.noreply.github.com>2020-06-28 13:18:54 -0700
commit6e14a8547d05cf02ad72e8415f70072bdf599212 (patch)
tree2de0b9a76106d89445aac4af0bdfa7bd26110494 /youtube/yt_data_extract
parent0b5d6fe1ed96d6899ed6379275cb18c48ae25688 (diff)
downloadyt-local-6e14a8547d05cf02ad72e8415f70072bdf599212.tar.lz
yt-local-6e14a8547d05cf02ad72e8415f70072bdf599212.tar.xz
yt-local-6e14a8547d05cf02ad72e8415f70072bdf599212.zip
Handle case where embedded player response missing
Change so it extracts other stuff from regular playerResponse Extract formats from embedded player response, but fallback to regular one if that doesn't work. Sometimes there is no 'player' at top_level and the urls are in the regular playerResponse
Diffstat (limited to 'youtube/yt_data_extract')
-rw-r--r--youtube/yt_data_extract/watch_extraction.py12
1 files changed, 10 insertions, 2 deletions
diff --git a/youtube/yt_data_extract/watch_extraction.py b/youtube/yt_data_extract/watch_extraction.py
index 0970125..9dbb252 100644
--- a/youtube/yt_data_extract/watch_extraction.py
+++ b/youtube/yt_data_extract/watch_extraction.py
@@ -415,8 +415,14 @@ def extract_watch_info(polymer_json):
if error:
info['playability_error'] = error
+ player_response = top_level.get('playerResponse', {})
+
+ # usually, only the embedded one has the urls
player_args = deep_get(top_level, 'player', 'args', default={})
- player_response = json.loads(player_args['player_response']) if 'player_response' in player_args else {}
+ if 'player_response' in player_args:
+ embedded_player_response = json.loads(player_args['player_response'])
+ else:
+ embedded_player_response = {}
# captions
info['automatic_caption_languages'] = []
@@ -446,7 +452,9 @@ def extract_watch_info(polymer_json):
print('WARNING: Found non-translatable caption language')
# formats
- _extract_formats(info, player_response)
+ _extract_formats(info, embedded_player_response)
+ if not info['formats']:
+ _extract_formats(info, player_response)
# playability errors
_extract_playability_error(info, player_response)