diff options
author | James Taylor <user234683@users.noreply.github.com> | 2020-06-28 13:18:54 -0700 |
---|---|---|
committer | James Taylor <user234683@users.noreply.github.com> | 2020-06-28 13:18:54 -0700 |
commit | 6e14a8547d05cf02ad72e8415f70072bdf599212 (patch) | |
tree | 2de0b9a76106d89445aac4af0bdfa7bd26110494 /youtube/yt_data_extract | |
parent | 0b5d6fe1ed96d6899ed6379275cb18c48ae25688 (diff) | |
download | yt-local-6e14a8547d05cf02ad72e8415f70072bdf599212.tar.lz yt-local-6e14a8547d05cf02ad72e8415f70072bdf599212.tar.xz yt-local-6e14a8547d05cf02ad72e8415f70072bdf599212.zip |
Handle case where embedded player response missing
Change so it extracts other stuff from regular playerResponse
Extract formats from embedded player response, but fallback to
regular one if that doesn't work.
Sometimes there is no 'player' at top_level and the urls are in
the regular playerResponse
Diffstat (limited to 'youtube/yt_data_extract')
-rw-r--r-- | youtube/yt_data_extract/watch_extraction.py | 12 |
1 files changed, 10 insertions, 2 deletions
diff --git a/youtube/yt_data_extract/watch_extraction.py b/youtube/yt_data_extract/watch_extraction.py index 0970125..9dbb252 100644 --- a/youtube/yt_data_extract/watch_extraction.py +++ b/youtube/yt_data_extract/watch_extraction.py @@ -415,8 +415,14 @@ def extract_watch_info(polymer_json): if error: info['playability_error'] = error + player_response = top_level.get('playerResponse', {}) + + # usually, only the embedded one has the urls player_args = deep_get(top_level, 'player', 'args', default={}) - player_response = json.loads(player_args['player_response']) if 'player_response' in player_args else {} + if 'player_response' in player_args: + embedded_player_response = json.loads(player_args['player_response']) + else: + embedded_player_response = {} # captions info['automatic_caption_languages'] = [] @@ -446,7 +452,9 @@ def extract_watch_info(polymer_json): print('WARNING: Found non-translatable caption language') # formats - _extract_formats(info, player_response) + _extract_formats(info, embedded_player_response) + if not info['formats']: + _extract_formats(info, player_response) # playability errors _extract_playability_error(info, player_response) |