diff options
Diffstat (limited to 'youtube/yt_data_extract.py')
-rw-r--r-- | youtube/yt_data_extract.py | 99 |
1 files changed, 69 insertions, 30 deletions
diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py index 7c2b717..8c5c63d 100644 --- a/youtube/yt_data_extract.py +++ b/youtube/yt_data_extract.py @@ -943,6 +943,11 @@ def extract_watch_info_mobile(top_level): info = {} microformat = default_multi_get(top_level, 'playerResponse', 'microformat', 'playerMicroformatRenderer', default={}) + family_safe = microformat.get('isFamilySafe') + if family_safe is None: + info['age_restricted'] = None + else: + info['age_restricted'] = not family_safe info['allowed_countries'] = microformat.get('availableCountries', []) info['published_date'] = microformat.get('publishDate') @@ -1055,6 +1060,34 @@ def get_caption_url(info, language, format, automatic=False, translation_languag url += '&tlang=' + translation_language return url +def extract_formats(info, player_response): + streaming_data = player_response.get('streamingData', {}) + yt_formats = streaming_data.get('formats', []) + streaming_data.get('adaptiveFormats', []) + + info['formats'] = [] + + for yt_fmt in yt_formats: + fmt = {} + fmt['ext'] = None + fmt['audio_bitrate'] = None + fmt['acodec'] = None + fmt['vcodec'] = None + fmt['width'] = yt_fmt.get('width') + fmt['height'] = yt_fmt.get('height') + fmt['file_size'] = yt_fmt.get('contentLength') + fmt['audio_sample_rate'] = yt_fmt.get('audioSampleRate') + fmt['fps'] = yt_fmt.get('fps') + cipher = dict(urllib.parse.parse_qsl(yt_fmt.get('cipher', ''))) + if cipher: + fmt['url'] = cipher.get('url') + else: + fmt['url'] = yt_fmt.get('url') + fmt['s'] = cipher.get('s') + fmt['sp'] = cipher.get('sp') + fmt.update(_formats.get(str(yt_fmt.get('itag')), {})) + + info['formats'].append(fmt) + SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt') def extract_watch_info(polymer_json): info = {'playability_error': None, 'error': None} @@ -1080,10 +1113,6 @@ def extract_watch_info(polymer_json): player_args = default_multi_get(top_level, 'player', 'args', default={}) player_response = json.loads(player_args['player_response']) if 'player_response' in player_args else {} - playability_status = default_multi_get(player_response, 'playabilityStatus', 'status', default=None) - playability_reason = default_multi_get(player_response, 'playabilityStatus', 'reason', default='Unknown error') - if playability_status not in (None, 'OK'): - info['playability_error'] = playability_reason # captions info['automatic_caption_languages'] = [] @@ -1106,35 +1135,19 @@ def extract_watch_info(polymer_json): print('WARNING: Found non-translatable caption language') # formats - streaming_data = player_response.get('streamingData', {}) - yt_formats = streaming_data.get('formats', []) + streaming_data.get('adaptiveFormats', []) - - info['formats'] = [] - - for yt_fmt in yt_formats: - fmt = {} - fmt['ext'] = None - fmt['audio_bitrate'] = None - fmt['acodec'] = None - fmt['vcodec'] = None - fmt['width'] = yt_fmt.get('width') - fmt['height'] = yt_fmt.get('height') - fmt['file_size'] = yt_fmt.get('contentLength') - fmt['audio_sample_rate'] = yt_fmt.get('audioSampleRate') - fmt['fps'] = yt_fmt.get('fps') - cipher = dict(urllib.parse.parse_qsl(yt_fmt.get('cipher', ''))) - if cipher: - fmt['url'] = cipher.get('url') + extract_formats(info, player_response) + playability_status = default_multi_get(player_response, 'playabilityStatus', 'status', default=None) + playability_reason = default_multi_get(player_response, 'playabilityStatus', 'reason', default='Could not find playability error') + if not info['formats']: + if playability_status not in (None, 'OK'): + info['playability_error'] = playability_reason else: - fmt['url'] = yt_fmt.get('url') - fmt['s'] = cipher.get('s') - fmt['sp'] = cipher.get('sp') - fmt.update(_formats.get(str(yt_fmt.get('itag')), {})) + info['playability_error'] = 'Unknown playability error' - info['formats'].append(fmt) - if info['formats']: - info['playability_error'] = None # in case they lie + # check age-restriction + info['age_restricted'] = (playability_status == 'LOGIN_REQUIRED' and playability_reason and ' age' in playability_reason) + # base_js (for decryption of signatures) info['base_js'] = default_multi_get(top_level, 'player', 'assets', 'js') if info['base_js']: info['base_js'] = normalize_url(info['base_js']) @@ -1162,3 +1175,29 @@ def extract_watch_info(polymer_json): # other stuff info['author_url'] = 'https://www.youtube.com/channel/' + info['author_id'] if info['author_id'] else None return info + +def update_with_age_restricted_info(info, video_info_page): + ERROR_PREFIX = 'Error bypassing age-restriction: ' + + video_info = urllib.parse.parse_qs(video_info_page) + player_response = default_multi_get(video_info, 'player_response', 0) + if player_response is None: + info['playability_error'] = ERROR_PREFIX + 'Could not find player_response in video_info_page' + return + try: + player_response = json.loads(player_response) + except json.decoder.JSONDecodeError: + traceback.print_exc() + info['playability_error'] = ERROR_PREFIX + 'Failed to parse json response' + return + + extract_formats(info, player_response) + if info['formats']: + info['playability_error'] = None + else: + playability_status = default_multi_get(player_response, 'playabilityStatus', 'status', default=None) + playability_reason = default_multi_get(player_response, 'playabilityStatus', 'reason', default=ERROR_PREFIX + 'Could not find playability error') + if playability_status not in (None, 'OK'): + info['playability_error'] = ERROR_PREFIX + playability_reason + else: + info['playability_error'] = ERROR_PREFIX + 'Unknown playability error' |