diff options
author | James Taylor <user234683@users.noreply.github.com> | 2019-12-12 22:13:17 -0800 |
---|---|---|
committer | James Taylor <user234683@users.noreply.github.com> | 2019-12-12 22:13:17 -0800 |
commit | 26f37521babbb2fc4b86ad59354e8c69da1f3897 (patch) | |
tree | 7fd6eb51bc09ce84b18a9193ab99f23cb481af55 | |
parent | 205ad29cb0763dd263a5940cdcb3059d189bbfe7 (diff) | |
download | yt-local-26f37521babbb2fc4b86ad59354e8c69da1f3897.tar.lz yt-local-26f37521babbb2fc4b86ad59354e8c69da1f3897.tar.xz yt-local-26f37521babbb2fc4b86ad59354e8c69da1f3897.zip |
Extraction: Bypass age-restriction
-rw-r--r-- | youtube/watch.py | 26 | ||||
-rw-r--r-- | youtube/yt_data_extract.py | 99 |
2 files changed, 90 insertions, 35 deletions
diff --git a/youtube/watch.py b/youtube/watch.py index fa697ba..4575c1e 100644 --- a/youtube/watch.py +++ b/youtube/watch.py @@ -275,17 +275,32 @@ headers = ( ) + util.mobile_ua def extract_info(video_id): - polymer_json = util.fetch_url('https://m.youtube.com/watch?v=' + video_id + '&pbj=1', headers=headers, debug_name='watch') + polymer_json = util.fetch_url('https://m.youtube.com/watch?v=' + video_id + '&pbj=1', headers=headers, debug_name='watch').decode('utf-8') + # TODO: Decide whether this should be done in yt_data_extract.extract_watch_info try: polymer_json = json.loads(polymer_json) except json.decoder.JSONDecodeError: traceback.print_exc() return {'error': 'Failed to parse json response'} info = yt_data_extract.extract_watch_info(polymer_json) - error = decrypt_signatures(info) - if error: - print('Error decrypting url signatures: ' + error) - info['playability_error'] = error + + # age restriction bypass + if info['age_restricted']: + print('Fetching age restriction bypass page') + data = { + 'video_id': video_id, + 'eurl': 'https://youtube.googleapis.com/v/' + video_id, + } + url = 'https://www.youtube.com/get_video_info?' + urllib.parse.urlencode(data) + video_info_page = util.fetch_url(url, debug_name='get_video_info', report_text='Fetched age restriction bypass page').decode('utf-8') + yt_data_extract.update_with_age_restricted_info(info, video_info_page) + + # signature decryption + decryption_error = decrypt_signatures(info) + if decryption_error: + decryption_error = 'Error decrypting url signatures: ' + decryption_error + info['playability_error'] = decryption_error + return info def video_quality_string(format): @@ -410,6 +425,7 @@ def get_watch_page(): uploader = info['author'], description = info['description'], unlisted = info['unlisted'], + age_restricted = info['age_restricted'], playability_error = info['playability_error'], ) diff --git a/youtube/yt_data_extract.py b/youtube/yt_data_extract.py index 7c2b717..8c5c63d 100644 --- a/youtube/yt_data_extract.py +++ b/youtube/yt_data_extract.py @@ -943,6 +943,11 @@ def extract_watch_info_mobile(top_level): info = {} microformat = default_multi_get(top_level, 'playerResponse', 'microformat', 'playerMicroformatRenderer', default={}) + family_safe = microformat.get('isFamilySafe') + if family_safe is None: + info['age_restricted'] = None + else: + info['age_restricted'] = not family_safe info['allowed_countries'] = microformat.get('availableCountries', []) info['published_date'] = microformat.get('publishDate') @@ -1055,6 +1060,34 @@ def get_caption_url(info, language, format, automatic=False, translation_languag url += '&tlang=' + translation_language return url +def extract_formats(info, player_response): + streaming_data = player_response.get('streamingData', {}) + yt_formats = streaming_data.get('formats', []) + streaming_data.get('adaptiveFormats', []) + + info['formats'] = [] + + for yt_fmt in yt_formats: + fmt = {} + fmt['ext'] = None + fmt['audio_bitrate'] = None + fmt['acodec'] = None + fmt['vcodec'] = None + fmt['width'] = yt_fmt.get('width') + fmt['height'] = yt_fmt.get('height') + fmt['file_size'] = yt_fmt.get('contentLength') + fmt['audio_sample_rate'] = yt_fmt.get('audioSampleRate') + fmt['fps'] = yt_fmt.get('fps') + cipher = dict(urllib.parse.parse_qsl(yt_fmt.get('cipher', ''))) + if cipher: + fmt['url'] = cipher.get('url') + else: + fmt['url'] = yt_fmt.get('url') + fmt['s'] = cipher.get('s') + fmt['sp'] = cipher.get('sp') + fmt.update(_formats.get(str(yt_fmt.get('itag')), {})) + + info['formats'].append(fmt) + SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt') def extract_watch_info(polymer_json): info = {'playability_error': None, 'error': None} @@ -1080,10 +1113,6 @@ def extract_watch_info(polymer_json): player_args = default_multi_get(top_level, 'player', 'args', default={}) player_response = json.loads(player_args['player_response']) if 'player_response' in player_args else {} - playability_status = default_multi_get(player_response, 'playabilityStatus', 'status', default=None) - playability_reason = default_multi_get(player_response, 'playabilityStatus', 'reason', default='Unknown error') - if playability_status not in (None, 'OK'): - info['playability_error'] = playability_reason # captions info['automatic_caption_languages'] = [] @@ -1106,35 +1135,19 @@ def extract_watch_info(polymer_json): print('WARNING: Found non-translatable caption language') # formats - streaming_data = player_response.get('streamingData', {}) - yt_formats = streaming_data.get('formats', []) + streaming_data.get('adaptiveFormats', []) - - info['formats'] = [] - - for yt_fmt in yt_formats: - fmt = {} - fmt['ext'] = None - fmt['audio_bitrate'] = None - fmt['acodec'] = None - fmt['vcodec'] = None - fmt['width'] = yt_fmt.get('width') - fmt['height'] = yt_fmt.get('height') - fmt['file_size'] = yt_fmt.get('contentLength') - fmt['audio_sample_rate'] = yt_fmt.get('audioSampleRate') - fmt['fps'] = yt_fmt.get('fps') - cipher = dict(urllib.parse.parse_qsl(yt_fmt.get('cipher', ''))) - if cipher: - fmt['url'] = cipher.get('url') + extract_formats(info, player_response) + playability_status = default_multi_get(player_response, 'playabilityStatus', 'status', default=None) + playability_reason = default_multi_get(player_response, 'playabilityStatus', 'reason', default='Could not find playability error') + if not info['formats']: + if playability_status not in (None, 'OK'): + info['playability_error'] = playability_reason else: - fmt['url'] = yt_fmt.get('url') - fmt['s'] = cipher.get('s') - fmt['sp'] = cipher.get('sp') - fmt.update(_formats.get(str(yt_fmt.get('itag')), {})) + info['playability_error'] = 'Unknown playability error' - info['formats'].append(fmt) - if info['formats']: - info['playability_error'] = None # in case they lie + # check age-restriction + info['age_restricted'] = (playability_status == 'LOGIN_REQUIRED' and playability_reason and ' age' in playability_reason) + # base_js (for decryption of signatures) info['base_js'] = default_multi_get(top_level, 'player', 'assets', 'js') if info['base_js']: info['base_js'] = normalize_url(info['base_js']) @@ -1162,3 +1175,29 @@ def extract_watch_info(polymer_json): # other stuff info['author_url'] = 'https://www.youtube.com/channel/' + info['author_id'] if info['author_id'] else None return info + +def update_with_age_restricted_info(info, video_info_page): + ERROR_PREFIX = 'Error bypassing age-restriction: ' + + video_info = urllib.parse.parse_qs(video_info_page) + player_response = default_multi_get(video_info, 'player_response', 0) + if player_response is None: + info['playability_error'] = ERROR_PREFIX + 'Could not find player_response in video_info_page' + return + try: + player_response = json.loads(player_response) + except json.decoder.JSONDecodeError: + traceback.print_exc() + info['playability_error'] = ERROR_PREFIX + 'Failed to parse json response' + return + + extract_formats(info, player_response) + if info['formats']: + info['playability_error'] = None + else: + playability_status = default_multi_get(player_response, 'playabilityStatus', 'status', default=None) + playability_reason = default_multi_get(player_response, 'playabilityStatus', 'reason', default=ERROR_PREFIX + 'Could not find playability error') + if playability_status not in (None, 'OK'): + info['playability_error'] = ERROR_PREFIX + playability_reason + else: + info['playability_error'] = ERROR_PREFIX + 'Unknown playability error' |