diff options
Diffstat (limited to 'youtube')
-rw-r--r-- | youtube/watch.py | 184 | ||||
-rw-r--r-- | youtube/yt_data_extract/__init__.py | 2 | ||||
-rw-r--r-- | youtube/yt_data_extract/watch_extraction.py | 2 |
3 files changed, 119 insertions, 69 deletions
diff --git a/youtube/watch.py b/youtube/watch.py index c18347e..c38a3f5 100644 --- a/youtube/watch.py +++ b/youtube/watch.py @@ -19,6 +19,46 @@ from urllib.parse import parse_qs, urlencode from types import SimpleNamespace from math import ceil +# https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/extractor/youtube.py#L72 +INNERTUBE_CLIENTS = { + 'android': { + 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w', + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'ANDROID', + 'clientVersion': '17.31.35', + 'androidSdkVersion': 31, + 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip' + }, + # https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-887739287 + 'thirdParty': { + 'embedUrl': 'https://google.com', # Can be any valid URL + } + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 3, + 'REQUIRE_JS_PLAYER': False, + }, + + # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option) + # See: https://github.com/zerodytrash/YouTube-Internal-Clients + 'tv_embedded': { + 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', + 'clientVersion': '2.0', + }, + # https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-887739287 + 'thirdParty': { + 'embedUrl': 'https://google.com', # Can be any valid URL + } + + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 85, + 'REQUIRE_JS_PLAYER': True, + }, +} + try: with open(os.path.join(settings.data_dir, 'decrypt_function_cache.json'), 'r') as f: decrypt_cache = json.loads(f.read())['decrypt_cache'] @@ -49,6 +89,8 @@ def get_video_sources(info, target_resolution): video_only_sources = {} uni_sources = [] pair_sources = [] + + for fmt in info['formats']: if not all(fmt[attr] for attr in ('ext', 'url', 'itag')): continue @@ -74,7 +116,6 @@ def get_video_sources(info, target_resolution): fmt['audio_bitrate'] = int(fmt['bitrate']/1000) source = { 'type': 'audio/' + fmt['ext'], - 'bitrate': fmt['audio_bitrate'], 'quality_string': audio_quality_string(fmt), } source.update(fmt) @@ -308,14 +349,6 @@ def save_decrypt_cache(): f.close() -watch_headers = ( - ('Accept', '*/*'), - ('Accept-Language', 'en-US,en;q=0.5'), - ('X-YouTube-Client-Name', '2'), - ('X-YouTube-Client-Version', '2.20180830'), -) + util.mobile_ua - - def decrypt_signatures(info, video_id): '''return error string, or False if no errors''' if not yt_data_extract.requires_decryption(info): @@ -345,8 +378,28 @@ def _add_to_error(info, key, additional_message): else: info[key] = additional_message +def fetch_player_response(client, video_id): + client_params = INNERTUBE_CLIENTS[client] + context = client_params['INNERTUBE_CONTEXT'] + key = client_params['INNERTUBE_API_KEY'] + host = client_params.get('INNERTUBE_HOST') or 'youtubei.googleapis.com' + user_agent = context['client'].get('userAgent') or util.mobile_user_agent -def extract_info(video_id, use_invidious, playlist_id=None, index=None): + url = 'https://' + host + '/youtubei/v1/player?key=' + key + data = { + 'videoId': video_id, + 'context': context, + } + data = json.dumps(data) + headers = (('Content-Type', 'application/json'),('User-Agent', user_agent)) + player_response = util.fetch_url( + url, data=data, headers=headers, + debug_name='youtubei_player_' + client, + report_text='Fetched ' + client + ' youtubei player' + ).decode('utf-8') + return player_response + +def fetch_watch_page_info(video_id, playlist_id, index): # bpctr=9999999999 will bypass are-you-sure dialogs for controversial # videos url = 'https://m.youtube.com/embed/' + video_id + '?bpctr=9999999999' @@ -354,52 +407,46 @@ def extract_info(video_id, use_invidious, playlist_id=None, index=None): url += '&list=' + playlist_id if index: url += '&index=' + index - watch_page = util.fetch_url(url, headers=watch_headers, + + headers = ( + ('Accept', '*/*'), + ('Accept-Language', 'en-US,en;q=0.5'), + ('X-YouTube-Client-Name', '2'), + ('X-YouTube-Client-Version', '2.20180830'), + ) + util.mobile_ua + + watch_page = util.fetch_url(url, headers=headers, debug_name='watch') watch_page = watch_page.decode('utf-8') - info = yt_data_extract.extract_watch_info_from_html(watch_page) - - context = { - 'client': { - 'clientName': 'ANDROID', - 'clientVersion': '17.29.35', - 'androidSdkVersion': '31', - 'gl': 'US', - 'hl': 'en', - }, - # https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-887739287 - 'thirdParty': { - 'embedUrl': 'https://google.com', # Can be any valid URL - } - } + return yt_data_extract.extract_watch_info_from_html(watch_page) + +def extract_info(video_id, use_invidious, playlist_id=None, index=None): + tasks = ( + # Get video metadata from here + gevent.spawn(fetch_watch_page_info, video_id, playlist_id, index), + + # Get video URLs by spoofing as android client because its urls don't + # require decryption + # The URLs returned with WEB for videos requiring decryption + # couldn't be decrypted with the base.js from the web page for some + # reason + # https://github.com/yt-dlp/yt-dlp/issues/574#issuecomment-887171136 + gevent.spawn(fetch_player_response, 'android', video_id) + ) + gevent.joinall(tasks) + util.check_gevent_exceptions(*tasks) + info, player_response = tasks[0].value, tasks[1].value + + yt_data_extract.update_with_new_urls(info, player_response) + + # Age restricted video, retry if info['age_restricted'] or info['player_urls_missing']: if info['age_restricted']: - print('Age restricted video. Fetching /youtubei/v1/player page') + print('Age restricted video, retrying') else: - print('Missing player. Fetching /youtubei/v1/player page') - context['client']['clientScreen'] = 'EMBED' - else: - print('Fetching /youtubei/v1/player page') - - # https://github.com/yt-dlp/yt-dlp/issues/574#issuecomment-887171136 - # ANDROID is used instead because its urls don't require decryption - # The URLs returned with WEB for videos requiring decryption - # couldn't be decrypted with the base.js from the web page for some - # reason - url ='https://youtubei.googleapis.com/youtubei/v1/player' - url += '?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8' - data = { - 'videoId': video_id, - 'context': context, - } - data = json.dumps(data) - content_header = (('Content-Type', 'application/json'),) - player_response = util.fetch_url( - url, data=data, headers=util.mobile_ua + content_header, - debug_name='youtubei_player', - report_text='Fetched youtubei player page').decode('utf-8') - - yt_data_extract.update_with_age_restricted_info(info, player_response) + print('Player urls missing, retrying') + player_response = fetch_player_response('tv_embedded', video_id) + yt_data_extract.update_with_new_urls(info, player_response) # signature decryption decryption_error = decrypt_signatures(info, video_id) @@ -422,8 +469,7 @@ def extract_info(video_id, use_invidious, playlist_id=None, index=None): if (info['hls_manifest_url'] and (info['live'] or not info['formats'] or not info['urls_ready']) ): - manifest = util.fetch_url( - info['hls_manifest_url'], + manifest = util.fetch_url(info['hls_manifest_url'], debug_name='hls_manifest.m3u8', report_text='Fetched hls manifest' ).decode('utf-8') @@ -439,6 +485,7 @@ def extract_info(video_id, use_invidious, playlist_id=None, index=None): # check for 403. Unnecessary for tor video routing b/c ip address is same info['invidious_used'] = False info['invidious_reload_button'] = False + info['tor_bypass_used'] = False if (settings.route_tor == 1 and info['formats'] and info['formats'][0]['url']): try: @@ -452,6 +499,7 @@ def extract_info(video_id, use_invidious, playlist_id=None, index=None): if response.status == 403: print('Access denied (403) for video urls.') print('Routing video through Tor') + info['tor_bypass_used'] = True for fmt in info['formats']: fmt['url'] += '&use_tor=1' elif 300 <= response.status < 400: @@ -682,20 +730,20 @@ def get_watch_page(video_id=None): 'codecs': codecs_string, }) - target_resolution = settings.default_resolution + if (settings.route_tor == 2) or info['tor_bypass_used']: + target_resolution = 240 + else: + target_resolution = settings.default_resolution + source_info = get_video_sources(info, target_resolution) uni_sources = source_info['uni_sources'] pair_sources = source_info['pair_sources'] uni_idx, pair_idx = source_info['uni_idx'], source_info['pair_idx'] - video_height = yt_data_extract.deep_get(source_info, 'uni_sources', - uni_idx, 'height', - default=360) - video_width = yt_data_extract.deep_get(source_info, 'uni_sources', - uni_idx, 'width', - default=640) + pair_quality = yt_data_extract.deep_get(pair_sources, pair_idx, 'quality') uni_quality = yt_data_extract.deep_get(uni_sources, uni_idx, 'quality') + pair_error = abs((pair_quality or 360) - target_resolution) uni_error = abs((uni_quality or 360) - target_resolution) if uni_error == pair_error: @@ -705,6 +753,7 @@ def get_watch_page(video_id=None): closer_to_target = 'uni' else: closer_to_target = 'pair' + using_pair_sources = ( bool(pair_sources) and (not uni_sources or closer_to_target == 'pair') ) @@ -719,6 +768,8 @@ def get_watch_page(video_id=None): uni_sources, uni_idx, 'width', default=640 ) + + # 1 second per pixel, or the actual video width theater_video_target_width = max(640, info['duration'] or 0, video_width) @@ -751,14 +802,13 @@ def get_watch_page(video_id=None): template_name = 'embed.html' else: template_name = 'watch.html' - return flask.render_template( - template_name, - header_playlist_names = local_playlist.get_playlist_names(), - uploader_channel_url = ('/' + info['author_url']) if info['author_url'] else '', - time_published = info['time_published'], - time_published_utc=time_utc_isoformat(info['time_published']), + return flask.render_template(template_name, + header_playlist_names = local_playlist.get_playlist_names(), + uploader_channel_url = ('/' + info['author_url']) if info['author_url'] else '', + time_published = info['time_published'], view_count = (lambda x: '{:,}'.format(x) if x is not None else "")(info.get("view_count", None)), like_count = (lambda x: '{:,}'.format(x) if x is not None else "")(info.get("like_count", None)), + dislike_count = (lambda x: '{:,}'.format(x) if x is not None else "")(info.get("dislike_count", None)), download_formats = download_formats, other_downloads = other_downloads, video_info = json.dumps(video_info), @@ -807,7 +857,7 @@ def get_watch_page(video_id=None): 'related': info['related_videos'], 'playability_error': info['playability_error'], }, - font_family=youtube.font_choices[settings.font], + font_family=youtube.font_choices[settings.font], # for embed page **source_info, using_pair_sources = using_pair_sources, ) diff --git a/youtube/yt_data_extract/__init__.py b/youtube/yt_data_extract/__init__.py index 9016810..de1812d 100644 --- a/youtube/yt_data_extract/__init__.py +++ b/youtube/yt_data_extract/__init__.py @@ -7,7 +7,7 @@ from .everything_else import (extract_channel_info, extract_search_info, extract_playlist_metadata, extract_playlist_info, extract_comments_info) from .watch_extraction import (extract_watch_info, get_caption_url, - update_with_age_restricted_info, requires_decryption, + update_with_new_urls, requires_decryption, extract_decryption_function, decrypt_signatures, _formats, update_format_with_type_info, extract_hls_formats, extract_watch_info_from_html, captions_available) diff --git a/youtube/yt_data_extract/watch_extraction.py b/youtube/yt_data_extract/watch_extraction.py index 31127e3..4f9ec30 100644 --- a/youtube/yt_data_extract/watch_extraction.py +++ b/youtube/yt_data_extract/watch_extraction.py @@ -791,7 +791,7 @@ def get_caption_url(info, language, format, automatic=False, translation_languag url += '&tlang=' + translation_language return url -def update_with_age_restricted_info(info, player_response): +def update_with_new_urls(info, player_response): '''Inserts urls from player_response json''' ERROR_PREFIX = 'Error getting missing player or bypassing age-restriction: ' |