diff options
Diffstat (limited to 'youtube/watch.py')
-rw-r--r-- | youtube/watch.py | 481 |
1 files changed, 379 insertions, 102 deletions
diff --git a/youtube/watch.py b/youtube/watch.py index c3c90bc..0274cd0 100644 --- a/youtube/watch.py +++ b/youtube/watch.py @@ -8,7 +8,6 @@ from flask import request import flask import json -import html import gevent import os import math @@ -16,6 +15,10 @@ import traceback import urllib import re import urllib3.exceptions +from urllib.parse import parse_qs, urlencode +from types import SimpleNamespace +from math import ceil + try: with open(os.path.join(settings.data_dir, 'decrypt_function_cache.json'), 'r') as f: @@ -24,29 +27,148 @@ except FileNotFoundError: decrypt_cache = {} -def get_video_sources(info, tor_bypass=False): - video_sources = [] - if (not settings.theater_mode) or (settings.route_tor == 2) or tor_bypass: - max_resolution = 360 +def codec_name(vcodec): + if vcodec.startswith('avc'): + return 'h264' + elif vcodec.startswith('av01'): + return 'av1' + elif vcodec.startswith('vp'): + return 'vp' else: - max_resolution = settings.default_resolution + return 'unknown' + + +def get_video_sources(info, target_resolution): + '''return dict with organized sources: { + 'uni_sources': [{}, ...], # video and audio in one file + 'uni_idx': int, # default unified source index + 'pair_sources': [{video: {}, audio: {}, quality: ..., ...}, ...], + 'pair_idx': int, # default pair source index + } + ''' + audio_sources = [] + video_only_sources = {} + uni_sources = [] + pair_sources = [] + + for fmt in info['formats']: - if not all(fmt[attr] for attr in ('quality', 'width', 'ext', 'url')): + if not all(fmt[attr] for attr in ('ext', 'url', 'itag')): continue - if fmt['acodec'] and fmt['vcodec'] and fmt['height'] <= max_resolution: - video_sources.append({ - 'src': fmt['url'], + + # unified source + if fmt['acodec'] and fmt['vcodec']: + source = { 'type': 'video/' + fmt['ext'], - 'quality': fmt['quality'], - 'height': fmt['height'], - 'width': fmt['width'], - }) + 'quality_string': short_video_quality_string(fmt), + } + source['quality_string'] += ' (integrated)' + source.update(fmt) + uni_sources.append(source) + continue - #### order the videos sources so the preferred resolution is first ### + if not (fmt['init_range'] and fmt['index_range']): + continue - video_sources.sort(key=lambda source: source['quality'], reverse=True) + # audio source + if fmt['acodec'] and not fmt['vcodec'] and ( + fmt['audio_bitrate'] or fmt['bitrate']): + if fmt['bitrate']: # prefer this one, more accurate right now + fmt['audio_bitrate'] = int(fmt['bitrate']/1000) + source = { + 'type': 'audio/' + fmt['ext'], + 'quality_string': audio_quality_string(fmt), + } + source.update(fmt) + source['mime_codec'] = (source['type'] + '; codecs="' + + source['acodec'] + '"') + audio_sources.append(source) + # video-only source + elif all(fmt[attr] for attr in ('vcodec', 'quality', 'width', 'fps', + 'file_size')): + if codec_name(fmt['vcodec']) == 'unknown': + continue + source = { + 'type': 'video/' + fmt['ext'], + 'quality_string': short_video_quality_string(fmt), + } + source.update(fmt) + source['mime_codec'] = (source['type'] + '; codecs="' + + source['vcodec'] + '"') + quality = str(fmt['quality']) + 'p' + str(fmt['fps']) + if quality in video_only_sources: + video_only_sources[quality].append(source) + else: + video_only_sources[quality] = [source] + + audio_sources.sort(key=lambda source: source['audio_bitrate']) + uni_sources.sort(key=lambda src: src['quality']) + + webm_audios = [a for a in audio_sources if a['ext'] == 'webm'] + mp4_audios = [a for a in audio_sources if a['ext'] == 'mp4'] + + for quality_string, sources in video_only_sources.items(): + # choose an audio source to go with it + # 0.5 is semiarbitrary empirical constant to spread audio sources + # between 144p and 1080p. Use something better eventually. + quality, fps = map(int, quality_string.split('p')) + target_audio_bitrate = quality*fps/30*0.5 + pair_info = { + 'quality_string': quality_string, + 'quality': quality, + 'height': sources[0]['height'], + 'width': sources[0]['width'], + 'fps': fps, + 'videos': sources, + 'audios': [], + } + for audio_choices in (webm_audios, mp4_audios): + if not audio_choices: + continue + closest_audio_source = audio_choices[0] + best_err = target_audio_bitrate - audio_choices[0]['audio_bitrate'] + best_err = abs(best_err) + for audio_source in audio_choices[1:]: + err = abs(audio_source['audio_bitrate'] - target_audio_bitrate) + # once err gets worse we have passed the closest one + if err > best_err: + break + best_err = err + closest_audio_source = audio_source + pair_info['audios'].append(closest_audio_source) + + if not pair_info['audios']: + continue - return video_sources + def video_rank(src): + ''' Sort by settings preference. Use file size as tiebreaker ''' + setting_name = 'codec_rank_' + codec_name(src['vcodec']) + return (settings.current_settings_dict[setting_name], + src['file_size']) + pair_info['videos'].sort(key=video_rank) + + pair_sources.append(pair_info) + + pair_sources.sort(key=lambda src: src['quality']) + + uni_idx = 0 if uni_sources else None + for i, source in enumerate(uni_sources): + if source['quality'] > target_resolution: + break + uni_idx = i + + pair_idx = 0 if pair_sources else None + for i, pair_info in enumerate(pair_sources): + if pair_info['quality'] > target_resolution: + break + pair_idx = i + + return { + 'uni_sources': uni_sources, + 'uni_idx': uni_idx, + 'pair_sources': pair_sources, + 'pair_idx': pair_idx, + } def make_caption_src(info, lang, auto=False, trans_lang=None): @@ -56,7 +178,7 @@ def make_caption_src(info, lang, auto=False, trans_lang=None): if trans_lang: label += ' -> ' + trans_lang return { - 'url': '/' + yt_data_extract.get_caption_url(info, lang, 'vtt', auto, trans_lang), + 'url': util.prefix_url(yt_data_extract.get_caption_url(info, lang, 'vtt', auto, trans_lang)), 'label': label, 'srclang': trans_lang[0:2] if trans_lang else lang[0:2], 'on': False, @@ -82,7 +204,7 @@ def lang_eq(lang1, lang2): def equiv_lang_in(lang, sequence): '''Extracts a language in sequence which is equivalent to lang. e.g. if lang is en, extracts en-GB from sequence. - Necessary because if only a specific variant like en-GB is available, can't ask Youtube for simply en. Need to get the available variant.''' + Necessary because if only a specific variant like en-GB is available, can't ask YouTube for simply en. Need to get the available variant.''' lang = lang[0:2] for l in sequence: if l[0:2] == lang: @@ -100,6 +222,8 @@ def get_subtitle_sources(info): pref_lang (Automatic) pref_lang (Manual)''' sources = [] + if not yt_data_extract.captions_available(info): + return [] pref_lang = settings.subtitles_language native_video_lang = None if info['automatic_caption_languages']: @@ -186,14 +310,6 @@ def save_decrypt_cache(): f.close() -watch_headers = ( - ('Accept', '*/*'), - ('Accept-Language', 'en-US,en;q=0.5'), - ('X-YouTube-Client-Name', '2'), - ('X-YouTube-Client-Version', '2.20180830'), -) + util.mobile_ua - - def decrypt_signatures(info, video_id): '''return error string, or False if no errors''' if not yt_data_extract.requires_decryption(info): @@ -217,7 +333,20 @@ def decrypt_signatures(info, video_id): return err -def extract_info(video_id, use_invidious, playlist_id=None, index=None): +def _add_to_error(info, key, additional_message): + if key in info and info[key]: + info[key] += additional_message + else: + info[key] = additional_message + + +def fetch_player_response(client, video_id): + return util.call_youtube_api(client, 'player', { + 'videoId': video_id, + }) + + +def fetch_watch_page_info(video_id, playlist_id, index): # bpctr=9999999999 will bypass are-you-sure dialogs for controversial # videos url = 'https://m.youtube.com/embed/' + video_id + '?bpctr=9999999999' @@ -225,33 +354,55 @@ def extract_info(video_id, use_invidious, playlist_id=None, index=None): url += '&list=' + playlist_id if index: url += '&index=' + index - watch_page = util.fetch_url(url, headers=watch_headers, + + headers = ( + ('Accept', '*/*'), + ('Accept-Language', 'en-US,en;q=0.5'), + ('X-YouTube-Client-Name', '2'), + ('X-YouTube-Client-Version', '2.20180830'), + ) + util.mobile_ua + + watch_page = util.fetch_url(url, headers=headers, debug_name='watch') watch_page = watch_page.decode('utf-8') - info = yt_data_extract.extract_watch_info_from_html(watch_page) + return yt_data_extract.extract_watch_info_from_html(watch_page) - # request player urls if it's missing - # see https://github.com/user234683/youtube-local/issues/22#issuecomment-706395160 - if info['age_restricted'] or info['player_urls_missing']: - if info['age_restricted']: - print('Age restricted video. Fetching get_video_info page') - else: - print('Missing player. Fetching get_video_info page') - data = { - 'video_id': video_id, - 'eurl': 'https://youtube.googleapis.com/v/' + video_id, - } - url = 'https://www.youtube.com/get_video_info?' + urllib.parse.urlencode(data) - video_info_page = util.fetch_url( - url, headers=watch_headers, debug_name='get_video_info', - report_text='Fetched get_video_info page').decode('utf-8') - yt_data_extract.update_with_age_restricted_info(info, video_info_page) + +def extract_info(video_id, use_invidious, playlist_id=None, index=None): + primary_client = 'android_vr' + fallback_client = 'ios' + last_resort_client = 'tv_embedded' + + tasks = ( + # Get video metadata from here + gevent.spawn(fetch_watch_page_info, video_id, playlist_id, index), + gevent.spawn(fetch_player_response, primary_client, video_id) + ) + gevent.joinall(tasks) + util.check_gevent_exceptions(*tasks) + + info = tasks[0].value or {} + player_response = tasks[1].value or {} + + yt_data_extract.update_with_new_urls(info, player_response) + + # Fallback to 'ios' if no valid URLs are found + if not info.get('formats') or info.get('player_urls_missing'): + print(f"No URLs found in '{primary_client}', attempting with '{fallback_client}'.") + player_response = fetch_player_response(fallback_client, video_id) or {} + yt_data_extract.update_with_new_urls(info, player_response) + + # Final attempt with 'tv_embedded' if there are still no URLs + if not info.get('formats') or info.get('player_urls_missing'): + print(f"No URLs found in '{fallback_client}', attempting with '{last_resort_client}'") + player_response = fetch_player_response(last_resort_client, video_id) or {} + yt_data_extract.update_with_new_urls(info, player_response) # signature decryption - decryption_error = decrypt_signatures(info, video_id) - if decryption_error: - decryption_error = 'Error decrypting url signatures: ' + decryption_error - info['playability_error'] = decryption_error + if info.get('formats'): + decryption_error = decrypt_signatures(info, video_id) + if decryption_error: + info['playability_error'] = 'Error decrypting url signatures: ' + decryption_error # check if urls ready (non-live format) in former livestream # urls not ready if all of them have no filesize @@ -265,22 +416,21 @@ def extract_info(video_id, use_invidious, playlist_id=None, index=None): # livestream urls # sometimes only the livestream urls work soon after the livestream is over - if (info['hls_manifest_url'] - and (info['live'] or not info['formats'] or not info['urls_ready']) - ): - manifest = util.fetch_url( - info['hls_manifest_url'], - debug_name='hls_manifest.m3u8', - report_text='Fetched hls manifest' - ).decode('utf-8') - - info['hls_formats'], err = yt_data_extract.extract_hls_formats(manifest) - if not err: - info['playability_error'] = None - for fmt in info['hls_formats']: - fmt['video_quality'] = video_quality_string(fmt) - else: - info['hls_formats'] = [] + info['hls_formats'] = [] + if info.get('hls_manifest_url') and (info.get('live') or not info.get('formats') or not info['urls_ready']): + try: + manifest = util.fetch_url(info['hls_manifest_url'], + debug_name='hls_manifest.m3u8', + report_text='Fetched hls manifest' + ).decode('utf-8') + info['hls_formats'], err = yt_data_extract.extract_hls_formats(manifest) + if not err: + info['playability_error'] = None + for fmt in info['hls_formats']: + fmt['video_quality'] = video_quality_string(fmt) + except Exception as e: + print(f"Error obteniendo HLS manifest: {e}") + info['hls_formats'] = [] # check for 403. Unnecessary for tor video routing b/c ip address is same info['invidious_used'] = False @@ -319,15 +469,30 @@ def video_quality_string(format): return '?' -def audio_quality_string(format): - if format['acodec']: - result = str(format['audio_bitrate'] or '?') + 'k' - if format['audio_sample_rate']: - result += ' ' + str(format['audio_sample_rate']) + ' Hz' +def short_video_quality_string(fmt): + result = str(fmt['quality'] or '?') + 'p' + if fmt['fps']: + result += str(fmt['fps']) + if fmt['vcodec'].startswith('av01'): + result += ' AV1' + elif fmt['vcodec'].startswith('avc'): + result += ' h264' + else: + result += ' ' + fmt['vcodec'] + return result + + +def audio_quality_string(fmt): + if fmt['acodec']: + if fmt['audio_bitrate']: + result = '%d' % fmt['audio_bitrate'] + 'k' + else: + result = '?k' + if fmt['audio_sample_rate']: + result += ' ' + '%.3G' % (fmt['audio_sample_rate']/1000) + 'kHz' return result - elif format['vcodec']: + elif fmt['vcodec']: return 'video only' - return '?' @@ -346,12 +511,71 @@ def format_bytes(bytes): return '%.2f%s' % (converted, suffix) -time_table = {'h': 3600, 'm': 60, 's': 1} +@yt_app.route('/ytl-api/storyboard.vtt') +def get_storyboard_vtt(): + """ + See: + https://github.com/iv-org/invidious/blob/9a8b81fcbe49ff8d88f197b7f731d6bf79fc8087/src/invidious.cr#L3603 + https://github.com/iv-org/invidious/blob/3bb7fbb2f119790ee6675076b31cd990f75f64bb/src/invidious/videos.cr#L623 + """ + + spec_url = request.args.get('spec_url') + url, *boards = spec_url.split('|') + base_url, q = url.split('?') + q = parse_qs(q) # for url query + + storyboard = None + wanted_height = 90 + + for i, board in enumerate(boards): + *t, _, sigh = board.split("#") + width, height, count, width_cnt, height_cnt, interval = map(int, t) + if height != wanted_height: continue + q['sigh'] = [sigh] + url = f"{base_url}?{urlencode(q, doseq=True)}" + storyboard = SimpleNamespace( + url = url.replace("$L", str(i)).replace("$N", "M$M"), + width = width, + height = height, + interval = interval, + width_cnt = width_cnt, + height_cnt = height_cnt, + storyboard_count = ceil(count / (width_cnt * height_cnt)) + ) + + if not storyboard: + flask.abort(404) + + def to_ts(ms): + s, ms = divmod(ms, 1000) + h, s = divmod(s, 3600) + m, s = divmod(s, 60) + return f"{h:02}:{m:02}:{s:02}.{ms:03}" + + r = "WEBVTT" # result + ts = 0 # current timestamp + + for i in range(storyboard.storyboard_count): + url = '/' + storyboard.url.replace("$M", str(i)) + interval = storyboard.interval + w, h = storyboard.width, storyboard.height + w_cnt, h_cnt = storyboard.width_cnt, storyboard.height_cnt + for j in range(h_cnt): + for k in range(w_cnt): + r += f"{to_ts(ts)} --> {to_ts(ts+interval)}\n" + r += f"{url}#xywh={w * k},{h * j},{w},{h}\n\n" + ts += interval + return flask.Response(r, mimetype='text/vtt') + + +time_table = {'h': 3600, 'm': 60, 's': 1} @yt_app.route('/watch') @yt_app.route('/embed') @yt_app.route('/embed/<video_id>') +@yt_app.route('/shorts') +@yt_app.route('/shorts/<video_id>') def get_watch_page(video_id=None): video_id = request.args.get('v') or video_id if not video_id: @@ -392,16 +616,20 @@ def get_watch_page(video_id=None): return flask.render_template('error.html', error_message=info['error']) video_info = { - "duration": util.seconds_to_timestamp(info["duration"] or 0), - "id": info['id'], - "title": info['title'], - "author": info['author'], + 'duration': util.seconds_to_timestamp(info['duration'] or 0), + 'id': info['id'], + 'title': info['title'], + 'author': info['author'], + 'author_id': info['author_id'], } # prefix urls, and other post-processing not handled by yt_data_extract for item in info['related_videos']: + item['thumbnail'] = "https://i.ytimg.com/vi/{}/hqdefault.jpg".format(item['id']) # set HQ relateds thumbnail videos util.prefix_urls(item) util.add_extra_html_info(item) + for song in info['music_list']: + song['url'] = util.prefix_url(song['url']) if info['playlist']: playlist_id = info['playlist']['id'] for item in info['playlist']['items']: @@ -413,10 +641,11 @@ def get_watch_page(video_id=None): item['url'] += '&index=' + str(item['index']) info['playlist']['author_url'] = util.prefix_url( info['playlist']['author_url']) - # Don't prefix hls_formats for now because the urls inside the manifest - # would need to be prefixed as well. - for fmt in info['formats']: - fmt['url'] = util.prefix_url(fmt['url']) + if settings.img_prefix: + # Don't prefix hls_formats for now because the urls inside the manifest + # would need to be prefixed as well. + for fmt in info['formats']: + fmt['url'] = util.prefix_url(fmt['url']) # Add video title to end of url path so it has a filename other than just # "videoplayback" when downloaded @@ -430,12 +659,6 @@ def get_watch_page(video_id=None): '/videoplayback', '/videoplayback/name/' + filename) - if settings.gather_googlevideo_domains: - with open(os.path.join(settings.data_dir, 'googlevideo-domains.txt'), 'a+', encoding='utf-8') as f: - url = info['formats'][0]['url'] - subdomain = url[0:url.find(".googlevideo.com")] - f.write(subdomain + "\n") - download_formats = [] for format in (info['formats'] + info['hls_formats']): @@ -452,9 +675,53 @@ def get_watch_page(video_id=None): 'codecs': codecs_string, }) - video_sources = get_video_sources(info, tor_bypass=info['tor_bypass_used']) - video_height = yt_data_extract.deep_get(video_sources, 0, 'height', default=360) - video_width = yt_data_extract.deep_get(video_sources, 0, 'width', default=640) + if (settings.route_tor == 2) or info['tor_bypass_used']: + target_resolution = 240 + else: + target_resolution = settings.default_resolution + + source_info = get_video_sources(info, target_resolution) + uni_sources = source_info['uni_sources'] + pair_sources = source_info['pair_sources'] + uni_idx, pair_idx = source_info['uni_idx'], source_info['pair_idx'] + + pair_quality = yt_data_extract.deep_get(pair_sources, pair_idx, 'quality') + uni_quality = yt_data_extract.deep_get(uni_sources, uni_idx, 'quality') + + pair_error = abs((pair_quality or 360) - target_resolution) + uni_error = abs((uni_quality or 360) - target_resolution) + if uni_error == pair_error: + # use settings.prefer_uni_sources as a tiebreaker + closer_to_target = 'uni' if settings.prefer_uni_sources else 'pair' + elif uni_error < pair_error: + closer_to_target = 'uni' + else: + closer_to_target = 'pair' + + if settings.prefer_uni_sources == 2: + # Use uni sources unless there's no choice. + using_pair_sources = ( + bool(pair_sources) and (not uni_sources) + ) + else: + # Use the pair sources if they're closer to the desired resolution + using_pair_sources = ( + bool(pair_sources) + and (not uni_sources or closer_to_target == 'pair') + ) + if using_pair_sources: + video_height = pair_sources[pair_idx]['height'] + video_width = pair_sources[pair_idx]['width'] + else: + video_height = yt_data_extract.deep_get( + uni_sources, uni_idx, 'height', default=360 + ) + video_width = yt_data_extract.deep_get( + uni_sources, uni_idx, 'width', default=640 + ) + + + # 1 second per pixel, or the actual video width theater_video_target_width = max(640, info['duration'] or 0, video_width) @@ -487,19 +754,16 @@ def get_watch_page(video_id=None): template_name = 'embed.html' else: template_name = 'watch.html' - return flask.render_template( - template_name, - header_playlist_names = local_playlist.get_playlist_names(), - uploader_channel_url = ('/' + info['author_url']) if info['author_url'] else '', - time_published = info['time_published'], - time_published_utc=time_utc_isoformat(info['time_published']), + return flask.render_template(template_name, + header_playlist_names = local_playlist.get_playlist_names(), + uploader_channel_url = ('/' + info['author_url']) if info['author_url'] else '', + time_published = info['time_published'], view_count = (lambda x: '{:,}'.format(x) if x is not None else "")(info.get("view_count", None)), like_count = (lambda x: '{:,}'.format(x) if x is not None else "")(info.get("like_count", None)), dislike_count = (lambda x: '{:,}'.format(x) if x is not None else "")(info.get("dislike_count", None)), download_formats = download_formats, other_downloads = other_downloads, video_info = json.dumps(video_info), - video_sources = video_sources, hls_formats = info['hls_formats'], subtitle_sources = subtitle_sources, related = info['related_videos'], @@ -528,13 +792,26 @@ def get_watch_page(video_id=None): invidious_used = info['invidious_used'], invidious_reload_button = info['invidious_reload_button'], video_url = util.URL_ORIGIN + '/watch?v=' + video_id, - time_start = time_start, + video_id = video_id, + storyboard_url = (util.URL_ORIGIN + '/ytl-api/storyboard.vtt?' + + urlencode([('spec_url', info['storyboard_spec_url'])]) + if info['storyboard_spec_url'] else None), js_data = { - 'video_id': video_info['id'], + 'video_id': info['id'], + 'video_duration': info['duration'], + 'settings': settings.current_settings_dict, + 'has_manual_captions': any(s.get('on') for s in subtitle_sources), + **source_info, + 'using_pair_sources': using_pair_sources, + 'time_start': time_start, + 'playlist': info['playlist'], + 'related': info['related_videos'], + 'playability_error': info['playability_error'], }, - # for embed page - font_family=youtube.font_choices[settings.font], + font_family = youtube.font_choices[settings.font], # for embed page + **source_info, + using_pair_sources = using_pair_sources, ) |