aboutsummaryrefslogtreecommitdiffstats
path: root/youtube
diff options
context:
space:
mode:
authorJesus E <heckyel@riseup.net>2023-05-28 21:30:51 -0400
committerJesus E <heckyel@riseup.net>2023-05-28 21:30:51 -0400
commite54596f3e9bac1b5ef328b25e44604aa11479f83 (patch)
tree5af65503f7b47037a0c5fbf7d70ffadd6c9c04bb /youtube
parentc6e1b366b5cf4e1d7e52631e4dedc935ea021154 (diff)
downloadyt-local-e54596f3e9bac1b5ef328b25e44604aa11479f83.tar.lz
yt-local-e54596f3e9bac1b5ef328b25e44604aa11479f83.tar.xz
yt-local-e54596f3e9bac1b5ef328b25e44604aa11479f83.zip
Partially fix age restricted videos
Does not work for videos that require decryption because decryption is not working (giving 403) for some reason. Related invidious issue for decryption not working: https://github.com/iv-org/invidious/issues/3245 Partial fix for #146
Diffstat (limited to 'youtube')
-rw-r--r--youtube/watch.py184
-rw-r--r--youtube/yt_data_extract/__init__.py2
-rw-r--r--youtube/yt_data_extract/watch_extraction.py2
3 files changed, 119 insertions, 69 deletions
diff --git a/youtube/watch.py b/youtube/watch.py
index c18347e..c38a3f5 100644
--- a/youtube/watch.py
+++ b/youtube/watch.py
@@ -19,6 +19,46 @@ from urllib.parse import parse_qs, urlencode
from types import SimpleNamespace
from math import ceil
+# https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/extractor/youtube.py#L72
+INNERTUBE_CLIENTS = {
+ 'android': {
+ 'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'ANDROID',
+ 'clientVersion': '17.31.35',
+ 'androidSdkVersion': 31,
+ 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
+ },
+ # https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-887739287
+ 'thirdParty': {
+ 'embedUrl': 'https://google.com', # Can be any valid URL
+ }
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
+ 'REQUIRE_JS_PLAYER': False,
+ },
+
+ # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
+ # See: https://github.com/zerodytrash/YouTube-Internal-Clients
+ 'tv_embedded': {
+ 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8',
+ 'INNERTUBE_CONTEXT': {
+ 'client': {
+ 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER',
+ 'clientVersion': '2.0',
+ },
+ # https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-887739287
+ 'thirdParty': {
+ 'embedUrl': 'https://google.com', # Can be any valid URL
+ }
+
+ },
+ 'INNERTUBE_CONTEXT_CLIENT_NAME': 85,
+ 'REQUIRE_JS_PLAYER': True,
+ },
+}
+
try:
with open(os.path.join(settings.data_dir, 'decrypt_function_cache.json'), 'r') as f:
decrypt_cache = json.loads(f.read())['decrypt_cache']
@@ -49,6 +89,8 @@ def get_video_sources(info, target_resolution):
video_only_sources = {}
uni_sources = []
pair_sources = []
+
+
for fmt in info['formats']:
if not all(fmt[attr] for attr in ('ext', 'url', 'itag')):
continue
@@ -74,7 +116,6 @@ def get_video_sources(info, target_resolution):
fmt['audio_bitrate'] = int(fmt['bitrate']/1000)
source = {
'type': 'audio/' + fmt['ext'],
- 'bitrate': fmt['audio_bitrate'],
'quality_string': audio_quality_string(fmt),
}
source.update(fmt)
@@ -308,14 +349,6 @@ def save_decrypt_cache():
f.close()
-watch_headers = (
- ('Accept', '*/*'),
- ('Accept-Language', 'en-US,en;q=0.5'),
- ('X-YouTube-Client-Name', '2'),
- ('X-YouTube-Client-Version', '2.20180830'),
-) + util.mobile_ua
-
-
def decrypt_signatures(info, video_id):
'''return error string, or False if no errors'''
if not yt_data_extract.requires_decryption(info):
@@ -345,8 +378,28 @@ def _add_to_error(info, key, additional_message):
else:
info[key] = additional_message
+def fetch_player_response(client, video_id):
+ client_params = INNERTUBE_CLIENTS[client]
+ context = client_params['INNERTUBE_CONTEXT']
+ key = client_params['INNERTUBE_API_KEY']
+ host = client_params.get('INNERTUBE_HOST') or 'youtubei.googleapis.com'
+ user_agent = context['client'].get('userAgent') or util.mobile_user_agent
-def extract_info(video_id, use_invidious, playlist_id=None, index=None):
+ url = 'https://' + host + '/youtubei/v1/player?key=' + key
+ data = {
+ 'videoId': video_id,
+ 'context': context,
+ }
+ data = json.dumps(data)
+ headers = (('Content-Type', 'application/json'),('User-Agent', user_agent))
+ player_response = util.fetch_url(
+ url, data=data, headers=headers,
+ debug_name='youtubei_player_' + client,
+ report_text='Fetched ' + client + ' youtubei player'
+ ).decode('utf-8')
+ return player_response
+
+def fetch_watch_page_info(video_id, playlist_id, index):
# bpctr=9999999999 will bypass are-you-sure dialogs for controversial
# videos
url = 'https://m.youtube.com/embed/' + video_id + '?bpctr=9999999999'
@@ -354,52 +407,46 @@ def extract_info(video_id, use_invidious, playlist_id=None, index=None):
url += '&list=' + playlist_id
if index:
url += '&index=' + index
- watch_page = util.fetch_url(url, headers=watch_headers,
+
+ headers = (
+ ('Accept', '*/*'),
+ ('Accept-Language', 'en-US,en;q=0.5'),
+ ('X-YouTube-Client-Name', '2'),
+ ('X-YouTube-Client-Version', '2.20180830'),
+ ) + util.mobile_ua
+
+ watch_page = util.fetch_url(url, headers=headers,
debug_name='watch')
watch_page = watch_page.decode('utf-8')
- info = yt_data_extract.extract_watch_info_from_html(watch_page)
-
- context = {
- 'client': {
- 'clientName': 'ANDROID',
- 'clientVersion': '17.29.35',
- 'androidSdkVersion': '31',
- 'gl': 'US',
- 'hl': 'en',
- },
- # https://github.com/yt-dlp/yt-dlp/pull/575#issuecomment-887739287
- 'thirdParty': {
- 'embedUrl': 'https://google.com', # Can be any valid URL
- }
- }
+ return yt_data_extract.extract_watch_info_from_html(watch_page)
+
+def extract_info(video_id, use_invidious, playlist_id=None, index=None):
+ tasks = (
+ # Get video metadata from here
+ gevent.spawn(fetch_watch_page_info, video_id, playlist_id, index),
+
+ # Get video URLs by spoofing as android client because its urls don't
+ # require decryption
+ # The URLs returned with WEB for videos requiring decryption
+ # couldn't be decrypted with the base.js from the web page for some
+ # reason
+ # https://github.com/yt-dlp/yt-dlp/issues/574#issuecomment-887171136
+ gevent.spawn(fetch_player_response, 'android', video_id)
+ )
+ gevent.joinall(tasks)
+ util.check_gevent_exceptions(*tasks)
+ info, player_response = tasks[0].value, tasks[1].value
+
+ yt_data_extract.update_with_new_urls(info, player_response)
+
+ # Age restricted video, retry
if info['age_restricted'] or info['player_urls_missing']:
if info['age_restricted']:
- print('Age restricted video. Fetching /youtubei/v1/player page')
+ print('Age restricted video, retrying')
else:
- print('Missing player. Fetching /youtubei/v1/player page')
- context['client']['clientScreen'] = 'EMBED'
- else:
- print('Fetching /youtubei/v1/player page')
-
- # https://github.com/yt-dlp/yt-dlp/issues/574#issuecomment-887171136
- # ANDROID is used instead because its urls don't require decryption
- # The URLs returned with WEB for videos requiring decryption
- # couldn't be decrypted with the base.js from the web page for some
- # reason
- url ='https://youtubei.googleapis.com/youtubei/v1/player'
- url += '?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
- data = {
- 'videoId': video_id,
- 'context': context,
- }
- data = json.dumps(data)
- content_header = (('Content-Type', 'application/json'),)
- player_response = util.fetch_url(
- url, data=data, headers=util.mobile_ua + content_header,
- debug_name='youtubei_player',
- report_text='Fetched youtubei player page').decode('utf-8')
-
- yt_data_extract.update_with_age_restricted_info(info, player_response)
+ print('Player urls missing, retrying')
+ player_response = fetch_player_response('tv_embedded', video_id)
+ yt_data_extract.update_with_new_urls(info, player_response)
# signature decryption
decryption_error = decrypt_signatures(info, video_id)
@@ -422,8 +469,7 @@ def extract_info(video_id, use_invidious, playlist_id=None, index=None):
if (info['hls_manifest_url']
and (info['live'] or not info['formats'] or not info['urls_ready'])
):
- manifest = util.fetch_url(
- info['hls_manifest_url'],
+ manifest = util.fetch_url(info['hls_manifest_url'],
debug_name='hls_manifest.m3u8',
report_text='Fetched hls manifest'
).decode('utf-8')
@@ -439,6 +485,7 @@ def extract_info(video_id, use_invidious, playlist_id=None, index=None):
# check for 403. Unnecessary for tor video routing b/c ip address is same
info['invidious_used'] = False
info['invidious_reload_button'] = False
+ info['tor_bypass_used'] = False
if (settings.route_tor == 1
and info['formats'] and info['formats'][0]['url']):
try:
@@ -452,6 +499,7 @@ def extract_info(video_id, use_invidious, playlist_id=None, index=None):
if response.status == 403:
print('Access denied (403) for video urls.')
print('Routing video through Tor')
+ info['tor_bypass_used'] = True
for fmt in info['formats']:
fmt['url'] += '&use_tor=1'
elif 300 <= response.status < 400:
@@ -682,20 +730,20 @@ def get_watch_page(video_id=None):
'codecs': codecs_string,
})
- target_resolution = settings.default_resolution
+ if (settings.route_tor == 2) or info['tor_bypass_used']:
+ target_resolution = 240
+ else:
+ target_resolution = settings.default_resolution
+
source_info = get_video_sources(info, target_resolution)
uni_sources = source_info['uni_sources']
pair_sources = source_info['pair_sources']
uni_idx, pair_idx = source_info['uni_idx'], source_info['pair_idx']
- video_height = yt_data_extract.deep_get(source_info, 'uni_sources',
- uni_idx, 'height',
- default=360)
- video_width = yt_data_extract.deep_get(source_info, 'uni_sources',
- uni_idx, 'width',
- default=640)
+
pair_quality = yt_data_extract.deep_get(pair_sources, pair_idx, 'quality')
uni_quality = yt_data_extract.deep_get(uni_sources, uni_idx, 'quality')
+
pair_error = abs((pair_quality or 360) - target_resolution)
uni_error = abs((uni_quality or 360) - target_resolution)
if uni_error == pair_error:
@@ -705,6 +753,7 @@ def get_watch_page(video_id=None):
closer_to_target = 'uni'
else:
closer_to_target = 'pair'
+
using_pair_sources = (
bool(pair_sources) and (not uni_sources or closer_to_target == 'pair')
)
@@ -719,6 +768,8 @@ def get_watch_page(video_id=None):
uni_sources, uni_idx, 'width', default=640
)
+
+
# 1 second per pixel, or the actual video width
theater_video_target_width = max(640, info['duration'] or 0, video_width)
@@ -751,14 +802,13 @@ def get_watch_page(video_id=None):
template_name = 'embed.html'
else:
template_name = 'watch.html'
- return flask.render_template(
- template_name,
- header_playlist_names = local_playlist.get_playlist_names(),
- uploader_channel_url = ('/' + info['author_url']) if info['author_url'] else '',
- time_published = info['time_published'],
- time_published_utc=time_utc_isoformat(info['time_published']),
+ return flask.render_template(template_name,
+ header_playlist_names = local_playlist.get_playlist_names(),
+ uploader_channel_url = ('/' + info['author_url']) if info['author_url'] else '',
+ time_published = info['time_published'],
view_count = (lambda x: '{:,}'.format(x) if x is not None else "")(info.get("view_count", None)),
like_count = (lambda x: '{:,}'.format(x) if x is not None else "")(info.get("like_count", None)),
+ dislike_count = (lambda x: '{:,}'.format(x) if x is not None else "")(info.get("dislike_count", None)),
download_formats = download_formats,
other_downloads = other_downloads,
video_info = json.dumps(video_info),
@@ -807,7 +857,7 @@ def get_watch_page(video_id=None):
'related': info['related_videos'],
'playability_error': info['playability_error'],
},
- font_family=youtube.font_choices[settings.font],
+ font_family=youtube.font_choices[settings.font], # for embed page
**source_info,
using_pair_sources = using_pair_sources,
)
diff --git a/youtube/yt_data_extract/__init__.py b/youtube/yt_data_extract/__init__.py
index 9016810..de1812d 100644
--- a/youtube/yt_data_extract/__init__.py
+++ b/youtube/yt_data_extract/__init__.py
@@ -7,7 +7,7 @@ from .everything_else import (extract_channel_info, extract_search_info,
extract_playlist_metadata, extract_playlist_info, extract_comments_info)
from .watch_extraction import (extract_watch_info, get_caption_url,
- update_with_age_restricted_info, requires_decryption,
+ update_with_new_urls, requires_decryption,
extract_decryption_function, decrypt_signatures, _formats,
update_format_with_type_info, extract_hls_formats,
extract_watch_info_from_html, captions_available)
diff --git a/youtube/yt_data_extract/watch_extraction.py b/youtube/yt_data_extract/watch_extraction.py
index 31127e3..4f9ec30 100644
--- a/youtube/yt_data_extract/watch_extraction.py
+++ b/youtube/yt_data_extract/watch_extraction.py
@@ -791,7 +791,7 @@ def get_caption_url(info, language, format, automatic=False, translation_languag
url += '&tlang=' + translation_language
return url
-def update_with_age_restricted_info(info, player_response):
+def update_with_new_urls(info, player_response):
'''Inserts urls from player_response json'''
ERROR_PREFIX = 'Error getting missing player or bypassing age-restriction: '